package folioxml.core;
/**
* Implements unicode-compliant line number and column counting. Since some line terminators span multiple characters, this requires state tracking.
* Lines and columns are 0-based. Used by TokenReaderBase to parse completed matches.
*
* @author nathanael
*/
public class LineColTracker {
public LineColTracker() {
}
private long _line = 0;
private long _col = 0;
private long _char = 0;
public TokenInfo getTokenInfo() {
TokenInfo t = new TokenInfo();
t.col = col();
t.line = line();
t.charIndex = chars();
return t;
}
/**
* The index of the current line. 0-based
*
* @return
*/
public long line() {
return _line;
}
public LineColTracker line(long newValue) {
_line = newValue;
return this;
}
/**
* The character index from the last line terminator. First character on a line = 0
*
* @return
*/
public long col() {
return _col;
}
public LineColTracker col(long newValue) {
_col = newValue;
return this;
}
/**
* Character index from the beginning of the file, including line terminators. 0-based
*
* @return
*/
public long chars() {
return _char;
}
public LineColTracker chars(long newValue) {
_char = newValue;
return this;
}
private boolean skipLF = false;
public LineColTracker add(CharSequence cs) {
return add(cs, 0, cs.length());
}
public LineColTracker add(CharSequence text, int startIndex, int endIndex) {
char c;
for (int i = startIndex; i < endIndex; i++) {
c = text.charAt(i);
_char++; //always increment this guy
//Skip the LF: ON CR-LF we count the CR and skip the immediately subsequent LF
if (c == '\n' && skipLF) {
skipLF = false;
continue;
}
skipLF = (c == '\r'); //If it's a CR, skip the following LF
//Check all unicode spec newline chars.
if (c == '\r' || c == '\n' || c == '\u0085' || c == '\u000c' || c == '\u2028' || c == '\u2029') {
_line++;
_col = 0;
} else {
_col++;
}
}
return this;
}
}