package folioxml.core;
import folioxml.core.TokenBase.TokenType;
import java.io.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Reads a series of FolioToken instances from the specififed Reader input stream.
* Fetches DI and FI preprocessor includes using the specified IIncludeResolutionService.
*
* @author nathanael
*/
public class TokenBaseReader<T extends TokenBase> extends folioxml.core.TokenReaderBase {
/**
* Uses the UTF-8 encoding
*/
public TokenBaseReader(File path) throws UnsupportedEncodingException, FileNotFoundException, IOException {
this(new InputStreamReader(new FileInputStream(path), "UTF-8"));
}
public TokenBaseReader(Reader reader) {
this(reader, READ_SIZE_DEFAULT);
}
public TokenBaseReader(Reader reader, int readBlockSize) {
super(reader, readBlockSize);
}
private static Pattern rComment = Pattern.compile("^" + TokenBase.RegexComment, Pattern.DOTALL);
private static Pattern rText = Pattern.compile("^" + TokenBase.RegexText);
private static Pattern rTag = Pattern.compile("^" + TokenBase.RegexTag);
private static Pattern rEntity = Pattern.compile("^" + TokenBase.RegexEntity);
/**
* An array of the patterns we look for, in the correct order.
*/
private static Pattern[] tokenPatterns = new Pattern[]{rText, rEntity, rTag, rComment};
protected Pattern[] getTokenPatterns() {
return tokenPatterns;
}
@SuppressWarnings("unchecked")
public T read(T blankToken) throws IOException, InvalidMarkupException {
//Store current position. After getNextMatch() is called, these values will be incremented to the *next* token.
TokenInfo ti = tracker.getTokenInfo();
//Or read from main stream
Matcher m = getNextMatch();
if (m == null) return null; //eof
TokenBase t = blankToken;
//Set the matched markup
t.markup = m.group();
//Set the type
if (m.pattern() == rComment) {
t.type = TokenType.Comment;
} else if (m.pattern() == rText) {
t.type = TokenType.Text;
} else if (m.pattern() == rEntity) {
t.type = TokenType.Entity;
} else if (m.pattern() == rTag) {
t.type = TokenType.Tag;
t.parseTagFromMatcher(m);
}
//Save debugging info
//t.info = ti;
//t.info.length = m.end() - m.start();
//TODO: Xml parsing needs line numbers too... We should probably add .info to TokenBase.
index = m.end();
return (T) t;
}
}