TokenBaseReader.java example

Explorer

folioxml-master
- commandline
  - src
    - folioxml
      - command
        Main.java
      - export
        ExportRunner.java
  - testsrc
    - folioxml
      - export
        TestExportRunner.java
- contrib
  - folioxml-lucene
    - src
      - folioxml
        export
        plugins
        ResolveHyperlinks.java
        lucene
        FieldCollector.java
        IndexFieldOpts.java
        IndexFieldOptsProvider.java
        InfobaseFieldOptsSet.java
        InfobaseSetIndexer.java
        analysis
        AnalyzerPicker.java
        DynamicAnalyzer.java
        ListAnalyzer.java
        ListTokenizer.java
        LowercaseKeywordAnalyzer.java
        folio
        FolioEnuAnalyzer.java
        FolioEnuPhraseAnalyzer.java
        FolioEnuTokenizer.java
        LookAroundCharTokenizer.java
        TokenCombiner.java
        folioQueryParser
        QueryParser.java
        QueryToken.java
        QueryTokenReader.java
    - testsrc
      - apache
        lucene
        CharTokenizer.java
      - folioxml
        directexport
        SimultaneousTest.java
        lucene
        analysis
        folio
        TokenCombinerTest.java
        folioQueryParser
        QueryParserTest.java
        tests
        Indexer.java
- core
  - folioxml
- diff_match_patch
  - oldtest
    - name
      - fraser
        neil
        plaintext
        diff_match_patch_test.java
  - src
    - name
      - fraser
        neil
        plaintext
        diff_match_patch.java

package folioxml.core;

import folioxml.core.TokenBase.TokenType;

import java.io.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * Reads a series of FolioToken instances from the specififed Reader input stream.
 * Fetches DI and FI preprocessor includes using the specified IIncludeResolutionService.
 *
 * @author nathanael
 */
public class TokenBaseReader<T extends TokenBase> extends folioxml.core.TokenReaderBase {

    /**
     * Uses the UTF-8 encoding
     */
    public TokenBaseReader(File path) throws UnsupportedEncodingException, FileNotFoundException, IOException {
        this(new InputStreamReader(new FileInputStream(path), "UTF-8"));

    }

    public TokenBaseReader(Reader reader) {
        this(reader, READ_SIZE_DEFAULT);
    }

    public TokenBaseReader(Reader reader, int readBlockSize) {
        super(reader, readBlockSize);

    }


    private static Pattern rComment = Pattern.compile("^" + TokenBase.RegexComment, Pattern.DOTALL);
    private static Pattern rText = Pattern.compile("^" + TokenBase.RegexText);
    private static Pattern rTag = Pattern.compile("^" + TokenBase.RegexTag);
    private static Pattern rEntity = Pattern.compile("^" + TokenBase.RegexEntity);

    /**
     * An array of the patterns we look for, in the correct order.
     */
    private static Pattern[] tokenPatterns = new Pattern[]{rText, rEntity, rTag, rComment};

    protected Pattern[] getTokenPatterns() {
        return tokenPatterns;
    }

    @SuppressWarnings("unchecked")
    public T read(T blankToken) throws IOException, InvalidMarkupException {


        //Store current position. After getNextMatch() is called, these values will be incremented to the *next* token.
        TokenInfo ti = tracker.getTokenInfo();

        //Or read from main stream
        Matcher m = getNextMatch();

        if (m == null) return null; //eof

        TokenBase t = blankToken;

        //Set the matched markup
        t.markup = m.group();

        //Set the type
        if (m.pattern() == rComment) {
            t.type = TokenType.Comment;
        } else if (m.pattern() == rText) {
            t.type = TokenType.Text;
        } else if (m.pattern() == rEntity) {
            t.type = TokenType.Entity;
        } else if (m.pattern() == rTag) {
            t.type = TokenType.Tag;
            t.parseTagFromMatcher(m);
        }

        //Save debugging info
        //t.info = ti;
        //t.info.length = m.end() - m.start();
        //TODO: Xml parsing needs line numbers too... We should probably add .info to TokenBase. 

        index = m.end();

        return (T) t;

    }

}