EDITokenizer.java example

Explorer

edireader-master
- edireader
  - src
    - main
      - java
        com
        berryworks
        edireader
        Ansi999Generator.java
        AnsiFAGenerator.java
        AnsiReader.java
        DefaultXMLTags.java
        EDIAbstractReader.java
        EDIAttributes.java
        EDIParser.java
        EDIParserFactory.java
        EDIReader.java
        EDIReaderFactory.java
        EDIReaderWrapper.java
        EDISyntaxException.java
        EdifactCONTRLGenerator.java
        EdifactReader.java
        EdifactReaderWithCONTRL.java
        ParserRegistry.java
        Plugin.java
        PluginController.java
        ReplyGenerator.java
        StandardReader.java
        SyntaxDescriptor.java
        TransactionCallback.java
        UNHReader.java
        XMLTags.java
        benchmark
        AnsiUnitOfWork.java
        Benchmark.java
        BenchmarkUnitOfWork.java
        EDITestData.java
        demo
        AnsiTransactionExtractor.java
        EDIAck.java
        EDIScanner.java
        EDISplitter.java
        EDITransform.java
        EDItoXML.java
        EDItoXML_Variations.java
        EDItoXPATH.java
        error
        EDISyntaxExceptionHandler.java
        ErrorMessages.java
        GroupControlNumberException.java
        GroupCountException.java
        InterchangeControlNumberException.java
        MissingMandatoryElementException.java
        RecoverableSyntaxException.java
        SegmentCountException.java
        TransactionControlNumberException.java
        TransactionCountException.java
        formatter
        Formatter.java
        FormatterHandler.java
        FormatterParser.java
        option
        Option.java
        Options.java
        SupportsOptions.java
        plugin
        ANSI_110.java
        ANSI_210.java
        ANSI_277.java
        ANSI_810.java
        ANSI_824.java
        ANSI_834.java
        ANSI_834_X_004010.java
        ANSI_834_X_005010.java
        ANSI_835.java
        ANSI_850.java
        ANSI_856.java
        ANSI_870.java
        ANSI_872.java
        ANSI_997.java
        ANSI_997_X_004010.java
        AbstractPluginControllerFactory.java
        CompositeAwarePlugin.java
        EANCOMSigning.java
        EDIFACT_AUTACK.java
        EDIFACT_FINCAN.java
        EDIFACT_MEDRPT.java
        EDIFACT_MSCONS.java
        EDIFACT_ORDERS.java
        EDIFACT_UTILMD.java
        EDIFACT_UTILTS.java
        LoopContext.java
        LoopDescriptor.java
        LoopStack.java
        PluginControllerFactory.java
        PluginControllerFactoryInterface.java
        PluginControllerImpl.java
        PluginPreparation.java
        splitter
        ClosingDetails.java
        HandlerFactory.java
        SplittingHandler.java
        tokenizer
        AbstractTokenizer.java
        EDITokenizer.java
        SourcePosition.java
        Token.java
        TokenImpl.java
        Tokenizer.java
        util
        BranchingWriter.java
        CommandLine.java
        ContentHandlerBase64Encoder.java
        DateTimeGenerator.java
        FileSequenceNameGenerator.java
        FileUtil.java
        FixedLength.java
        Splitter.java
        XmlFormatter.java
        base64
        AbstractDecoder.java
        AbstractEncoder.java
        AbstractEncoderDecoder.java
        DecoderBackEnd.java
        DecoderFrontEnd.java
        EncoderBackEnd.java
        EncoderFrontEnd.java
        StringBase64Decoder.java
        dom
        AbstractElementList.java
        ChildElements.java
        DocumentUtil.java
        DomBuildingSaxHandler.java
        XPathElements.java
        sax
        ContextAwareSaxAdapter.java
        EDIReaderSAXAdapter.java
        QueuedContentHandler.java
        SAXCharacters.java
        SAXEndDocument.java
        SAXEndElement.java
        SAXEndOfStreamMarker.java
        SAXObject.java
        SAXObjectHandler.java
        SAXObjectReader.java
        SAXStartDocument.java
        SAXStartElement.java
        SerializableAttributes.java
        StackedContentHandler.java
    - test
      - java
        com
        berryworks
        edireader
        ABCReader.java
        AnsiFAGeneratorTest.java
        AnsiReaderTest.java
        CustomXMLTagsTest.java
        DefaultXMLTagsTest.java
        EDIAbstractReaderTest.java
        EDIReaderWrapperTest.java
        EdifactReaderTest.java
        LoopDescriptorTest.java
        ParserRegistryTest.java
        PluginTest.java
        StandardReaderTest.java
        SyntaxDescriptorTest.java
        demo
        EDItoXMLTest.java
        option
        TestOption.java
        plugin
        ANSI_850_X_003999.java
        PluginControllerImplTest.java
        tokenizer
        TestCharBufferBasics.java
        TestEDITokenizer.java
        TestTokenImpl.java
        util
        CommandLineTest.java
        Conversion.java
        EDItoXMLTestCase.java
        FileSequenceNameGeneratorTest.java
        FixedLengthTest.java
        HtmlExtractCodes.java
        ResourceEquipped.java
        ResourcesPath.java
        SplitterTest.java
        TestBranchingWriter.java
        VerboseTestCase.java
        base64
        PublicDomainBase64.java
        TestDecoding.java
        TestEncoding.java
        TestPublicDomainEncoding.java
        dom
        DomBuildingSaxHandlerTest.java
        sax
        EDIReaderSAXAdapterTest.java
        QueuedContentHandlerTest.java
        SerializableAttributesTest.java

/*
 * Copyright 2005-2015 by BerryWorks Software, LLC. All rights reserved.
 *
 * This file is part of EDIReader. You may obtain a license for its use directly from
 * BerryWorks Software, and you may also choose to use this software under the terms of the
 * GPL version 3. Other products in the EDIReader software suite are available only by licensing
 * with BerryWorks. Only those files bearing the GPL statement below are available under the GPL.
 *
 * EDIReader is free software: you can redistribute it and/or modify it under the terms of the
 * GNU General Public License as published by the Free Software Foundation, either version 3 of
 * the License, or (at your option) any later version.
 *
 * EDIReader is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
 * even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License along with EDIReader.  If not,
 * see <http://www.gnu.org/licenses/>.
 */

package com.berryworks.edireader.tokenizer;

import com.berryworks.edireader.EDIReader;

import java.io.IOException;
import java.io.Reader;
import java.nio.CharBuffer;

/**
 * Interprets EDI input as a sequence of primitive syntactic tokens.
 * <p>
 * As an EDI interchange is parsed, the parser uses a Tokenizer to advance through the
 * input EDI stream one token at a time. A call to <code>nextToken()</code> causes the tokenizer to advance
 * past the next token and return a <code>Token</code> instance describing that token.
 * <p>
 * This implementation of Tokenizer uses CharBuffer instead of char[].
 */
public class EDITokenizer extends AbstractTokenizer {

    public static final int BUFFER_SIZE = 1000;
    private final CharBuffer charBuffer = CharBuffer.wrap(new char[BUFFER_SIZE]);

    public EDITokenizer(Reader source) {
        super(source);
        charBuffer.flip();
        if (EDIReader.debug)
            trace("Constructed a new EDITokenizer");
    }

    public EDITokenizer(Reader source, char[] preRead) {
        this(source);
        if (preRead == null || preRead.length == 0)
            return;

        if (preRead.length > charBuffer.capacity())
            throw new RuntimeException("Attempt to create EDITokenizer with " + preRead.length +
                    " pre-read chars, which is greater than the internal buffer size of " + charBuffer.capacity());
        charBuffer.clear();
        charBuffer.put(preRead);
        charBuffer.flip();
    }

    /**
     * Returns a String representation of the current state of the tokenizer
     * for testing and debugging purposes.
     *
     * @return String representation
     */
    @Override
    public String toString() {
        String result = "tokenizer state:";
        result += " segmentCount=" + segmentCount;
        result += " charCount=" + charCount;
        result += " segTokenCount=" + segTokenCount;
        result += " segCharCount=" + segCharCount;
        result += " currentToken=" + currentToken;
        result += " buffer.limit=" + charBuffer.limit();
        result += " buffer.position=" + charBuffer.position();
        return result;
    }

    /**
     * Gets the next character of input. Sets cChar and cClass
     *
     * @throws IOException for problem reading EDI data
     */
    public void getChar() throws IOException {
        if (unGot) {
            // The current character has been "put back" with ungetChar()
            // after having been seen with getChar(). Therefore, this call
            // to getChar() can simply reget the current character.
            unGot = false;
            charCount++;
            segCharCount++;
            return;
        }

        // Read a fresh character from the input source.
        // But first copy the current one to an outputWriter
        // or the recorder if necessary.
        if (outputWriter != null) {
            // We do have an outputWriter wanting data, but do we have
            // a current character to write? And make sure writing is
            // not suspended.
            if ((!endOfFile) && (!writingSuspended))
                outputWriter.write(cChar);
        }
        if (recorderOn)
            recording.append(cChar);

        if (charBuffer.remaining() == 0) {
            readUntilBufferProvidesAtLeast(1);
        }

        if (endOfFile) {
            cClass = CharacterClass.EOF;
            if (EDIReader.debug)
                trace("end-of-file encountered");
        } else {
            cChar = charBuffer.get();
            if (cChar == delimiter)
                cClass = CharacterClass.DELIMITER;
            else if (cChar == subDelimiter)
                cClass = CharacterClass.SUB_DELIMITER;
            else if (cChar == release)
                cClass = CharacterClass.RELEASE;
            else if (cChar == terminator)
                cClass = CharacterClass.TERMINATOR;
            else if (cChar == repetitionSeparator)
                cClass = CharacterClass.REPEAT_DELIMITER;
            else
                cClass = CharacterClass.DATA;
        }
        charCount++;
        segCharCount++;
    }

    /**
     * Gets the remaining chars that have been read into the buffer
     * and not returned by getChars(n) or equivalant. Chars previewed
     * by lookahead(n) are not considered to have been used and therefore
     * are included among the chars returned by getBuffered.
     *
     * The use of getBuffered() is intended for only very special situations.
     * For example, if an input stream contains multiple fully independent EDI
     * interchanges -- perhaps from different EDI standards -- it is useful to
     * logically "start from scratch" on each successive interchange, with new
     * parser, tokenizer, buffer, etc, with any chars remaining in the buffer
     * from the previous interchange to be used as new data.
     *
     * @return chars of unprocessed input data
     */
    public char[] getBuffered() {
        char[] result = new char[0];

//        if (endOfFile)
//            return result;

        if (charBuffer.remaining() == 0 && !unGot) {
            return result;
        }

        try {
            int n = charBuffer.remaining();
            if (endOfFile && n == 0) {
                // Special case: if we've hit eof and the charBuffer is empty
                // ignore an unGot char if there is one.
            } else {
                n += unGot ? 1 : 0;
            }
            result = lookahead(n);
        } catch (Exception ignore) {
        }

        return result;
    }

    /**
     * Look ahead into the source of input chars and return the next n chars to
     * be seen, without disturbing the normal operation of getChar().
     *
     * @param n number of chars to return
     * @return char[] containing upcoming input chars
     * @throws IOException for problem reading EDI data
     */
    public char[] lookahead(int n) throws IOException {
        if (EDIReader.debug)
            trace("EDITokenizer.lookahead(" + n + ")");

        char[] rval = new char[n];

        // The 1st char is grabbed using the tokenizer's built-in
        // getChar() / ungetChar() mechanism. This allows things to work
        // properly whether or not the next char has already been gotten.
        getChar();
        rval[0] = cChar;
        ungetChar();

        // The minus 1 is because we have already filled the first char of the return value, so we only need n-1 more
        if (charBuffer.remaining() < n - 1) {
            if (EDIReader.debug)
                trace("Buffering more data to satisfy lookahead(" + n + ")");
            readUntilBufferProvidesAtLeast(n - 1);
        }

        // Move chars from the buffer into the return value
        int j = 1;
        for (int i = charBuffer.position(); i < charBuffer.limit() && j < n; i++)
            rval[j++] = charBuffer.get(i);

        // If more lookahead chars were requested than were satisfied for any reason,
        // then fill the return value with '?' to the requested length.
        for (; j < n; ) {
            rval[j++] = '?';
//            throw new RuntimeException("problem with lookahead " + n);
        }

        return rval;
    }

    private void readUntilBufferProvidesAtLeast(int needed) throws IOException {

        int remaining;
        while ((remaining = charBuffer.remaining()) < needed) {
            if (EDIReader.debug)
                trace("Reading from input stream because at least " + needed +
                        " chars are needed and only " + remaining + " are avilalble");
            charBuffer.compact();
            int n;
            while ((n = inputReader.read(charBuffer)) == 0) {
            }
            charBuffer.flip();

            if (n < 0) {
                if (EDIReader.debug)
                    trace("Hit end of file on the input stream");
                endOfFile = true;
                break;
            } else {
                if (EDIReader.debug)
                    trace("Number of chars read from input stream: " + n);
            }
        }
    }
}