MincodeParser.java example

Explorer

divolte-collector-master
- src
  - main
    - java
      - io
        divolte
        server
        AllowedMethodsHandler.java
        AsyncRequestBodyReceiver.java
        AvroRecordBuffer.java
        BrowserSource.java
        ChunkyByteBuffer.java
        ClientSideCookieEventHandler.java
        DivolteEvent.java
        DivolteIdentifier.java
        HttpSource.java
        IncomingRequestListener.java
        IncomingRequestProcessingPool.java
        IncomingRequestProcessor.java
        IncompleteRequestException.java
        JavaScriptHandler.java
        JsonContentHandler.java
        JsonEventHandler.java
        JsonSource.java
        Mapping.java
        MappingTestServer.java
        MoreCollectors.java
        PingHandler.java
        ProxyAdjacentPeerAddressHandler.java
        SchemaRegistry.java
        Server.java
        ShortTermDuplicateMemory.java
        UndertowEvent.java
        config
        BrowserSourceConfiguration.java
        DivolteConfiguration.java
        DurationDeserializer.java
        DurationFormatException.java
        FileStrategyConfiguration.java
        GlobalConfiguration.java
        HdfsConfiguration.java
        HdfsSinkConfiguration.java
        ImmutableProperties.java
        JavascriptConfiguration.java
        JsonSourceConfiguration.java
        KafkaConfiguration.java
        KafkaSinkConfiguration.java
        MapperConfiguration.java
        MappingConfiguration.java
        PropertiesDeserializer.java
        ServerConfiguration.java
        SinkConfiguration.java
        SinkTypeConfiguration.java
        SourceConfiguration.java
        UserAgentParserConfiguration.java
        ValidatedConfiguration.java
        constraint
        MappingSourceSinkReferencesMustExist.java
        OneSchemaPerSink.java
        SourceAndSinkNamesCannotCollide.java
        hdfs
        FileCreateAndSyncStrategy.java
        HdfsFlusher.java
        HdfsFlushingPool.java
        SimpleRollingFileStrategy.java
        ip2geo
        DatabaseLookupService.java
        ExternalDatabaseLookupService.java
        LookupService.java
        js
        Gzip.java
        GzippableHttpBody.java
        HttpBody.java
        JavaScriptResource.java
        Slf4jErrorManager.java
        TrackingJavaScriptResource.java
        kafka
        AvroRecordBufferSerializer.java
        DivolteIdentifierSerializer.java
        KafkaFlusher.java
        KafkaFlushingPool.java
        mincode
        MincodeFactory.java
        MincodeParser.java
        processing
        Item.java
        ItemProcessor.java
        ProcessingPool.java
        recordmapping
        AvroGenericRecordMapper.java
        DslRecordMapper.java
        DslRecordMapping.java
        JacksonSupport.java
        JsonPathSupport.java
        QueryStringParser.java
        SchemaMappingException.java
        UserAgentParserAndCache.java
        ValidationError.java
  - test
    - java
      - io
        divolte
        server
        BrowserLists.java
        ChunkyByteBufferInputStreamTest.java
        DivolteIdentifierTest.java
        DslRecordMapperTest.java
        JsonSourceTest.java
        ProxyAdjacentPeerAddressHandlerTest.java
        RequestChecksumTest.java
        SeleniumDisabledAutoPageViewEventTest.java
        SeleniumJavaScriptTest.java
        SeleniumTestBase.java
        ServerPingTest.java
        ServerSinkSourceConfigurationTest.java
        ServerTestUtils.java
        ShortTermDuplicateMemoryTest.java
        config
        ValidatedConfigurationTest.java
        hdfs
        HdfsFlusherTest.java
        js
        GzipTest.java
        TrackingJavaScriptResourceTest.java
        mincode
        MincodeFactoryTest.java
        MincodeParserSamplesTest.java
        MincodeParserTest.java
        recordmapping
        AvroGenericRecordMapperTest.java

/*
 * Copyright 2015 GoDataDriven B.V.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package io.divolte.server.mincode;

import com.fasterxml.jackson.core.Base64Variant;
import com.fasterxml.jackson.core.JsonToken;
import com.fasterxml.jackson.core.ObjectCodec;
import com.fasterxml.jackson.core.base.ParserBase;
import com.fasterxml.jackson.core.io.IOContext;

import javax.annotation.Nullable;
import javax.annotation.ParametersAreNonnullByDefault;
import java.io.IOException;
import java.io.Reader;
import java.io.Writer;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.util.Objects;
import java.util.Optional;

@ParametersAreNonnullByDefault
public class MincodeParser extends ParserBase {

    private static final Optional<Integer> PENDING_END_OBJECT = Optional.of((int)'.');

    private final Reader reader;
    private char[] inputBuffer;
    private final boolean bufferRecyclable;

    @Nullable
    private ObjectCodec objectCodec;

    private Optional<Integer> pendingRecordType = Optional.empty();

    MincodeParser(final IOContext ctxt,
                  final int parserFeatures,
                  @Nullable final ObjectCodec objectCodec,
                  final Reader reader) {
        super(ctxt, parserFeatures);
        this.reader = Objects.requireNonNull(reader);
        this.inputBuffer = ctxt.allocTokenBuffer();
        this._inputPtr = 0;
        this._inputEnd = 0;
        this.bufferRecyclable = true;
        this.objectCodec = objectCodec;
    }

    private boolean _loadMore() throws IOException {
        _currInputProcessed += _inputEnd;
        _currInputRowStart -= _inputEnd;
        final boolean loadedMore;
        if (null != reader) {
            final int count = reader.read(inputBuffer, 0, inputBuffer.length);
            if (loadedMore = 0 < count) {
                _inputPtr = 0;
                _inputEnd = count;
            } else {
                _closeInput();
            }
        } else {
            loadedMore = false;
        }
        return loadedMore;
    }

    private int nextChar() throws IOException {
        return _inputPtr < _inputEnd || _loadMore() ? inputBuffer[_inputPtr++] : -1;
    }

    private char nextChar(final String eofMsg) throws IOException {
        final int nextChar = nextChar();
        if (-1 == nextChar) {
            _reportInvalidEOF(eofMsg, _currToken);
        }
        return (char)nextChar;
    }

    @Override
    protected void _finishString() throws IOException {
        // Step 1: Scan until we reach either an escape sequence or the
        //         end of the input buffer.
        final int inputLen = _inputEnd;
        int ptr = _inputPtr;
        loop:
        while (ptr < inputLen) {
            final int c = inputBuffer[ptr];
            switch (c) {
                case '!':
                    // Found the end of the string. No escapes necessary.
                    _textBuffer.resetWithShared(inputBuffer, _inputPtr, ptr - _inputPtr);
                    _inputPtr = ptr + 1;
                    // EARLY RETURN
                    return;
                case '~':
                    // Escape sequence encountered; more work will be required.
                    break loop;
                default:
                    // Nothing to do yet; proceed to next character.
            }
            ++ptr;
        }

        // We either ran out of buffer, or hit an escape sequence.

        // Step 2: Make a copy of what we scanned past so far.
        _textBuffer.resetWithCopy(inputBuffer, _inputPtr, ptr - _inputPtr);
        _inputPtr = ptr;

        // Step 3: Get the current segment so we can start filling it in.
        char[] outBuf = _textBuffer.getCurrentSegment();
        int outPtr = _textBuffer.getCurrentSegmentSize();

        // Step 4: Scan over the remaining data, filling in the current segment
        //         and rolling over to new ones as necessary.
        loop:
        for (;;) {
            if (_inputPtr >= _inputEnd && !_loadMore()) {
                _reportInvalidEOF(": was expecting end of string value", _currToken);
            }
            char c = inputBuffer[_inputPtr++];
            switch (c) {
                case '!':
                    // End of string.
                    break loop;
                case '~':
                    // Escape sequence. Next character is the real thing.
                    c = nextChar(" in character escape sequence");
                    break;
                default:
            }
            if (outPtr >= outBuf.length) {
                outBuf = _textBuffer.finishCurrentSegment();
                outPtr = 0;
            }
            outBuf[outPtr++] = c;
        }
        _textBuffer.setCurrentLength(outPtr);
    }

    @Override
    protected void _closeInput() throws IOException {
        if (_ioContext.isResourceManaged() || isEnabled(Feature.AUTO_CLOSE_SOURCE)) {
            reader.close();
        }
    }

    @Override
    protected void _releaseBuffers() throws IOException {
        super._releaseBuffers();
        if (bufferRecyclable) {
            final char[] buf = inputBuffer;
            if (null != inputBuffer) {
                inputBuffer = null;
                _ioContext.releaseTokenBuffer(buf);
            }
        }
    }

    @Override
    @Nullable
    public ObjectCodec getCodec() {
        return objectCodec;
    }

    @Override
    public void setCodec(@Nullable ObjectCodec objectCodec) {
        this.objectCodec = objectCodec;
    }

    @Override
    public int releaseBuffered(final Writer w) throws IOException {
        final int count = _inputEnd - _inputPtr;
        if (0 < count) {
            w.write(inputBuffer, _inputPtr, count);
        }
        return count;
    }

    @Override
    public Object getInputSource() {
        return reader;
    }

    @Override
    @Nullable
    public JsonToken nextToken() throws IOException {
        final JsonToken nextToken;
        if (_closed) {
            nextToken = null;
        } else {
            _tokenInputTotal = _currInputProcessed + _inputPtr - 1;
            _tokenInputRow = _currInputRow;
            _tokenInputCol = _inputPtr - _currInputRowStart - 1;

            // Cursor is positioned:
            //  - At the start of a record.
            //  - At the start of the name of a field if:
            //      - The current token is START_OBJECT
            //      - The pending type isn't '.'
            //  - At the start of the value of a field if:
            //      - The current token is FIELD_NAME

            // We may be in the middle of processing a record.
            // If so, the type has been preserved as the pending record type.
            final int recordType;
            if (pendingRecordType.isPresent()) {
                recordType = pendingRecordType.get();
                pendingRecordType = Optional.empty();
            } else {
                recordType = nextChar();
            }

            // If we just finished a field name, invalidate the
            // buffer containing the text for the field name.
            if (JsonToken.FIELD_NAME == _currToken) {
                _nameCopied = false;
            }

            // Special handling for end-of-object.
            // (This is the only record type while in an object that doesn't
            //  have a field name.)
            if (')' == recordType) {
                if (!_parsingContext.inObject()) {
                    _reportError("Unexpected end of object while not in object.");
                }
                nextToken = JsonToken.END_OBJECT;
                _parsingContext = _parsingContext.getParent();
            } else if (_currToken != JsonToken.FIELD_NAME
                        && _parsingContext.inObject()) {
                // If we're in an object but didn't just deliver the field name,
                // that means we've just started a new record and are positioned
                // over the name of the field.
                nextToken = JsonToken.FIELD_NAME;
                _finishString();
                _parsingContext.setCurrentName(_textBuffer.contentsAsString());
                pendingRecordType = Optional.of(recordType);
            } else {
                // We're now positioned on the payload for the record, if it has any.
                switch (recordType) {
                    case -1:
                        // End-of-file.
                        _handleEOF();
                        close();
                        nextToken = null;
                        break;
                    case 'a':
                        // Start of an array.
                        nextToken = JsonToken.START_ARRAY;
                        _parsingContext = _parsingContext.createChildArrayContext(_tokenInputRow, _tokenInputCol);
                        break;
                    case '.':
                        if (!_parsingContext.inArray()) {
                            _reportError("Unexpected end of array while not in array.");
                        }
                        nextToken = JsonToken.END_ARRAY;
                        _parsingContext = _parsingContext.getParent();
                        break;
                    case '(':
                        // Start of an object.
                        nextToken = JsonToken.START_OBJECT;
                        _parsingContext = _parsingContext.createChildObjectContext(_tokenInputRow, _tokenInputCol);
                        break;
                    case 'o':
                        // An empty object.
                        nextToken = JsonToken.START_OBJECT;
                        _parsingContext = _parsingContext.createChildObjectContext(_tokenInputRow, _tokenInputCol);
                        pendingRecordType = PENDING_END_OBJECT;
                        break;
                    case 's':
                        nextToken = JsonToken.VALUE_STRING;
                        _finishString();
                        break;
                    case 't':
                        // Boolean true.
                        nextToken = JsonToken.VALUE_TRUE;
                        break;
                    case 'f':
                        // Boolean false.
                        nextToken = JsonToken.VALUE_FALSE;
                        break;
                    case 'n':
                        // Null.
                        nextToken = JsonToken.VALUE_NULL;
                        break;
                    case 'd':
                        // Base-36 integer.
                        nextToken = JsonToken.VALUE_NUMBER_INT;
                        nextIntegerRecord();
                        break;
                    case 'j':
                        // JSON-formatted number.
                        nextToken = nextNumberRecord();
                        break;
                    default:
                        // Unknown record type.
                        throw _constructError("Unknown record type encountered: " + (char)recordType);
                }
            }
        }
        _currToken = nextToken;
        return nextToken;
    }

    private void nextIntegerRecord() throws IOException {
        // First process the body of the record, as a string.
        _finishString();
        try {
            // Next parse it and store as BigInteger.
            setNumberValue(new BigInteger(_textBuffer.contentsAsString(), 36));
        } catch (final NumberFormatException e) {
            _wrapError("Invalid integer record", e);
        }
    }

    @SuppressWarnings("PMD.EmptyCatchBlock")
    private void setNumberValue(final BigInteger value) {
        _numTypesValid = NR_BIGINT;
        _numberBigInt = value;
        // Jackson expects all smaller types to be filled in,
        // so do this until they don't fit.
        try {
            _numberLong = value.longValueExact();
            _numTypesValid |= NR_LONG;
            _numberInt = value.intValueExact();
            _numTypesValid |= NR_INT;
        } catch (final ArithmeticException e) {
            // Harmless; means we reached a type into which it won't fit.
        }
    }

    private JsonToken nextNumberRecord() throws IOException {
        // First process the body of the record, as a string,
        // and convert to a BigDecimal.
        _finishString();
        JsonToken token;
        try {
            final BigDecimal number = _textBuffer.contentsAsDecimal();
            // We have a number, but don't know yet if it's an integer or floating point.
            // Jackson uses floating point to mean a decimal and/or exponent is present.
            // Our best heuristic for this is to check if the scale is 0.
            if (0 == number.scale()) {
                token = JsonToken.VALUE_NUMBER_INT;
                setNumberValue(number.unscaledValue());
            } else {
                token = JsonToken.VALUE_NUMBER_FLOAT;
                _numTypesValid = NR_BIGDECIMAL;
                _numberBigDecimal = number;
                // Unlike integer types, Jackson will convert to double/float on demand.
            }
        } catch (final NumberFormatException e) {
            _wrapError("Invalid number record", e);
            // Unreachable.
            token = null;
        }
        return token;
    }

    @Override
    @Nullable
    public String getText() throws IOException {
        final String text;
        final JsonToken t = _currToken;
        if (null != t) {
            switch (t) {
                case VALUE_STRING:
                case VALUE_NUMBER_INT:
                case VALUE_NUMBER_FLOAT:
                    text = _textBuffer.contentsAsString();
                    break;
                case FIELD_NAME:
                    text = _parsingContext.getCurrentName();
                    break;
                default:
                    text = t.asString();
            }
        } else {
            text = null;
        }
        return text;
    }

    @Override
    @Nullable
    public char[] getTextCharacters() throws IOException {
        final JsonToken t = _currToken;
        final char[] textCharacters;
        if (null != _currToken) {
            switch (_currToken) {
                case FIELD_NAME:
                    if (!_nameCopied) {
                        final String name = _parsingContext.getCurrentName();
                        final int nameLen = name.length();
                        if (_nameCopyBuffer == null) {
                            _nameCopyBuffer = _ioContext.allocNameCopyBuffer(nameLen);
                        } else if (_nameCopyBuffer.length < nameLen) {
                            _nameCopyBuffer = new char[nameLen];
                        }
                        name.getChars(0, nameLen, _nameCopyBuffer, 0);
                        _nameCopied = true;
                    }
                    textCharacters = _nameCopyBuffer;
                    break;
                case VALUE_STRING:
                case VALUE_NUMBER_INT:
                case VALUE_NUMBER_FLOAT:
                    textCharacters = _textBuffer.getTextBuffer();
                    break;
                default:
                    textCharacters = t.asCharArray();
            }
        } else {
            textCharacters = null;
        }
        return textCharacters;
    }

    @Override
    public int getTextLength() throws IOException {
        final int textLength;
        final JsonToken t = _currToken;
        if (null != t) {
            switch (t) {
                case FIELD_NAME:
                    textLength = _parsingContext.getCurrentName().length();
                    break;
                case VALUE_STRING:
                case VALUE_NUMBER_INT:
                case VALUE_NUMBER_FLOAT:
                    textLength = _textBuffer.size();
                    break;
                default:
                    textLength = t.asCharArray().length;
            }
        } else {
            textLength = 0;
        }
        return textLength;
    }

    @Override
    public int getTextOffset() throws IOException {
        final int textOffset;
        final JsonToken t = _currToken;
        if (null != t) {
            switch (t) {
                case VALUE_STRING:
                case VALUE_NUMBER_INT:
                case VALUE_NUMBER_FLOAT:
                    textOffset = _textBuffer.getTextOffset();
                    break;
                case FIELD_NAME:
                default:
                    textOffset = 0;
            }
        } else {
            textOffset = 0;
        }
        return textOffset;
    }

    @Override
    public byte[] getBinaryValue(final Base64Variant b64variant) throws IOException {
        return b64variant.decode(_textBuffer.contentsAsString());
    }
}