/* * Hibernate, Relational Persistence for Idiomatic Java * * JBoss, Home of Professional Open Source * Copyright 2011 Red Hat Inc. and/or its affiliates and other contributors * as indicated by the @authors tag. All rights reserved. * See the copyright.txt in the distribution for a * full listing of individual contributors. * * This copyrighted material is made available to anyone wishing to use, * modify, copy, or redistribute it subject to the terms and conditions * of the GNU Lesser General Public License, v. 2.1. * This program is distributed in the hope that it will be useful, but WITHOUT A * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A * PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. * You should have received a copy of the GNU Lesser General Public License, * v.2.1 along with this distribution; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, * MA 02110-1301, USA. */ package org.hibernate.search.indexes.serialization.avro.impl; import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.Serializable; import java.nio.ByteBuffer; import java.util.HashMap; import java.util.List; import java.util.Map; import org.apache.avro.Protocol; import org.apache.avro.generic.GenericDatumReader; import org.apache.avro.generic.GenericRecord; import org.apache.avro.io.Decoder; import org.apache.avro.io.DecoderFactory; import org.apache.avro.util.Utf8; import org.hibernate.search.bridge.spi.ConversionContext; import org.hibernate.search.bridge.util.impl.ContextualExceptionBridgeHelper; import org.hibernate.search.indexes.serialization.spi.Deserializer; import org.hibernate.search.indexes.serialization.spi.LuceneWorksBuilder; import org.hibernate.search.indexes.serialization.spi.SerializableIndex; import org.hibernate.search.indexes.serialization.spi.SerializableStore; import org.hibernate.search.indexes.serialization.spi.SerializableTermVector; import org.hibernate.search.util.logging.impl.Log; import org.hibernate.search.util.logging.impl.LoggerFactory; /** * @author Emmanuel Bernard <emmanuel@hibernate.org> */ public class AvroDeserializer implements Deserializer { private static final Log log = LoggerFactory.make(); private final Protocol protocol; private List<Utf8> classReferences; public AvroDeserializer(Protocol protocol) { this.protocol = protocol; } @Override public void deserialize(byte[] data, LuceneWorksBuilder hydrator) { ByteArrayInputStream inputStream = new ByteArrayInputStream(data); int majorVersion = inputStream.read(); int minorVersion = inputStream.read(); if ( AvroSerializationProvider.getMajorVersion() != majorVersion ) { throw log.incompatibleProtocolVersion( majorVersion, minorVersion, AvroSerializationProvider.getMajorVersion(), AvroSerializationProvider.getMinorVersion() ); } if ( AvroSerializationProvider.getMinorVersion() < minorVersion ) { //TODO what to do about it? Log each time? Once? if ( log.isTraceEnabled() ) { log.tracef( "Parsing message from a future protocol version. Some feature might not be propagated. Message version: " + majorVersion + "." + minorVersion + ". Current protocol version: " + AvroSerializationProvider.getMajorVersion() + "." + AvroSerializationProvider.getMinorVersion() ); } } Decoder decoder = DecoderFactory.get().binaryDecoder( inputStream, null ); GenericDatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>( protocol.getType("Message") ); GenericRecord result; try { result = reader.read(null, decoder); } catch ( IOException e ) { throw log.unableToDeserializeAvroStream( e ); } classReferences = asListOfString( result, "classReferences" ); final List<GenericRecord> operations = asListOfGenericRecords( result, "operations" ); final ConversionContext conversionContext = new ContextualExceptionBridgeHelper(); for ( GenericRecord operation : operations ) { String schema = operation.getSchema().getName(); if ( "OptimizeAll".equals( schema ) ) { hydrator.addOptimizeAll(); } else if ( "PurgeAll".equals( schema ) ) { hydrator.addPurgeAllLuceneWork( asClass( operation, "class" ) ); } else if ( "Delete".equals( schema ) ) { processId(operation, hydrator); hydrator.addDeleteLuceneWork( asClass( operation, "class" ), conversionContext ); } else if ( "Add".equals( schema ) ) { buildLuceneDocument( asGenericRecord( operation, "document" ), hydrator ); Map<String, String> analyzers = getAnalyzers( operation ); processId(operation, hydrator); hydrator.addAddLuceneWork( asClass( operation, "class" ), analyzers, conversionContext ); } else if ( "Update".equals( schema ) ) { buildLuceneDocument( asGenericRecord( operation, "document" ), hydrator ); Map<String, String> analyzers = getAnalyzers( operation ); processId(operation, hydrator); hydrator.addUpdateLuceneWork( asClass( operation, "class" ), analyzers, conversionContext ); } else { throw log.cannotDeserializeOperation( schema ); } } } private String asClass(GenericRecord operation, String attribute) { Integer index = (Integer) operation.get( attribute ); return classReferences.get( index ).toString(); } private List<Utf8> asListOfString(GenericRecord result, String attribute) { return (List<Utf8>) result.get(attribute); } private void processId(GenericRecord operation, LuceneWorksBuilder hydrator) { GenericRecord id = (GenericRecord)operation.get("id"); Object value = id.get( "value" ); if (value instanceof ByteBuffer) { hydrator.addIdAsJavaSerialized( asByteArray( ( ByteBuffer ) value ) ); } else if (value instanceof Utf8) { hydrator.addId( value.toString() ); } else { //the rest are serialized objects hydrator.addId( (Serializable) value ); } } private Map<String, String> getAnalyzers(GenericRecord operation) { Map<?,?> analyzersWithUtf8 = (Map<?,?>) operation.get( "fieldToAnalyzerMap" ); if (analyzersWithUtf8 == null) return null; Map<String,String> analyzers = new HashMap<String, String>( analyzersWithUtf8.size() ); for ( Map.Entry<?,?> entry : analyzersWithUtf8.entrySet() ) { analyzers.put( entry.getKey().toString(), entry.getValue().toString() ); } return analyzers; } private void buildLuceneDocument(GenericRecord document, LuceneWorksBuilder hydrator) { hydrator.defineDocument( asFloat( document, "boost" ) ); List<GenericRecord> fieldables = asListOfGenericRecords( document, "fieldables" ); for ( GenericRecord field : fieldables ) { String schema = field.getSchema().getName(); if ( "CustomFieldable".equals( schema ) ) { hydrator.addFieldable( asByteArray( field, "instance" ) ); } else if ( "NumericIntField".equals( schema ) ) { hydrator.addIntNumericField( asInt( field, "value" ), asString( field, "name" ), asInt( field, "precisionStep" ), asStore( field ), asBoolean( field, "indexed" ), asFloat( field, "boost" ), asBoolean( field, "omitNorms" ), asBoolean( field, "omitTermFreqAndPositions" ) ); } else if ( "NumericFloatField".equals( schema ) ) { hydrator.addFloatNumericField( asFloat( field, "value" ), asString( field, "name" ), asInt( field, "precisionStep" ), asStore( field ), asBoolean(field, "indexed"), asFloat( field, "boost" ), asBoolean( field, "omitNorms" ), asBoolean( field, "omitTermFreqAndPositions" ) ); } else if ( "NumericLongField".equals( schema ) ) { hydrator.addLongNumericField( asLong( field, "value" ), asString( field, "name" ), asInt( field, "precisionStep" ), asStore( field ), asBoolean(field, "indexed"), asFloat( field, "boost" ), asBoolean( field, "omitNorms" ), asBoolean( field, "omitTermFreqAndPositions" ) ); } else if ( "NumericDoubleField".equals( schema ) ) { hydrator.addDoubleNumericField( asDouble( field, "value" ), asString( field, "name" ), asInt( field, "precisionStep" ), asStore( field ), asBoolean( field, "indexed" ), asFloat( field, "boost" ), asBoolean( field, "omitNorms" ), asBoolean( field, "omitTermFreqAndPositions" ) ); } else if ( "BinaryField".equals( schema ) ) { hydrator.addFieldWithBinaryData( asString( field, "name" ), asByteArray( field, "value" ), asInt( field, "offset" ), asInt( field, "length" ), asFloat( field, "boost" ), asBoolean( field, "omitNorms" ), asBoolean( field, "omitTermFreqAndPositions" ) ); } else if ( "StringField".equals( schema ) ) { hydrator.addFieldWithStringData( asString( field, "name" ), asString( field, "value" ), asStore( field ), asIndex( field ), asTermVector( field ), asFloat( field, "boost" ), asBoolean( field, "omitNorms" ), asBoolean( field, "omitTermFreqAndPositions" ) ); } else if ( "TokenStreamField".equals( schema ) ) { buildAttributes(field, "value", hydrator); hydrator.addFieldWithTokenStreamData( asString( field, "name" ), asTermVector( field ), asFloat( field, "boost" ), asBoolean( field, "omitNorms" ), asBoolean( field, "omitTermFreqAndPositions" ) ); } else if ( "ReaderField".equals( schema ) ) { hydrator.addFieldWithSerializableReaderData( asString( field, "name" ), asByteArray( field, "value" ), asTermVector( field ), asFloat( field, "boost" ), asBoolean( field, "omitNorms" ), asBoolean( field, "omitTermFreqAndPositions" ) ); } else { throw log.cannotDeserializeField(schema); } } } private void buildAttributes(GenericRecord record, String field, LuceneWorksBuilder hydrator) { List<List<?>> tokens = (List<List<?>>) record.get( field ); for ( List<?> token : tokens ) { for(Object attribute : token) { buildAttribute( attribute, hydrator ); } hydrator.addToken(); } } private void buildAttribute(Object element, LuceneWorksBuilder hydrator) { if ( element instanceof GenericRecord ) { GenericRecord record = (GenericRecord) element; String name = record.getSchema().getName(); if ( "TokenTrackingAttribute".equals( name ) ) { hydrator.addTokenTrackingAttribute( (List<Integer>) record.get( "positions" ) ); } else if ( "CharTermAttribute".equals( name ) ) { hydrator.addCharTermAttribute( (CharSequence) record.get( "sequence" ) ); } else if ( "PayloadAttribute".equals( name ) ) { hydrator.addPayloadAttribute( asByteArray(record, "payload") ); } else if ( "KeywordAttribute".equals( name ) ) { hydrator.addKeywordAttribute( asBoolean(record, "isKeyword") ); } else if ( "PositionIncrementAttribute".equals( name ) ) { hydrator.addPositionIncrementAttribute( asInt(record, "positionIncrement") ); } else if ( "FlagsAttribute".equals( name ) ) { hydrator.addFlagsAttribute( asInt(record, "flags") ); } else if ( "TypeAttribute".equals( name ) ) { hydrator.addTypeAttribute( asString(record, "type") ); } else if ( "OffsetAttribute".equals( name ) ) { hydrator.addOffsetAttribute( asInt(record, "startOffset"), asInt(record, "endOffset") ); } else { log.unknownAttributeSerializedRepresentation( name ); } } if ( element instanceof ByteBuffer) { hydrator.addSerializedAttribute( asByteArray( (ByteBuffer) element ) ); } else { log.unknownAttributeSerializedRepresentation( element.getClass().getName() ); } } private GenericRecord asGenericRecord(GenericRecord operation, String field) { return (GenericRecord) operation.get(field); } private List<GenericRecord> asListOfGenericRecords(GenericRecord result, String field) { return (List<GenericRecord>) result.get(field); } private float asFloat(GenericRecord record, String field) { return ( (Float) record.get(field) ).floatValue(); } private int asInt(GenericRecord record, String field) { return ( (Integer) record.get(field) ).intValue(); } private long asLong(GenericRecord record, String field) { return ( (Long) record.get(field) ).longValue(); } private double asDouble(GenericRecord record, String field) { return ( (Double) record.get(field) ).doubleValue(); } private String asString(GenericRecord record, String field) { return record.get(field).toString(); } private boolean asBoolean(GenericRecord record, String field) { return ( (Boolean) record.get(field) ).booleanValue(); } private SerializableStore asStore(GenericRecord field) { String string = field.get("store").toString(); return SerializableStore.valueOf( string ); } private SerializableIndex asIndex(GenericRecord field) { String string = field.get("index").toString(); return SerializableIndex.valueOf( string ); } private SerializableTermVector asTermVector(GenericRecord field) { String string = field.get("termVector").toString(); return SerializableTermVector.valueOf( string ); } private byte[] asByteArray(GenericRecord operation, String field) { ByteBuffer buffer = (ByteBuffer) operation.get(field); return asByteArray( buffer ); } private byte[] asByteArray(ByteBuffer buffer) { byte[] copy = new byte[buffer.remaining()]; buffer.get( copy ); return copy; } }