/*
* Hibernate Search, full-text search for your domain model
*
* License: GNU Lesser General Public License (LGPL), version 2.1 or later
* See the lgpl.txt file in the root directory or <http://www.gnu.org/licenses/lgpl-2.1.html>.
*/
package org.hibernate.search.indexes.serialization.avro.impl;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.Serializable;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import org.apache.avro.Protocol;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericDatumWriter;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.io.BinaryEncoder;
import org.apache.avro.io.EncoderFactory;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttributeImpl;
import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.util.AttributeImpl;
import org.apache.lucene.util.BytesRef;
import org.hibernate.search.backend.LuceneWork;
import org.hibernate.search.backend.spi.DeletionQuery;
import org.hibernate.search.indexes.serialization.spi.LuceneFieldContext;
import org.hibernate.search.indexes.serialization.spi.LuceneNumericFieldContext;
import org.hibernate.search.indexes.serialization.spi.Serializer;
import org.hibernate.search.util.logging.impl.Log;
import org.hibernate.search.util.logging.impl.LoggerFactory;
import static org.hibernate.search.indexes.serialization.impl.SerializationHelper.toByteArray;
/**
* @author Emmanuel Bernard <emmanuel@hibernate.org>
*/
public class AvroSerializer implements Serializer {
private static final Log log = LoggerFactory.make();
private GenericRecord idRecord;
private List<GenericRecord> fieldables;
private List<GenericRecord> operations;
private List<String> classReferences;
private GenericRecord document;
private final Protocol protocol;
public AvroSerializer(Protocol protocol) {
this.protocol = protocol;
this.classReferences = new ArrayList<>();
}
@Override
public void luceneWorks(List<LuceneWork> works) {
operations = new ArrayList<>( works.size() );
}
@Override
public void addOptimizeAll() {
operations.add( new GenericData.Record( protocol.getType( "OptimizeAll" ) ) );
}
@Override
public void addFlush() {
operations.add( new GenericData.Record( protocol.getType( "Flush" ) ) );
}
@Override
public void addPurgeAll(String entityClassName) {
int classRef = getClassReference( entityClassName );
GenericRecord purgeAll = new GenericData.Record( protocol.getType( "PurgeAll" ) );
purgeAll.put( "class", classRef );
operations.add( purgeAll );
}
@Override
public void addDeleteByQuery(String entityClassName, DeletionQuery deletionQuery) {
int classRef = getClassReference( entityClassName );
GenericRecord deleteByQuery = new GenericData.Record( protocol.getType( "DeleteByQuery" ) );
deleteByQuery.put( "class", classRef );
deleteByQuery.put( "key", deletionQuery.getQueryKey() );
deleteByQuery.put( "query", Arrays.asList( deletionQuery.serialize() ) );
operations.add( deleteByQuery );
}
private int getClassReference(String entityClassName) {
int classRef = classReferences.indexOf( entityClassName );
if ( classRef == -1 ) {
classReferences.add( entityClassName );
classRef = classReferences.size() - 1;
}
return classRef;
}
@Override
public void addIdSerializedInJava(byte[] id) {
this.idRecord = new GenericData.Record( protocol.getType( "Id" ) );
idRecord.put( "value", ByteBuffer.wrap( id ) );
}
@Override
public void addIdAsInteger(int id) {
this.idRecord = new GenericData.Record( protocol.getType( "Id" ) );
idRecord.put( "value", id );
}
@Override
public void addIdAsLong(long id) {
this.idRecord = new GenericData.Record( protocol.getType( "Id" ) );
idRecord.put( "value", id );
}
@Override
public void addIdAsFloat(float id) {
this.idRecord = new GenericData.Record( protocol.getType( "Id" ) );
idRecord.put( "value", id );
}
@Override
public void addIdAsDouble(double id) {
this.idRecord = new GenericData.Record( protocol.getType( "Id" ) );
idRecord.put( "value", id );
}
@Override
public void addIdAsString(String id) {
this.idRecord = new GenericData.Record( protocol.getType( "Id" ) );
idRecord.put( "value", id );
}
@Override
public void addDelete(String entityClassName) {
int classRef = getClassReference( entityClassName );
GenericRecord delete = new GenericData.Record( protocol.getType( "Delete" ) );
delete.put( "class", classRef );
delete.put( "id", idRecord );
operations.add( delete );
idRecord = null;
}
@Override
public void addAdd(String entityClassName, Map<String, String> fieldToAnalyzerMap) {
int classRef = getClassReference( entityClassName );
GenericRecord add = new GenericData.Record( protocol.getType( "Add" ) );
add.put( "class", classRef );
add.put( "id", idRecord );
add.put( "document", document );
add.put( "fieldToAnalyzerMap", fieldToAnalyzerMap );
operations.add( add );
idRecord = null;
clearDocument();
}
@Override
public void addUpdate(String entityClassName, Map<String, String> fieldToAnalyzerMap) {
int classRef = getClassReference( entityClassName );
GenericRecord update = new GenericData.Record( protocol.getType( "Update" ) );
update.put( "class", classRef );
update.put( "id", idRecord );
update.put( "document", document );
update.put( "fieldToAnalyzerMap", fieldToAnalyzerMap );
operations.add( update );
idRecord = null;
clearDocument();
}
@Override
public byte[] serialize() {
final ByteArrayOutputStream out = new ByteArrayOutputStream();
out.write( KnownProtocols.MAJOR_VERSION );
out.write( KnownProtocols.LATEST_MINOR_VERSION );
Schema msgSchema = protocol.getType( "Message" );
GenericDatumWriter<GenericRecord> writer = new GenericDatumWriter<>( msgSchema );
BinaryEncoder encoder = EncoderFactory.get().directBinaryEncoder( out, null );
GenericRecord message = new GenericData.Record( msgSchema );
message.put( "classReferences", classReferences );
message.put( "operations", operations );
operations = null;
try {
writer.write( message, encoder );
encoder.flush();
}
catch (IOException e) {
throw log.unableToSerializeInAvro( e );
}
return out.toByteArray();
}
@Override
public void fields(List<IndexableField> fields) {
fieldables = new ArrayList<>( fields.size() );
}
@Override
public void addIntNumericField(int value, LuceneNumericFieldContext context) {
GenericRecord numericField = createNumericField( "NumericIntField", context );
numericField.put( "value", value );
fieldables.add( numericField );
}
private GenericRecord createNumericField(String schemaName, LuceneNumericFieldContext context) {
GenericRecord numericField = new GenericData.Record( protocol.getType( schemaName ) );
numericField.put( "name", context.getName() );
numericField.put( "precisionStep", context.getPrecisionStep() );
numericField.put( "store", context.getStore() );
numericField.put( "indexed", context.isIndexed() );
numericField.put( "boost", context.getBoost() );
numericField.put( "omitNorms", context.getOmitNorms() );
numericField.put( "omitTermFreqAndPositions", context.getOmitTermFreqAndPositions() );
return numericField;
}
@Override
public void addLongNumericField(long value, LuceneNumericFieldContext context) {
GenericRecord numericField = createNumericField( "NumericLongField", context );
numericField.put( "value", value );
fieldables.add( numericField );
}
@Override
public void addFloatNumericField(float value, LuceneNumericFieldContext context) {
GenericRecord numericField = createNumericField( "NumericFloatField", context );
numericField.put( "value", value );
fieldables.add( numericField );
}
@Override
public void addDoubleNumericField(double value, LuceneNumericFieldContext context) {
GenericRecord numericField = createNumericField( "NumericDoubleField", context );
numericField.put( "value", value );
fieldables.add( numericField );
}
@Override
public void addFieldWithBinaryData(LuceneFieldContext context) {
GenericRecord field = createNormalField( "BinaryField", context );
BytesRef binaryValue = context.getBinaryValue();
field.put( "value", ByteBuffer.wrap( binaryValue.bytes, binaryValue.offset, binaryValue.length ) );
//Following two attributes are meant for serialization format backwards compatibility:
field.put( "offset", 0 );
field.put( "length", binaryValue.length );
fieldables.add( field );
}
private GenericRecord createNormalField(String schemaName, LuceneFieldContext context) {
GenericRecord field = new GenericData.Record( protocol.getType( schemaName ) );
field.put( "name", context.getName() );
field.put( "boost", context.getBoost() );
field.put( "omitNorms", context.isOmitNorms() );
field.put( "omitTermFreqAndPositions", context.isOmitTermFreqAndPositions() );
return field;
}
@Override
public void addFieldWithStringData(LuceneFieldContext context) {
GenericRecord field = createNormalField( "StringField", context );
field.put( "value", context.getStringValue() );
field.put( "store", context.getStore() );
field.put( "index", context.getIndex() );
field.put( "termVector", context.getTermVector() );
fieldables.add( field );
}
@Override
public void addFieldWithTokenStreamData(LuceneFieldContext context) {
GenericRecord field = createNormalField( "TokenStreamField", context );
List<List<AttributeImpl>> stream = context.getTokenStream().getStream();
List<List<Object>> value = new ArrayList<>( stream.size() );
for ( List<AttributeImpl> attrs : stream ) {
List<Object> elements = new ArrayList<>( attrs.size() );
for ( AttributeImpl attr : attrs ) {
elements.add( buildAttributeImpl( attr ) );
}
value.add( elements );
}
field.put( "value", value );
field.put( "termVector", context.getTermVector() );
fieldables.add( field );
}
private Object buildAttributeImpl(final AttributeImpl attr) {
if ( attr instanceof CharTermAttributeImpl ) {
GenericRecord record = new GenericData.Record( protocol.getType( "CharTermAttribute" ) );
CharTermAttribute charAttr = (CharTermAttribute) attr;
record.put( "sequence", charAttr.toString() );
return record;
}
else if ( attr instanceof PayloadAttribute ) {
GenericRecord record = new GenericData.Record( protocol.getType( "PayloadAttribute" ) );
PayloadAttribute payloadAttr = (PayloadAttribute) attr;
BytesRef payload = payloadAttr.getPayload();
record.put( "payload", ByteBuffer.wrap( payload.bytes, payload.offset, payload.length ) );
return record;
}
else if ( attr instanceof KeywordAttribute ) {
GenericRecord record = new GenericData.Record( protocol.getType( "KeywordAttribute" ) );
KeywordAttribute narrowedAttr = (KeywordAttribute) attr;
record.put( "isKeyword", narrowedAttr.isKeyword() );
return record;
}
else if ( attr instanceof PositionIncrementAttribute ) {
GenericRecord record = new GenericData.Record( protocol.getType( "PositionIncrementAttribute" ) );
PositionIncrementAttribute narrowedAttr = (PositionIncrementAttribute) attr;
record.put( "positionIncrement", narrowedAttr.getPositionIncrement() );
return record;
}
else if ( attr instanceof FlagsAttribute ) {
GenericRecord record = new GenericData.Record( protocol.getType( "FlagsAttribute" ) );
FlagsAttribute narrowedAttr = (FlagsAttribute) attr;
record.put( "flags", narrowedAttr.getFlags() );
return record;
}
else if ( attr instanceof TypeAttribute ) {
GenericRecord record = new GenericData.Record( protocol.getType( "TypeAttribute" ) );
TypeAttribute narrowedAttr = (TypeAttribute) attr;
record.put( "type", narrowedAttr.type() );
return record;
}
else if ( attr instanceof OffsetAttribute ) {
GenericRecord record = new GenericData.Record( protocol.getType( "OffsetAttribute" ) );
OffsetAttribute narrowedAttr = (OffsetAttribute) attr;
record.put( "startOffset", narrowedAttr.startOffset() );
record.put( "endOffset", narrowedAttr.endOffset() );
return record;
}
else if ( attr instanceof Serializable ) {
return ByteBuffer.wrap( toByteArray( (Serializable) attr ) );
}
else {
throw log.attributeNotRecognizedNorSerializable( attr.getClass() );
}
}
@Override
public void addFieldWithSerializableReaderData(LuceneFieldContext context) {
GenericRecord field = createNormalField( "ReaderField", context );
field.put( "value", ByteBuffer.wrap( context.getReaderValue() ) );
field.put( "termVector", context.getTermVector() );
fieldables.add( field );
}
@Override
public void addFieldWithSerializableFieldable(byte[] fieldable) {
GenericRecord customFieldable = new GenericData.Record( protocol.getType( "CustomFieldable" ) );
customFieldable.put( "instance", ByteBuffer.wrap( fieldable ) );
fieldables.add( customFieldable );
}
@Override
public void addDocValuesFieldWithBinaryValue(LuceneFieldContext context) {
GenericRecord record = new GenericData.Record( protocol.getType( "BinaryDocValuesField" ) );
record.put( "name", context.getName() );
record.put( "type", context.getDocValuesType() );
BytesRef binaryValue = context.getBinaryValue();
record.put( "value", ByteBuffer.wrap( binaryValue.bytes, binaryValue.offset, binaryValue.length ) );
record.put( "offset", 0 );
record.put( "length", binaryValue.length );
fieldables.add( record );
}
@Override
public void addDocValuesFieldWithNumericValue(long value, LuceneFieldContext context) {
GenericRecord record = new GenericData.Record( protocol.getType( "NumericDocValuesField" ) );
record.put( "name", context.getName() );
record.put( "type", context.getDocValuesType() );
record.put( "value", value );
fieldables.add( record );
}
@Override
public void addDocument() {
document = new GenericData.Record( protocol.getType( "Document" ) );
//backwards compatibility: we used to have a boost here in Lucene 3 / Hibernate Search 4.x
//With Lucene 3 there was a notion of "Document level boost" which was then dropped.
//Using the constant 1f doesn't hurt as it would be multiplied by the field boost,
//which in the new design incorporates the factor.
document.put( "boost", 1f );
document.put( "fieldables", fieldables );
}
private void clearDocument() {
document = null;
fieldables = null;
}
}