/*
* Hibernate, Relational Persistence for Idiomatic Java
*
* JBoss, Home of Professional Open Source
* Copyright 2011 Red Hat Inc. and/or its affiliates and other contributors
* as indicated by the @authors tag. All rights reserved.
* See the copyright.txt in the distribution for a
* full listing of individual contributors.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU Lesser General Public License, v. 2.1.
* This program is distributed in the hope that it will be useful, but WITHOUT A
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
* PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details.
* You should have received a copy of the GNU Lesser General Public License,
* v.2.1 along with this distribution; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
package org.hibernate.search.test.serialization;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.EOFException;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.avro.Protocol;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericDatumReader;
import org.apache.avro.generic.GenericDatumWriter;
import org.apache.avro.generic.GenericEnumSymbol;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.io.Decoder;
import org.apache.avro.io.DecoderFactory;
import org.apache.avro.io.Encoder;
import org.apache.avro.io.EncoderFactory;
import org.apache.avro.util.Utf8;
import org.junit.Test;
import static org.fest.assertions.Assertions.assertThat;
import static org.hibernate.search.test.serialization.AvroUtils.*;
/**
* @author Emmanuel Bernard <emmanuel@hibernate.org>
*/
public class AvroTest {
@Test
public void experimentWithAvro() throws Exception {
String root = "org/hibernate/search/remote/codex/avro/v1_0/";
parseSchema( root + "attribute/TokenTrackingAttribute.avro", "attribute/TokenTrackingAttribute" );
parseSchema( root + "attribute/CharTermAttribute.avro", "attribute/CharTermAttribute" );
parseSchema( root + "attribute/PayloadAttribute.avro", "attribute/PayloadAttribute" );
parseSchema( root + "attribute/KeywordAttribute.avro", "attribute/KeywordAttribute" );
parseSchema( root + "attribute/PositionIncrementAttribute.avro", "attribute/PositionIncrementAttribute" );
parseSchema( root + "attribute/FlagsAttribute.avro", "attribute/FlagsAttribute" );
parseSchema( root + "attribute/TypeAttribute.avro", "attribute/TypeAttribute" );
parseSchema( root + "attribute/OffsetAttribute.avro", "attribute/OffsetAttribute" );
parseSchema( root + "field/TermVector.avro", "field/TermVector" );
parseSchema( root + "field/Index.avro", "field/Index" );
parseSchema( root + "field/Store.avro", "field/Store" );
parseSchema( root + "field/TokenStreamField.avro", "field/TokenStreamField" );
parseSchema( root + "field/ReaderField.avro", "field/ReaderField" );
parseSchema( root + "field/StringField.avro", "field/StringField" );
parseSchema( root + "field/BinaryField.avro", "field/BinaryField" );
parseSchema( root + "field/NumericIntField.avro", "field/NumericIntField" );
parseSchema( root + "field/NumericLongField.avro", "field/NumericLongField" );
parseSchema( root + "field/NumericFloatField.avro", "field/NumericFloatField" );
parseSchema( root + "field/NumericDoubleField.avro", "field/NumericDoubleField" );
parseSchema( root + "field/CustomFieldable.avro", "field/CustomFieldable" );
parseSchema( root + "Document.avro", "Document" );
parseSchema( root + "operation/Id.avro", "operation/Id" );
parseSchema( root + "operation/OptimizeAll.avro", "operation/OptimizeAll" );
parseSchema( root + "operation/PurgeAll.avro", "operation/PurgeAll" );
parseSchema( root + "operation/Delete.avro", "operation/Delete" );
parseSchema( root + "operation/Add.avro", "operation/Add" );
parseSchema( root + "operation/Update.avro", "operation/Update" );
parseSchema( root + "Message.avro", "Message" );
String filename = root + "Works.avpr";
Protocol protocol = parseProtocol( filename, "Works" );
final Schema termVectorSchema = protocol.getType( "TermVector" );
final Schema indexSchema = protocol.getType( "Index" );
final Schema storeSchema = protocol.getType( "Store" );
final Schema tokenTrackingAttribute = protocol.getType( "TokenTrackingAttribute" );
final Schema tokenStreamSchema = protocol.getType( "TokenStreamField" );
final Schema readerSchema = protocol.getType( "ReaderField" );
final Schema stringSchema = protocol.getType( "StringField" );
final Schema binarySchema = protocol.getType( "BinaryField" );
final Schema intFieldSchema = protocol.getType( "NumericIntField" );
final Schema longFieldSchema = protocol.getType( "NumericLongField" );
final Schema floatFieldSchema = protocol.getType( "NumericFloatField" );
final Schema doubleFieldSchema = protocol.getType( "NumericDoubleField" );
final Schema custonFieldableSchema = protocol.getType( "CustomFieldable" );
final Schema documentSchema = protocol.getType( "Document" );
final Schema idSchema = protocol.getType( "Id" );
final Schema optimizeAllSchema = protocol.getType( "OptimizeAll" );
final Schema purgeAllSchema = protocol.getType( "PurgeAll" );
final Schema deleteSchema = protocol.getType( "Delete" );
final Schema addSchema = protocol.getType( "Add" );
final Schema updateSchema = protocol.getType( "Update" );
Schema messageSchema = protocol.getType( "Message" );
final ByteArrayOutputStream out = new ByteArrayOutputStream();
GenericDatumWriter<GenericRecord> writer = new GenericDatumWriter<GenericRecord>( messageSchema );
Encoder encoder = EncoderFactory.get().directBinaryEncoder( out, null );
byte[] serializableSample = new byte[10];
for ( int i = 0; i < 10; i++ ) {
serializableSample[i] = ( byte ) i;
}
List<String> classReferences = new ArrayList<String>( );
classReferences.add( AvroTest.class.getName() );
List<GenericRecord> fieldables = new ArrayList<GenericRecord>( 1 );
//custom fieldable
GenericRecord customFieldable = new GenericData.Record( custonFieldableSchema );
customFieldable.put( "instance", ByteBuffer.wrap( serializableSample ) );
fieldables.add( customFieldable );
//numeric fields
GenericRecord numericField = createNumeric( intFieldSchema );
numericField.put( "value", 3 );
fieldables.add( numericField );
numericField = createNumeric( longFieldSchema );
numericField.put( "value", 3l );
fieldables.add( numericField );
numericField = createNumeric( floatFieldSchema );
numericField.put( "value", 2.3f );
fieldables.add( numericField );
numericField = createNumeric( doubleFieldSchema );
numericField.put( "value", 2.3d );
fieldables.add( numericField );
//fields
GenericRecord field = createField( binarySchema );
field.put( "offset", 0 );
field.put( "length", 10 );
field.put( "value", ByteBuffer.wrap( serializableSample ) );
fieldables.add( field );
field = createField( stringSchema );
field.put( "value", stringSchema.getName() );
field.put( "store", "YES" );
field.put( "index", "ANALYZED" );
field.put( "termVector", "WITH_OFFSETS" );
fieldables.add( field );
field = createField( tokenStreamSchema );
List<List<Object>> tokens = new ArrayList<List<Object>>( );
List<Object> attrs = new ArrayList<Object>( );
tokens.add( attrs );
GenericData.Record attr = new GenericData.Record( tokenTrackingAttribute );
List<Integer> positions = new ArrayList<Integer>();
positions.add(1);
positions.add(2);
positions.add(3);
positions.add(4);
attr.put( "positions", positions);
attrs.add( attr );
attrs.add( ByteBuffer.wrap( serializableSample ) );
field.put( "value", tokens );
field.put( "termVector", "WITH_OFFSETS" );
fieldables.add( field );
field = createField( readerSchema );
field.put( "value", ByteBuffer.wrap( serializableSample ) );
field.put( "termVector", "WITH_OFFSETS" );
fieldables.add( field );
GenericRecord doc = new GenericData.Record( documentSchema );
doc.put( "boost", 2.3f );
doc.put( "fieldables", fieldables );
GenericRecord add = new GenericData.Record( addSchema );
add.put( "class", classReferences.indexOf( AvroTest.class.getName() ) );
GenericRecord id = new GenericData.Record( idSchema );
id.put( "value", ByteBuffer.wrap( serializableSample ) );
add.put( "id", id );
add.put( "document", doc );
Map<String, String> analyzers = new HashMap<String, String>();
analyzers.put( "name", "ngram" );
analyzers.put( "description", "porter" );
add.put( "fieldToAnalyzerMap", analyzers );
GenericRecord delete = new GenericData.Record( deleteSchema );
delete.put( "class", classReferences.indexOf( AvroTest.class.getName() ) );
id = new GenericData.Record( idSchema );
id.put( "value", new Long(30) );
delete.put( "id", id );
GenericRecord purgeAll = new GenericData.Record( purgeAllSchema );
purgeAll.put( "class", classReferences.indexOf( AvroTest.class.getName() ) );
GenericRecord optimizeAll = new GenericData.Record( optimizeAllSchema );
List<GenericRecord> operations = new ArrayList<GenericRecord>( 1 );
operations.add( purgeAll );
operations.add( optimizeAll );
operations.add( delete );
operations.add( add );
GenericRecord message = new GenericData.Record( messageSchema );
message.put( "classReferences", classReferences );
message.put( "operations", operations );
writer.write( message, encoder );
encoder.flush();
ByteArrayInputStream inputStream = new ByteArrayInputStream( out.toByteArray() );
Decoder decoder = DecoderFactory.get().binaryDecoder( inputStream, null );
GenericDatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>( messageSchema );
while ( true ) {
try {
GenericRecord result = reader.read( null, decoder );
System.out.println( result );
assertThat( result ).isNotNull();
//operations
assertThat( result.get( "operations" ) ).isNotNull().isInstanceOf( List.class );
List<?> ops = ( List<?> ) result.get( "operations" );
assertThat( ops ).hasSize( 4 );
//Delete
assertThat( ops.get( 2 ) ).isInstanceOf( GenericRecord.class );
GenericRecord deleteOp = ( GenericRecord ) ops.get( 2 );
assertThat( deleteOp.getSchema().getName() ).isEqualTo( "Delete" );
Object actual = ( ( GenericRecord ) deleteOp.get( "id" ) ).get( "value" );
assertThat( actual ).isInstanceOf( Long.class );
assertThat( actual ).isEqualTo( Long.valueOf( 30 ) );
//Add
assertThat( ops.get( 3 ) ).isInstanceOf( GenericRecord.class );
GenericRecord addOp = ( GenericRecord ) ops.get( 3 );
assertThat( addOp.getSchema().getName() ).isEqualTo( "Add" );
actual = ( ( GenericRecord ) addOp.get( "id" ) ).get( "value" );
assertThat( actual ).isInstanceOf( ByteBuffer.class );
ByteBuffer bb = ( ByteBuffer ) actual;
assertThat( bb.hasArray() ).isTrue();
byte[] copy = new byte[bb.remaining()];
bb.get( copy );
assertThat( serializableSample ).isEqualTo( copy );
//fieldToAnalyzerMap
assertThat( addOp.get( "fieldToAnalyzerMap" ) ).isInstanceOf( Map.class );
assertThat( ( Map ) addOp.get( "fieldToAnalyzerMap" ) ).hasSize( 2 );
//document
assertThat( addOp.get( "document" ) ).isNotNull();
GenericRecord document = ( GenericRecord ) addOp.get( "document" );
assertThat( document.get( "boost" ) ).isEqualTo( 2.3f );
//numeric fields
assertThat( document.get( "fieldables" ) ).isNotNull().isInstanceOf( List.class );
List<?> fields = ( List<?> ) document.get( "fieldables" );
assertThat( fields ).hasSize( 9 ); //custom + 4 numerics + 4 fields
field = ( GenericRecord ) fields.get( 0 );
assertThat( field.getSchema().getName() ).isEqualTo( "CustomFieldable" );
field = ( GenericRecord ) fields.get( 1 );
assertThat( field.getSchema().getName() ).isEqualTo( "NumericIntField" );
assertThat( field.get( "value" ) ).isEqualTo( 3 );
assertNumericField( field );
field = ( GenericRecord ) fields.get( 2 );
assertThat( field.getSchema().getName() ).isEqualTo( "NumericLongField" );
assertThat( field.get( "value" ) ).isEqualTo( 3l );
assertNumericField( field );
field = ( GenericRecord ) fields.get( 3 );
assertThat( field.getSchema().getName() ).isEqualTo( "NumericFloatField" );
assertThat( field.get( "value" ) ).isEqualTo( 2.3f );
assertNumericField( field );
field = ( GenericRecord ) fields.get( 4 );
assertThat( field.getSchema().getName() ).isEqualTo( "NumericDoubleField" );
assertThat( field.get( "value" ) ).isEqualTo( 2.3d );
assertNumericField( field );
//fields
field = ( GenericRecord ) fields.get( 5 );
assertThat( field.getSchema().getName() ).isEqualTo( "BinaryField" );
assertThat( field.get( "value" ) ).isInstanceOf( ByteBuffer.class );
assertField( field );
field = ( GenericRecord ) fields.get( 6 );
assertThat( field.getSchema().getName() ).isEqualTo( "StringField" );
assertThat( field.get( "value" ) ).isInstanceOf( Utf8.class );
assertTermVector( field );
assertIndexAndStore( field );
assertField( field );
field = ( GenericRecord ) fields.get( 7 );
assertThat( field.getSchema().getName() ).isEqualTo( "TokenStreamField" );
assertThat( field.get( "value" ) ).isInstanceOf( List.class );
List<List<Object>> l1 = ( List<List<Object>> ) field.get( "value" );
assertThat( l1.get( 0 ) ).as("Wrong attribute impl list").hasSize( 2 );
Object object = l1.get( 0 ).get( 0 );
assertThat( object ).isNotNull();
assertTermVector( field );
assertField( field );
field = ( GenericRecord ) fields.get( 8 );
assertThat( field.getSchema().getName() ).isEqualTo( "ReaderField" );
assertThat( field.get( "value" ) ).isInstanceOf( ByteBuffer.class );
assertTermVector( field );
assertField( field );
}
catch ( EOFException eof ) {
break;
}
catch ( Exception ex ) {
ex.printStackTrace();
throw ex;
}
}
}
private void assertTermVector(GenericRecord field) {
assertThat( field.get( "termVector" ) ).isInstanceOf( GenericEnumSymbol.class );
assertThat( field.get( "termVector" ).toString() ).isEqualTo( "WITH_OFFSETS" );
}
private void assertIndexAndStore(GenericRecord field) {
assertThat( field.get( "index" ) ).isInstanceOf( GenericEnumSymbol.class );
assertThat( field.get( "index" ).toString() ).isEqualTo( "ANALYZED" );
assertThat( field.get( "store" ) ).isInstanceOf( GenericEnumSymbol.class );
assertThat( field.get( "store" ).toString() ).isEqualTo( "YES" );
}
private void assertField(GenericRecord field) {
assertThat( field.get( "name" ) ).isInstanceOf( Utf8.class );
assertThat( field.get( "name" ).toString() ).isEqualTo( field.getSchema().getName() );
assertThat( field.get( "boost" ) ).isEqualTo( 2.3f );
assertThat( field.get( "omitNorms" ) ).isEqualTo( true );
assertThat( field.get( "omitTermFreqAndPositions" ) ).isEqualTo( true );
}
private GenericRecord createField(Schema schema) {
GenericRecord field = new GenericData.Record( schema );
field.put( "name", schema.getName() );
field.put( "boost", 2.3f );
field.put( "omitNorms", true );
field.put( "omitTermFreqAndPositions", true );
return field;
}
private void assertNumericField(GenericRecord field) {
assertThat( field.get( "name" ) ).isInstanceOf( Utf8.class );
assertThat( field.get( "name" ).toString() ).isEqualTo( "int" );
assertThat( field.get( "precisionStep" ) ).isEqualTo( 3 );
assertThat( field.get( "boost" ) ).isEqualTo( 2.3f );
assertThat( field.get( "indexed" ) ).isEqualTo( true );
assertThat( field.get( "omitNorms" ) ).isEqualTo( true );
assertThat( field.get( "omitTermFreqAndPositions" ) ).isEqualTo( true );
assertThat( ( field.get( "store" ) ) ).isInstanceOf( GenericData.EnumSymbol.class );
assertThat( ( field.get( "store" ) ).toString() ).isEqualTo( "YES" );
}
private GenericRecord createNumeric(Schema schema) {
GenericRecord numericField = new GenericData.Record( schema );
numericField.put( "name", "int" );
numericField.put( "precisionStep", 3 );
numericField.put( "store", "YES" );
numericField.put( "indexed", true );
numericField.put( "boost", 2.3f );
numericField.put( "omitNorms", true );
numericField.put( "omitTermFreqAndPositions", true );
return numericField;
}
}