/*
* Hibernate, Relational Persistence for Idiomatic Java
*
* JBoss, Home of Professional Open Source
* Copyright 2011 Red Hat Inc. and/or its affiliates and other contributors
* as indicated by the @authors tag. All rights reserved.
* See the copyright.txt in the distribution for a
* full listing of individual contributors.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU Lesser General Public License, v. 2.1.
* This program is distributed in the hope that it will be useful, but WITHOUT A
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
* PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details.
* You should have received a copy of the GNU Lesser General Public License,
* v.2.1 along with this distribution; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
package org.hibernate.search.test.serialization;
import java.io.IOException;
import java.io.Reader;
import java.io.Serializable;
import java.net.URL;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttributeImpl;
import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
import org.apache.lucene.analysis.tokenattributes.FlagsAttributeImpl;
import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
import org.apache.lucene.analysis.tokenattributes.KeywordAttributeImpl;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttributeImpl;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.analysis.tokenattributes.PayloadAttributeImpl;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttributeImpl;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttributeImpl;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.document.NumericField;
import org.apache.lucene.index.Payload;
import org.apache.lucene.util.AttributeImpl;
import org.apache.solr.handler.AnalysisRequestHandlerBase;
import org.junit.Test;
import org.hibernate.search.backend.AddLuceneWork;
import org.hibernate.search.backend.DeleteLuceneWork;
import org.hibernate.search.backend.LuceneWork;
import org.hibernate.search.backend.OptimizeLuceneWork;
import org.hibernate.search.backend.PurgeAllLuceneWork;
import org.hibernate.search.backend.UpdateLuceneWork;
import org.hibernate.search.indexes.serialization.avro.impl.AvroSerializationProvider;
import org.hibernate.search.indexes.serialization.impl.CopyTokenStream;
import org.hibernate.search.indexes.serialization.impl.PluggableSerializationLuceneWorkSerializer;
import org.hibernate.search.indexes.serialization.impl.SerializationHelper;
import org.hibernate.search.indexes.serialization.spi.LuceneWorkSerializer;
import org.hibernate.search.indexes.serialization.spi.SerializableTokenStream;
import org.hibernate.search.test.SearchTestCase;
import org.hibernate.search.util.logging.impl.Log;
import org.hibernate.search.util.logging.impl.LoggerFactory;
import static org.fest.assertions.Assertions.assertThat;
/**
* @author Emmanuel Bernard <emmanuel@hibernate.org>
*/
public class SerializationTest extends SearchTestCase {
private static final Log log = LoggerFactory.make();
@Test
public void testAvroSerialization() throws Exception {
LuceneWorkSerializer converter = new PluggableSerializationLuceneWorkSerializer(
new AvroSerializationProvider(),
getSearchFactoryImpl()
);
List<LuceneWork> works = buildWorks();
byte[] bytes = converter.toSerializedModel( works );
List<LuceneWork> copyOfWorks = converter.toLuceneWorks( bytes );
assertThat( copyOfWorks ).hasSize( works.size() );
for ( int index = 0; index < works.size(); index++ ) {
assertLuceneWork( works.get( index ), copyOfWorks.get( index ) );
}
}
@Test
/**
* 20110815
* Our avro serializer is slower (1.6) than Java serialization esp when the VM is not warm (small loop value like = 1000
* In evens up on longer loops like 100000
*
* Our avro deserializer is slower (2.5) than Java serialization esp when the VM is not warm (small loop value like = 1000
* In evens up or beats the Java serialization on longer loops like 100000
*
* Test done after initial implementation (in particular the schema is not part of the message
*
* With 1000000:
* Java serialization: 28730
* Java message size: 2509
* Java deserialization: 82970
* Avro serialization: 24245
* Avro message size: 1064
* Avro deserialization: 54444
*
*
* 20110824
* The new Work sample is bigger and Avro's layer has been optimized
* Our avro serializer is faster (1.8 times) than Java serialization for 100000.
*
* Our avro deserializer is faster (2.7 times) than Java serialization for 100000.
*
* The message size is 4.4 times smaller in Avro
*
* (the schema is not part of the message)
*
* With 1000000:
* Java serialization: 55786
* Java message size: 4094
* Java deserialization: 160764
* Avro serialization: 30430
* Avro message size: 929
* Avro deserialization: 59255
*
* 20110826
* Our avro serializer is faster (1.7 times) than Java serialization for 100000.
*
* Our avro deserializer is faster (2.7 times) than Java serialization for 100000.
*
* The message size is 6.6 times smaller in Avro
*
* (the schema is not part of the message)
*
* With 1000000:
* Java serialization: 52682
* Java message size: 4094
* Java de-serialization: 168595
* Avro serialization: 30586
* Avro message size: 617
* Avro deserialization: 62141
*/
public void testAvroSerializationPerf() throws Exception {
final int loop = 10; //TODO do 10000 or 100000
LuceneWorkSerializer converter = new PluggableSerializationLuceneWorkSerializer(
new AvroSerializationProvider(),
getSearchFactoryImpl()
);
List<LuceneWork> works = buildWorks();
long begin;
long end;
byte[] javaBytes = null;
begin = System.nanoTime();
for ( int i = 0; i < loop; i++ ) {
javaBytes = SerializationHelper.toByteArray( (Serializable) works );
}
end = System.nanoTime();
log.debug( "Java serialization: " + ( ( end - begin ) / 1000000 ) );
log.debug( "Java message size: " + javaBytes.length );
begin = System.nanoTime();
List<LuceneWork> copyOfWorkForJavaSerial = null;
for ( int i = 0; i < loop; i++ ) {
copyOfWorkForJavaSerial = (List<LuceneWork>) SerializationHelper.toSerializable(
javaBytes,
Thread.currentThread().getContextClassLoader()
);
}
end = System.nanoTime();
log.debug( "Java de-serialization: " + ( ( end - begin ) / 1000000 ) );
byte[] avroBytes = null;
begin = System.nanoTime();
for ( int i = 0; i < loop; i++ ) {
avroBytes = converter.toSerializedModel( works );
}
end = System.nanoTime();
log.debug( "Avro serialization: " + ( ( end - begin ) / 1000000 ) );
log.debug( "Avro message size: " + avroBytes.length );
List<LuceneWork> copyOfWorks = null;
begin = System.nanoTime();
for ( int i = 0; i < loop; i++ ) {
copyOfWorks = converter.toLuceneWorks( avroBytes );
}
end = System.nanoTime();
log.debug( "Avro deserialization: " + ( ( end - begin ) / 1000000 ) );
//make sure the compiler does not cheat
log.debug( copyOfWorks == copyOfWorkForJavaSerial );
}
private List<LuceneWork> buildWorks() throws Exception {
List<LuceneWork> works = new ArrayList<LuceneWork>();
works.add( OptimizeLuceneWork.INSTANCE );
works.add( OptimizeLuceneWork.INSTANCE );
works.add( new OptimizeLuceneWork( RemoteEntity.class ) ); //class won't be send over
works.add( new PurgeAllLuceneWork( RemoteEntity.class ) );
works.add( new PurgeAllLuceneWork( RemoteEntity.class ) );
works.add( new DeleteLuceneWork( 123l, "123", RemoteEntity.class ) );
works.add( new DeleteLuceneWork( "Sissi", "Sissi", RemoteEntity.class ) );
works.add(
new DeleteLuceneWork(
new URL( "http://emmanuelbernard.com" ),
"http://emmanuelbernard.com",
RemoteEntity.class
)
);
Document doc = new Document();
doc.setBoost( 2.3f );
NumericField numField = new NumericField( "double", 23, Field.Store.NO, true );
numField.setDoubleValue( 23d );
numField.setOmitNorms( true );
numField.setOmitTermFreqAndPositions( true );
numField.setBoost( 3f );
doc.add( numField );
numField = new NumericField( "int", 23, Field.Store.NO, true );
numField.setIntValue( 23 );
doc.add( numField );
numField = new NumericField( "float", 23, Field.Store.NO, true );
numField.setFloatValue( 2.3f );
doc.add( numField );
numField = new NumericField( "long", 23, Field.Store.NO, true );
numField.setLongValue( 23l );
doc.add( numField );
Map<String, String> analyzers = new HashMap<String, String>();
analyzers.put( "godo", "ngram" );
works.add( new AddLuceneWork( 123, "123", RemoteEntity.class, doc, analyzers ) );
doc = new Document();
doc.setBoost( 2.3f );
Field field = new Field(
"StringF",
"String field",
Field.Store.YES,
Field.Index.ANALYZED,
Field.TermVector.WITH_OFFSETS
);
field.setOmitNorms( true );
field.setOmitTermFreqAndPositions( true );
field.setBoost( 3f );
doc.add( field );
field = new Field(
"StringF2",
"String field 2",
Field.Store.YES,
Field.Index.ANALYZED,
Field.TermVector.WITH_OFFSETS
);
doc.add( field );
byte[] array = new byte[4];
array[0] = 2;
array[1] = 5;
array[2] = 5;
array[3] = 8;
field = new Field( "binary", array, 0, array.length );
doc.add( field );
SerializableStringReader reader = new SerializableStringReader();
field = new Field( "ReaderField", reader, Field.TermVector.WITH_OFFSETS );
doc.add( field );
List<List<AttributeImpl>> tokens = buildTokenSteamWithAttributes();
CopyTokenStream tokenStream = new CopyTokenStream( tokens );
field = new Field( "tokenstream", tokenStream, Field.TermVector.WITH_POSITIONS_OFFSETS );
field.setOmitNorms( true );
field.setOmitTermFreqAndPositions( true );
field.setBoost( 3f );
doc.add( field );
works.add( new UpdateLuceneWork( 1234, "1234", RemoteEntity.class, doc ) );
works.add( new AddLuceneWork( 125, "125", RemoteEntity.class, new Document() ) );
return works;
}
private List<List<AttributeImpl>> buildTokenSteamWithAttributes() {
List<List<AttributeImpl>> tokens = new ArrayList<List<AttributeImpl>>();
tokens.add( new ArrayList<AttributeImpl>() );
AnalysisRequestHandlerBase.TokenTrackingAttributeImpl attrImpl = new AnalysisRequestHandlerBase.TokenTrackingAttributeImpl();
attrImpl.reset( new int[] { 1, 2, 3 }, 4 );
tokens.get( 0 ).add( attrImpl );
CharTermAttributeImpl charAttr = new CharTermAttributeImpl();
charAttr.append( "Wazzza" );
tokens.get( 0 ).add( charAttr );
PayloadAttributeImpl payloadAttribute = new PayloadAttributeImpl();
payloadAttribute.setPayload( new Payload( new byte[] { 0, 1, 2, 3 } ) );
tokens.get( 0 ).add( payloadAttribute );
KeywordAttributeImpl keywordAttr = new KeywordAttributeImpl();
keywordAttr.setKeyword( true );
tokens.get( 0 ).add( keywordAttr );
PositionIncrementAttributeImpl posIncrAttr = new PositionIncrementAttributeImpl();
posIncrAttr.setPositionIncrement( 3 );
tokens.get( 0 ).add( posIncrAttr );
FlagsAttributeImpl flagsAttr = new FlagsAttributeImpl();
flagsAttr.setFlags( 435 );
tokens.get( 0 ).add( flagsAttr );
TypeAttributeImpl typeAttr = new TypeAttributeImpl();
typeAttr.setType( "acronym" );
tokens.get( 0 ).add( typeAttr );
OffsetAttributeImpl offsetAttr = new OffsetAttributeImpl();
offsetAttr.setOffset( 4, 7 );
tokens.get( 0 ).add( offsetAttr );
return tokens;
}
private void assertLuceneWork(LuceneWork work, LuceneWork copy) {
assertThat( copy ).isInstanceOf( work.getClass() );
if ( work instanceof OptimizeLuceneWork ) {
assertNotNull( copy );
assertTrue( copy instanceof OptimizeLuceneWork );
}
else if ( work instanceof PurgeAllLuceneWork ) {
assertPurgeAll( (PurgeAllLuceneWork) work, (PurgeAllLuceneWork) copy );
}
else if ( work instanceof DeleteLuceneWork ) {
assertDelete( (DeleteLuceneWork) work, (DeleteLuceneWork) copy );
}
else if ( work instanceof AddLuceneWork ) {
assertAdd( (AddLuceneWork) work, (AddLuceneWork) copy );
}
else if ( work instanceof UpdateLuceneWork ) {
assertUpdate( (UpdateLuceneWork) work, (UpdateLuceneWork) copy );
}
else {
fail( "unexpected type" );
}
}
private void assertAdd(AddLuceneWork work, AddLuceneWork copy) {
assertThat( work.getEntityClass() ).as( "Add.getEntityClass is not copied" ).isEqualTo( copy.getEntityClass() );
assertThat( work.getId() ).as( "Add.getId is not copied" ).isEqualTo( copy.getId() );
assertThat( work.getIdInString() ).as( "Add.getIdInString is not the same" ).isEqualTo( copy.getIdInString() );
assertThat( work.getFieldToAnalyzerMap() ).as( "Add.getFieldToAnalyzerMap is not the same" )
.isEqualTo( copy.getFieldToAnalyzerMap() );
assertDocument( work.getDocument(), copy.getDocument() );
}
private void assertUpdate(UpdateLuceneWork work, UpdateLuceneWork copy) {
assertThat( work.getEntityClass() ).as( "Add.getEntityClass is not copied" ).isEqualTo( copy.getEntityClass() );
assertThat( work.getId() ).as( "Add.getId is not copied" ).isEqualTo( copy.getId() );
assertThat( work.getIdInString() ).as( "Add.getIdInString is not the same" ).isEqualTo( copy.getIdInString() );
assertThat( work.getFieldToAnalyzerMap() ).as( "Add.getFieldToAnalyzerMap is not the same" )
.isEqualTo( copy.getFieldToAnalyzerMap() );
assertDocument( work.getDocument(), copy.getDocument() );
}
private void assertDocument(Document document, Document copy) {
assertThat( document.getBoost() ).isEqualTo( copy.getBoost() );
for ( int index = 0; index < document.getFields().size(); index++ ) {
Fieldable field = document.getFields().get( index );
Fieldable fieldCopy = copy.getFields().get( index );
assertThat( field ).isInstanceOf( fieldCopy.getClass() );
if ( field instanceof NumericField ) {
assertNumericField( (NumericField) field, (NumericField) fieldCopy );
}
else if ( field instanceof Field ) {
assertNormalField( (Field) field, (Field) fieldCopy );
}
}
}
private void assertNormalField(Field field, Field copy) {
assertThat( copy.name() ).isEqualTo( field.name() );
assertThat( copy.getBinaryLength() ).isEqualTo( field.getBinaryLength() );
assertThat( copy.getBinaryOffset() ).isEqualTo( field.getBinaryOffset() );
assertThat( copy.getBinaryValue() ).isEqualTo( field.getBinaryValue() );
assertThat( copy.getBoost() ).isEqualTo( field.getBoost() );
assertThat( copy.getOmitNorms() ).isEqualTo( field.getOmitNorms() );
assertThat( copy.getOmitTermFreqAndPositions() ).isEqualTo( field.getOmitTermFreqAndPositions() );
assertThat( copy.isBinary() ).isEqualTo( field.isBinary() );
assertThat( copy.isIndexed() ).isEqualTo( field.isIndexed() );
assertThat( copy.isLazy() ).isEqualTo( field.isLazy() );
assertThat( copy.isStoreOffsetWithTermVector() ).isEqualTo( field.isStoreOffsetWithTermVector() );
assertThat( copy.isStorePositionWithTermVector() ).isEqualTo( field.isStorePositionWithTermVector() );
assertThat( copy.isStored() ).isEqualTo( field.isStored() );
assertThat( copy.isTokenized() ).isEqualTo( field.isTokenized() );
assertThat( compareReaders( copy.readerValue(), field.readerValue() ) ).isTrue();
assertThat( compareTokenStreams( field.tokenStreamValue(), copy.tokenStreamValue() ) ).isTrue();
assertThat( copy.stringValue() ).isEqualTo( field.stringValue() );
assertThat( copy.isTermVectorStored() ).isEqualTo( field.isTermVectorStored() );
}
private boolean compareTokenStreams(TokenStream original, TokenStream copy) {
if ( original == null ) {
return copy == null;
}
try {
original.reset();
}
catch ( IOException e ) {
throw new RuntimeException( e );
}
SerializableTokenStream serOriginal = CopyTokenStream.buildSerializabletokenStream( original );
SerializableTokenStream serCopy = CopyTokenStream.buildSerializabletokenStream( copy );
if ( serOriginal.getStream().size() != serCopy.getStream().size() ) {
return false;
}
for ( int i = 0; i < serOriginal.getStream().size(); i++ ) {
List<AttributeImpl> origToken = serOriginal.getStream().get( i );
List<AttributeImpl> copyToken = serCopy.getStream().get( i );
if ( origToken.size() != copyToken.size() ) {
return false;
}
for ( int j = 0; j < origToken.size(); j++ ) {
AttributeImpl origAttr = origToken.get( j );
AttributeImpl copyAttr = copyToken.get( j );
if ( origAttr.getClass() != copyAttr.getClass() ) {
return false;
}
testAttributeTypes( origAttr, copyAttr );
}
}
return true;
}
private void testAttributeTypes(AttributeImpl origAttr, AttributeImpl copyAttr) {
if ( origAttr instanceof AnalysisRequestHandlerBase.TokenTrackingAttributeImpl ) {
assertThat( ( (AnalysisRequestHandlerBase.TokenTrackingAttributeImpl) origAttr ).getPositions() )
.isEqualTo( ( (AnalysisRequestHandlerBase.TokenTrackingAttributeImpl) copyAttr ).getPositions() );
}
else if ( origAttr instanceof CharTermAttribute ) {
assertThat( origAttr.toString() ).isEqualTo( copyAttr.toString() );
}
else if ( origAttr instanceof PayloadAttribute ) {
assertThat( ( (PayloadAttribute) origAttr ).getPayload() ).isEqualTo(
( (PayloadAttribute) copyAttr ).getPayload()
);
}
else if ( origAttr instanceof KeywordAttribute ) {
assertThat( ( (KeywordAttribute) origAttr ).isKeyword() ).isEqualTo(
( (KeywordAttribute) copyAttr ).isKeyword()
);
}
else if ( origAttr instanceof PositionIncrementAttribute ) {
assertThat( ( (PositionIncrementAttribute) origAttr ).getPositionIncrement() ).isEqualTo(
( (PositionIncrementAttribute) copyAttr ).getPositionIncrement()
);
}
else if ( origAttr instanceof FlagsAttribute ) {
assertThat( ( (FlagsAttribute) origAttr ).getFlags() ).isEqualTo(
( (FlagsAttribute) copyAttr ).getFlags()
);
}
else if ( origAttr instanceof TypeAttribute ) {
assertThat( ( (TypeAttribute) origAttr ).type() ).isEqualTo(
( (TypeAttribute) copyAttr ).type()
);
}
else if ( origAttr instanceof OffsetAttribute ) {
OffsetAttribute orig = (OffsetAttribute) origAttr;
OffsetAttribute cop = (OffsetAttribute) copyAttr;
assertThat( orig.startOffset() ).isEqualTo( cop.startOffset() );
assertThat( orig.endOffset() ).isEqualTo( cop.endOffset() );
}
}
private boolean compareReaders(Reader copy, Reader original) {
if ( original == null ) {
return copy == null;
}
try {
for ( int o = original.read(); o != -1; o = original.read() ) {
int c = copy.read();
if ( o != c ) {
return false;
}
}
return copy.read() == -1;
}
catch ( IOException e ) {
throw new RuntimeException( e );
}
}
private void assertNumericField(NumericField field, NumericField copy) {
assertThat( copy.name() ).isEqualTo( field.name() );
assertThat( copy.getBinaryLength() ).isEqualTo( field.getBinaryLength() );
assertThat( copy.getBinaryOffset() ).isEqualTo( field.getBinaryOffset() );
assertThat( copy.getBinaryValue() ).isEqualTo( field.getBinaryValue() );
assertThat( copy.getBoost() ).isEqualTo( field.getBoost() );
assertThat( copy.getDataType() ).isEqualTo( field.getDataType() );
assertThat( copy.getNumericValue() ).isEqualTo( field.getNumericValue() );
assertThat( copy.getOmitNorms() ).isEqualTo( field.getOmitNorms() );
assertThat( copy.getOmitTermFreqAndPositions() ).isEqualTo( field.getOmitTermFreqAndPositions() );
assertThat( copy.getPrecisionStep() ).isEqualTo( field.getPrecisionStep() );
assertThat( copy.isBinary() ).isEqualTo( field.isBinary() );
assertThat( copy.isIndexed() ).isEqualTo( field.isIndexed() );
assertThat( copy.isLazy() ).isEqualTo( field.isLazy() );
assertThat( copy.isStoreOffsetWithTermVector() ).isEqualTo( field.isStoreOffsetWithTermVector() );
assertThat( copy.isStorePositionWithTermVector() ).isEqualTo( field.isStorePositionWithTermVector() );
assertThat( copy.isStored() ).isEqualTo( field.isStored() );
assertThat( copy.isTokenized() ).isEqualTo( field.isTokenized() );
assertThat( copy.readerValue() ).isEqualTo( field.readerValue() );
assertThat( copy.tokenStreamValue() ).isEqualTo( field.tokenStreamValue() );
assertThat( copy.stringValue() ).isEqualTo( field.stringValue() );
}
private void assertDelete(DeleteLuceneWork work, DeleteLuceneWork copy) {
assertThat( work.getEntityClass() ).as( "Delete.getEntityClass is not copied" )
.isEqualTo( copy.getEntityClass() );
assertThat( work.getId() ).as( "Delete.getId is not copied" ).isEqualTo( copy.getId() );
assertThat( work.getDocument() ).as( "Delete.getDocument is not the same" ).isEqualTo( copy.getDocument() );
assertThat( work.getIdInString() ).as( "Delete.getIdInString is not the same" )
.isEqualTo( copy.getIdInString() );
assertThat( work.getFieldToAnalyzerMap() ).as( "Delete.getFieldToAnalyzerMap is not the same" )
.isEqualTo( copy.getFieldToAnalyzerMap() );
}
private void assertPurgeAll(PurgeAllLuceneWork work, PurgeAllLuceneWork copy) {
assertThat( work.getEntityClass() ).as( "PurgeAllLuceneWork.getEntityClass is not copied" )
.isEqualTo( copy.getEntityClass() );
}
@Override
protected Class<?>[] getAnnotatedClasses() {
return new Class<?>[] {
RemoteEntity.class
};
}
private static class SerializableStringReader extends Reader implements Serializable {
private boolean read = false;
@Override
public int read(char[] cbuf, int off, int len) throws IOException {
if ( read ) {
return -1;
}
else {
read = true;
cbuf[off] = 2;
return 1;
}
}
@Override
public void close() throws IOException {
}
}
}