/*
* Hibernate, Relational Persistence for Idiomatic Java
*
* Copyright (c) 2012, Red Hat, Inc. and/or its affiliates or third-party contributors as
* indicated by the @author tags or express copyright attribution
* statements applied by the authors. All third-party contributions are
* distributed under license by Red Hat, Inc.
*
* This copyrighted material is made available to anyone wishing to use, modify,
* copy, or redistribute it subject to the terms and conditions of the GNU
* Lesser General Public License, as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
* for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this distribution; if not, write to:
* Free Software Foundation, Inc.
* 51 Franklin Street, Fifth Floor
* Boston, MA 02110-1301 USA
*/
package org.hibernate.search.test.bridge.tika;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.net.URISyntaxException;
import java.sql.Blob;
import java.util.List;
import org.apache.commons.io.FileUtils;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Query;
import org.hibernate.CacheMode;
import org.hibernate.FlushMode;
import org.hibernate.ScrollMode;
import org.hibernate.ScrollableResults;
import org.hibernate.Session;
import org.hibernate.Transaction;
import org.hibernate.cfg.Configuration;
import org.hibernate.search.Environment;
import org.hibernate.search.FullTextSession;
import org.hibernate.search.Search;
import org.hibernate.search.test.SearchTestCase;
import org.hibernate.search.test.TestConstants;
/**
* @author Hardy Ferentschik
*/
public class TikaBridgeBlobSupportTest extends SearchTestCase {
private static final String TEST_DOCUMENT_PDF = "/org/hibernate/search/test/bridge/tika/test-document-1.pdf";
private static final String PATH_TO_TEST_DOCUMENT_PDF;
static {
try {
File pdfFile = new File( TikaBridgeBlobSupportTest.class.getResource( TEST_DOCUMENT_PDF ).toURI() );
PATH_TO_TEST_DOCUMENT_PDF = pdfFile.getAbsolutePath();
}
catch ( URISyntaxException e ) {
throw new RuntimeException( "Unable to determine file path for test document" );
}
}
public void testDefaultTikaBridgeWithBlobData() throws Exception {
Session session = openSession();
persistBook( session );
// we have to index manually. Using the Blob (streaming approach) the indexing would try to re-read the
// input stream of the blob after it was persisted into the database
indexBook( session );
searchBook( session );
session.close();
}
@SuppressWarnings("unchecked")
private void searchBook(Session session) throws ParseException {
FullTextSession fullTextSession = Search.getFullTextSession( session );
Transaction tx = session.beginTransaction();
QueryParser parser = new QueryParser(
TestConstants.getTargetLuceneVersion(),
"content",
TestConstants.standardAnalyzer
);
Query query = parser.parse( "foo" );
List<Book> result = fullTextSession.createFullTextQuery( query ).list();
assertEquals( "there should be no match", 0, result.size() );
query = parser.parse( "Lucene" );
result = fullTextSession.createFullTextQuery( query ).list();
assertEquals( "there should be match", 1, result.size() );
tx.commit();
}
private void persistBook(Session session) throws IOException {
Transaction tx = session.beginTransaction();
Book book = new Book();
Blob data = getBlobData( PATH_TO_TEST_DOCUMENT_PDF, session );
book.setContent( data );
session.save( book );
session.flush();
tx.commit();
session.clear();
}
void indexBook(Session session) {
FullTextSession fullTextSession = org.hibernate.search.Search.getFullTextSession( session );
fullTextSession.setFlushMode( FlushMode.MANUAL );
fullTextSession.setCacheMode( CacheMode.IGNORE );
Transaction transaction = fullTextSession.beginTransaction();
int BATCH_SIZE = 10;
ScrollableResults results = fullTextSession.createCriteria( Book.class )
.setFetchSize( BATCH_SIZE )
.scroll( ScrollMode.FORWARD_ONLY );
int index = 0;
while ( results.next() ) {
index++;
fullTextSession.index( results.get( 0 ) );
if ( index % BATCH_SIZE == 0 ) {
fullTextSession.flushToIndexes();
fullTextSession.clear();
}
}
fullTextSession.flush();
transaction.commit();
fullTextSession.clear();
}
protected Class<?>[] getAnnotatedClasses() {
return new Class[] {
Book.class
};
}
protected void configure(Configuration cfg) {
super.configure( cfg );
cfg.setProperty( Environment.INDEXING_STRATEGY, "manual" );
}
private Blob getBlobData(String fileName, Session session) throws IOException {
File file = new File( fileName );
FileInputStream in = FileUtils.openInputStream( file );
return session.getLobHelper().createBlob( in, file.length() );
}
}