QueryRun.java example

Explorer
openbd-core-master
- src
/* 
 *  Copyright (C) 2000 - 2013 TagServlet Ltd
 *
 *  This file is part of Open BlueDragon (OpenBD) CFML Server Engine.
 *  
 *  OpenBD is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  Free Software Foundation,version 3.
 *  
 *  OpenBD is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *  
 *  You should have received a copy of the GNU General Public License
 *  along with OpenBD.  If not, see http://www.gnu.org/licenses/
 *  
 *  Additional permission under GNU GPL version 3 section 7
 *  
 *  If you modify this Program, or any covered work, by linking or combining 
 *  it with any of the JARS listed in the README.txt (or a modified version of 
 *  (that library), containing parts covered by the terms of that JAR, the 
 *  licensors of this Program grant you additional permission to convey the 
 *  resulting work. 
 *  README.txt @ http://www.openbluedragon.org/license/README.txt
 *  
 *  http://www.openbluedragon.org/
 *  
 *  $Id: QueryRun.java 2404 2013-09-22 21:51:40Z alan $
 */

package com.bluedragon.search.search;

import java.io.StringReader;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Fragmenter;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.Scorer;
import org.apache.lucene.search.highlight.SimpleFragmenter;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;

import com.bluedragon.search.AnalyzerFactory;
import com.bluedragon.search.DocumentWrap;
import com.bluedragon.search.collection.Collection;
import com.naryx.tagfusion.cfm.engine.cfArrayData;
import com.naryx.tagfusion.cfm.engine.cfNumberData;
import com.naryx.tagfusion.cfm.engine.cfQueryResultData;
import com.naryx.tagfusion.cfm.engine.cfStringData;


public class QueryRun extends Object {
	private static final String[] QUERY_COLUMNS = new String[] { "KEY", "TITLE", "SCORE", "SEARCHCOUNT", "RECORDSSEARCHED", "RANK" };

	private QueryAttributes queryAttributes;
	private cfQueryResultData	queryResultData;
	private Map<String, Integer>	activeColumns;
	private Set<String>	uniqueSet = null;
	
	public QueryRun( QueryAttributes _queryAttributes ){
		queryAttributes	= _queryAttributes;
		queryResultData	=  new cfQueryResultData( QUERY_COLUMNS, "SEARCH" );
		
		if ( queryAttributes.getUniqueColumn() != null )
			uniqueSet	= new HashSet<String>();
			
		activeColumns	= new HashMap<String,Integer>();
		for ( int x=0; x<QUERY_COLUMNS.length; x++ )
			activeColumns.put(QUERY_COLUMNS[x].toLowerCase(), x+1 );
		
		activeColumns.put( "id", 		1 );
		activeColumns.put( "name", 	2 );
	}

	public cfQueryResultData getQueryResultData(){
		return queryResultData;
	}

	public void run() throws CorruptIndexException, Exception{
		int potentialRows = 0;

		Iterator<Collection> it =	queryAttributes.getCollectionIterator();

		while ( it.hasNext() && queryResultData.getSize() <= queryAttributes.getMaxRows() ){
			Collection collection	= it.next();

			IndexSearcher	searcher	= collection.getIndexSearcher();
			TopDocs hits = searcher.search( queryAttributes.getQuery(), collection.getTotalDocs() );

			ScoreDoc[] scorehits	= hits.scoreDocs;
			for ( int x=0; x < scorehits.length; x++ ){
				if ( scorehits[x].score > queryAttributes.getMinScore() ){
					potentialRows++;
					
					if ( potentialRows >= queryAttributes.getStartRow() ){
						addRow( searcher, scorehits[x].doc, scorehits[x].score, x, scorehits.length, collection.getTotalDocs() );
						if ( queryResultData.getSize() == queryAttributes.getMaxRows() )
							break;
					}
					
				}
			}
		}
		
		queryResultData.reset();
		
		if ( uniqueSet != null )
			uniqueSet.clear();
	}

	
	private void addRow(IndexSearcher	searcher, int docid, float score, int rank, int searchCount, int recordsSearched ) throws CorruptIndexException, Exception {
		DocumentWrap document = new DocumentWrap( searcher.doc(docid) );
		
		queryResultData.addRow(1);
		queryResultData.setCurrentRow( queryResultData.getSize() );
		
		// Add in the standard columns that we know we have for every search
		queryResultData.setCell( 1, new cfStringData(document.getId()) );
		queryResultData.setCell( 2, new cfStringData(document.getName()) );
		queryResultData.setCell( 3, new cfNumberData(score) );
		queryResultData.setCell( 4, new cfNumberData(searchCount) );
		queryResultData.setCell( 5, new cfNumberData(recordsSearched) );
		queryResultData.setCell( 6, new cfNumberData(rank+1) );
		
		String uC	= queryAttributes.getUniqueColumn();
		
		// Now we do the custom ones
		List<IndexableField> fields 	= document.getDocument().getFields();
		Iterator<IndexableField> it	= fields.iterator();
		while ( it.hasNext() ){
			IndexableField	fieldable	= it.next();
			
			String fieldName	= fieldable.name().toLowerCase();
			
			// Check for the unique
			if ( uniqueSet != null && fieldName.equals( uC ) ){
				if ( uniqueSet.contains(fieldable.stringValue()) ){
					queryResultData.deleteRow( queryResultData.getSize() );
					return;
				}else
					uniqueSet.add(fieldable.stringValue());
			}
			
			// Check to see if we have this column
			if ( fieldName.equals("contents") && !queryAttributes.getContentFlag() )
				continue;
			
			if ( !activeColumns.containsKey( fieldName ) ){
				int newcolumn = queryResultData.addColumnData( fieldable.name().toUpperCase(), cfArrayData.createArray(1), null );
				activeColumns.put( fieldName, newcolumn );
			}
			
			int column	= activeColumns.get( fieldName );
			if ( column <= 6 )
				continue;
			
			queryResultData.setCell( column, new cfStringData( fieldable.stringValue() ) );
		}

		
		// Do the context stuff if enable
		if ( queryAttributes.getContextPassages() > 0 ){
			
	    Scorer scorer									= new QueryScorer( queryAttributes.getQuery() );
	    SimpleHTMLFormatter formatter = new SimpleHTMLFormatter( queryAttributes.getContextHighlightStart(), queryAttributes.getContextHighlightEnd() );
	    Highlighter highlighter 			= new Highlighter( formatter, scorer );
	    Fragmenter fragmenter 				= new SimpleFragmenter( queryAttributes.getContextBytes() );
	    highlighter.setTextFragmenter( fragmenter );

    	String nextContext = "";
      String contents = document.getAttribute( DocumentWrap.CONTENTS );
      
      if ( contents != null ){
	      TokenStream tokenStream = AnalyzerFactory.get("simple").tokenStream( DocumentWrap.CONTENTS, new StringReader( contents ) );
	      String [] fragments = null;
	      try {
					fragments = highlighter.getBestFragments( tokenStream, contents, queryAttributes.getContextPassages() );
					if ( fragments.length == 1 ){
						nextContext = fragments[0] + "...";
					}else{
						StringBuilder context = new StringBuilder();
						for ( int f = 0; f < fragments.length; f++ ){
							context.append( "..." );
							context.append( fragments[f] );
						}
						context.append( "..." );
						nextContext = context.toString();
					}
				} catch (Exception e) {
				}
				
				
				// Add in the context
				if ( !activeColumns.containsKey( "context" ) ){
					int newcolumn = queryResultData.addColumnData( "CONTEXT", cfArrayData.createArray(1), null );
					activeColumns.put( "context", newcolumn );
				}
				
				queryResultData.setCell( activeColumns.get( "context" ), new cfStringData( nextContext ) );
      }
		}
	}
}