/* * Copyright (C) 2000 - 2013 TagServlet Ltd * * This file is part of Open BlueDragon (OpenBD) CFML Server Engine. * * OpenBD is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * Free Software Foundation,version 3. * * OpenBD is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with OpenBD. If not, see http://www.gnu.org/licenses/ * * Additional permission under GNU GPL version 3 section 7 * * If you modify this Program, or any covered work, by linking or combining * it with any of the JARS listed in the README.txt (or a modified version of * (that library), containing parts covered by the terms of that JAR, the * licensors of this Program grant you additional permission to convey the * resulting work. * README.txt @ http://www.openbluedragon.org/license/README.txt * * http://www.openbluedragon.org/ * * $Id: QueryRun.java 2404 2013-09-22 21:51:40Z alan $ */ package com.bluedragon.search.search; import java.io.StringReader; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Set; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.IndexableField; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.highlight.Fragmenter; import org.apache.lucene.search.highlight.Highlighter; import org.apache.lucene.search.highlight.QueryScorer; import org.apache.lucene.search.highlight.Scorer; import org.apache.lucene.search.highlight.SimpleFragmenter; import org.apache.lucene.search.highlight.SimpleHTMLFormatter; import com.bluedragon.search.AnalyzerFactory; import com.bluedragon.search.DocumentWrap; import com.bluedragon.search.collection.Collection; import com.naryx.tagfusion.cfm.engine.cfArrayData; import com.naryx.tagfusion.cfm.engine.cfNumberData; import com.naryx.tagfusion.cfm.engine.cfQueryResultData; import com.naryx.tagfusion.cfm.engine.cfStringData; public class QueryRun extends Object { private static final String[] QUERY_COLUMNS = new String[] { "KEY", "TITLE", "SCORE", "SEARCHCOUNT", "RECORDSSEARCHED", "RANK" }; private QueryAttributes queryAttributes; private cfQueryResultData queryResultData; private Map<String, Integer> activeColumns; private Set<String> uniqueSet = null; public QueryRun( QueryAttributes _queryAttributes ){ queryAttributes = _queryAttributes; queryResultData = new cfQueryResultData( QUERY_COLUMNS, "SEARCH" ); if ( queryAttributes.getUniqueColumn() != null ) uniqueSet = new HashSet<String>(); activeColumns = new HashMap<String,Integer>(); for ( int x=0; x<QUERY_COLUMNS.length; x++ ) activeColumns.put(QUERY_COLUMNS[x].toLowerCase(), x+1 ); activeColumns.put( "id", 1 ); activeColumns.put( "name", 2 ); } public cfQueryResultData getQueryResultData(){ return queryResultData; } public void run() throws CorruptIndexException, Exception{ int potentialRows = 0; Iterator<Collection> it = queryAttributes.getCollectionIterator(); while ( it.hasNext() && queryResultData.getSize() <= queryAttributes.getMaxRows() ){ Collection collection = it.next(); IndexSearcher searcher = collection.getIndexSearcher(); TopDocs hits = searcher.search( queryAttributes.getQuery(), collection.getTotalDocs() ); ScoreDoc[] scorehits = hits.scoreDocs; for ( int x=0; x < scorehits.length; x++ ){ if ( scorehits[x].score > queryAttributes.getMinScore() ){ potentialRows++; if ( potentialRows >= queryAttributes.getStartRow() ){ addRow( searcher, scorehits[x].doc, scorehits[x].score, x, scorehits.length, collection.getTotalDocs() ); if ( queryResultData.getSize() == queryAttributes.getMaxRows() ) break; } } } } queryResultData.reset(); if ( uniqueSet != null ) uniqueSet.clear(); } private void addRow(IndexSearcher searcher, int docid, float score, int rank, int searchCount, int recordsSearched ) throws CorruptIndexException, Exception { DocumentWrap document = new DocumentWrap( searcher.doc(docid) ); queryResultData.addRow(1); queryResultData.setCurrentRow( queryResultData.getSize() ); // Add in the standard columns that we know we have for every search queryResultData.setCell( 1, new cfStringData(document.getId()) ); queryResultData.setCell( 2, new cfStringData(document.getName()) ); queryResultData.setCell( 3, new cfNumberData(score) ); queryResultData.setCell( 4, new cfNumberData(searchCount) ); queryResultData.setCell( 5, new cfNumberData(recordsSearched) ); queryResultData.setCell( 6, new cfNumberData(rank+1) ); String uC = queryAttributes.getUniqueColumn(); // Now we do the custom ones List<IndexableField> fields = document.getDocument().getFields(); Iterator<IndexableField> it = fields.iterator(); while ( it.hasNext() ){ IndexableField fieldable = it.next(); String fieldName = fieldable.name().toLowerCase(); // Check for the unique if ( uniqueSet != null && fieldName.equals( uC ) ){ if ( uniqueSet.contains(fieldable.stringValue()) ){ queryResultData.deleteRow( queryResultData.getSize() ); return; }else uniqueSet.add(fieldable.stringValue()); } // Check to see if we have this column if ( fieldName.equals("contents") && !queryAttributes.getContentFlag() ) continue; if ( !activeColumns.containsKey( fieldName ) ){ int newcolumn = queryResultData.addColumnData( fieldable.name().toUpperCase(), cfArrayData.createArray(1), null ); activeColumns.put( fieldName, newcolumn ); } int column = activeColumns.get( fieldName ); if ( column <= 6 ) continue; queryResultData.setCell( column, new cfStringData( fieldable.stringValue() ) ); } // Do the context stuff if enable if ( queryAttributes.getContextPassages() > 0 ){ Scorer scorer = new QueryScorer( queryAttributes.getQuery() ); SimpleHTMLFormatter formatter = new SimpleHTMLFormatter( queryAttributes.getContextHighlightStart(), queryAttributes.getContextHighlightEnd() ); Highlighter highlighter = new Highlighter( formatter, scorer ); Fragmenter fragmenter = new SimpleFragmenter( queryAttributes.getContextBytes() ); highlighter.setTextFragmenter( fragmenter ); String nextContext = ""; String contents = document.getAttribute( DocumentWrap.CONTENTS ); if ( contents != null ){ TokenStream tokenStream = AnalyzerFactory.get("simple").tokenStream( DocumentWrap.CONTENTS, new StringReader( contents ) ); String [] fragments = null; try { fragments = highlighter.getBestFragments( tokenStream, contents, queryAttributes.getContextPassages() ); if ( fragments.length == 1 ){ nextContext = fragments[0] + "..."; }else{ StringBuilder context = new StringBuilder(); for ( int f = 0; f < fragments.length; f++ ){ context.append( "..." ); context.append( fragments[f] ); } context.append( "..." ); nextContext = context.toString(); } } catch (Exception e) { } // Add in the context if ( !activeColumns.containsKey( "context" ) ){ int newcolumn = queryResultData.addColumnData( "CONTEXT", cfArrayData.createArray(1), null ); activeColumns.put( "context", newcolumn ); } queryResultData.setCell( activeColumns.get( "context" ), new cfStringData( nextContext ) ); } } } }