/*
* Created on May 2, 2004
*/
package org.openedit.data.lucene;
import java.io.IOException;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Date;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.lucene.document.DateTools;
import org.apache.lucene.document.Document;
import org.apache.lucene.facet.params.FacetSearchParams;
import org.apache.lucene.facet.search.CountFacetRequest;
import org.apache.lucene.facet.search.DrillDownQuery;
import org.apache.lucene.facet.search.FacetResult;
import org.apache.lucene.facet.search.FacetResultNode;
import org.apache.lucene.facet.search.FacetsCollector;
import org.apache.lucene.facet.taxonomy.CategoryPath;
import org.apache.lucene.search.CachingWrapperFilter;
import org.apache.lucene.search.FieldCacheTermsFilter;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.TopDocs;
import org.openedit.Data;
import org.openedit.data.PropertyDetail;
import org.openedit.data.Searcher;
import org.openedit.util.DateStorageUtil;
import com.openedit.OpenEditException;
import com.openedit.hittracker.FilterNode;
import com.openedit.hittracker.HitTracker;
import com.openedit.hittracker.Term;
/**
* @author cburkey
*
*/
public class LuceneHitTracker extends HitTracker
{
private static final Log log = LogFactory.getLog(LuceneHitTracker.class);
// protected transient TopDocs fieldHits;
protected transient LuceneConnectionManager fieldLuceneConnectionManager;
protected transient Query fieldLuceneQuery;
protected transient Sort fieldLuceneSort;
// protected Map fieldPages;
// protected Map<Integer,ScoreDoc> fieldCursors;
protected Integer fieldSize;
protected String fieldSearchType;
protected ScoreDoc[] fieldDocs;
protected int fieldOpenDocsSearcherHash;
protected DrillDownQuery fieldDrillDownQuery;
public DrillDownQuery getDrillDownQuery()
{
return fieldDrillDownQuery;
}
public void setdDrillDownQuery(DrillDownQuery fieldDrillDownQuery)
{
this.fieldDrillDownQuery = fieldDrillDownQuery;
}
/**
* Data type
*/
public String getSearchType()
{
return fieldSearchType;
}
public void setSearchType(String inSearchType)
{
fieldSearchType = inSearchType;
}
public LuceneHitTracker()
{
}
public LuceneHitTracker(LuceneConnectionManager inManager, Query inQuery, Sort inSort, Searcher inSearcher)
{
super(inSearcher);
setLuceneConnectionManager(inManager);
setLuceneQuery(inQuery);
setLuceneSort(inSort);
}
public int size()
{
if( !isAllSelected() && isShowOnlySelected() && ( fieldSelections == null || fieldSelections.size() == 0 ) )
{
return 0;
}
if (fieldSize == null)
{
getPage(0); // this will never happen because we set the size
// already
}
return fieldSize;
}
/*
* protected void setCursorForPage(ScoreDoc inDoc, int inPageZeroBased) { if
* (fieldCursors == null) { fieldCursors = new HashMap(); } if( inDoc !=
* null) //may be zero results { //log.info( getSearchType() + " Page " +
* inPageZeroBased + " ended with " + inDoc.doc + " " + fieldSize );
* fieldCursors.put(inPageZeroBased, inDoc); } }
*
* protected ScoreDoc getCursorForPage(int inPageZeroBased) { if
* (fieldCursors == null) { return null; } return fieldCursors.get(inPa int
* docid = lastDoc.doc; final SearchResultStoredFieldVisitor visitor = new
* SearchResultStoredFieldVisitor(columns); searcher.doc(docid, visitor);
* page.add( visitor.createSearchResult() );geZeroBased); }
*/
public void setPage(int inPageOneBased)
{
if (inPageOneBased == 0)
{
inPageOneBased = 1;
}
fieldPage = inPageOneBased;
fieldCurrentPage = getPage(inPageOneBased - 1);
}
/**
* @deprecated use getPage(int)
*/
public Data get(int inCount)
{
int page = inCount / getHitsPerPage();
// Make sure we are on the current page?
// get the chunk 1
List<Data> row = getPage(page);
// 50 - (1 * 40) = 10 relative
int indexlocation = inCount - (page * getHitsPerPage());
return row.get(indexlocation);
}
// protected Map<Integer,List<Data>> getPages()
// {
// if (fieldPages == null)
// {
// //fieldPages = new HashMap<Integer,List<Data>>(); //this will leak for
// really large resultsets
// fieldPages = new ReferenceMap(ReferenceMap.HARD,ReferenceMap.SOFT);
// }
// return fieldPages;
// }
protected List<Data> getPage(int inPageNumberZeroBased)
{
// List<Data> page = getPages().get(inPageNumberZeroBased);
// if( page == null )
// {
IndexSearcher searcher = null;
LuceneConnection refs = null;
try
{
refs = getLuceneConnectionManager().acquire();
searcher = refs.getIndexSearcher();
// searcher = getLuceneSearcherManager().acquire();
if (fieldOpenDocsSearcherHash != searcher.hashCode())
{
TopDocs docs = null;
// do the search and save the reuslts
int max = Integer.MAX_VALUE;
if (getHitsPerPage() == 1)
{
max = 1; // This causes our array to not have the right
// number of hits in it
}
Filter filter = null;
//Add security filter first. Then commbine it with selections
Collection<String> allowedids = getSearchQuery().getSecurityIds();
if( fieldSelections != null && isShowOnlySelected() )
{
allowedids = fieldSelections;
}
if (allowedids != null && allowedids.size() > 0)
{
filter = new FieldCacheTermsFilter("id", allowedids.toArray(new String[allowedids.size()]));
}
// List<String> terms = new ArrayList();
// terms.add("5");
// results = searcher.search(q, ), numDocs).scoreDocs;
// assertEquals("Must match nothing", 0, results.length);
if (getLuceneSort() != null)
{
docs = searcher.search(getLuceneQuery(), filter, max, getLuceneSort(), false, false);
}
else
{
docs = searcher.search(getLuceneQuery(), filter, max);
}
if (max > 1)
{
if( docs.totalHits > 1)
{
log.info(getSearchType() + " " + docs.totalHits + " hits " + getLuceneQuery() + " page " + inPageNumberZeroBased + " sort by: " + getLuceneSort() + " " + getSessionId());
}
else
{
log.debug(getSearchType() + " " + docs.totalHits + " hits " + getLuceneQuery() + " page " + inPageNumberZeroBased + " sort by: " + getLuceneSort() + " " + getSessionId());
}
}
fieldSize = docs.totalHits;
fieldDocs = docs.scoreDocs;
// do we need to reset the selections?
// Use selected doc ids to reload all the selection data
}
fieldOpenDocsSearcherHash = searcher.hashCode();
List<Data> page = populatePageData(inPageNumberZeroBased, searcher);
return page;
}
catch (Exception ex)
{
throw new OpenEditException(ex);
}
finally
{
getLuceneConnectionManager().release(refs);
}
}
protected List<Data> populatePageData(int inPageNumberZeroBased, IndexSearcher searcher) throws IOException
{
int start = getHitsPerPage() * inPageNumberZeroBased;
int max = start + getHitsPerPage();
max = Math.min(max, fieldSize);
List<Data> page = new ArrayList<Data>(getHitsPerPage());
readPageOfData(searcher, start, max, page);
return page;
}
/*
* protected List<Data> cursorSearch(IndexSearcher searcher,int
* inPageNumberZeroBased,ScoreDoc after ) throws IOException { TopDocs docs
* = null; int start = getHitsPerPage() * inPageNumberZeroBased; int max =
* start + getHitsPerPage();
*
* if( getLuceneSort() != null ) { docs = searcher.searchAfter( after,
* getLuceneQuery(), getHitsPerPage() ,getLuceneSort() ); } else { docs =
* searcher.searchAfter( after, getLuceneQuery(),getHitsPerPage()); }
*
* List<Data> page = new ArrayList<Data>(getHitsPerPage());
*
* ScoreDoc lastDoc = null; Map<String,Integer> columns = new
* TreeMap<String,Integer>(); int returned = docs.scoreDocs.length; for (int
* i = 0; i < returned; i++) { lastDoc = docs.scoreDocs[i]; int docid =
* lastDoc.doc; final SearchResultStoredFieldVisitor visitor = new
* SearchResultStoredFieldVisitor(columns); searcher.doc(docid, visitor);
* Data lastRecord =visitor.createSearchResult(); page.add( lastRecord ); }
* // return lastDoc;
*
* //log.info( getSearchType() + " Page " + inPageNumberZeroBased +
* " ended with " + lastDoc.doc + " = " + lastRecord.getId()); //ScoreDoc
* lastone = readPageOfData(searcher, 0, docs, page);
* setCursorForPage(lastDoc,inPageNumberZeroBased); if( log.isDebugEnabled()
* ) { log.debug(getSearchType() + " page " + inPageNumberZeroBased ); }
*
* return page; }
*/
/*
* protected List<Data> fullSearch(IndexSearcher searcher, int
* inPageNumberZeroBased) throws IOException { // int start =
* getHitsPerPage() * inPageNumberZeroBased; // int max = start +
* getHitsPerPage(); TopDocs docs = null;
*
* if( getLuceneSort() != null ) { docs = searcher.search(
* getLuceneQuery(),Integer.MAX_VALUE ,getLuceneSort() ); } else { docs =
* searcher.search( getLuceneQuery(),Integer.MAX_VALUE); } fieldSize =
* docs.totalHits; List<Data> page = new ArrayList<Data>(getHitsPerPage());
*
* ScoreDoc lastone = readPageOfData(searcher, start, docs, page);
* setCursorForPage(lastone,inPageNumberZeroBased); log.info(getSearchType()
* + " " + size() + " hits " + getLuceneQuery() + " page " +
* inPageNumberZeroBased + " sort by: " + getLuceneSort() + " " +
* getCatalogId()); return page;
*
* }
*/
protected ScoreDoc readPageOfData(IndexSearcher searcher, int start, int max, List<Data> page) throws IOException
{
/**
* This is optimized to only store string versions of the data we have.
* Normally the Document class has FieldType that use a bunch of memory.
* Guess Most people do not loop over their entire database as often as
* we do. TODO: Find a way to cache more generically instead of one page
* at a time?
*/
ScoreDoc lastDoc = null;
Map<String, Integer> columns = new TreeMap<String, Integer>(); //TODO: Test performance vs HashMap
for (int i = 0; start + i < max; i++)
{
int offset = start + i;
lastDoc = fieldDocs[offset];
int docid = lastDoc.doc;
// final SearchResultStoredFieldVisitor visitor = new
// SearchResultStoredFieldVisitor(columns);
final SearchResultStoredFieldVisitor visitor = new SearchResultStoredFieldVisitor(columns);
searcher.doc(docid, visitor);
Data data = visitor.createSearchResult();
// if data.getId()
page.add(data);
}
// log.info( getSearchType() + " ended with " + lastDoc.doc + " = " +
// page.get(page.size() - 1).getId());
return lastDoc;
}
/**
* @deprecated not really needed
*/
//TODO: Remove this API
public Collection<String> getSourcePaths()
{
List sourcepaths = new ArrayList();
IndexSearcher searcher = null;
LuceneConnection refs = null;
try
{
refs = getLuceneConnectionManager().acquire();
searcher = refs.getIndexSearcher();
int max = Integer.MAX_VALUE;
TopDocs docs = null;
if (getLuceneSort() != null)
{
docs = searcher.search(getLuceneQuery(), max, getLuceneSort());
}
else
{
docs = searcher.search(getLuceneQuery(), max);
}
fieldSize = docs.totalHits;
for (int i = 0; i < fieldSize; i++)
{
Document doc = searcher.doc(docs.scoreDocs[i].doc);
sourcepaths.add(doc.get("sourcepath"));
}
log.info(size() + " total query:" + getLuceneQuery() + " session:" + getSessionId());
return sourcepaths;
}
catch (Exception ex)
{
throw new OpenEditException(ex);
}
finally
{
getLuceneConnectionManager().release(refs);
}
}
public Iterator iterator()
{
return new HitIterator(this);
}
public String toDate(String inValue)
{
if (inValue == null)
{
return null;
}
Date date = toDateObject(inValue);
return DateStorageUtil.getStorageUtil().formatForStorage(date);
}
// This is main date API
public Date getDateValue(Data inHit, String inField)
{
String value = inHit.get(inField);
if (value == null)
{
return null;
}
return toDateObject(value);
}
public Date toDateObject(String inValue)
{
Date date = null;
try
{
date = DateTools.stringToDate(inValue);
}
catch (ParseException ex)
{
log.error(ex);
return null;
}
return date;
}
/**
* @deprecated removed for $context.getDateTime
* @param inValue
* @return
*/
public String toDateTime(String inValue)
{
return toDate(inValue);
}
// Only look for data within the current page
public Integer findSelf(String inId) throws Exception
{
if (inId == null)
{
return null;
}
int i = getPage() * getHitsPerPage();
for (Iterator iterator = getPageOfHits().iterator(); iterator.hasNext();)
{
i++;
Data type = (Data) iterator.next();
if (inId.equals(type.getId()))
{
return new Integer(i);
}
}
return null;
}
public String previousId(String inId) throws Exception
{
Data previous = previous(inId);
if (previous != null)
{
return previous.get("id");
}
return null;
}
public String nextId(String inId) throws Exception
{
Data next = next(inId);
if (next != null)
{
return next.get("id");
}
return null;
}
public Data previous(String inId) throws Exception
{
Integer row = findSelf(inId);
if (row != null && row.intValue() - 1 >= 0)
{
Data hit = (Data) get(row.intValue() - 1);
return hit;
}
return null;
}
public Data next(String inId) throws Exception
{
Integer row = findSelf(inId);
if (row != null && row.intValue() + 1 < getTotal())
{
Data hit = get(row.intValue() + 1);
return hit;
}
return null;
}
// Never call this!!!
public boolean contains(Object inHit)
{
Data contains = (Data) inHit;
for (Iterator iterator = iterator(); iterator.hasNext();)
{
Data type = (Data) iterator.next();
String id = type.getId();
if (id != null && id.equals(contains.getId()))
{
return true;
}
}
return false;
}
public String highlight(Object inDoc, String inField)
{
Data doc = (Data) inDoc;
String value = doc.get(inField);
if (value != null)
{
for (Iterator iterator = getSearchQuery().getTerms().iterator(); iterator.hasNext();)
{
Term term = (Term) iterator.next();
if (term.getValue() != null)
{
value = replaceAll(value, term.getValue(), "<span class='hit'>", "</span>");
}
}
}
value = trim(value, 300);
return value;
// String FIELD_NAME = "text";
// Highlighter highlighter = new Highlighter(new MyBolder(),
// new QueryScorer(query));
// highlighter.setTextFragmenter(new SimpleFragmenter(20));
// for (int i = 0; i < hits.length(); i++) {
// System.out.println("URL " + (i + 1) + ": " +
// hits.doc(i).getField("URL").stringValue());
// String text = hits.doc(i).get(FIELD_NAME);
// int maxNumFragmentsRequired = 2;
// String fragmentSeparator = "...";
// TokenStream tokenStream =
// analyzer.tokenStream(FIELD_NAME, new StringReader(text));
//
// String result =
// highlighter.getBestFragments(
// tokenStream,
// text,
// maxNumFragmentsRequired,
// fragmentSeparator);
// System.out.println("\t" + result);
}
private String replaceAll(String inSource, String inFind, String inPreReplace, String inPostReplace)
{
String lowercase = inSource.toLowerCase();
String findlower = inFind.toLowerCase();
StringBuffer buffer = new StringBuffer();
int start = 0;
while (true)
{
int hit = lowercase.indexOf(findlower, start);
if (hit == -1)
{
buffer.append(inSource.substring(start, inSource.length()));
break;
}
String before = inSource.substring(start, hit);
buffer.append(before);
buffer.append(inPreReplace);
String existing = inSource.substring(hit, hit + findlower.length());
buffer.append(existing);
buffer.append(inPostReplace);
start = hit + findlower.length();
}
return buffer.toString();
}
public String trim(String value, int inMax)
{
if (value.length() > inMax)
{
// trim the begining
int start = value.indexOf("<span class='hit'>");
if (start > -1 && start > 10 && start + 150 < value.length())
{
int before = value.indexOf(" ", start - 10);
if (before < start)
{
value = value.substring(before, value.length());
}
else
{
}
// TODO: Look for needed <span class='res'>
}
// if still too long trim the end
if (value.length() > inMax)
{
int end = inMax;
// Near max distance.
for (; end > 0; end--)
{
if (value.charAt(end) == ' ')
{
break;
}
}
value = value.substring(0, end);
if (value.endsWith("<span"))
{
value = value.substring(0, value.length() - 5);
}
}
// TODO: check for the need for </span>
}
return value;
}
public String getValue(Object inHit, String inKey)
{
if (inHit instanceof Data)
{
Data hit = (Data) inHit;
return hit.get(inKey);
}
if (inHit instanceof Document)
{
Document doc = (Document) inHit;
return doc.get(inKey);
}
else
{
log.error("Invalid data type " + inHit);
}
return null;
}
public Data toData(Object inHit)
{
if (inHit instanceof Data)
{
return (Data) inHit;
}
DocumentData data = new DocumentData((Document) inHit);
return data;
}
public Object[] toArray()
{
List list = new ArrayList(size());
for (Iterator iterator = iterator(); iterator.hasNext();)
{
Object hit = iterator.next();
list.add(hit);
}
return list.toArray();
}
// public Object getById(String inId)
// {
// if(inId == null)
// {
// return null;
// }
// int size = size();
// for (int i = 0; i < size; i++)
// {
// Document doc = getDoc(i);
// String id = doc.get("id");
// if( inId.equals(id))
// {
// return toData(doc);
// }
// }
// return null;
// }
public LuceneConnectionManager getLuceneConnectionManager()
{
return fieldLuceneConnectionManager;
}
public void setLuceneConnectionManager(LuceneConnectionManager inLuceneSearcherManager)
{
fieldLuceneConnectionManager = inLuceneSearcherManager;
}
public Query getLuceneQuery()
{
return fieldLuceneQuery;
}
public void setLuceneQuery(Query inLuceneQuery)
{
fieldLuceneQuery = inLuceneQuery;
}
public Sort getLuceneSort()
{
return fieldLuceneSort;
}
public void setLuceneSort(Sort inLuceneSort)
{
fieldLuceneSort = inLuceneSort;
}
protected List<FilterNode> getFacetedResults() throws Exception
{
IndexSearcher searcher = null;
LuceneConnection refs = null;
try
{
refs = getLuceneConnectionManager().acquire();
List<FilterNode> facetNodes = new ArrayList<FilterNode>();
if( refs.getTaxonomyReader() == null)
{
log.error("Tried to get facets on a non-facet index. restart?");
return facetNodes;
}
searcher = refs.getIndexSearcher();
BaseLuceneSearcher lsearcher = (BaseLuceneSearcher) getSearcher();
// lsearcher.getTaxonomyWriter());
// TaxonomyReader newReader = TaxonomyReader.openIfChanged(
// this.taxoReader );
ArrayList params = new ArrayList();
List propertydetails = lsearcher.getPropertyDetails().getDetailsByProperty("filter", "true");
if (propertydetails.size() > 0)
{
for (Iterator iterator = propertydetails.iterator(); iterator.hasNext();)
{
PropertyDetail detail = (PropertyDetail) iterator.next();
if( detail.getId().equals("viewasset") || detail.getId().equals("editasset") )
{
continue;
}
if( !getSearchQuery().hasFilter(detail.getId()))
{
String count = detail.get("facetcount");
int defaultcount = 20;
if( count != null)
{
defaultcount = Integer.parseInt(count);
}
params.add(new CountFacetRequest(new CategoryPath(detail.getId()), defaultcount)); //need to have a show more button on UI
}
}
if( params.isEmpty() )
{
return facetNodes;
}
FacetSearchParams fsp = new FacetSearchParams(params);
FacetsCollector facetsCollector = FacetsCollector.create(fsp, searcher.getIndexReader(), refs.getTaxonomyReader() );
if (isShowOnlySelected() && fieldSelections != null && fieldSelections.size() > 0)
{
Filter filterids = new FieldCacheTermsFilter("id", fieldSelections.toArray(new String[fieldSelections.size()]));
searcher.search(getLuceneQuery(),filterids, facetsCollector);
}
else
{
searcher.search(getLuceneQuery(), facetsCollector);
}
//copy the collected results to our data structure
List<FacetResult> facetResults = facetsCollector.getFacetResults();
//a tree of options
for (FacetResult fres : facetResults)
{
FacetResultNode root = fres.getFacetResultNode();
if( root.subResults.isEmpty() )
{
continue;
}
FilterNode filterNode = new FilterNode();
String type = root.label.toString();
filterNode.setId(type);
//filterNode.setProperty("path", type);
PropertyDetail parent = getSearcher().getDetail(type);
if (parent != null)
{
filterNode.setName(parent.getText());
}
filterNode.setProperty("count", String.valueOf( Math.round( root.value)) ) ;
facetNodes.add(filterNode);
for (FacetResultNode cat : root.subResults)
{
FilterNode childnode = new FilterNode();
String childlabel = cat.label.toString();
String[] splits = childlabel.split("/");
String id = splits[1];
childnode.setId(id);
String label = null;
if( parent != null && (parent.isList() || "category".equals( parent.getId() ) ) )
{
Data data = getSearcher().getSearcherManager().getData(getCatalogId(), parent.getListId(), id);
if (data == null)
{
continue;
}
label = data.getName();
}
if( label == null)
{
label = id;
}
childnode.setName(label);
childnode.setProperty("path", childlabel);
childnode.setProperty("count", String.valueOf( Math.round( cat.value)) ) ;
//log.info("Found " + root.label + " " + id);
filterNode.addChild(childnode);
}
//filterNode.sortChildren();
}
return facetNodes;
}
}
finally
{
getLuceneConnectionManager().release(refs);
}
return null;
}
}