package org.openedit.entermedia.autocomplete; import java.io.IOException; import java.util.Calendar; import java.util.Date; import java.util.GregorianCalendar; import java.util.HashMap; import java.util.HashSet; import java.util.Map; import java.util.Set; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper; import org.apache.lucene.document.DateTools; import org.apache.lucene.document.DateTools.Resolution; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.Field.Index; import org.apache.lucene.document.Field.Store; import org.apache.lucene.facet.taxonomy.TaxonomyWriter; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.util.Version; import org.openedit.data.lucene.BaseLuceneSearcher; import org.openedit.data.lucene.FullTextAnalyzer; import org.openedit.data.lucene.RecordLookUpAnalyzer; import com.openedit.OpenEditException; import com.openedit.WebPageRequest; import com.openedit.hittracker.HitTracker; import com.openedit.hittracker.SearchQuery; /** * Thesaurus searcher for a Lucene index. * * @author jvalencia * */ public class AutoCompleteLuceneSearcher extends BaseLuceneSearcher implements AutoCompleteSearcher { private static Log log = LogFactory.getLog(AutoCompleteLuceneSearcher.class); protected Set fieldCache; protected Date fieldCacheDate; public Date getCacheDate() { if (fieldCacheDate == null) { GregorianCalendar tomorrow = new GregorianCalendar(); tomorrow.set(Calendar.HOUR_OF_DAY, 0); tomorrow.set(Calendar.MINUTE, 0); tomorrow.add(Calendar.DATE, 1); fieldCacheDate = tomorrow.getTime(); } return fieldCacheDate; } public void setCacheDate(Date inCacheDate) { fieldCacheDate = inCacheDate; } protected Set getCache() { if( fieldCache == null) { fieldCache = new HashSet(500); } return fieldCache; } public Analyzer getAnalyzer() { if (fieldAnalyzer == null) { Map map = new HashMap(); map.put("synonymsenc", new FullTextAnalyzer(Version.LUCENE_41)); PerFieldAnalyzerWrapper composite = new PerFieldAnalyzerWrapper( new RecordLookUpAnalyzer() , map); fieldAnalyzer = composite; } return fieldAnalyzer; } public HitTracker getAllHits(WebPageRequest inReq) { return null; } public String getIndexPath() { if (getSearchType().startsWith("asset")) { return "/WEB-INF/data/" + getCatalogId() + "/assets/search/autocomplete/index"; } else { return "/" + getCatalogId() + "/temp/autocomplete/" + getSearchType(); } } public synchronized void reIndexAll(IndexWriter inWriter, TaxonomyWriter inTaxonomyWriter) { //do nothing } public void updateHits(HitTracker tracker, String word) { if( new Date().after(getCacheDate()) ) { //clear the cache once a day getCache().clear(); setCacheDate(null); } if( getCache().contains(word)) { return; } int size = getCache().size(); if( size > 2000) //TODO: Move to a normal 1000 cache system { getCache().clear(); } getCache().add(word); //word could be "hot dog" int hits = tracker.size(); if (word == null || hits == 0) { return; } SearchQuery suggestionsQuery = createSearchQuery(); //String nospace = word.replace(' ', '_'); //hot_dog suggestionsQuery.addExact("synonyms", word); //Todo: Do a local mem cache with a max of 1000 entries. To keep us from search for the same things HitTracker wordsHits = search(suggestionsQuery); Field id = new Field("synonyms", word, Store.YES, Index.NOT_ANALYZED_NO_NORMS); try { if (wordsHits.size() == 0) { saveHitCount(word, hits, id); } else if (wordsHits.size() > 0) { Object row = wordsHits.get(0); String hitstring = wordsHits.getValue(row, "hitcount"); int currentcount = Integer.parseInt(hitstring); if (currentcount == hits) { return; } /* Check for timestamp */ String stamp = wordsHits.getValue(row, "timestamp"); GregorianCalendar timestamp = new GregorianCalendar(); timestamp.setTime(DateTools.stringToDate(stamp)); GregorianCalendar yesterday = new GregorianCalendar(); yesterday.add(Calendar.DATE, -1); if (timestamp.before(yesterday)) { saveHitCountWithSynonyms(word, hits, id); } } } catch(Exception ex) { throw new OpenEditException(ex); } } protected void saveHitCountWithSynonyms(String word, int hits, Field id) throws IOException { Document doc = new Document(); doc.add(id); doc.add(new Field("synonymsenc", word, Store.NO, Index.ANALYZED)); doc.add(new Field("hits", getNumberUtils().int2sortableStr(hits), Store.NO, Index.NOT_ANALYZED_NO_NORMS)); doc.add(new Field("hitcount", Integer.toString(hits), Store.YES, Index.NOT_ANALYZED_NO_NORMS)); /* Timestamp */ String newstamp = DateTools.dateToString(new Date(), Resolution.SECOND); doc.add(new Field("timestamp", newstamp, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS)); Term term = new Term("synonyms", word); getIndexWriter().updateDocument(term, doc, getAnalyzer()); clearIndex(); } protected void saveHitCount(String word, int hits, Field id) throws IOException { Document doc = new Document(); doc.add(id); doc.add(new Field("synonymsenc", word, Store.NO, Index.ANALYZED)); doc.add(new Field("hits", getNumberUtils().int2sortableStr(hits), Store.NO, Index.NOT_ANALYZED_NO_NORMS)); doc.add(new Field("hitcount", Integer.toString(hits), Store.YES, Index.NOT_ANALYZED_NO_NORMS)); /* Timestamp */ String newstamp = DateTools.dateToString(new Date(), Resolution.SECOND); doc.add(new Field("timestamp", newstamp, Field.Store.YES, Field.Index.NOT_ANALYZED)); getIndexWriter().addDocument(doc, getAnalyzer()); clearIndex(); } }