package uc.files.filelist; import java.io.IOException; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Set; import logger.LoggerFactory; import org.apache.log4j.Logger; import org.apache.lucene.analysis.SimpleAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopScoreDocCollector; import org.apache.lucene.store.Directory; import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.util.Version; import helpers.IFilter; import helpers.ISearchMap; import helpers.ISubstringMapping2; public class InvertedIndex<V> implements ISearchMap<V> { private static Logger logger = LoggerFactory.make(); private final ISubstringMapping2<V> mapping; private final List<V> itemByIndex = new ArrayList<V>(); private final SimpleAnalyzer analyzer = new SimpleAnalyzer( Version.LUCENE_34); private final Directory index = new RAMDirectory(); private IndexWriter w; public InvertedIndex(ISubstringMapping2<V> mapping) { this.mapping = mapping; } public synchronized void put(V toMap) { if (w == null) { try { IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_34, analyzer); w = new IndexWriter(index, iwc); //new IndexWriter(index, analyzer, true, //IndexWriter.MaxFieldLength.UNLIMITED); } catch (Exception e) { logger.error(e, e); } } int pos = itemByIndex.size(); itemByIndex.add(toMap); Document doc = new Document(); doc.add(new Field("title", mapping.getMappingString(toMap), Field.Store.NO, Field.Index.ANALYZED)); doc.add(new Field("pos",""+pos,Field.Store.YES,Field.Index.NO )); try { w.addDocument(doc); } catch (CorruptIndexException e) { logger.warn(e, e); } catch (IOException e) { logger.warn(e, e); } } public Set<V> search(String s) { return search(Collections.singleton(s)); } public Set<V> search(Set<String> searchStrings) { return search(searchStrings,Collections.<String>emptySet(),new IFilter<V>() { //empty filter.. public boolean filter(V item) { return true; } public Set<V> mapItems(Set<V> nodeItems) { return nodeItems; }}); } public synchronized Set<V> search(Set<String> searchStrings, Set<String> excludes, IFilter<V> filter) { if (itemByIndex.isEmpty()) { //if inverted Index is empty .. -> no results.. return Collections.<V>emptySet(); } if (w != null) { try { w.close(); w = null; } catch (Exception e) { logger.error("Problem creating the FileListindex: "+e,e); } } Set<V> current = null; //do the searches.. for (String s: searchToQueryStrings(searchStrings)) { Set<V> found = getMatching(s); found = filter.mapItems(found); if (current != null) { found.retainAll(current); } else { //remove filtered items for (Iterator<V> it = found.iterator(); it.hasNext();) { if (!filter.filter(it.next())) { it.remove(); } } } current = found; if (current.isEmpty()) { break; } } if (current == null) { current = Collections.<V>emptySet(); } //remove all excludes ... for (String exclude : searchToQueryStrings(excludes)) { Set<V> found = getMatching(exclude); found = filter.mapItems(found); current.removeAll(found); } return current; } private Set<V> getMatching(String querystr) { Set<V> res = new HashSet<V>(); try { Query q = new QueryParser(Version.LUCENE_34,"title", analyzer).parse(querystr); IndexSearcher searcher = new IndexSearcher(index,true); TopScoreDocCollector collector = TopScoreDocCollector.create(itemByIndex.size(),false); // TopDocCollector collector = new TopDocCollector(itemByIndex.size()); searcher.search(q, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; for (ScoreDoc sd: hits) { int docId = sd.doc; Document d = searcher.doc(docId); int pos = Integer.parseInt(d.get("pos")); res.add(itemByIndex.get(pos)); } searcher.close(); } catch(Exception e) { logger.warn(e+": "+querystr,e); } return res; } private static List<String> searchToQueryStrings(Collection<String> searchstrings) { List<String> queryStrings = new ArrayList<String>(); for (String searchstr: searchstrings) { for (String s: searchstr.split("\\W")) { if (s.length() > 2) { queryStrings.add( s+"*" ); } } } return queryStrings; } }