package org.apache.lucene.ClusterBasedPsuedoRelevanceFeedback; /** * Created by IntelliJ IDEA. * User: Antonio * Date: 4/28/11 * Time: 2:45 PM * To change this template use File | Settings | File Templates. */ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; import org.apache.lucene.search.*; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.BytesRef; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; public class ClusterPsuedoRelevanceFeedbackDriver { public static void main(String args[]) throws IOException { SimilarityProvider sp = new LanguageModelSimilarityProvider(); IndexReader reader; reader = IndexReader.open(FSDirectory.open(new File("../LMSMIndex/")), true);// only searching, so read-only=true IndexSearcher searcher = new IndexSearcher(reader); searcher.setSimilarityProvider(sp); TopDocs top = searcher.search(new TermQuery(new Term("text", "politician")),100); ScoreDoc[] allDocs = top.scoreDocs; for(int i : reader.getTermFreqVector(allDocs[0].doc,"text").getTermFrequencies()) System.out.println(i); for(BytesRef i : reader.getTermFreqVector(allDocs[0].doc,"text").getTerms()) System.out.println(i.bytes[0]); System.out.println(reader.getTermFreqVector(allDocs[0].doc,"text").getTerms().length); ArrayList<DocumentCluster> clusters = new ArrayList<DocumentCluster>(100); for(ScoreDoc sd : allDocs) { clusters.add(new DocumentCluster(sd,reader.getTermFreqVector(sd.doc,"text"))); } for(DocumentCluster c : clusters) { for(ScoreDoc sd : allDocs) { c.addPoint(sd,reader.getTermFreqVector(sd.doc,"text")); } } DocumentCluster first= clusters.get(0),second= clusters.get(0),third = clusters.get(0); for(DocumentCluster c : clusters) { if(c.getScore() > first.getScore()) { first = c; } else if(c.getScore() > second.getScore()) { second = c; } else if(c.getScore() > third.getScore()) { third = c; } } List<ScoreDoc> tops = first.Docs; tops.addAll(second.Docs); tops.addAll(third.Docs); Map<ScoreDoc,Integer> rankings = new HashMap<ScoreDoc,Integer>(); for(ScoreDoc sd : tops) { if(rankings.containsKey(sd)) rankings.put(sd,rankings.get(sd) + 1); else rankings.put(sd,1); } } }