package org.apache.lucene.demo;
import org.apache.lucene.analysis.SimpleAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.benchmark.quality.*;
import org.apache.lucene.benchmark.quality.trec.TrecJudge;
import org.apache.lucene.benchmark.quality.trec.TrecTopicsReader;
import org.apache.lucene.benchmark.quality.utils.SimpleQQParser;
import org.apache.lucene.benchmark.quality.utils.SubmissionReport;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.codecs.CodecProvider;
import org.apache.lucene.index.codecs.standard.StandardCodec;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.LanguageModelQuery;
import org.apache.lucene.search.LanguageModelSimilarityProvider;
import org.apache.lucene.search.SimilarityProvider;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import java.io.*;
/**
* Created by IntelliJ IDEA.
* User: Antonio
* Date: 4/10/11
* Time: 11:03 AM
* To change this template use File | Settings | File Templates.
*/
public class LanguageModelBenchmark {
/**
* Testing Lucene Base against Benchmark.
*/
public static void main(String[] args) throws Exception {
String indexST = IndexFiles.INDEX_DIR_ST;
String indexSM = IndexFiles.INDEX_DIR_SM;
String indexSTLM = IndexFiles.INDEX_DIR_ST_LM;
String indexSMLM = IndexFiles.INDEX_DIR_SM_LM;
System.out.println("SMA - ST");
benchmark(indexSM, false, 0.0f);
System.out.println("SMA - LM 500");
benchmark(indexSMLM, true, 500.0f);
System.out.println("SMA - LM 750");
benchmark(indexSMLM, true, 750f);
System.out.println("SMA - LM 1000");
benchmark(indexSMLM, true, 1000f);
System.out.println("SMA - LM 1500");
benchmark(indexSMLM, true, 1500f);
System.out.println("SMA - LM 2000");
benchmark(indexSMLM, true, 2000f);
System.out.println("SMA - LM 2500");
benchmark(indexSMLM, true, 2500f);
System.out.println("SMA - LM 3000");
benchmark(indexSMLM, true, 3000f);
System.out.println("SMA - LM 4000");
benchmark(indexSMLM, true, 4000f);/*
//STD Analyzer
System.out.println("STD - ST");
benchmark(indexST, false, 0.0f);
System.out.println("STD - LM 0");
benchmark(indexST, true, 0.0f);
System.out.println("STD - LM .015");
benchmark(indexST, true, .015f);
System.out.println("STD - LM .03");
benchmark(indexST, true, .03f);
System.out.println("STD - LM .06");
benchmark(indexST, true, .06f);
System.out.println("STD - LM .12");
benchmark(indexST, true, .12f);
System.out.println("STD - LM .25");
benchmark(indexST, true, .25f);
System.out.println("STD - LM .5");
benchmark(indexST, true, .5f);
System.out.println("STD - LM .75");
benchmark(indexST, true, .75f);*/
}
public static void benchmark(String indexDir, boolean lm, float smooth) {
try {
SimilarityProvider sp = new LanguageModelSimilarityProvider();
IndexReader reader;
reader = IndexReader.open(FSDirectory.open(new File(indexDir)), true);// only searching, so read-only=true
IndexSearcher searcher = new IndexSearcher(reader);
searcher.setSimilarityProvider(sp);
PrintWriter logger = new PrintWriter(new FileWriter(new File("../stat" + (lm ? "lm" + smooth : "st") + ".txt")), true);
TrecTopicsReader qReader = new TrecTopicsReader();
QualityQuery qqs[] = qReader.readQueries(new BufferedReader(new FileReader(new File("../topicsFile.txt"))));
Judge judge = new TrecJudge(new BufferedReader(new FileReader(new File("../qrelsFile.txt"))));
judge.validateData(qqs, logger);
QualityQueryParser qqParser = new SimpleQQParser("title", "text");
QualityStats stats[];
SubmissionReport submitLog = null;
if (lm) {
LanguageQualityBenchmark qrun = new LanguageQualityBenchmark(qqs, qqParser, searcher, "DOCNO", smooth);
stats = qrun.execute(judge, submitLog, logger);
} else {
QualityBenchmark qrun = new QualityBenchmark(qqs, qqParser, searcher, "DOCNO");
stats = qrun.execute(judge, submitLog, logger);
}
QualityStats avg = QualityStats.average(stats);
double minMAP = 0, maxMAP = 0, MAP = 0, minR = 0, maxR = 0, RP = 0;
boolean first = true;
for (int i = 0; i < stats.length; i++) {
double TMAP, TR = 0;
TMAP = stats[i].getAvp();
TR = stats[i].getNumGoodPoints() / stats[i].getMaxGoodPoints();
if (first) {
minMAP = TMAP;
minR = TR;
maxMAP = TMAP;
maxR = TR;
first = false;
}
if (TMAP < minMAP) {
minMAP = TMAP;
}
if (TMAP > maxMAP) {
maxMAP = TMAP;
}
if (TR < minR) {
minR = TR;
}
if (TR > maxR) {
maxR = TR;
}
RP += TR;
MAP += TMAP;
}
RP = RP / stats.length;
MAP = MAP / stats.length;
System.out.println("Min MAP :" + minMAP + " Max MAP : " + maxMAP + " AVG MAP : " + MAP + "\n"
+ "Min R :" + minR + " Max R : " + maxR + " AVG R : " + RP + "\n");
logger.close();
reader.close();
} catch (CorruptIndexException e1) {
// TODO Auto-generated catch block
e1.printStackTrace();
} catch (IOException e1) {
// TODO Auto-generated catch block
e1.printStackTrace();
} catch (Exception e1) {
}
}
}