/*
* This file is part of Caliph & Emir.
*
* Caliph & Emir is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* Caliph & Emir is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Caliph & Emir; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
* Copyright statement:
* --------------------
* (c) 2002-2005 by Mathias Lux (mathias@juggle.at)
* http://www.juggle.at, http://caliph-emir.sourceforge.net
*/
package at.lux.fotoretrieval.retrievalengines;
import at.lux.fotoretrieval.ResultListEntry;
import at.lux.fotoretrieval.lucene.Graph;
import at.lux.fotoretrieval.lucene.Node;
import at.lux.fotoretrieval.lucene.similarity.TermFrequencySimilarity;
import junit.framework.TestCase;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import java.io.IOException;
import java.util.*;
/**
* Date: 26.03.2005
* Time: 00:14:14
*
* @author Mathias Lux, mathias@juggle.at
*/
public class LucenePathIndexRetrievalEngineTest extends TestCase {
private LucenePathIndexRetrievalEngine engine;
private final String pathToIndex = "testdata";
// private final String pathToIndex = "C:\\Dokumente und Einstellungen\\Mathias\\Eigene Dateien\\JavaProjects\\Caliph\\testdata";
/**
* Sets up the fixture, for example, open a network connection.
* This method is called before a test is executed.
*/
protected void setUp() throws Exception {
super.setUp();
engine = new LucenePathIndexRetrievalEngine(50);
}
public void testCreateIndex() {
engine.indexFilesSemantically(pathToIndex, null);
try {
IndexReader reader = IndexReader.open(pathToIndex + "/idx_paths");
for (int i = 0; i< reader.numDocs(); i++) {
System.out.println(reader.document(i).get("graph"));
}
} catch (IOException e) {
e.printStackTrace();
fail(e.toString());
}
}
public void testSearch() {
try {
QueryParser qParser = new QueryParser("graph", new WhitespaceAnalyzer());
IndexSearcher search = new IndexSearcher("C:\\Dokumente und Einstellungen\\Mathias\\Eigene Dateien\\JavaProjects\\Caliph\\testdata\\idx_paths");
Hits h = search.search(qParser.parse("_*_0_1"));
for (int i = 0; i < h.length(); i++) {
System.out.println(h.score(i) + ": " + h.doc(i).get("graph"));
}
} catch (IOException e) {
e.printStackTrace();
fail(e.toString());
} catch (ParseException e) {
e.printStackTrace();
fail(e.toString());
}
}
public void testSemanticSearch() {
List<ResultListEntry> result = engine.getImagesBySemantics("[\"Mathias Lux\"] [Talking] [\"Michael Granitzer\"] patientOf 1 2 agent 2 3", null, pathToIndex, true, null);
for (Iterator<ResultListEntry> iterator = result.iterator(); iterator.hasNext();) {
ResultListEntry entry = iterator.next();
System.out.println(entry.getRelevance() + ": " + entry.getDescriptionPath());
}
}
public void testPrecisionAndRecall() {
try {
String repository = "C:\\Java\\JavaProjects\\CaliphEmir\\testdata";
// String repository = "C:\\Dokumente und Einstellungen\\Mathias\\Eigene Dateien\\JavaProjects\\Caliph\\testdata";
IndexSearcher is = new IndexSearcher(repository + "\\idx_paths");
IndexReader ir = IndexReader.open(repository + "\\idx_paths");
for (int i = 0; i<ir.numDocs(); i++) {
testQuery(ir, new Graph(ir.document(i).getValues("graph")[0]), is);
}
} catch (IOException e) {
e.printStackTrace();
fail(e.toString());
} catch (ParseException e) {
e.printStackTrace();
}
}
public void testPrecisionAndRecallFullText() {
try {
String repository = "C:\\Java\\JavaProjects\\CaliphEmir\\testdata";
// String repository = "C:\\Dokumente und Einstellungen\\Mathias\\Eigene Dateien\\JavaProjects\\Caliph\\testdata";
IndexSearcher is = new IndexSearcher(repository + "\\idx_paths");
IndexReader ir = IndexReader.open(repository + "\\idx_paths");
for (int i = 0; i<ir.numDocs(); i++) {
testDirectQuery(ir, new Graph(ir.document(i).getValues("graph")[0]), is);
}
} catch (IOException e) {
e.printStackTrace();
fail(e.toString());
} catch (ParseException e) {
e.printStackTrace();
}
}
private void testQuery(IndexReader ir, Graph query, IndexSearcher is) throws IOException, ParseException {
// create results from mcs:
LinkedList<ResultHolder> resultsMcs = new LinkedList<ResultHolder>();
for (int j = 0; j < ir.numDocs(); j++) {
Graph model = new Graph(ir.document(j).getValues("graph")[0]);
float mcsSimilarity = query.getMcsSimilarity(model);
resultsMcs.add(new ResultHolder(j, model.toString(), mcsSimilarity));
}
Collections.sort(resultsMcs);
// for (Iterator<ResultHolder> iterator = resultsMcs.iterator(); iterator.hasNext();) {
// ResultHolder r = iterator.next();
// System.out.println(r.getDocumentNumber() + ": " + r.getSimilarity());
// }
// create results from search:
// set to another similarity if necessary:
is.setSimilarity(new TermFrequencySimilarity());
// is.setSimilarity(new SimpleTfIdfSimilarity());
LucenePathIndexRetrievalEngine engine = new LucenePathIndexRetrievalEngine(50);
String gQuery = LucenePathIndexRetrievalEngine.createLucenePathQuery(query);
// System.out.println(query);
QueryParser qParse = new QueryParser("paths", new WhitespaceAnalyzer());
Query q = qParse.parse(gQuery);
Hits hits = is.search(q);
LinkedList<ResultHolder> resultsSearch = new LinkedList<ResultHolder>();
for (int i = 0; i < hits.length(); i++) {
String graph = hits.doc(i).getValues("graph")[0];
int docID = -1;
for (int j = 0; j < ir.numDocs(); j++) {
Graph model = new Graph(ir.document(j).getValues("graph")[0]);
if (model.toString().equals(graph)) docID = j;
}
resultsSearch.add(new ResultHolder(docID, graph, hits.score(i)));
}
Collections.sort(resultsSearch);
printPrecisionRecallPlot(resultsMcs, resultsSearch);
}
private void testDirectQuery(IndexReader ir, Graph query, IndexSearcher is) throws IOException, ParseException {
IndexReader reader = IndexReader.open("C:\\Java\\JavaProjects\\CaliphEmir\\testdata\\idx_semantic");
IndexSearcher searcher = new IndexSearcher("C:\\Java\\JavaProjects\\CaliphEmir\\testdata\\idx_fulltext");
HashMap<Integer, String> node2label = new HashMap<Integer, String>();
for (int j = 0; j < reader.numDocs(); j++) {
String id = reader.document(j).getValues("id")[0];
String label = reader.document(j).getValues("label")[0];
node2label.put(Integer.parseInt(id), label);
}
// create results from mcs:
LinkedList<ResultHolder> resultsMcs = new LinkedList<ResultHolder>();
for (int j = 0; j < ir.numDocs(); j++) {
Graph model = new Graph(ir.document(j).getValues("graph")[0]);
float mcsSimilarity = query.getMcsSimilarity(model);
String[] file = ir.document(j).getValues("file");
for (int i = 0; i < file.length; i++) {
String s = file[i];
resultsMcs.add(new ResultHolder(mcsSimilarity, s));
}
}
Collections.sort(resultsMcs);
// for (Iterator<ResultHolder> iterator = resultsMcs.iterator(); iterator.hasNext();) {
// ResultHolder r = iterator.next();
// System.out.println(r.getDocumentNumber() + ": " + r.getSimilarity());
// }
// create results from search:
StringBuilder qBuilder = new StringBuilder(64);
for (Iterator<Node> iterator = query.getNodes().iterator(); iterator.hasNext();) {
Node node = iterator.next();
// qBuilder.append("\"");
qBuilder.append(node2label.get(node.getNodeID()));
qBuilder.append(" ");
// qBuilder.append("\" ");
}
// System.out.println(query);
QueryParser qParse = new QueryParser("all", new WhitespaceAnalyzer());
Query q = qParse.parse(qBuilder.toString());
Hits hits = searcher.search(q);
LinkedList<ResultHolder> resultsSearch = new LinkedList<ResultHolder>();
for (int i = 0; i < hits.length(); i++) {
String graph = hits.doc(i).getValues("file")[0];
// int docID = -1;
// for (int j = 0; j < ir.numDocs(); j++) {
// Graph model = new Graph(ir.document(j).getValues("graph")[0]);
// if (model.toString().equals(graph)) docID = j;
// }
resultsSearch.add(new ResultHolder(hits.score(i), graph));
}
Collections.sort(resultsSearch);
printPrecisionRecallPlotFileBased(resultsMcs, resultsSearch);
}
public static String printPrecisionRecallPlot(LinkedList<ResultHolder> mcs, LinkedList<ResultHolder> search) {
int numLevels = 10;
List<ResultHolder> optimal = mcs.subList(0, numLevels);
HashSet<Integer> firstOptimalResultsDocIDs = new HashSet<Integer>(numLevels);
for (Iterator<ResultHolder> iterator = optimal.iterator(); iterator.hasNext();) {
ResultHolder r = iterator.next();
firstOptimalResultsDocIDs.add(r.getDocumentNumber());
}
LinkedList<Integer> foundInSearch = new LinkedList<Integer>();
int position = 1;
for (Iterator<ResultHolder> iterator = search.iterator(); iterator.hasNext();) {
ResultHolder r = iterator.next();
if (firstOptimalResultsDocIDs.contains(r.getDocumentNumber())) {
foundInSearch.add(position);
}
position++;
}
position = 1;
StringBuilder sb1 = new StringBuilder(256);
StringBuilder sb2 = new StringBuilder(256);
sb1.append("precision 1 ");
sb2.append("recall 0 ");
for (Iterator<Integer> iterator = foundInSearch.iterator(); iterator.hasNext();) {
Integer integer = iterator.next();
float recall = (1f / (float) numLevels) * ((float) position);
float precision = ((float) position) / ((float) integer);
sb1.append(precision);
sb1.append(" ");
sb2.append(recall);
sb2.append(" ");
position++;
}
// System.out.println(sb2.toString().replace('.', ','));
System.out.println(sb1.toString().replace('.', ','));
return "";
}
public static String printPrecisionRecallPlotFileBased(LinkedList<ResultHolder> mcs, LinkedList<ResultHolder> search) {
int numLevels = 10;
List<ResultHolder> optimal = mcs.subList(0, numLevels);
HashSet<String> firstOptimalResultsDocIDs = new HashSet<String>(numLevels);
for (Iterator<ResultHolder> iterator = optimal.iterator(); iterator.hasNext();) {
ResultHolder r = iterator.next();
firstOptimalResultsDocIDs.add(r.getFile());
}
LinkedList<Integer> foundInSearch = new LinkedList<Integer>();
int position = 1;
for (Iterator<ResultHolder> iterator = search.iterator(); iterator.hasNext();) {
ResultHolder r = iterator.next();
if (firstOptimalResultsDocIDs.contains(r.getFile())) {
foundInSearch.add(position);
}
position++;
}
position = 1;
StringBuilder sb1 = new StringBuilder(256);
StringBuilder sb2 = new StringBuilder(256);
sb1.append("precision 1 ");
sb2.append("recall 0 ");
for (Iterator<Integer> iterator = foundInSearch.iterator(); iterator.hasNext();) {
Integer integer = iterator.next();
float recall = (1f / (float) numLevels) * ((float) position);
float precision = ((float) position) / ((float) integer);
sb1.append(precision);
sb1.append(" ");
sb2.append(recall);
sb2.append(" ");
position++;
}
// System.out.println(sb2.toString().replace('.', ','));
System.out.println(sb1.toString().replace('.', ','));
return "";
}
}
class ResultHolder implements Comparable {
private float similarity;
private int documentNumber;
private String graph;
private String file;
public ResultHolder(int documentNumber, String graph, float similarity) {
this.documentNumber = documentNumber;
this.graph = graph;
this.similarity = similarity;
}
public ResultHolder(float similarity, String file) {
// this.documentNumber = documentNumber;
this.file = file;
this.similarity = similarity;
}
public String getFile() {
return file;
}
public int getDocumentNumber() {
return documentNumber;
}
public void setDocumentNumber(int documentNumber) {
this.documentNumber = documentNumber;
}
public String getGraph() {
return graph;
}
public void setGraph(String graph) {
this.graph = graph;
}
public float getSimilarity() {
return similarity;
}
public void setSimilarity(float similarity) {
this.similarity = similarity;
}
public int compareTo(Object o) {
if (o instanceof ResultHolder) {
ResultHolder r = (ResultHolder) o;
return (int) Math.signum(r.similarity - similarity);
}
return 0;
}
}