LucenePathIndexRetrievalEngineTest.java example

Explorer
CaliphEmir-master
- caliphemir
  - src
    - at
  - test
    - at
      - lux
        fotoannotation
        ComponentsTest.java
        FileTreeTest.java
        fotoretrieval
        FastMapGraphTest.java
        FastMapTest.java
        GraphConstructionTest.java
        PointPanel.java
        SimilarityMatrixTest.java
        lucene
        GraphAnalyzerTest.java
        LabeledGraphTest.java
        panels
        TestConfigurationDialog.java
        retrievalengines
        GraphTest.java
        LucenePathIndexRetrievalEngineTest.java
        LuceneRetrievalEngineTest.java
        graphviz
        SpringEmbedderTest.java
        SpringEmbedderVis.java
        imageanalysis
        ColorLayoutTest.java
        ColorStructureTest.java
        ColorTest.java
        DominantColorTest.java
        EdgeHistogramTest.java
        ScalableColorTest.java
        db
        DerbyTest.java
        imaging
        BmpReaderTest.java
        PpmReaderTest.java
        retrieval
        StcTest.java
        evaluation
        SuffixTreeEvaluation.java
        graphisomorphism
        FastSubgraphIsomorphismTest.java
        SubgraphIsomorphismTest.java
        metrics
        BooleanNodeDistanceFunctionTest.java
        SimpleEdgeDistanceFunctionTest.java
        TermVectorNodeDistanceFunctionTest.java
        suffixtreemodel
        SuffixTreeTest.java
        vectorspace
        ElementTextVectorSimilarityTest.java
        GraphVectorSimilarityTest.java
/*
 * This file is part of Caliph & Emir.
 *
 * Caliph & Emir is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * Caliph & Emir is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with Caliph & Emir; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 *
 * Copyright statement:
 * --------------------
 * (c) 2002-2005 by Mathias Lux (mathias@juggle.at)
 * http://www.juggle.at, http://caliph-emir.sourceforge.net
 */
package at.lux.fotoretrieval.retrievalengines;

import at.lux.fotoretrieval.ResultListEntry;
import at.lux.fotoretrieval.lucene.Graph;
import at.lux.fotoretrieval.lucene.Node;
import at.lux.fotoretrieval.lucene.similarity.TermFrequencySimilarity;
import junit.framework.TestCase;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;

import java.io.IOException;
import java.util.*;

/**
 * Date: 26.03.2005
 * Time: 00:14:14
 *
 * @author Mathias Lux, mathias@juggle.at
 */
public class LucenePathIndexRetrievalEngineTest extends TestCase {
    private LucenePathIndexRetrievalEngine engine;
    private final String pathToIndex = "testdata";
//    private final String pathToIndex = "C:\\Dokumente und Einstellungen\\Mathias\\Eigene Dateien\\JavaProjects\\Caliph\\testdata";

    /**
     * Sets up the fixture, for example, open a network connection.
     * This method is called before a test is executed.
     */
    protected void setUp() throws Exception {
        super.setUp();
        engine = new LucenePathIndexRetrievalEngine(50);
    }

    public void testCreateIndex() {
        engine.indexFilesSemantically(pathToIndex, null);
        try {
            IndexReader reader = IndexReader.open(pathToIndex + "/idx_paths");
            for (int i = 0; i< reader.numDocs(); i++) {
                System.out.println(reader.document(i).get("graph"));
            }
        } catch (IOException e) {
            e.printStackTrace();
            fail(e.toString());
        }
    }

    public void testSearch() {
        try {
            QueryParser qParser = new QueryParser("graph", new WhitespaceAnalyzer());
            IndexSearcher search = new IndexSearcher("C:\\Dokumente und Einstellungen\\Mathias\\Eigene Dateien\\JavaProjects\\Caliph\\testdata\\idx_paths");
            Hits h = search.search(qParser.parse("_*_0_1"));
            for (int i = 0; i < h.length(); i++) {
                System.out.println(h.score(i) + ": " + h.doc(i).get("graph"));
            }
        } catch (IOException e) {
            e.printStackTrace();
            fail(e.toString());
        } catch (ParseException e) {
            e.printStackTrace();
            fail(e.toString());
        }
    }

    public void testSemanticSearch() {
        List<ResultListEntry> result = engine.getImagesBySemantics("[\"Mathias Lux\"] [Talking] [\"Michael Granitzer\"] patientOf 1 2 agent 2 3", null, pathToIndex, true, null);
        for (Iterator<ResultListEntry> iterator = result.iterator(); iterator.hasNext();) {
            ResultListEntry entry = iterator.next();
            System.out.println(entry.getRelevance() + ": " + entry.getDescriptionPath());
        }
    }

    public void testPrecisionAndRecall() {
        try {
            String repository = "C:\\Java\\JavaProjects\\CaliphEmir\\testdata";
//            String repository = "C:\\Dokumente und Einstellungen\\Mathias\\Eigene Dateien\\JavaProjects\\Caliph\\testdata";
            IndexSearcher is = new IndexSearcher(repository + "\\idx_paths");
            IndexReader ir = IndexReader.open(repository + "\\idx_paths");

            for (int i = 0; i<ir.numDocs(); i++) {
                testQuery(ir, new Graph(ir.document(i).getValues("graph")[0]), is);
            }

        } catch (IOException e) {
            e.printStackTrace();
            fail(e.toString());
        } catch (ParseException e) {
            e.printStackTrace();
        }
    }

    public void testPrecisionAndRecallFullText() {
        try {
            String repository = "C:\\Java\\JavaProjects\\CaliphEmir\\testdata";
//            String repository = "C:\\Dokumente und Einstellungen\\Mathias\\Eigene Dateien\\JavaProjects\\Caliph\\testdata";
            IndexSearcher is = new IndexSearcher(repository + "\\idx_paths");
            IndexReader ir = IndexReader.open(repository + "\\idx_paths");

            for (int i = 0; i<ir.numDocs(); i++) {
                testDirectQuery(ir, new Graph(ir.document(i).getValues("graph")[0]), is);
            }

        } catch (IOException e) {
            e.printStackTrace();
            fail(e.toString());
        } catch (ParseException e) {
            e.printStackTrace();
        }
    }

    private void testQuery(IndexReader ir, Graph query, IndexSearcher is) throws IOException, ParseException {
        // create results from mcs:
        LinkedList<ResultHolder> resultsMcs = new LinkedList<ResultHolder>();
        for (int j = 0; j < ir.numDocs(); j++) {
            Graph model = new Graph(ir.document(j).getValues("graph")[0]);
            float mcsSimilarity = query.getMcsSimilarity(model);
            resultsMcs.add(new ResultHolder(j, model.toString(), mcsSimilarity));
        }
        Collections.sort(resultsMcs);
//            for (Iterator<ResultHolder> iterator = resultsMcs.iterator(); iterator.hasNext();) {
//                ResultHolder r = iterator.next();
//                System.out.println(r.getDocumentNumber() + ": " + r.getSimilarity());
//            }

        // create results from search:

        // set to another similarity if necessary:
        is.setSimilarity(new TermFrequencySimilarity());
//        is.setSimilarity(new SimpleTfIdfSimilarity());

        LucenePathIndexRetrievalEngine engine = new LucenePathIndexRetrievalEngine(50);
        String gQuery = LucenePathIndexRetrievalEngine.createLucenePathQuery(query);
//        System.out.println(query);
        QueryParser qParse = new QueryParser("paths", new WhitespaceAnalyzer());
        Query q = qParse.parse(gQuery);
        Hits hits = is.search(q);
        LinkedList<ResultHolder> resultsSearch = new LinkedList<ResultHolder>();
        for (int i = 0; i < hits.length(); i++) {
            String graph = hits.doc(i).getValues("graph")[0];
            int docID = -1;
            for (int j = 0; j < ir.numDocs(); j++) {
                Graph model = new Graph(ir.document(j).getValues("graph")[0]);
                if (model.toString().equals(graph)) docID = j;
            }
            resultsSearch.add(new ResultHolder(docID, graph, hits.score(i)));
        }
        Collections.sort(resultsSearch);
        printPrecisionRecallPlot(resultsMcs, resultsSearch);
    }

    private void testDirectQuery(IndexReader ir, Graph query, IndexSearcher is) throws IOException, ParseException {
        IndexReader reader = IndexReader.open("C:\\Java\\JavaProjects\\CaliphEmir\\testdata\\idx_semantic");
        IndexSearcher searcher = new IndexSearcher("C:\\Java\\JavaProjects\\CaliphEmir\\testdata\\idx_fulltext");

        HashMap<Integer, String> node2label = new HashMap<Integer, String>();
        for (int j = 0; j < reader.numDocs(); j++) {
            String id = reader.document(j).getValues("id")[0];
            String label = reader.document(j).getValues("label")[0];
            node2label.put(Integer.parseInt(id), label);
        }
        // create results from mcs:
        LinkedList<ResultHolder> resultsMcs = new LinkedList<ResultHolder>();
        for (int j = 0; j < ir.numDocs(); j++) {
            Graph model = new Graph(ir.document(j).getValues("graph")[0]);
            float mcsSimilarity = query.getMcsSimilarity(model);
            String[] file = ir.document(j).getValues("file");
            for (int i = 0; i < file.length; i++) {
                String s = file[i];
                resultsMcs.add(new ResultHolder(mcsSimilarity, s));
            }
        }
        Collections.sort(resultsMcs);
//            for (Iterator<ResultHolder> iterator = resultsMcs.iterator(); iterator.hasNext();) {
//                ResultHolder r = iterator.next();
//                System.out.println(r.getDocumentNumber() + ": " + r.getSimilarity());
//            }

        // create results from search:
        StringBuilder qBuilder = new StringBuilder(64);
        for (Iterator<Node> iterator = query.getNodes().iterator(); iterator.hasNext();) {
            Node node = iterator.next();
//            qBuilder.append("\"");
            qBuilder.append(node2label.get(node.getNodeID()));
            qBuilder.append(" ");
//            qBuilder.append("\" ");
        }
//        System.out.println(query);
        QueryParser qParse = new QueryParser("all", new WhitespaceAnalyzer());
        Query q = qParse.parse(qBuilder.toString());
        Hits hits = searcher.search(q);
        LinkedList<ResultHolder> resultsSearch = new LinkedList<ResultHolder>();
        for (int i = 0; i < hits.length(); i++) {
            String graph = hits.doc(i).getValues("file")[0];
//            int docID = -1;
//            for (int j = 0; j < ir.numDocs(); j++) {
//                Graph model = new Graph(ir.document(j).getValues("graph")[0]);
//                if (model.toString().equals(graph)) docID = j;
//            }
            resultsSearch.add(new ResultHolder(hits.score(i), graph));
        }
        Collections.sort(resultsSearch);
        printPrecisionRecallPlotFileBased(resultsMcs, resultsSearch);
    }

    public static String printPrecisionRecallPlot(LinkedList<ResultHolder> mcs, LinkedList<ResultHolder> search) {
        int numLevels = 10;
        List<ResultHolder> optimal = mcs.subList(0, numLevels);
        HashSet<Integer> firstOptimalResultsDocIDs = new HashSet<Integer>(numLevels);
        for (Iterator<ResultHolder> iterator = optimal.iterator(); iterator.hasNext();) {
            ResultHolder r = iterator.next();
            firstOptimalResultsDocIDs.add(r.getDocumentNumber());
        }

        LinkedList<Integer> foundInSearch = new LinkedList<Integer>();
        int position = 1;
        for (Iterator<ResultHolder> iterator = search.iterator(); iterator.hasNext();) {
            ResultHolder r = iterator.next();
            if (firstOptimalResultsDocIDs.contains(r.getDocumentNumber())) {
                foundInSearch.add(position);
            }
            position++;
        }
        position = 1;
        StringBuilder sb1 = new StringBuilder(256);
        StringBuilder sb2 = new StringBuilder(256);
        sb1.append("precision 1 ");
        sb2.append("recall 0 ");
        for (Iterator<Integer> iterator = foundInSearch.iterator(); iterator.hasNext();) {
            Integer integer = iterator.next();
            float recall = (1f / (float) numLevels) * ((float) position);
            float precision = ((float) position) / ((float) integer);
            sb1.append(precision);
            sb1.append(" ");
            sb2.append(recall);
            sb2.append(" ");
            position++;
        }
//        System.out.println(sb2.toString().replace('.', ','));
        System.out.println(sb1.toString().replace('.', ','));
        return "";
    }
    public static String printPrecisionRecallPlotFileBased(LinkedList<ResultHolder> mcs, LinkedList<ResultHolder> search) {
        int numLevels = 10;
        List<ResultHolder> optimal = mcs.subList(0, numLevels);
        HashSet<String> firstOptimalResultsDocIDs = new HashSet<String>(numLevels);
        for (Iterator<ResultHolder> iterator = optimal.iterator(); iterator.hasNext();) {
            ResultHolder r = iterator.next();
            firstOptimalResultsDocIDs.add(r.getFile());
        }

        LinkedList<Integer> foundInSearch = new LinkedList<Integer>();
        int position = 1;
        for (Iterator<ResultHolder> iterator = search.iterator(); iterator.hasNext();) {
            ResultHolder r = iterator.next();
            if (firstOptimalResultsDocIDs.contains(r.getFile())) {
                foundInSearch.add(position);
            }
            position++;
        }
        position = 1;
        StringBuilder sb1 = new StringBuilder(256);
        StringBuilder sb2 = new StringBuilder(256);
        sb1.append("precision 1 ");
        sb2.append("recall 0 ");
        for (Iterator<Integer> iterator = foundInSearch.iterator(); iterator.hasNext();) {
            Integer integer = iterator.next();
            float recall = (1f / (float) numLevels) * ((float) position);
            float precision = ((float) position) / ((float) integer);
            sb1.append(precision);
            sb1.append(" ");
            sb2.append(recall);
            sb2.append(" ");
            position++;
        }
//        System.out.println(sb2.toString().replace('.', ','));
        System.out.println(sb1.toString().replace('.', ','));
        return "";
    }

}

class ResultHolder implements Comparable {
    private float similarity;
    private int documentNumber;
    private String graph;
    private String file;

    public ResultHolder(int documentNumber, String graph, float similarity) {
        this.documentNumber = documentNumber;
        this.graph = graph;
        this.similarity = similarity;
    }

    public ResultHolder(float similarity, String file) {
//        this.documentNumber = documentNumber;
        this.file = file;
        this.similarity = similarity;
    }

    public String getFile() {
        return file;
    }

    public int getDocumentNumber() {
        return documentNumber;
    }

    public void setDocumentNumber(int documentNumber) {
        this.documentNumber = documentNumber;
    }

    public String getGraph() {
        return graph;
    }

    public void setGraph(String graph) {
        this.graph = graph;
    }

    public float getSimilarity() {
        return similarity;
    }

    public void setSimilarity(float similarity) {
        this.similarity = similarity;
    }

    public int compareTo(Object o) {
        if (o instanceof ResultHolder) {
            ResultHolder r = (ResultHolder) o;
            return (int) Math.signum(r.similarity - similarity);
        }
        return 0;
    }
}