/* * This file is part of the LIRE project: http://www.semanticmetadata.net/lire * LIRE is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * LIRE is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with LIRE; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * * We kindly ask you to refer the any or one of the following publications in * any publication mentioning or employing Lire: * * Lux Mathias, Savvas A. Chatzichristofis. Lire: Lucene Image Retrieval – * An Extensible Java CBIR Library. In proceedings of the 16th ACM International * Conference on Multimedia, pp. 1085-1088, Vancouver, Canada, 2008 * URL: http://doi.acm.org/10.1145/1459359.1459577 * * Lux Mathias. Content Based Image Retrieval with LIRE. In proceedings of the * 19th ACM International Conference on Multimedia, pp. 735-738, Scottsdale, * Arizona, USA, 2011 * URL: http://dl.acm.org/citation.cfm?id=2072432 * * Mathias Lux, Oge Marques. Visual Information Retrieval using Java and LIRE * Morgan & Claypool, 2013 * URL: http://www.morganclaypool.com/doi/abs/10.2200/S00468ED1V01Y201301ICR025 * * Copyright statement: * -------------------- * (c) 2002-2013 by Mathias Lux (mathias@juggle.at) * http://www.semanticmetadata.net/lire, http://www.lire-project.net */ package net.semanticmetadata.lire.lucene; import junit.framework.TestCase; import net.semanticmetadata.lire.DocumentBuilder; import net.semanticmetadata.lire.DocumentBuilderFactory; import net.semanticmetadata.lire.ImageSearchHits; import net.semanticmetadata.lire.filter.RerankFilter; import net.semanticmetadata.lire.imageanalysis.FCTH; import net.semanticmetadata.lire.utils.FileUtils; import org.apache.lucene.analysis.core.SimpleAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.TextField; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.queryparser.classic.ParseException; import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; import org.xml.sax.Attributes; import org.xml.sax.InputSource; import org.xml.sax.SAXException; import org.xml.sax.XMLReader; import org.xml.sax.helpers.DefaultHandler; import javax.xml.parsers.ParserConfigurationException; import javax.xml.parsers.SAXParser; import javax.xml.parsers.SAXParserFactory; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.util.ArrayList; import java.util.Iterator; /** * A test class for image based re-ranking of results from a text search with Lucene. * It's based on Junit (it's either included in your IDE or just google for the jar) * The testdata employed is in the LIRE SVN at * https://code.google.com/p/lire/source/checkout * * @author Mathias Lux mathias@juggle.at * */ public class TestRerankTextSearch extends TestCase { // that's where we put the index for testing: private File testIndex = new File("textindextest"); public void testIndexing() throws IOException, ParserConfigurationException, SAXException { IndexWriterConfig iwConf = new IndexWriterConfig(Version.LUCENE_42, new SimpleAnalyzer(Version.LUCENE_42)); IndexWriter iw = new IndexWriter(FSDirectory.open(testIndex), iwConf); // if you want to append the index to a pre-existing one use the next line. // iwConf.setOpenMode(IndexWriterConfig.OpenMode.APPEND); // create a LIRE DocumentBuilder for extracting FCTH (just an example, every other feature will do). DocumentBuilder builder = DocumentBuilderFactory.getFCTHDocumentBuilder(); ArrayList<File> files = FileUtils.getAllImageFiles(new File("testdata/ferrari"), true); // for handling the XML of the test data set SAXParserFactory spf = SAXParserFactory.newInstance(); spf.setNamespaceAware(true); SAXParser saxParser = spf.newSAXParser(); XMLReader xmlReader = saxParser.getXMLReader(); for (Iterator<File> iterator = files.iterator(); iterator.hasNext(); ) { File img = iterator.next(); String path = img.getCanonicalPath(); // create the document with the LIRE DocumentBuilder, this adds the image features to the document. Document d = builder.createDocument(new FileInputStream(img), path); // handling the XML of the test data set path = path.substring(0,path.lastIndexOf('.')) + ".xml"; TagHandler handler = new TagHandler(); xmlReader.setContentHandler(handler); xmlReader.parse(new InputSource(new File(path).toURI().toString())); // add the text to the document ... d.add(new TextField("tags", handler.getTags(), Field.Store.YES)); // don't forget to add the document to the index. iw.addDocument(d); } iw.close(); } public void testSearch() throws IOException, ParseException { // create a Lucene IndexReader and the according IndexSearcher: IndexReader reader = DirectoryReader.open(FSDirectory.open(testIndex)); IndexSearcher searcher = new IndexSearcher(reader); // The QueryParser takes a String and creates a query out of it. Make sure you use the same field // as for indexing, in this case "tags" QueryParser q = new QueryParser(Version.LUCENE_42, "tags", new SimpleAnalyzer(Version.LUCENE_42)); // let's just take the tags of the first document in the index: Query query = q.parse(reader.document(1).getValues("tags")[0]); // now that's the actual search: // NOTE: The number of results here is critical. The less documents are returned here, the // less the image re-ranking can mess up. However, the recall (the absolute number of relevant // documents returned) is also influenced by this. Best to try several values like 10, 100, 200, 500, ... TopDocs results = searcher.search(query, 10); // here we print the results of the text search, just for the win. System.out.println("-----------> SEARCH RESULTS ..."); for (int i = 0; i < results.scoreDocs.length; i++) { ScoreDoc scoreDoc = results.scoreDocs[i]; System.out.print(scoreDoc.score + "\t: "); // reader.document(scoreDoc.doc).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0] gets you the actual image file path. // LIRE manages all needed filed names as static Strings in DocumentBuilder ... System.out.print(reader.document(scoreDoc.doc).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0] + " -> "); System.out.println(reader.document(scoreDoc.doc).getValues("tags")[0]); } // just for a visual example ... this will pop up a browser window FileUtils.browseUri(FileUtils.saveImageResultsToHtml("text", results, reader, reader.document(1).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0])); // and now for the re-ranking: // make sure to use a low level feature that has been indexed -- check the DocumentBuilder in above method. RerankFilter rerank = new RerankFilter(FCTH.class, DocumentBuilder.FIELD_NAME_FCTH); // note that you need the document here, it contains the low level feature ... // if you don't have it but just the image you need to create a new one with the // appropriate DocumentBuilder -- check the DocumentBuilder in above method. ImageSearchHits hitsReranked = rerank.filter(results, reader, reader.document(1)); // and here we print the re-ranked hits: System.out.println("-----------> RERANKED ..."); for (int i = 0; i < hitsReranked.length(); i++) { System.out.print(hitsReranked.score(i) + "\t: "); System.out.print(hitsReranked.doc(i).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0] + " -> "); System.out.println(hitsReranked.doc(i).getValues("tags")[0]); } // just for a visual example ... this will pop up a browser window. FileUtils.browseUri(FileUtils.saveImageResultsToHtml("reranked", hitsReranked, reader.document(1).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0])); } // handling the XML of the test data set, we just want the tags. // no need for that if you get the text elsewhere. class TagHandler extends DefaultHandler { StringBuilder sb = new StringBuilder(1024); boolean inTag = false; public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException { if (localName.startsWith("tag")) inTag = true; super.startElement(uri, localName, qName, attributes); } public void endElement(String uri, String localName, String qName) throws SAXException { if (localName.startsWith("tag")) { inTag = false; sb.append(' '); } super.endElement(uri, localName, qName); } public void characters(char[] ch, int start, int length) throws SAXException { if (inTag) { sb.append(ch, start, length); } } public String getTags() { return sb.toString().trim(); } } }