/*
* This file is part of the LIRE project: http://www.semanticmetadata.net/lire
* LIRE is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* LIRE is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with LIRE; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
* We kindly ask you to refer the any or one of the following publications in
* any publication mentioning or employing Lire:
*
* Lux Mathias, Savvas A. Chatzichristofis. Lire: Lucene Image Retrieval –
* An Extensible Java CBIR Library. In proceedings of the 16th ACM International
* Conference on Multimedia, pp. 1085-1088, Vancouver, Canada, 2008
* URL: http://doi.acm.org/10.1145/1459359.1459577
*
* Lux Mathias. Content Based Image Retrieval with LIRE. In proceedings of the
* 19th ACM International Conference on Multimedia, pp. 735-738, Scottsdale,
* Arizona, USA, 2011
* URL: http://dl.acm.org/citation.cfm?id=2072432
*
* Mathias Lux, Oge Marques. Visual Information Retrieval using Java and LIRE
* Morgan & Claypool, 2013
* URL: http://www.morganclaypool.com/doi/abs/10.2200/S00468ED1V01Y201301ICR025
*
* Copyright statement:
* --------------------
* (c) 2002-2013 by Mathias Lux (mathias@juggle.at)
* http://www.semanticmetadata.net/lire, http://www.lire-project.net
*/
package net.semanticmetadata.lire.invertedlist;
import junit.framework.TestCase;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import java.util.StringTokenizer;
//import org.apache.lucene.index.Norm;
public class GeneralInvertedListTest extends TestCase {
private String indexPath = "./test-index-cedd-flickr";
private int numRefObjsReferenced = 50;
private int numRefObjs = 500;
/*
public void testIndexing() throws IOException {
IndexReader reader = IndexReader.open(FSDirectory.open(new File(indexPath)));
int numDocs = reader.numDocs();
System.out.println("numDocs = " + numDocs);
int docs = reader.numDocs();
boolean hasDeletions = reader.hasDeletions();
numRefObjs = 500;
// int numRefObjs = (int) Math.sqrt(docs);
System.out.println("numRefObjs = " + numRefObjs);
// init reference objects:
IndexWriter iw = LuceneUtils.createIndexWriter(indexPath + "-ro", true);
HashSet<Integer> referenceObjectIds = new HashSet<Integer>(numRefObjs);
double numDocsDouble = (double) numDocs;
while (referenceObjectIds.size() < numRefObjs) {
referenceObjectIds.add((int) (numDocsDouble * Math.random()));
}
int count = 0;
for (int i : referenceObjectIds) {
count++;
// todo: check if deleted ...
Document document = reader.document(i);
document.add(new Field("ro-id", count + "", Field.Store.YES, Field.Index.NOT_ANALYZED));
iw.addDocument(document);
}
iw.commit();
iw.close();
// now find the reference objects for each entry ;)
IndexReader readerRo = IndexReader.open(FSDirectory.open(new File(indexPath + "-ro")));
ImageSearcher searcher = ImageSearcherFactory.createCEDDImageSearcher(numRefObjsReferenced);
PerFieldAnalyzerWrapper wrapper =
new PerFieldAnalyzerWrapper(new SimpleAnalyzer(LuceneUtils.LUCENE_VERSION));
wrapper.addAnalyzer("ro-order", new WhitespaceAnalyzer(LuceneUtils.LUCENE_VERSION));
iw = LuceneUtils.createIndexWriter(indexPath + "-new", true);
// iw = new IndexWriter(FSDirectory.open(new File(indexPath + "-new")), wrapper, true, IndexWriter.MaxFieldLength.UNLIMITED);
StringBuilder sb = new StringBuilder(256);
for (int i = 0; i < docs; i++) {
if (hasDeletions && reader.isDeleted(i)) {
continue;
}
Document document = reader.document(i);
ImageSearchHits hits = searcher.search(document, readerRo);
sb.delete(0, sb.length());
for (int j = 0; j < numRefObjsReferenced; j++) {
sb.append(hits.doc(j).getValues("ro-id")[0]);
sb.append(' ');
}
// System.out.println(sb.toString());
document.add(new Field("ro-order", sb.toString(), Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
iw.addDocument(document);
}
iw.commit();
iw.close();
}
public void testExplicitSearch() throws IOException {
IndexReader reader = IndexReader.open(FSDirectory.open(new File(indexPath + "-new")));
int numSearches = 50;
String query = reader.document(2).getValues("ro-order")[0];
TopDocs docs;
long ms = System.currentTimeMillis();
for (int i = 0; i < numSearches; i++) {
query = reader.document(i).getValues("ro-order")[0];
docs = scoreDocs(query, reader);
}
ms = System.currentTimeMillis() - ms;
System.out.println("ms = " + ms);
// for (int i = 0; i < docs.scoreDocs.length; i++) {
// ScoreDoc scoreDoc = docs.scoreDocs[i];
// System.out.println("<img title=\"Score: "+scoreDoc.score+"\" src=\"file:///"+reader.document(scoreDoc.doc).getValues("descriptorImageIdentifier")[0]+"\"><p>");
// }
ImageSearcher ceddSearcher = ImageSearcherFactory.createCEDDImageSearcher(100);
ms = System.currentTimeMillis();
for (int i = 0; i < numSearches; i++) {
ceddSearcher.search(reader.document(i), reader);
}
ms = System.currentTimeMillis() - ms;
System.out.println("ms = " + ms);
}
public TopDocs scoreDocs(String queryString, IndexReader reader) throws IOException {
StringTokenizer st = new StringTokenizer(queryString);
int position = 0;
HashMap<Integer, Integer> doc2score = new HashMap<Integer, Integer>();
HashMap<Integer, Integer> doc2count = new HashMap<Integer, Integer>();
int currDoc = 0;
while (st.hasMoreTokens()) {
TermPositions tp = reader.termPositions(new Term("ro-order", st.nextToken()));
while (tp.next()) {
currDoc = tp.doc();
// System.out.println(tp.doc() + ": " + tp.nextPosition());
if (doc2score.get(currDoc) == null) {
doc2score.put(currDoc, Math.abs(tp.nextPosition() - position));
doc2count.put(currDoc, 1);
} else {
doc2score.put(currDoc, doc2score.get(currDoc) + Math.abs(tp.nextPosition() - position));
doc2count.put(currDoc, doc2count.get(currDoc) + 1);
}
}
position++;
}
// fill up all the remaining doc scores,
throw new UnsupportedOperationException("Not implemented");
}*/
public static Query getQuery(String queryString) {
BooleanQuery b = new BooleanQuery();
TermQuery t;
StringTokenizer st = new StringTokenizer(queryString);
while (st.hasMoreTokens())
b.add(new BooleanClause(new TermQuery(new Term("ro-order", st.nextToken())), BooleanClause.Occur.SHOULD));
return b;
}
}
/*class PositionScorer extends Scorer {
private int doc;
private final int[] docs = new int[32]; // buffered doc numbers
private final int[] freqs = new int[32]; // buffered term freqs
private int pointer;
private int pointerMax;
private TermPositions tp;
*//**
* Constructs a Scorer.
*
* @param similarity The <code>Similarity</code> implementation used by this scorer.
*//*
public PositionScorer(Similarity similarity, TermPositions tp) {
super(similarity);
this.tp = this.tp;
}
public void score(HitCollector hc) throws IOException {
next();
score(hc, Integer.MAX_VALUE);
}
protected boolean score(HitCollector c, int end) throws IOException {
Similarity similarity = getSimilarity(); // cache sim in local
float[] normDecoder = Similarity.getNormDecoder();
while (doc < end) { // for docs in window
int f = freqs[pointer];
float score = similarity.tf(f);
score *= normDecoder[norms[doc] & 0xFF]; // normalize for field
c.collect(doc, score); // collect score
if (++pointer >= pointerMax) {
pointerMax = termDocs.read(docs, freqs); // refill buffers
if (pointerMax != 0) {
pointer = 0;
} else {
termDocs.close(); // close stream
doc = Integer.MAX_VALUE; // set to sentinel value
return false;
}
}
doc = docs[pointer];
}
return true;
}
*//** Returns the current document number matching the query.
* Initially invalid, until {@link #next()} is called the first time.
*//*
public int doc() { return doc; }
*//** Advances to the next document matching the query.
* <br>The iterator over the matching documents is buffered using
* {@link TermDocs#read(int[], int[])}.
* @return true iff there is another document matching the query.
*//*
public boolean next() throws IOException {
pointer++;
if (pointer >= pointerMax) {
pointerMax = termDocs.read(docs, freqs); // refill buffer
if (pointerMax != 0) {
pointer = 0;
} else {
termDocs.close(); // close stream
doc = Integer.MAX_VALUE; // set to sentinel value
return false;
}
}
doc = docs[pointer];
return true;
}
public float score() {
int f = freqs[pointer];
float raw = // compute tf(f)*weight
f < SCORE_CACHE_SIZE // check cache
? scoreCache[f] // cache hit
: getSimilarity().tf(f)*weightValue; // cache miss
return raw * Similarity.decodeNorm(norms[doc]); // normalize for field
}
*//** Skips to the first match beyond the current whose document number is
* greater than or equal to a given target.
* <br>The implementation uses {@link TermDocs#skipTo(int)}.
* @param target The target document number.
* @return true iff there is such a match.
*//*
public boolean skipTo(int target) throws IOException {
// first scan in cache
for (pointer++; pointer < pointerMax; pointer++) {
if (docs[pointer] >= target) {
doc = docs[pointer];
return true;
}
}
// not found in cache, seek underlying stream
boolean result = termDocs.skipTo(target);
if (result) {
pointerMax = 1;
pointer = 0;
docs[pointer] = doc = termDocs.doc();
freqs[pointer] = termDocs.freq();
} else {
doc = Integer.MAX_VALUE;
}
return result;
}
*//** Returns an explanation of the score for a document.
* <br>When this method is used, the {@link #next()} method
* and the {@link #score(HitCollector)} method should not be used.
* @param doc The document number for the explanation.
*//*
public Explanation explain(int doc) throws IOException {
TermQuery query = (TermQuery)weight.getQuery();
Explanation tfExplanation = new Explanation();
int tf = 0;
while (pointer < pointerMax) {
if (docs[pointer] == doc)
tf = freqs[pointer];
pointer++;
}
if (tf == 0) {
if (termDocs.skipTo(doc))
{
if (termDocs.doc() == doc)
{
tf = termDocs.freq();
}
}
}
termDocs.close();
tfExplanation.setValue(getSimilarity().tf(tf));
tfExplanation.setDescription("tf(termFreq("+query.getTerm()+")="+tf+")");
return tfExplanation;
}
*/
/**
* Returns a string representation of this <code>TermScorer</code>.
*//*
public String toString() { return "scorer(" + weight + ")"; }
}*/
//class PlainSimilarity extends Similarity {
// @Override
// public void computeNorm(FieldInvertState fieldInvertState, Norm norm) {
// //To change body of implemented methods use File | Settings | File Templates.
// }
//
// @Override
// public SimWeight computeWeight(float v, CollectionStatistics collectionStatistics, TermStatistics... termStatisticses) {
// return null; //To change body of implemented methods use File | Settings | File Templates.
// }
//
// @Override
// public ExactSimScorer exactSimScorer(SimWeight simWeight, AtomicReaderContext atomicReaderContext) throws IOException {
// return null; //To change body of implemented methods use File | Settings | File Templates.
// }
//
// @Override
// public SloppySimScorer sloppySimScorer(SimWeight simWeight, AtomicReaderContext atomicReaderContext) throws IOException {
// return null; //To change body of implemented methods use File | Settings | File Templates.
// }
//}