GeneralInvertedListTest.java example

Explorer

lire-master
- samples
  - classifier
    - src
      - net
        semanticmetadata
        lire
        classifiers
        ClassifierTest.java
        HashingSearchBasedClassifierMod.java
  - liredemo
    - src
      - main
        java
        edu
        uniklu
        itec
        mosaix
        ImageFunctions.java
        engine
        Engine.java
        EngineObserver.java
        Experimental.java
        LeastUsedWeightingStrategy.java
        Logging.java
        ProportionWeightingStrategy.java
        RandomWeightingStrategy.java
        SimpleWeightingData.java
        SimpleWeightingDataFactory.java
        WeightingData.java
        WeightingDataFactory.java
        WeightingStrategy.java
        liredemo
        ImagePanel.java
        IndexingThread.java
        LireDemoFrame.java
        Main.java
        ProgressMonitor.java
        SearchResultsTableModel.java
        flickr
        FlickrDownloadThread.java
        FlickrIndexingThread.java
        FlickrPhoto.java
        FlickrPhotoGrabber.java
        indexing
        MetadataBuilder.java
        ParallelIndexer.java
      - test
        java
        liredemo
        flickr
        FlickrPhotoGrabberTest.java
  - simpleapplication
    - src
      - main
        java
        net
        semanticmetadata
        lire
        sampleapp
        CreateARFFFile.java
        ExtractFeatures.java
        ExtractMultipleFeatures.java
        ExtractSingleFeature.java
        Indexer.java
        IndexingAndSearchWithLocalFeatures.java
        ParallelIndexing.java
        Searcher.java
  - teaching
    - src
      - main
        java
        samples
        Indexing.java
        Search.java
- src

/*
 * This file is part of the LIRE project: http://www.semanticmetadata.net/lire
 * LIRE is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * LIRE is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with LIRE; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 *
 * We kindly ask you to refer the any or one of the following publications in
 * any publication mentioning or employing Lire:
 *
 * Lux Mathias, Savvas A. Chatzichristofis. Lire: Lucene Image Retrieval –
 * An Extensible Java CBIR Library. In proceedings of the 16th ACM International
 * Conference on Multimedia, pp. 1085-1088, Vancouver, Canada, 2008
 * URL: http://doi.acm.org/10.1145/1459359.1459577
 *
 * Lux Mathias. Content Based Image Retrieval with LIRE. In proceedings of the
 * 19th ACM International Conference on Multimedia, pp. 735-738, Scottsdale,
 * Arizona, USA, 2011
 * URL: http://dl.acm.org/citation.cfm?id=2072432
 *
 * Mathias Lux, Oge Marques. Visual Information Retrieval using Java and LIRE
 * Morgan & Claypool, 2013
 * URL: http://www.morganclaypool.com/doi/abs/10.2200/S00468ED1V01Y201301ICR025
 *
 * Copyright statement:
 * --------------------
 * (c) 2002-2013 by Mathias Lux (mathias@juggle.at)
 *     http://www.semanticmetadata.net/lire, http://www.lire-project.net
 */

package net.semanticmetadata.lire.invertedlist;

import junit.framework.TestCase;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;

import java.util.StringTokenizer;

//import org.apache.lucene.index.Norm;

public class GeneralInvertedListTest extends TestCase {
    private String indexPath = "./test-index-cedd-flickr";
    private int numRefObjsReferenced = 50;
    private int numRefObjs = 500;

    /*
    public void testIndexing() throws IOException {
        IndexReader reader = IndexReader.open(FSDirectory.open(new File(indexPath)));
        int numDocs = reader.numDocs();
        System.out.println("numDocs = " + numDocs);
        int docs = reader.numDocs();
        boolean hasDeletions = reader.hasDeletions();

        numRefObjs = 500;
//        int numRefObjs = (int) Math.sqrt(docs);
        System.out.println("numRefObjs = " + numRefObjs);

        // init reference objects:

        IndexWriter iw = LuceneUtils.createIndexWriter(indexPath + "-ro", true);
        HashSet<Integer> referenceObjectIds = new HashSet<Integer>(numRefObjs);

        double numDocsDouble = (double) numDocs;
        while (referenceObjectIds.size() < numRefObjs) {
            referenceObjectIds.add((int) (numDocsDouble * Math.random()));
        }
        int count = 0;
        for (int i : referenceObjectIds) {
            count++;
            // todo: check if deleted ...
            Document document = reader.document(i);
            document.add(new Field("ro-id", count + "", Field.Store.YES, Field.Index.NOT_ANALYZED));
            iw.addDocument(document);
        }
        iw.commit();
        iw.close();

        // now find the reference objects for each entry ;)
        IndexReader readerRo = IndexReader.open(FSDirectory.open(new File(indexPath + "-ro")));
        ImageSearcher searcher = ImageSearcherFactory.createCEDDImageSearcher(numRefObjsReferenced);
        PerFieldAnalyzerWrapper wrapper =
                new PerFieldAnalyzerWrapper(new SimpleAnalyzer(LuceneUtils.LUCENE_VERSION));
        wrapper.addAnalyzer("ro-order", new WhitespaceAnalyzer(LuceneUtils.LUCENE_VERSION));

        iw = LuceneUtils.createIndexWriter(indexPath + "-new", true);
//        iw = new IndexWriter(FSDirectory.open(new File(indexPath + "-new")), wrapper, true, IndexWriter.MaxFieldLength.UNLIMITED);
        StringBuilder sb = new StringBuilder(256);
        for (int i = 0; i < docs; i++) {
            if (hasDeletions && reader.isDeleted(i)) {
                continue;
            }
            Document document = reader.document(i);
            ImageSearchHits hits = searcher.search(document, readerRo);
            sb.delete(0, sb.length());
            for (int j = 0; j < numRefObjsReferenced; j++) {
                sb.append(hits.doc(j).getValues("ro-id")[0]);
                sb.append(' ');
            }
            // System.out.println(sb.toString());
            document.add(new Field("ro-order", sb.toString(), Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
            iw.addDocument(document);
        }
        iw.commit();
        iw.close();

    }


    public void testExplicitSearch() throws IOException {
        IndexReader reader = IndexReader.open(FSDirectory.open(new File(indexPath + "-new")));
        int numSearches = 50;

        String query = reader.document(2).getValues("ro-order")[0];
        TopDocs docs;
        long ms = System.currentTimeMillis();
        for (int i = 0; i < numSearches; i++) {
            query = reader.document(i).getValues("ro-order")[0];
            docs = scoreDocs(query, reader);
        }
        ms = System.currentTimeMillis() - ms;
        System.out.println("ms = " + ms);

//        for (int i = 0; i < docs.scoreDocs.length; i++) {
//            ScoreDoc scoreDoc = docs.scoreDocs[i];
//            System.out.println("<img title=\"Score: "+scoreDoc.score+"\" src=\"file:///"+reader.document(scoreDoc.doc).getValues("descriptorImageIdentifier")[0]+"\"><p>");
//        }

        ImageSearcher ceddSearcher = ImageSearcherFactory.createCEDDImageSearcher(100);
        ms = System.currentTimeMillis();
        for (int i = 0; i < numSearches; i++) {
            ceddSearcher.search(reader.document(i), reader);
        }
        ms = System.currentTimeMillis() - ms;
        System.out.println("ms = " + ms);
    }

    public TopDocs scoreDocs(String queryString, IndexReader reader) throws IOException {
        StringTokenizer st = new StringTokenizer(queryString);
        int position = 0;
        HashMap<Integer, Integer> doc2score = new HashMap<Integer, Integer>();
        HashMap<Integer, Integer> doc2count = new HashMap<Integer, Integer>();
        int currDoc = 0;
        while (st.hasMoreTokens()) {
            TermPositions tp = reader.termPositions(new Term("ro-order", st.nextToken()));
            while (tp.next()) {
                currDoc = tp.doc();
                // System.out.println(tp.doc() + ": " + tp.nextPosition());
                if (doc2score.get(currDoc) == null) {
                    doc2score.put(currDoc, Math.abs(tp.nextPosition() - position));
                    doc2count.put(currDoc, 1);
                } else {
                    doc2score.put(currDoc, doc2score.get(currDoc) + Math.abs(tp.nextPosition() - position));
                    doc2count.put(currDoc, doc2count.get(currDoc) + 1);
                }

            }
            position++;
        }
        // fill up all the remaining doc scores,

        throw new UnsupportedOperationException("Not implemented");
    }*/

    public static Query getQuery(String queryString) {
        BooleanQuery b = new BooleanQuery();
        TermQuery t;
        StringTokenizer st = new StringTokenizer(queryString);
        while (st.hasMoreTokens())
            b.add(new BooleanClause(new TermQuery(new Term("ro-order", st.nextToken())), BooleanClause.Occur.SHOULD));
        return b;
    }
}

/*class PositionScorer extends Scorer {
    private int doc;

    private final int[] docs = new int[32];         // buffered doc numbers
    private final int[] freqs = new int[32];        // buffered term freqs
    private int pointer;
    private int pointerMax;

    private TermPositions tp;

    *//**
 * Constructs a Scorer.
 *
 * @param similarity The <code>Similarity</code> implementation used by this scorer.
 *//*
    public PositionScorer(Similarity similarity, TermPositions tp) {
        super(similarity);
        this.tp = this.tp;
    }


    public void score(HitCollector hc) throws IOException {
      next();
      score(hc, Integer.MAX_VALUE);
    }

    protected boolean score(HitCollector c, int end) throws IOException {
      Similarity similarity = getSimilarity();      // cache sim in local
      float[] normDecoder = Similarity.getNormDecoder();
      while (doc < end) {                           // for docs in window
        int f = freqs[pointer];
        float score =  similarity.tf(f);

        score *= normDecoder[norms[doc] & 0xFF];    // normalize for field

        c.collect(doc, score);                      // collect score

        if (++pointer >= pointerMax) {
          pointerMax = termDocs.read(docs, freqs);  // refill buffers
          if (pointerMax != 0) {
            pointer = 0;
          } else {
            termDocs.close();                       // close stream
            doc = Integer.MAX_VALUE;                // set to sentinel value
            return false;
          }
        }
        doc = docs[pointer];
      }
      return true;
    }

    *//** Returns the current document number matching the query.
 * Initially invalid, until {@link #next()} is called the first time.
 *//*
    public int doc() { return doc; }

    *//** Advances to the next document matching the query.
 * <br>The iterator over the matching documents is buffered using
 * {@link TermDocs#read(int[], int[])}.
 * @return true iff there is another document matching the query.
 *//*
    public boolean next() throws IOException {
      pointer++;
      if (pointer >= pointerMax) {
        pointerMax = termDocs.read(docs, freqs);    // refill buffer
        if (pointerMax != 0) {
          pointer = 0;
        } else {
          termDocs.close();                         // close stream
          doc = Integer.MAX_VALUE;                  // set to sentinel value
          return false;
        }
      }
      doc = docs[pointer];
      return true;
    }

    public float score() {
      int f = freqs[pointer];
      float raw =                                   // compute tf(f)*weight
        f < SCORE_CACHE_SIZE                        // check cache
        ? scoreCache[f]                             // cache hit
        : getSimilarity().tf(f)*weightValue;        // cache miss

      return raw * Similarity.decodeNorm(norms[doc]); // normalize for field
    }

    *//** Skips to the first match beyond the current whose document number is
 * greater than or equal to a given target.
 * <br>The implementation uses {@link TermDocs#skipTo(int)}.
 * @param target The target document number.
 * @return true iff there is such a match.
 *//*
    public boolean skipTo(int target) throws IOException {
      // first scan in cache
      for (pointer++; pointer < pointerMax; pointer++) {
        if (docs[pointer] >= target) {
          doc = docs[pointer];
          return true;
        }
      }

      // not found in cache, seek underlying stream
      boolean result = termDocs.skipTo(target);
      if (result) {
        pointerMax = 1;
        pointer = 0;
        docs[pointer] = doc = termDocs.doc();
        freqs[pointer] = termDocs.freq();
      } else {
        doc = Integer.MAX_VALUE;
      }
      return result;
    }

    *//** Returns an explanation of the score for a document.
 * <br>When this method is used, the {@link #next()} method
 * and the {@link #score(HitCollector)} method should not be used.
 * @param doc The document number for the explanation.
 *//*
    public Explanation explain(int doc) throws IOException {
      TermQuery query = (TermQuery)weight.getQuery();
      Explanation tfExplanation = new Explanation();
      int tf = 0;
      while (pointer < pointerMax) {
        if (docs[pointer] == doc)
          tf = freqs[pointer];
        pointer++;
      }
      if (tf == 0) {
          if (termDocs.skipTo(doc))
          {
              if (termDocs.doc() == doc)
              {
                  tf = termDocs.freq();
              }
          }
      }
      termDocs.close();
      tfExplanation.setValue(getSimilarity().tf(tf));
      tfExplanation.setDescription("tf(termFreq("+query.getTerm()+")="+tf+")");

      return tfExplanation;
    }

    */

/**
 * Returns a string representation of this <code>TermScorer</code>.
 *//*
    public String toString() { return "scorer(" + weight + ")"; }
}*/

//class PlainSimilarity extends Similarity {
//    @Override
//    public void computeNorm(FieldInvertState fieldInvertState, Norm norm) {
//        //To change body of implemented methods use File | Settings | File Templates.
//    }
//
//    @Override
//    public SimWeight computeWeight(float v, CollectionStatistics collectionStatistics, TermStatistics... termStatisticses) {
//        return null;  //To change body of implemented methods use File | Settings | File Templates.
//    }
//
//    @Override
//    public ExactSimScorer exactSimScorer(SimWeight simWeight, AtomicReaderContext atomicReaderContext) throws IOException {
//        return null;  //To change body of implemented methods use File | Settings | File Templates.
//    }
//
//    @Override
//    public SloppySimScorer sloppySimScorer(SimWeight simWeight, AtomicReaderContext atomicReaderContext) throws IOException {
//        return null;  //To change body of implemented methods use File | Settings | File Templates.
//    }
//}