GenericDocValuesImageSearcher.java example

Explorer

lire-master
- samples
  - classifier
    - src
      - net
        semanticmetadata
        lire
        classifiers
        ClassifierTest.java
        HashingSearchBasedClassifierMod.java
  - liredemo
    - src
      - main
        java
        edu
        uniklu
        itec
        mosaix
        ImageFunctions.java
        engine
        Engine.java
        EngineObserver.java
        Experimental.java
        LeastUsedWeightingStrategy.java
        Logging.java
        ProportionWeightingStrategy.java
        RandomWeightingStrategy.java
        SimpleWeightingData.java
        SimpleWeightingDataFactory.java
        WeightingData.java
        WeightingDataFactory.java
        WeightingStrategy.java
        liredemo
        ImagePanel.java
        IndexingThread.java
        LireDemoFrame.java
        Main.java
        ProgressMonitor.java
        SearchResultsTableModel.java
        flickr
        FlickrDownloadThread.java
        FlickrIndexingThread.java
        FlickrPhoto.java
        FlickrPhotoGrabber.java
        indexing
        MetadataBuilder.java
        ParallelIndexer.java
      - test
        java
        liredemo
        flickr
        FlickrPhotoGrabberTest.java
  - simpleapplication
    - src
      - main
        java
        net
        semanticmetadata
        lire
        sampleapp
        CreateARFFFile.java
        ExtractFeatures.java
        ExtractMultipleFeatures.java
        ExtractSingleFeature.java
        Indexer.java
        IndexingAndSearchWithLocalFeatures.java
        ParallelIndexing.java
        Searcher.java
  - teaching
    - src
      - main
        java
        samples
        Indexing.java
        Search.java
- src

/*
 * This file is part of the LIRE project: http://lire-project.net
 * LIRE is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * LIRE is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with LIRE; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 *
 * We kindly ask you to refer the any or one of the following publications in
 * any publication mentioning or employing Lire:
 *
 * Lux Mathias, Savvas A. Chatzichristofis. Lire: Lucene Image Retrieval –
 * An Extensible Java CBIR Library. In proceedings of the 16th ACM International
 * Conference on Multimedia, pp. 1085-1088, Vancouver, Canada, 2008
 * URL: http://doi.acm.org/10.1145/1459359.1459577
 *
 * Lux Mathias. Content Based Image Retrieval with LIRE. In proceedings of the
 * 19th ACM International Conference on Multimedia, pp. 735-738, Scottsdale,
 * Arizona, USA, 2011
 * URL: http://dl.acm.org/citation.cfm?id=2072432
 *
 * Mathias Lux, Oge Marques. Visual Information Retrieval using Java and LIRE
 * Morgan & Claypool, 2013
 * URL: http://www.morganclaypool.com/doi/abs/10.2200/S00468ED1V01Y201301ICR025
 *
 * Copyright statement:
 * ====================
 * (c) 2002-2013 by Mathias Lux (mathias@juggle.at)
 *  http://www.semanticmetadata.net/lire, http://www.lire-project.net
 *
 * Updated: 18.01.15 07:31
 */
package net.semanticmetadata.lire.searchers;

import net.semanticmetadata.lire.builders.DocumentBuilder;
import net.semanticmetadata.lire.builders.GlobalDocumentBuilder;
import net.semanticmetadata.lire.imageanalysis.features.GlobalFeature;
import net.semanticmetadata.lire.imageanalysis.features.LireFeature;
import net.semanticmetadata.lire.indexers.parallel.ExtractorItem;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.*;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.util.Bits;

import java.awt.image.BufferedImage;
import java.io.IOException;
import java.util.TreeSet;
import java.util.logging.Logger;

/**
 * A generic image searcher for global features that uses DocValues instead of Lucene text fields. Please make sure you
 * created the index using th useDocValues option in
 *
 * @author Mathias Lux, mathias@juggle.at
 */
public class GenericDocValuesImageSearcher extends AbstractImageSearcher {
    protected Logger logger = Logger.getLogger(getClass().getName());
    protected String fieldName;
    protected LireFeature cachedInstance = null;
    protected ExtractorItem extractorItem;

    protected IndexReader reader = null;

    protected int maxHits = 50;
    protected TreeSet<SimpleResult> docs = new TreeSet<SimpleResult>();
    protected double maxDistance;
    protected boolean useSimilarityScore = false;

    private BinaryDocValues docValues = null;


    public GenericDocValuesImageSearcher(int maxHits, Class<? extends GlobalFeature> globalFeature, IndexReader reader) {
        this.maxHits = maxHits;
        this.extractorItem = new ExtractorItem(globalFeature);
        this.fieldName = extractorItem.getFieldName();
        try {
            this.cachedInstance = (GlobalFeature) extractorItem.getExtractorInstance().getClass().newInstance();
        } catch (InstantiationException e) {
            e.printStackTrace();
        } catch (IllegalAccessException e) {
            e.printStackTrace();
        }
        this.reader = reader;
        init();
    }

    protected void init() {
        // put all respective features into an in-memory cache ...
        if (reader != null) {
            try {
                docValues = MultiDocValues.getBinaryValues(reader, cachedInstance.getFieldName());
            } catch (IOException e) {
                e.printStackTrace();
            }

        }
    }


    /**
     * @param lireFeature
     * @return the maximum distance found for normalizing.
     * @throws IOException
     */
    protected double findSimilar(LireFeature lireFeature) throws IOException {
        maxDistance = -1d;

        // clear result set ...
        docs.clear();
        // Needed for check whether the document is deleted.
        Bits liveDocs = MultiFields.getLiveDocs(reader);
        Document d;
        double tmpDistance;
        int docs = reader.numDocs();
        // we read each and every document from the index and then we compare it to the query.
        for (int i = 0; i < docs; i++) {
            if (reader.hasDeletions() && !liveDocs.get(i)) continue; // if it is deleted, just ignore it.

            cachedInstance.setByteArrayRepresentation(docValues.get(i).bytes, docValues.get(i).offset, docValues.get(i).length);

            tmpDistance = cachedInstance.getDistance(lireFeature);
            assert (tmpDistance >= 0);
            // if the array is not full yet:
            if (this.docs.size() < maxHits) {
                this.docs.add(new SimpleResult(tmpDistance, i));
                if (tmpDistance > maxDistance) maxDistance = tmpDistance;
            } else if (tmpDistance < maxDistance) {
                // if it is nearer to the sample than at least on of the current set:
                // remove the last one ...
                this.docs.remove(this.docs.last());
                // add the new one ...
                this.docs.add(new SimpleResult(tmpDistance, i));
                // and set our new distance border ...
                maxDistance = this.docs.last().getDistance();
            }
        }
        return maxDistance;
    }


    // This is an approach based on DocValues. It's extremely fast, even without caching, but I don't know if it's in
    // RAM or not, ie. if I can fill up RAM with all documents at once.
    public ImageSearchHits search(int doc) throws IOException {
        SimpleImageSearchHits searchHits = null;
        LireFeature lireFeature = extractorItem.getFeatureInstance();
//        BinaryDocValues binaryValues = MultiDocValues.getBinaryValues(reader, lireFeature.getFieldName());
        lireFeature.setByteArrayRepresentation(docValues.get(doc).bytes, docValues.get(doc).offset, docValues.get(doc).length);
        double maxDistance = findSimilar(lireFeature);

        if (!useSimilarityScore) {
            searchHits = new SimpleImageSearchHits(this.docs, maxDistance);
        } else {
            searchHits = new SimpleImageSearchHits(this.docs, maxDistance, useSimilarityScore);
        }
        return searchHits;
    }


    public ImageSearchHits search(Document doc, IndexReader reader) throws IOException {
        IndexSearcher is = new IndexSearcher(reader);
        TermQuery tq = new TermQuery(new Term(DocumentBuilder.FIELD_NAME_IDENTIFIER, doc.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]));
        TopDocs topDocs = is.search(tq, 1);
        if (topDocs.totalHits > 0) {
            return search(topDocs.scoreDocs[0].doc);
        } else return null;
    }

    public ImageSearchHits search(BufferedImage image, IndexReader reader) throws IOException {
        logger.finer("Starting extraction.");
        SimpleImageSearchHits searchHits = null;

        GlobalDocumentBuilder globalDocumentBuilder = new GlobalDocumentBuilder();
        GlobalFeature globalFeature = globalDocumentBuilder.extractGlobalFeature(image, (GlobalFeature) extractorItem.getExtractorInstance());

        double maxDistance = findSimilar(globalFeature);
        if (!useSimilarityScore) {
            searchHits = new SimpleImageSearchHits(this.docs, maxDistance);
        } else {
            searchHits = new SimpleImageSearchHits(this.docs, maxDistance, useSimilarityScore);
        }

        return searchHits;

    }

    public ImageDuplicates findDuplicates(IndexReader reader) throws IOException {
        throw new UnsupportedOperationException("not implemented yet.");

    }

    public String toString() {
        return "GenericDocValuesImageSearcher using " + extractorItem.getExtractorClass().getName();
    }

}