LuceneUtils.java example

Explorer

lire-master
- samples
  - classifier
    - src
      - net
        semanticmetadata
        lire
        classifiers
        ClassifierTest.java
        HashingSearchBasedClassifierMod.java
  - liredemo
    - src
      - main
        java
        edu
        uniklu
        itec
        mosaix
        ImageFunctions.java
        engine
        Engine.java
        EngineObserver.java
        Experimental.java
        LeastUsedWeightingStrategy.java
        Logging.java
        ProportionWeightingStrategy.java
        RandomWeightingStrategy.java
        SimpleWeightingData.java
        SimpleWeightingDataFactory.java
        WeightingData.java
        WeightingDataFactory.java
        WeightingStrategy.java
        liredemo
        ImagePanel.java
        IndexingThread.java
        LireDemoFrame.java
        Main.java
        ProgressMonitor.java
        SearchResultsTableModel.java
        flickr
        FlickrDownloadThread.java
        FlickrIndexingThread.java
        FlickrPhoto.java
        FlickrPhotoGrabber.java
        indexing
        MetadataBuilder.java
        ParallelIndexer.java
      - test
        java
        liredemo
        flickr
        FlickrPhotoGrabberTest.java
  - simpleapplication
    - src
      - main
        java
        net
        semanticmetadata
        lire
        sampleapp
        CreateARFFFile.java
        ExtractFeatures.java
        ExtractMultipleFeatures.java
        ExtractSingleFeature.java
        Indexer.java
        IndexingAndSearchWithLocalFeatures.java
        ParallelIndexing.java
        Searcher.java
  - teaching
    - src
      - main
        java
        samples
        Indexing.java
        Search.java
- src

/*
 * This file is part of the LIRE project: http://lire-project.net
 * LIRE is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * LIRE is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with LIRE; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 *
 * We kindly ask you to refer the any or one of the following publications in
 * any publication mentioning or employing Lire:
 *
 * Lux Mathias, Savvas A. Chatzichristofis. Lire: Lucene Image Retrieval –
 * An Extensible Java CBIR Library. In proceedings of the 16th ACM International
 * Conference on Multimedia, pp. 1085-1088, Vancouver, Canada, 2008
 * URL: http://doi.acm.org/10.1145/1459359.1459577
 *
 * Lux Mathias. Content Based Image Retrieval with LIRE. In proceedings of the
 * 19th ACM International Conference on Multimedia, pp. 735-738, Scottsdale,
 * Arizona, USA, 2011
 * URL: http://dl.acm.org/citation.cfm?id=2072432
 *
 * Mathias Lux, Oge Marques. Visual Information Retrieval using Java and LIRE
 * Morgan & Claypool, 2013
 * URL: http://www.morganclaypool.com/doi/abs/10.2200/S00468ED1V01Y201301ICR025
 *
 * Copyright statement:
 * ====================
 * (c) 2002-2013 by Mathias Lux (mathias@juggle.at)
 *  http://www.semanticmetadata.net/lire, http://www.lire-project.net
 *
 * Updated: 18.01.15 07:40
 */

package net.semanticmetadata.lire.utils;

import net.semanticmetadata.lire.builders.DocumentBuilder;
import net.semanticmetadata.lire.imageanalysis.features.GenericDoubleLireFeature;
import net.semanticmetadata.lire.indexers.LireCustomCodec;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.KeywordAnalyzer;
import org.apache.lucene.analysis.core.SimpleAnalyzer;
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.Version;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.file.Paths;

/**
 * This class provides some common functions for Lucene. As there are many changes to
 * the API of Lucene over time this class is the central place to change common variable
 * like the employed Lucene version.
 * User: Mathias
 * Date: 03.08.11
 * Time: 09:33
 *
 * @author Mathias Lux, mathias@juggle.at
 * @author Nektarios Anagnostopoulos, nek.anag@gmail.com
 */
public class LuceneUtils {
    /**
     * Currently employed version of Lucene
     */
    public static final Version LUCENE_VERSION = Version.LATEST;

    /**
     * Different types of analyzers
     */
    public enum AnalyzerType {
        SimpleAnalyzer, WhitespaceAnalyzer, KeywordAnalyzer, StandardAnalyzer
    }

    /**
     * Creates an IndexWriter for given index path, with a SimpleAnalyzer.
     *
     * @param indexPath the path to the index directory
     * @param create    set to true if you want to create a new index
     * @return the IndexWriter
     * @throws IOException
     */
    public static IndexWriter createIndexWriter(String indexPath, boolean create) throws IOException {
        return createIndexWriter(indexPath, create, AnalyzerType.SimpleAnalyzer);                       //TODO: Simple or Standard ??
    }

    /**
     * Creates an IndexWriter for given index path, with given analyzer.
     *
     * @param indexPath the path to the index directory
     * @param create    set to true if you want to create a new index
     * @param analyzer  gives the analyzer used for the Indexwriter.
     * @return an IndexWriter
     * @throws IOException
     */
    public static IndexWriter createIndexWriter(String indexPath, boolean create, AnalyzerType analyzer) throws IOException {
        return createIndexWriter(FSDirectory.open(Paths.get(indexPath)), create, analyzer);
    }

    /**
     * Creates an IndexWriter for given index path, with given analyzer.
     *
     * @param directory the path to the index directory
     * @param create    set to true if you want to create a new index
     * @param analyzer  gives the analyzer used for the Indexwriter.
     * @return an IndexWriter
     * @throws IOException
     */
    public static IndexWriter createIndexWriter(Directory directory, boolean create, AnalyzerType analyzer) throws IOException {
        // set the analyzer according to the method params
        Analyzer tmpAnalyzer = null;
        if (analyzer == AnalyzerType.SimpleAnalyzer)
            tmpAnalyzer = new SimpleAnalyzer();    // LetterTokenizer with LowerCaseFilter
        else if (analyzer == AnalyzerType.WhitespaceAnalyzer)
            tmpAnalyzer = new WhitespaceAnalyzer();  // WhitespaceTokenizer
        else if (analyzer == AnalyzerType.KeywordAnalyzer)
            tmpAnalyzer = new KeywordAnalyzer(); // entire string as one token.
        else if (analyzer == AnalyzerType.StandardAnalyzer)
            tmpAnalyzer = new StandardAnalyzer();

        // The config
        IndexWriterConfig config = new IndexWriterConfig(tmpAnalyzer);
        config.setRAMBufferSizeMB(512);
        config.setCommitOnClose(true);
        if (create)
            config.setOpenMode(IndexWriterConfig.OpenMode.CREATE); // overwrite if it exists.
        else
            config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); // create new if none is there, append otherwise.

        config.setCodec(new LireCustomCodec());
        return new IndexWriter(directory, config);
    }

    public static IndexWriter createIndexWriter(Directory directory, boolean create, AnalyzerType analyzer, double RAMBufferSize) throws IOException {
        // set the analyzer according to the method params
        Analyzer tmpAnalyzer = null;
        if (analyzer == AnalyzerType.SimpleAnalyzer) tmpAnalyzer = new SimpleAnalyzer();
        else if (analyzer == AnalyzerType.WhitespaceAnalyzer) tmpAnalyzer = new WhitespaceAnalyzer();

        // The config
        IndexWriterConfig config = new IndexWriterConfig(tmpAnalyzer);
        if (create)
            config.setOpenMode(IndexWriterConfig.OpenMode.CREATE); // overwrite if it exists.
        else
            config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); // create new if none is there, append otherwise.
        config.setRAMBufferSizeMB(RAMBufferSize);
        config.setCodec(new LireCustomCodec());
        return new IndexWriter(directory, config);
    }

    /**
     * Optimizes an index.
     *
     * @param iw
     * @throws IOException
     */
    public static void optimizeWriter(IndexWriter iw) throws IOException {
        iw.forceMerge(1);
    }

    public static void commitWriter(IndexWriter iw) throws IOException {
        iw.commit();
    }

    public static void closeWriter(IndexWriter iw) throws IOException {
        iw.close();
    }


    public static IndexReader openIndexReader(String indexPath) throws IOException {
        return openIndexReader(FSDirectory.open(Paths.get(indexPath)), false);
    }

    public static IndexReader openIndexReader(String indexPath, boolean RAMDirectory) throws IOException {
        return openIndexReader(FSDirectory.open(Paths.get(indexPath)), RAMDirectory);
    }

    public static IndexReader openIndexReader(FSDirectory directory) throws IOException {
        return openIndexReader(directory, false);
    }

    public static IndexReader openIndexReader(FSDirectory directory, boolean RAMDirectory) throws IOException {
        if (RAMDirectory)
            return DirectoryReader.open(new RAMDirectory(directory, IOContext.READONCE));
        else
            return DirectoryReader.open(directory);
    }

    public static IndexReader openIndexReader(IndexWriter writer, boolean applyDeletes) throws IOException {
        return DirectoryReader.open(writer, applyDeletes, applyDeletes);
    }

    public static void closeReader(IndexReader reader) throws IOException {
        reader.close();
    }


    public static IndexSearcher openIndexSearcher(IndexReader reader) {
        return new IndexSearcher(reader);
    }


    public static int writeFeaturesToIndex(InputStream in, IndexWriter iw) throws IOException {
        int count = 0;
        GenericDoubleLireFeature f = new GenericDoubleLireFeature();
        BufferedReader br = new BufferedReader(new InputStreamReader(in));
        String line;
        while ((line = br.readLine()) != null) {
            Document d = new Document();
            if (line.startsWith("#"))
                continue;
            String[] split = line.split("\\s"); // split at white space ...
            String filename = split[0];
            double[] data = new double[split.length-1];
            for (int i = 1; i < split.length; i++) {
                data[i-1] = Double.parseDouble(split[i]);
            }
            f.setData(data);
            d.add(new StoredField(f.getFieldName(), new BytesRef(f.getByteArrayRepresentation())));
            d.add(new StringField(DocumentBuilder.FIELD_NAME_IDENTIFIER, filename, Field.Store.YES));
            iw.addDocument(d);
            count++;
        }
        iw.close();
        return count;
    }


    /**
     * Method for 'converting' ByteRefs to bytes. This is a horrible way to do it. Main goal is to make it work. Later
     * we'll fix the performance implications of that.
     *
     * @param byteRef
     * @return
     */
    public static byte[] getBytes(BytesRef byteRef) {
        byte[] result = new byte[byteRef.length];
        System.arraycopy(byteRef.bytes, byteRef.offset, result, 0, byteRef.length);
        return result;
    }

}