ParallelExtractor.java example

Explorer
lire-master
- samples
  - classifier
    - src
      - net
        semanticmetadata
        lire
        classifiers
        ClassifierTest.java
        HashingSearchBasedClassifierMod.java
  - liredemo
    - src
      - main
        java
        edu
        uniklu
        itec
        mosaix
        ImageFunctions.java
        engine
        Engine.java
        EngineObserver.java
        Experimental.java
        LeastUsedWeightingStrategy.java
        Logging.java
        ProportionWeightingStrategy.java
        RandomWeightingStrategy.java
        SimpleWeightingData.java
        SimpleWeightingDataFactory.java
        WeightingData.java
        WeightingDataFactory.java
        WeightingStrategy.java
        liredemo
        ImagePanel.java
        IndexingThread.java
        LireDemoFrame.java
        Main.java
        ProgressMonitor.java
        SearchResultsTableModel.java
        flickr
        FlickrDownloadThread.java
        FlickrIndexingThread.java
        FlickrPhoto.java
        FlickrPhotoGrabber.java
        indexing
        MetadataBuilder.java
        ParallelIndexer.java
      - test
        java
        liredemo
        flickr
        FlickrPhotoGrabberTest.java
  - simpleapplication
    - src
      - main
        java
        net
        semanticmetadata
        lire
        sampleapp
        CreateARFFFile.java
        ExtractFeatures.java
        ExtractMultipleFeatures.java
        ExtractSingleFeature.java
        Indexer.java
        IndexingAndSearchWithLocalFeatures.java
        ParallelIndexing.java
        Searcher.java
  - teaching
    - src
      - main
        java
        samples
        Indexing.java
        Search.java
- src
/*
 * This file is part of the LIRE project: http://lire-project.net
 * LIRE is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * LIRE is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with LIRE; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 *
 * We kindly ask you to refer the any or one of the following publications in
 * any publication mentioning or employing Lire:
 *
 * Lux Mathias, Savvas A. Chatzichristofis. Lire: Lucene Image Retrieval –
 * An Extensible Java CBIR Library. In proceedings of the 16th ACM International
 * Conference on Multimedia, pp. 1085-1088, Vancouver, Canada, 2008
 * URL: http://doi.acm.org/10.1145/1459359.1459577
 *
 * Lux Mathias. Content Based Image Retrieval with LIRE. In proceedings of the
 * 19th ACM International Conference on Multimedia, pp. 735-738, Scottsdale,
 * Arizona, USA, 2011
 * URL: http://dl.acm.org/citation.cfm?id=2072432
 *
 * Mathias Lux, Oge Marques. Visual Information Retrieval using Java and LIRE
 * Morgan & Claypool, 2013
 * URL: http://www.morganclaypool.com/doi/abs/10.2200/S00468ED1V01Y201301ICR025
 *
 * Copyright statement:
 * ====================
 * (c) 2002-2013 by Mathias Lux (mathias@juggle.at)
 *  http://www.semanticmetadata.net/lire, http://www.lire-project.net
 *
 * Updated: 01.07.13 16:15
 */

package net.semanticmetadata.lire.indexers.tools.binary;

import net.semanticmetadata.lire.builders.DocumentBuilder;
import net.semanticmetadata.lire.imageanalysis.features.GlobalFeature;
import net.semanticmetadata.lire.indexers.parallel.WorkItem;
import net.semanticmetadata.lire.utils.ImageUtils;
import net.semanticmetadata.lire.utils.SerializationUtils;

import javax.imageio.ImageIO;
import java.awt.image.BufferedImage;
import java.io.*;
import java.util.*;
import java.util.concurrent.LinkedBlockingQueue;

/**
 * The Extractor is a configurable class that extracts multiple features from multiple images
 * and puts them into a data file. Main purpose is run multiple extractors at multiple machines
 * and put the data files into one single index. Images are references relatively to the data file,
 * so it should work fine for network file systems.
 * <p/>
 * File format is specified as: (12(345)+('-1'))+ with 1-5 being ...
 * <p/>
 * 1. Length of the file name [4 bytes], an int n giving the number of bytes for the file name
 * 2. File name, relative to the outfile [n bytes, see above]
 * 3. Feature index [1 byte], see static members
 * 4. Feature value length [4 bytes], an int k giving the number of bytes encoding the value
 * 5. Feature value [k bytes, see above]
 * <p/>
 * The file is sent through an GZIPOutputStream, so it's compressed in addition.
 * <p/>
 * Note that the outfile has to be in a folder parent to all images!
 * <p/>
 * // TODO: Change to LinkedBlockingQueue and Files.readAllBytes.
 *
 * @author Mathias Lux, mathias@juggle.at, 08.03.13
 */
public class ParallelExtractor implements Runnable {
    public static final String[] features = new String[]{
            "CEDD",                  // 0
            "FCTH",                  // 1
            "OpponentHistogram",     // 2
            "JointHistogram",        // 3
            "AutoColorCorrelogram",  // 4
            "ColorLayout",           // 5
            "EdgeHistogram",         // 6
            "Gabor",                 // 7
            "JCD",                   // 8
            "JpegCoefficientHistogram",
            "ScalableColor",         // 10
            "SimpleColorHistogram",  // 11
            "Tamura",                // 12
            "LuminanceLayout",       // 13
            "PHOG",                  // 14
            "LocalBinaryPatterns",   // 15
    };
    public static final String[] featureFieldNames = new String[]{
            DocumentBuilder.FIELD_NAME_CEDD,                  // 0
            DocumentBuilder.FIELD_NAME_FCTH,                  // 1
            DocumentBuilder.FIELD_NAME_OPPONENT_HISTOGRAM,    // 2
            DocumentBuilder.FIELD_NAME_JOINT_HISTOGRAM,       // 3
            DocumentBuilder.FIELD_NAME_AUTOCOLORCORRELOGRAM,  // 4
            DocumentBuilder.FIELD_NAME_COLORLAYOUT,           // 5
            DocumentBuilder.FIELD_NAME_EDGEHISTOGRAM,         // 6
            DocumentBuilder.FIELD_NAME_GABOR,                 // 7
            DocumentBuilder.FIELD_NAME_JCD,                   // 8
            DocumentBuilder.FIELD_NAME_JPEGCOEFFS,
            DocumentBuilder.FIELD_NAME_SCALABLECOLOR,
            DocumentBuilder.FIELD_NAME_COLORHISTOGRAM,
            DocumentBuilder.FIELD_NAME_TAMURA,                // 12
            DocumentBuilder.FIELD_NAME_LUMINANCE_LAYOUT,      // 13
            DocumentBuilder.FIELD_NAME_PHOG,                  // 14
            DocumentBuilder.FIELD_NAME_LOCAL_BINARY_PATTERNS, // 15
    };
    static HashMap<String, Integer> feature2index;

    static {
        feature2index = new HashMap<String, Integer>(features.length);
        for (int i = 0; i < features.length; i++) {
            feature2index.put(features[i], i);
        }
    }

    private static boolean force = false;
    private static int numberOfThreads = DocumentBuilder.NUM_OF_THREADS;
    LinkedBlockingQueue<WorkItem> images = new LinkedBlockingQueue<WorkItem>(200);
    boolean ended = false;
    int overallCount = 0;
    OutputStream dos = null;
    LinkedList<GlobalFeature> listOfFeatures;
    File fileList = null;
    File outFile = null;
    private int monitoringInterval = 10;
    private int maxSideLength = -1;

    public ParallelExtractor() {
        // default constructor.
        listOfFeatures = new LinkedList<GlobalFeature>();
    }

    /**
     * Sets the number of consumer threads that are employed for extraction
     *
     * @param numberOfThreads
     */
    public static void setNumberOfThreads(int numberOfThreads) {
        ParallelExtractor.numberOfThreads = numberOfThreads;
    }

    public static void main(String[] args) throws IOException {
        ParallelExtractor e = new ParallelExtractor();

        // parse programs args ...
        for (int i = 0; i < args.length; i++) {
            String arg = args[i];
            if (arg.startsWith("-i")) {
                // infile ...
                if ((i + 1) < args.length)
                    e.setFileList(new File(args[i + 1]));
                else {
                    System.err.println("Please give a input file after the -i option.");
                    printHelp();
                }
            } else if (arg.startsWith("-o")) {
                // out file
                if ((i + 1) < args.length)
                    e.setOutFile(new File(args[i + 1]));
                else {
                    System.err.println("Please name an outfile after the -o option.");
                    printHelp();
                }
            } else if (arg.startsWith("-m")) {
                // out file
                if ((i + 1) < args.length) {
                    try {
                        int s = Integer.parseInt(args[i + 1]);
                        if (s > 10)
                            e.setMaxSideLength(s);
                    } catch (NumberFormatException e1) {
                        e1.printStackTrace();
                        printHelp();
                    }
                } else printHelp();
            } else if (arg.startsWith("-f")) {
                force = true;
            } else if (arg.startsWith("-h")) {
                // help
                printHelp();
            } else if (arg.startsWith("-n")) {
                if ((i + 1) < args.length)
                    try {
                        ParallelExtractor.numberOfThreads = Integer.parseInt(args[i + 1]);
                    } catch (Exception e1) {
                        System.err.println("Could not set number of threads to \"" + args[i + 1] + "\".");
                        e1.printStackTrace();
                    }
                else printHelp();
            } else if (arg.startsWith("-c")) {
                // config file ...
                Properties p = new Properties();
                p.load(new FileInputStream(new File(args[i + 1])));
                Enumeration<?> enumeration = p.propertyNames();
                while (enumeration.hasMoreElements()) {
                    String key = (String) enumeration.nextElement();
                    if (key.toLowerCase().startsWith("feature.")) {
                        try {
                            e.addFeature((GlobalFeature) Class.forName(p.getProperty(key)).newInstance());
                        } catch (Exception e1) {
                            System.err.println("Could not add feature named " + p.getProperty(key));
                            e1.printStackTrace();
                        }
                    }
                }
            }
        }
        // check if there is an infile, an outfile and some features to extract.
        if (!e.isConfigured()) {
            System.err.println("There is an error in the configuration.");
            printHelp();
        } else {
            e.run();
        }
    }

    private static void printHelp() {
        System.out.println("Help for the ParallelExtractor class.\n" +
                "=============================\n" +
                "This help text is shown if you start the ParallelExtractor with the '-h' option.\n" +
                "\n" +
                "1. Usage\n" +
                "========\n" +
                "$> ParallelExtractor -i <infile> [-o <outfile>] -c <configfile> [-n <threads>] [-m <max_side_length>]\n" +
                "\n" +
                "Note: if you don't specify an outfile just \".data\" is appended to the infile for output.\n" +
                "\n" +
                "2. Config File\n" +
                "==============\n" +
                "The config file is a simple java Properties file. It basically gives the \n" +
                "employed features as a list of properties, just like:\n" +
                "\n" +
                "feature.1=CEDD\n" +
                "feature.2=FCTH\n" +
                "\n" +
                "... and so on. ");
    }

    /**
     * Adds a feature to the extractor chain. All those features are extracted from images.
     *
     * @param feature
     */
    public void addFeature(GlobalFeature feature) {
        listOfFeatures.add(feature);
    }

    /**
     * Sets the file list for processing. One image file per line is fine.
     *
     * @param fileList
     */
    public void setFileList(File fileList) {
        this.fileList = fileList;
    }

    /**
     * Sets the outfile. The outfile has to be in a folder parent to all input images.
     *
     * @param outFile
     */
    public void setOutFile(File outFile) {
        this.outFile = outFile;
    }

    public int getMaxSideLength() {
        return maxSideLength;
    }

    public void setMaxSideLength(int maxSideLength) {
        this.maxSideLength = maxSideLength;
    }

    private boolean isConfigured() {
        boolean configured = true;
        if (fileList == null || !fileList.exists()) {
            System.err.println("Input file is either not given or does not exist.");
            configured = false;
        }
        else if (outFile == null) {
            // create an outfile ...
            try {
                outFile = new File(fileList.getCanonicalPath() + ".data");
                System.out.println("Setting out file to " + outFile.getCanonicalFile());
            } catch (IOException e) {
                configured = false;
            }
        } else if (outFile.exists() && !force) {
            System.err.println(outFile.getName() + " already exists. Please delete or choose another outfile.");
            configured = false;
        }
        if (listOfFeatures.size() < 1) configured = false;
        return configured;
    }

    @Override
    public void run() {
        // check:
        if (fileList == null || !fileList.exists()) {
            System.err.println("No text file with a list of images given.");
            return;
        }
        if (listOfFeatures.size() == 0) {
            System.err.println("No features to extract given.");
            return;
        }
        try {
            dos = new BufferedOutputStream(new FileOutputStream(outFile));
            Thread p = new Thread(new Producer());
            p.start();
            LinkedList<Thread> threads = new LinkedList<Thread>();
            long l = System.currentTimeMillis();
            for (int i = 0; i < numberOfThreads; i++) {
                Thread c = new Thread(new Consumer());
                c.start();
                threads.add(c);
            }
            Thread m = new Thread(new Monitoring());
            m.start();
            for (Iterator<Thread> iterator = threads.iterator(); iterator.hasNext(); ) {
                iterator.next().join();
            }
            long l1 = System.currentTimeMillis() - l;
            System.out.println("Analyzed " + overallCount + " images in " + l1 / 1000 + " seconds, ~" + (overallCount > 0 ? (l1 / overallCount) : "inf.") + " ms each.");
            dos.close();
//            writer.commit();
//            writer.close();
//            threadFinished = true;

        } catch (Exception e) {
            e.printStackTrace();
        }

    }

    private void addFeatures(List features) {
        for (Iterator<GlobalFeature> iterator = listOfFeatures.iterator(); iterator.hasNext(); ) {
            GlobalFeature next = iterator.next();
            try {
                features.add(next.getClass().newInstance());
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
    }

    class Monitoring implements Runnable {
        public void run() {
            long ms = System.currentTimeMillis();
            try {
                Thread.sleep(1000 * monitoringInterval); // wait xx seconds
            } catch (InterruptedException e) {
                e.printStackTrace();
            }
            while (!ended) {
                try {
                    // print the current status:
                    long time = System.currentTimeMillis() - ms;
//                    System.out.println("Analyzed " + overallCount + " images in " + time / 1000 + " seconds, " + ((overallCount > 0) ? (time / overallCount) : "n.a.") + " ms each (" + images.size() + " images currently in queue).");
                    System.out.printf("Analyzed %,d images in %,d seconds, %s ms each (%,d images currently in queue).\n", overallCount, time / 1000, ((overallCount > 0) ? (time / overallCount) : "n.a."), images.size());
                    Thread.sleep(1000 * monitoringInterval); // wait xx seconds
                } catch (InterruptedException e) {
                    e.printStackTrace();
                }
            }
        }
    }

    class Producer implements Runnable {
        public void run() {
            int tmpSize = 0;
            try {
                BufferedReader br = new BufferedReader(new FileReader(fileList));
                String file = null;
                File next = null;
                while ((file = br.readLine()) != null) {
                    next = new File(file);
                    BufferedImage img = null;
                    try {
                        int fileSize = (int) next.length();
                        byte[] buffer = new byte[fileSize];
                        FileInputStream fis = new FileInputStream(next);
                        fis.read(buffer);
                        String path = next.getCanonicalPath();
                        images.put(new WorkItem(path, buffer));
                    } catch (Exception e) {
                        System.err.println("Could not read image " + file + ": " + e.getMessage());
                    }
                }
                for (int i = 0; i < numberOfThreads * 2; i++) {
                    String tmpString = null;
                    byte[] tmpBuffer = null;
                    try {
                        images.put(new WorkItem(tmpString, tmpBuffer));
                    } catch (InterruptedException e) {
                        e.printStackTrace();
                    }
                }
            } catch (IOException e) {
                e.printStackTrace();
            }
            synchronized (images) {
                ended = true;
                images.notifyAll();
            }
        }
    }

    class Consumer implements Runnable {
        WorkItem tmp = null;
        LinkedList<GlobalFeature> features = new LinkedList<GlobalFeature>();
        int count = 0;
        boolean locallyEnded = false;

        Consumer() {
            addFeatures(features);
        }

        public void run() {
            byte[] myBuffer = new byte[1024 * 1024 * 10];
            int bufferCount = 0;

            while (!locallyEnded) {
                try {
                    // we wait for the stack to be either filled or empty & not being filled any more.
                    if (!locallyEnded) {
                        tmp = images.take();
                        if (tmp.getBuffer() == null)
                            locallyEnded = true;
                        else {
                            count++;
                            overallCount++;
                        }
                    }
                    bufferCount = 0;
                    if (!locallyEnded) {
                        ByteArrayInputStream b = new ByteArrayInputStream(tmp.getBuffer());
                        BufferedImage img = ImageIO.read(b);
                        if (maxSideLength > 50)
                            img = ImageUtils.scaleImage(img, maxSideLength);
                        byte[] tmpBytes = tmp.getFileName().getBytes();
                        // everything is written to a buffer and only if no exception is thrown, the image goes to index.
                        System.arraycopy(SerializationUtils.toBytes(tmpBytes.length), 0, myBuffer, 0, 4);
                        bufferCount += 4;
                        // dos.write(SerializationUtils.toBytes(tmpBytes.length));
                        System.arraycopy(tmpBytes, 0, myBuffer, bufferCount, tmpBytes.length);
                        bufferCount += tmpBytes.length;
                        // dos.write(tmpBytes);
                        for (GlobalFeature feature : features) {
                            feature.extract(img);
                            myBuffer[bufferCount] = (byte) feature2index.get(feature.getClass().getSimpleName()).intValue();
                            bufferCount++;
                            // dos.write(feature2index.get(feature.getClass().getName()));
                            tmpBytes = feature.getByteArrayRepresentation();
                            System.arraycopy(SerializationUtils.toBytes(tmpBytes.length), 0, myBuffer, bufferCount, 4);
                            bufferCount += 4;
                            // dos.write(SerializationUtils.toBytes(tmpBytes.length));
                            System.arraycopy(tmpBytes, 0, myBuffer, bufferCount, tmpBytes.length);
                            bufferCount += tmpBytes.length;
                            // dos.write(tmpBytes);
                        }
                        // finally write everything to the stream - in case no exception was thrown..
                        synchronized (dos) {
                            dos.write(myBuffer, 0, bufferCount);
                            dos.write(-1); // that's the separator
                            dos.flush();
                        }

                    }
                } catch (Exception e) {
                    System.err.println("Error processing file " + tmp.getFileName());
                    e.printStackTrace();
                }
            }
        }
    }
}