/* * This file is part of the LIRE project: http://lire-project.net * LIRE is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * LIRE is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with LIRE; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * * We kindly ask you to refer the any or one of the following publications in * any publication mentioning or employing Lire: * * Lux Mathias, Savvas A. Chatzichristofis. Lire: Lucene Image Retrieval – * An Extensible Java CBIR Library. In proceedings of the 16th ACM International * Conference on Multimedia, pp. 1085-1088, Vancouver, Canada, 2008 * URL: http://doi.acm.org/10.1145/1459359.1459577 * * Lux Mathias. Content Based Image Retrieval with LIRE. In proceedings of the * 19th ACM International Conference on Multimedia, pp. 735-738, Scottsdale, * Arizona, USA, 2011 * URL: http://dl.acm.org/citation.cfm?id=2072432 * * Mathias Lux, Oge Marques. Visual Information Retrieval using Java and LIRE * Morgan & Claypool, 2013 * URL: http://www.morganclaypool.com/doi/abs/10.2200/S00468ED1V01Y201301ICR025 * * Copyright statement: * ==================== * (c) 2002-2013 by Mathias Lux (mathias@juggle.at) * http://www.semanticmetadata.net/lire, http://www.lire-project.net * * Updated: 01.07.13 16:15 */ package net.semanticmetadata.lire.indexers.tools.binary; import net.semanticmetadata.lire.builders.DocumentBuilder; import net.semanticmetadata.lire.imageanalysis.features.GlobalFeature; import net.semanticmetadata.lire.indexers.parallel.WorkItem; import net.semanticmetadata.lire.utils.ImageUtils; import net.semanticmetadata.lire.utils.SerializationUtils; import javax.imageio.ImageIO; import java.awt.image.BufferedImage; import java.io.*; import java.util.*; import java.util.concurrent.LinkedBlockingQueue; /** * The Extractor is a configurable class that extracts multiple features from multiple images * and puts them into a data file. Main purpose is run multiple extractors at multiple machines * and put the data files into one single index. Images are references relatively to the data file, * so it should work fine for network file systems. * <p/> * File format is specified as: (12(345)+('-1'))+ with 1-5 being ... * <p/> * 1. Length of the file name [4 bytes], an int n giving the number of bytes for the file name * 2. File name, relative to the outfile [n bytes, see above] * 3. Feature index [1 byte], see static members * 4. Feature value length [4 bytes], an int k giving the number of bytes encoding the value * 5. Feature value [k bytes, see above] * <p/> * The file is sent through an GZIPOutputStream, so it's compressed in addition. * <p/> * Note that the outfile has to be in a folder parent to all images! * <p/> * // TODO: Change to LinkedBlockingQueue and Files.readAllBytes. * * @author Mathias Lux, mathias@juggle.at, 08.03.13 */ public class ParallelExtractor implements Runnable { public static final String[] features = new String[]{ "CEDD", // 0 "FCTH", // 1 "OpponentHistogram", // 2 "JointHistogram", // 3 "AutoColorCorrelogram", // 4 "ColorLayout", // 5 "EdgeHistogram", // 6 "Gabor", // 7 "JCD", // 8 "JpegCoefficientHistogram", "ScalableColor", // 10 "SimpleColorHistogram", // 11 "Tamura", // 12 "LuminanceLayout", // 13 "PHOG", // 14 "LocalBinaryPatterns", // 15 }; public static final String[] featureFieldNames = new String[]{ DocumentBuilder.FIELD_NAME_CEDD, // 0 DocumentBuilder.FIELD_NAME_FCTH, // 1 DocumentBuilder.FIELD_NAME_OPPONENT_HISTOGRAM, // 2 DocumentBuilder.FIELD_NAME_JOINT_HISTOGRAM, // 3 DocumentBuilder.FIELD_NAME_AUTOCOLORCORRELOGRAM, // 4 DocumentBuilder.FIELD_NAME_COLORLAYOUT, // 5 DocumentBuilder.FIELD_NAME_EDGEHISTOGRAM, // 6 DocumentBuilder.FIELD_NAME_GABOR, // 7 DocumentBuilder.FIELD_NAME_JCD, // 8 DocumentBuilder.FIELD_NAME_JPEGCOEFFS, DocumentBuilder.FIELD_NAME_SCALABLECOLOR, DocumentBuilder.FIELD_NAME_COLORHISTOGRAM, DocumentBuilder.FIELD_NAME_TAMURA, // 12 DocumentBuilder.FIELD_NAME_LUMINANCE_LAYOUT, // 13 DocumentBuilder.FIELD_NAME_PHOG, // 14 DocumentBuilder.FIELD_NAME_LOCAL_BINARY_PATTERNS, // 15 }; static HashMap<String, Integer> feature2index; static { feature2index = new HashMap<String, Integer>(features.length); for (int i = 0; i < features.length; i++) { feature2index.put(features[i], i); } } private static boolean force = false; private static int numberOfThreads = DocumentBuilder.NUM_OF_THREADS; LinkedBlockingQueue<WorkItem> images = new LinkedBlockingQueue<WorkItem>(200); boolean ended = false; int overallCount = 0; OutputStream dos = null; LinkedList<GlobalFeature> listOfFeatures; File fileList = null; File outFile = null; private int monitoringInterval = 10; private int maxSideLength = -1; public ParallelExtractor() { // default constructor. listOfFeatures = new LinkedList<GlobalFeature>(); } /** * Sets the number of consumer threads that are employed for extraction * * @param numberOfThreads */ public static void setNumberOfThreads(int numberOfThreads) { ParallelExtractor.numberOfThreads = numberOfThreads; } public static void main(String[] args) throws IOException { ParallelExtractor e = new ParallelExtractor(); // parse programs args ... for (int i = 0; i < args.length; i++) { String arg = args[i]; if (arg.startsWith("-i")) { // infile ... if ((i + 1) < args.length) e.setFileList(new File(args[i + 1])); else { System.err.println("Please give a input file after the -i option."); printHelp(); } } else if (arg.startsWith("-o")) { // out file if ((i + 1) < args.length) e.setOutFile(new File(args[i + 1])); else { System.err.println("Please name an outfile after the -o option."); printHelp(); } } else if (arg.startsWith("-m")) { // out file if ((i + 1) < args.length) { try { int s = Integer.parseInt(args[i + 1]); if (s > 10) e.setMaxSideLength(s); } catch (NumberFormatException e1) { e1.printStackTrace(); printHelp(); } } else printHelp(); } else if (arg.startsWith("-f")) { force = true; } else if (arg.startsWith("-h")) { // help printHelp(); } else if (arg.startsWith("-n")) { if ((i + 1) < args.length) try { ParallelExtractor.numberOfThreads = Integer.parseInt(args[i + 1]); } catch (Exception e1) { System.err.println("Could not set number of threads to \"" + args[i + 1] + "\"."); e1.printStackTrace(); } else printHelp(); } else if (arg.startsWith("-c")) { // config file ... Properties p = new Properties(); p.load(new FileInputStream(new File(args[i + 1]))); Enumeration<?> enumeration = p.propertyNames(); while (enumeration.hasMoreElements()) { String key = (String) enumeration.nextElement(); if (key.toLowerCase().startsWith("feature.")) { try { e.addFeature((GlobalFeature) Class.forName(p.getProperty(key)).newInstance()); } catch (Exception e1) { System.err.println("Could not add feature named " + p.getProperty(key)); e1.printStackTrace(); } } } } } // check if there is an infile, an outfile and some features to extract. if (!e.isConfigured()) { System.err.println("There is an error in the configuration."); printHelp(); } else { e.run(); } } private static void printHelp() { System.out.println("Help for the ParallelExtractor class.\n" + "=============================\n" + "This help text is shown if you start the ParallelExtractor with the '-h' option.\n" + "\n" + "1. Usage\n" + "========\n" + "$> ParallelExtractor -i <infile> [-o <outfile>] -c <configfile> [-n <threads>] [-m <max_side_length>]\n" + "\n" + "Note: if you don't specify an outfile just \".data\" is appended to the infile for output.\n" + "\n" + "2. Config File\n" + "==============\n" + "The config file is a simple java Properties file. It basically gives the \n" + "employed features as a list of properties, just like:\n" + "\n" + "feature.1=CEDD\n" + "feature.2=FCTH\n" + "\n" + "... and so on. "); } /** * Adds a feature to the extractor chain. All those features are extracted from images. * * @param feature */ public void addFeature(GlobalFeature feature) { listOfFeatures.add(feature); } /** * Sets the file list for processing. One image file per line is fine. * * @param fileList */ public void setFileList(File fileList) { this.fileList = fileList; } /** * Sets the outfile. The outfile has to be in a folder parent to all input images. * * @param outFile */ public void setOutFile(File outFile) { this.outFile = outFile; } public int getMaxSideLength() { return maxSideLength; } public void setMaxSideLength(int maxSideLength) { this.maxSideLength = maxSideLength; } private boolean isConfigured() { boolean configured = true; if (fileList == null || !fileList.exists()) { System.err.println("Input file is either not given or does not exist."); configured = false; } else if (outFile == null) { // create an outfile ... try { outFile = new File(fileList.getCanonicalPath() + ".data"); System.out.println("Setting out file to " + outFile.getCanonicalFile()); } catch (IOException e) { configured = false; } } else if (outFile.exists() && !force) { System.err.println(outFile.getName() + " already exists. Please delete or choose another outfile."); configured = false; } if (listOfFeatures.size() < 1) configured = false; return configured; } @Override public void run() { // check: if (fileList == null || !fileList.exists()) { System.err.println("No text file with a list of images given."); return; } if (listOfFeatures.size() == 0) { System.err.println("No features to extract given."); return; } try { dos = new BufferedOutputStream(new FileOutputStream(outFile)); Thread p = new Thread(new Producer()); p.start(); LinkedList<Thread> threads = new LinkedList<Thread>(); long l = System.currentTimeMillis(); for (int i = 0; i < numberOfThreads; i++) { Thread c = new Thread(new Consumer()); c.start(); threads.add(c); } Thread m = new Thread(new Monitoring()); m.start(); for (Iterator<Thread> iterator = threads.iterator(); iterator.hasNext(); ) { iterator.next().join(); } long l1 = System.currentTimeMillis() - l; System.out.println("Analyzed " + overallCount + " images in " + l1 / 1000 + " seconds, ~" + (overallCount > 0 ? (l1 / overallCount) : "inf.") + " ms each."); dos.close(); // writer.commit(); // writer.close(); // threadFinished = true; } catch (Exception e) { e.printStackTrace(); } } private void addFeatures(List features) { for (Iterator<GlobalFeature> iterator = listOfFeatures.iterator(); iterator.hasNext(); ) { GlobalFeature next = iterator.next(); try { features.add(next.getClass().newInstance()); } catch (Exception e) { e.printStackTrace(); } } } class Monitoring implements Runnable { public void run() { long ms = System.currentTimeMillis(); try { Thread.sleep(1000 * monitoringInterval); // wait xx seconds } catch (InterruptedException e) { e.printStackTrace(); } while (!ended) { try { // print the current status: long time = System.currentTimeMillis() - ms; // System.out.println("Analyzed " + overallCount + " images in " + time / 1000 + " seconds, " + ((overallCount > 0) ? (time / overallCount) : "n.a.") + " ms each (" + images.size() + " images currently in queue)."); System.out.printf("Analyzed %,d images in %,d seconds, %s ms each (%,d images currently in queue).\n", overallCount, time / 1000, ((overallCount > 0) ? (time / overallCount) : "n.a."), images.size()); Thread.sleep(1000 * monitoringInterval); // wait xx seconds } catch (InterruptedException e) { e.printStackTrace(); } } } } class Producer implements Runnable { public void run() { int tmpSize = 0; try { BufferedReader br = new BufferedReader(new FileReader(fileList)); String file = null; File next = null; while ((file = br.readLine()) != null) { next = new File(file); BufferedImage img = null; try { int fileSize = (int) next.length(); byte[] buffer = new byte[fileSize]; FileInputStream fis = new FileInputStream(next); fis.read(buffer); String path = next.getCanonicalPath(); images.put(new WorkItem(path, buffer)); } catch (Exception e) { System.err.println("Could not read image " + file + ": " + e.getMessage()); } } for (int i = 0; i < numberOfThreads * 2; i++) { String tmpString = null; byte[] tmpBuffer = null; try { images.put(new WorkItem(tmpString, tmpBuffer)); } catch (InterruptedException e) { e.printStackTrace(); } } } catch (IOException e) { e.printStackTrace(); } synchronized (images) { ended = true; images.notifyAll(); } } } class Consumer implements Runnable { WorkItem tmp = null; LinkedList<GlobalFeature> features = new LinkedList<GlobalFeature>(); int count = 0; boolean locallyEnded = false; Consumer() { addFeatures(features); } public void run() { byte[] myBuffer = new byte[1024 * 1024 * 10]; int bufferCount = 0; while (!locallyEnded) { try { // we wait for the stack to be either filled or empty & not being filled any more. if (!locallyEnded) { tmp = images.take(); if (tmp.getBuffer() == null) locallyEnded = true; else { count++; overallCount++; } } bufferCount = 0; if (!locallyEnded) { ByteArrayInputStream b = new ByteArrayInputStream(tmp.getBuffer()); BufferedImage img = ImageIO.read(b); if (maxSideLength > 50) img = ImageUtils.scaleImage(img, maxSideLength); byte[] tmpBytes = tmp.getFileName().getBytes(); // everything is written to a buffer and only if no exception is thrown, the image goes to index. System.arraycopy(SerializationUtils.toBytes(tmpBytes.length), 0, myBuffer, 0, 4); bufferCount += 4; // dos.write(SerializationUtils.toBytes(tmpBytes.length)); System.arraycopy(tmpBytes, 0, myBuffer, bufferCount, tmpBytes.length); bufferCount += tmpBytes.length; // dos.write(tmpBytes); for (GlobalFeature feature : features) { feature.extract(img); myBuffer[bufferCount] = (byte) feature2index.get(feature.getClass().getSimpleName()).intValue(); bufferCount++; // dos.write(feature2index.get(feature.getClass().getName())); tmpBytes = feature.getByteArrayRepresentation(); System.arraycopy(SerializationUtils.toBytes(tmpBytes.length), 0, myBuffer, bufferCount, 4); bufferCount += 4; // dos.write(SerializationUtils.toBytes(tmpBytes.length)); System.arraycopy(tmpBytes, 0, myBuffer, bufferCount, tmpBytes.length); bufferCount += tmpBytes.length; // dos.write(tmpBytes); } // finally write everything to the stream - in case no exception was thrown.. synchronized (dos) { dos.write(myBuffer, 0, bufferCount); dos.write(-1); // that's the separator dos.flush(); } } } catch (Exception e) { System.err.println("Error processing file " + tmp.getFileName()); e.printStackTrace(); } } } } }