ImportCandidates.java example

Explorer
openmicroscopy-master
/*
 *   Copyright 2009 Glencoe Software, Inc. All rights reserved.
 *   Use is subject to license terms supplied in LICENSE.txt
 */

package ome.formats.importer;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;

import static omero.rtypes.rint;
import static omero.rtypes.rstring;

import loci.formats.FileInfo;
import loci.formats.FormatTools;
import loci.formats.IFormatReader;
import loci.formats.MissingLibraryException;
import loci.formats.UnknownFormatException;
import loci.formats.UnsupportedCompressionException;
import loci.formats.in.DefaultMetadataOptions;
import loci.formats.in.MetadataLevel;
import ome.formats.ImageNameMetadataStore;
import ome.formats.importer.util.ErrorHandler;

import org.apache.commons.io.DirectoryWalker;
import org.apache.commons.io.filefilter.TrueFileFilter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * Utility class which given any {@link File} object will determine the correct
 * number and members of a given import. This facility permits iterating over a
 * directory.
 *
 * This class is NOT thread-safe.
 *
 * @since Beta4.1
 */
public class ImportCandidates extends DirectoryWalker
{

    /**
     * Event raised during a pass through the directory structure given to
     * {@link ImportCandidates}. A {@link SCANNING} event will not necessarily
     * be raised for every file or directory, but the values will be valid for
     * each event.
     *
     * If {@link #totalFiles} is less than 0, then the directory is currently being
     * scanned and the count is unknown. Once {@link #totalFiles} is positive,
     * it will remain constant.
     *
     * If {@link #cancel()} is called, then directory searching will cease. The
     * {@link ImportCandidates} instance will be left with <em>no</em>
     * {@link ImportContainer}s.
     */
    public static class SCANNING extends ImportEvent
    {
        public final File file;
        public final int depth;
        public final int numFiles;
        public final int totalFiles;
        private boolean cancel = false;

        public SCANNING(File file, int depth, int numFiles, int totalFiles)
        {
            this.file = file;
            this.depth = depth;
            this.numFiles = numFiles;
            this.totalFiles = totalFiles;
        }

        /**
         * Can be called to cancel the current action.
         */
        public void cancel()
        {
            this.cancel = true;
        }

        public String toLog()
        {
            int l = file.toString().length() - 16;
            if (l < 0)
            {
                l = 0;
            }
            String f = file.toString().substring(l);
            return super.toLog() + String.format(": Depth:%s Num: %4s Tot: %4s File: %s",
                    depth, numFiles, (totalFiles < 0 ? "n/a" : totalFiles), f);
        }
    }

    /**
     * Marker exception raised if the {@link SCANNING#cancel()} method is
     * called by an {@link IObserver} instance.
     */
    public static class CANCEL extends RuntimeException {

	private static final long serialVersionUID = 1L;};

    final private static Logger log = LoggerFactory.getLogger(ImportCandidates.class);

    final public static int DEPTH = Integer.valueOf(
            System.getProperty("omero.import.depth","4"));
    final public static MetadataLevel METADATA_LEVEL =
        MetadataLevel.valueOf(System.getProperty(
                "omero.import.metadata.level","MINIMUM"));

    final private IObserver observer;
    final private OMEROWrapper reader;
    final private Set<String> allFiles = new HashSet<String>();
    final private Map<String, List<String>> usedBy = new LinkedHashMap<String, List<String>>();
    final private List<ImportContainer> containers = new ArrayList<ImportContainer>();
    final private long start = System.currentTimeMillis();

    /**
     * Time taken for {@link IFormatReader#setId()}
     */
    long readerTime = 0;

    /**
     * Current count of calls to {@link IFormatReader#setId()}.
     */
    int setids = 0;

    /**
     * Number of times UNKNOWN_EVENT was raised
     */
    int unknown = 0;

    /**
     * Current count of files processed. This will be incremented in two phases:
     * once during directory counting, and once during parsing.
     */
    int count = 0;

    /**
     * Total number of files which have been / will be examined. During the
     * first pass, this value is negative.
     */
    int total = -1;

    /**
     * Whether or not one of the {@link SCANNING} events had {@link SCANNING#cancel()}
     * called.
     */
    boolean cancelled = false;

    /**
     * Calls {@link #ImportCandidates(int, OMEROWrapper, String[], IObserver)}
     * with {@link #DEPTH} as the first argument.
     *
     * @param reader
     *            instance used for parsing each of the paths. Not used once the
     *            constructor completes.
     * @param paths
     *            file paths which are searched. May be directories.
     * @param observer
     *            {@link IObserver} which will monitor any exceptions during
     *            {@link OMEROWrapper#setId(String)}. Otherwise no error
     *            reporting takes place.
     */
    public ImportCandidates(OMEROWrapper reader, String[] paths,
            IObserver observer)
    {
        this(DEPTH, reader, paths, observer);
    }

    /**
     * Main constructor which starts depth-first descent into all the paths
     * and permits a descent to the given depth.
     *
     * @param depth
     *            number of directory levels to search down.
     * @param reader
     *            instance used for parsing each of the paths. Not used once the
     *            constructor completes.
     * @param paths
     *            file paths which are searched. May be directories.
     * @param observer
     *            {@link IObserver} which will monitor any exceptions during
     *            {@link OMEROWrapper#setId(String)}. Otherwise no error
     *            reporting takes place.
     */
    public ImportCandidates(int depth, OMEROWrapper reader, String[] paths,
            IObserver observer)
    {
        super(TrueFileFilter.INSTANCE, depth);
        this.reader = reader;
        this.observer = observer;
        log.info(String.format("Depth: %s Metadata Level: %s", depth,
                METADATA_LEVEL));

        if (paths != null && paths.length == 2 && "".equals(paths[0])
                && "".equals(paths[1]))
        {
            // Easter egg for testing.
            // groups is not null, therefore usage() won't need to be
            // called.
            System.exit(0);
            return;
        }

        if (paths == null || paths.length == 0)
        {
            return;
        }

        Groups g;
        try {
            execute(paths);
            total = count;
            count = 0;
            execute(paths);
            g = new Groups(usedBy);
            g.parse(containers);
            long totalElapsed = System.currentTimeMillis() - start;
            log.info(String.format("%s file(s) parsed into "
                    + "%s group(s) with %s call(s) to setId in "
                    + "%sms. (%sms total) [%s unknowns]", this.total, size(), this.setids,
                    readerTime, totalElapsed, unknown));
        } catch (CANCEL c)
        {
            log.info(String.format("Cancelling search after %sms "
                    + "with %s containers found (%sms in %s calls to setIds)",
                    (System.currentTimeMillis() - start), containers.size(),
                    readerTime, setids));
            containers.clear();
            cancelled = true;
            g = null;
            total = -1;
            count = -1;
        }

    }

    /**
     * Prints the "standard" representation of the groups, which is parsed by
     * other software layers. The format is: 1) any empty lines are ignored, 2)
     * any blocks of comments separate groups, 3) each group is begun by the
     * "key", 4) all other files in a group will also be imported.
     * 
     * The ordering of the used files is taken into account.
     */
    /*
     * Similar logic is contained in Groups.print() below but that does not
     * take the ordering of the used files into account.
     */
    public void print()
    {
        if (containers == null)
        {
            return;
        }
        for (ImportContainer container : containers)
        {
            System.out.println("#======================================");
            System.out.println(String.format(
                    "# Group: %s SPW: %s Reader: %s", container.getFile(),
                    container.getIsSPW(), container.getReader()));
            for (String file : container.getUsedFiles())
            {
                System.out.println(file);
            }
        }
    }

    /**
     * @return containers size
     */
    public int size()
    {
        return containers.size();
    }

    /**
     * @return if import was cancelled
     */
    public boolean wasCancelled()
    {
        return cancelled;
    }

    /**
     * @return array of string paths for files in containers
     */
    public List<String> getPaths()
    {
        List<String> paths = new ArrayList<String>();
        for (ImportContainer i : containers)
        {
            paths.add(i.getFile().getAbsolutePath());
        }
        return paths;
    }

    /**
     * Retrieve reader type for file specified in path
     *
     * @param path - absolute path for container
     * @return reader type
     */
    public String getReaderType(String path)
    {
        for (ImportContainer i : containers) {
            if (i.getFile().getAbsolutePath().equals(path)) {
                return i.getReader();
            }
        }
        throw new RuntimeException("Unfound reader for: " + path);
    }

    /**
     * Return string of files used by container item at path
     *
     * @param path - absolute path for container
     * @return string array of used files
     */
    public String[] getUsedFiles(String path)
    {
        for (ImportContainer i : containers)
        {
            if (i.getFile().getAbsolutePath().equals(path))
            {
                return i.getUsedFiles();
            }
        }
        throw new RuntimeException("Unfound reader for: " + path);
    }

    /**
     * @return all containers as an array list
     */
    public List<ImportContainer> getContainers()
    {
        return new ArrayList<ImportContainer>(containers);
    }

    /**
     * Method called during
     * {@link ImportCandidates#ImportCandidates(OMEROWrapper, String[], IObserver)}
     * to operate on all the given paths. This will be called twice: once
     * without reading the files, and once (with the known total) using
     * {@link #reader}
     *
     * @param paths
     */
    protected void execute(String[] paths)
    {
        for (String string : paths)
        {
            try {
                File f = new File(string);
                if (f.isDirectory())
                {
                    walk(f, null);
                } else
                {
                    handleFile(f, 0, null);
                }
                // Forcing an event for each path, so that at least one
                // event is raised per file despite the count of handlefile.
                scanWithCancel(f, 0);
            } catch (IOException e)
            {
                throw new RuntimeException(e);
            }
        }
    }

    /**
     * Return an import container for a single file
     * @param file - single file
     * @return importer container
     */
    protected ImportContainer singleFile(File file, ImportConfig config)
    {

        if (file == null) {
            // Can't do anything about it.
            return null;
        }

        final String path = file.getAbsolutePath();
        if (!file.exists() || !file.canRead()) {
            safeUpdate(new ErrorHandler.UNREADABLE_FILE(path,
                new java.io.FileNotFoundException(path), this));
            return null;
        }

        String format = null;
        String[] usedFiles = new String[] { path };
        long start = System.currentTimeMillis();
        try {

            try {
                setids++;
                reader.close();
                reader.setMetadataStore(new ImageNameMetadataStore());
                reader.setMetadataOptions(
                        new DefaultMetadataOptions(METADATA_LEVEL));
                reader.setId(path);
                format = reader.getFormat();
                usedFiles = getOrderedFiles();
                String[] domains = reader.getReader().getDomains();
                boolean isSPW = Arrays.asList(domains).contains(FormatTools.HCS_DOMAIN);

                final String readerClassName = reader.unwrap().getClass().getCanonicalName();
                ImportContainer ic = new ImportContainer(config,
                        file, null, null,
                        readerClassName, usedFiles, isSPW);
                ic.setDoThumbnails(config.doThumbnails.get());
                ic.setNoStatsInfo(config.noStatsInfo.get());
                String configImageName = config.userSpecifiedName.get();
                if (configImageName == null)
                {
                    ic.setUserSpecifiedName(file.getName());
                }
                else
                {
                    ic.setUserSpecifiedName(configImageName);
                }
                ic.setUserSpecifiedDescription(config.userSpecifiedDescription.get());
                ic.setCustomAnnotationList(config.annotations.get());
                return ic;
            } finally
            {
                readerTime += (System.currentTimeMillis() - start);
                reader.close();
            }

        } catch (UnsupportedCompressionException uce)
        {
            unknown++;
            // Handling as UNKNOWN_FORMAT for 4.3.0
            safeUpdate(new ErrorHandler.UNKNOWN_FORMAT(path, uce, this));
        } catch (UnknownFormatException ufe)
        {
            unknown++;
            safeUpdate(new ErrorHandler.UNKNOWN_FORMAT(path, ufe, this));
        } catch (MissingLibraryException mle)
        {
            safeUpdate(new ErrorHandler.MISSING_LIBRARY(path, mle, usedFiles, format));
        } catch (Throwable t)
        {
            Exception e = null;
            if (t instanceof Exception) {
                e = (Exception) t;
            }
            else {
                e = new Exception(t);
            }
            safeUpdate(new ErrorHandler.FILE_EXCEPTION(path, e, usedFiles, format));
        }

        return null;

    }

    /**
     * Retrieves Image names for each image that Bio-Formats has detected.
     * @return a list of Image names, in the order of <i>series</i>.
     */
    private List<String> getImageNames() {
        List<String> toReturn = new ArrayList<String>();
        Map<Integer, String> imageNames = ((ImageNameMetadataStore)
                reader.getMetadataStore()).getImageNames();
        for (int i = 0; i < reader.getSeriesCount(); i++) {
            toReturn.add(imageNames.get(i));
        }
        return toReturn;
    }

    /**
     * This method uses the {@link FileInfo#usedToInitialize} flag to re-order
     * used files. All files which can be used to initialize a fileset are
     * returned first.
     */
    private String[] getOrderedFiles() {

        FileInfo[] infos = reader.getAdvancedUsedFiles(false);
        String[] usedFiles = new String[infos.length];

        int count = 0;
        for (int i = 0; i < usedFiles.length; i++) {
            if (infos[i].usedToInitialize) {
                usedFiles[count++] = infos[i].filename;
            }
        }
        for (int i = 0; i < usedFiles.length; i++) {
            if (!infos[i].usedToInitialize) {
                usedFiles[count++] = infos[i].filename;
            }
        }
        return usedFiles;
    }

    /**
     * @param f
     * @param d
     * @throws CANCEL
     */
    private void scanWithCancel(File f, int d) throws CANCEL{
        SCANNING s = new SCANNING(f, d, count, total);
        safeUpdate(s);
        if (s.cancel) {
            throw new CANCEL();
        }
    }

    /**
     * Update observers with event
     *
     * @param event
     */
    private void safeUpdate(ImportEvent event) {
        try {
            observer.update(null, event);
        } catch (Exception ex) {
            log.error(
                    String.format("Error on %s with %s", observer, event),
                    ex);
        }
    }

    /**
     * Handle a file import
     *
     * @param file the selected file
     * @param depth the depth of the scan
     * @param collection the result objects, ignored
     */
    @Override
    public void handleFile(File file, int depth, Collection collection) {

        count++;

        // Our own filtering
        if (file.getName().startsWith(".")) {
            return; // Omitting dot files.
        }

        // If this is the 100th file, publish an event
        if (count%100 == 0) {
            scanWithCancel(file, depth);
        }

        // If this is just a count, return
        if (total < 0) {
            return;
        }

        // Optimization.
        if (allFiles.contains(file.getAbsolutePath())) {
            return;
        }

        ImportContainer info = singleFile(file, reader.getConfig());
        if (info == null) {
            return;
        }

        containers.add(info);
        allFiles.addAll(Arrays.asList(info.getUsedFiles()));
        for (String string : info.getUsedFiles()) {
            List<String> users = usedBy.get(string);
            if (users == null) {
                users = new ArrayList<String>();
                usedBy.put(string, users);
            }
            users.add(file.getAbsolutePath());
        }
    }

    /**
     * The {@link Groups} class serves as an algorithm for sorting the usedBy
     * map from the {@link ImportCandidates#walk(File, Collection)} method.
     * These objects should never leave the outer class.
     *
     * It is important that the Groups keep their used files ordered.
     * @see ImportCandidates#getOrderedFiles()
     */
    private static class Groups {

        private class Group {
            String key;
            List<String> theyUseMe;
            List<String> iUseThem;

            public Group(String key) {
                this.key = key;
                this.theyUseMe = new ArrayList<String>(usedBy.get(key));
                this.theyUseMe.remove(key);
                this.iUseThem = new ArrayList<String>();
                for (Map.Entry<String, List<String>> entry : usedBy.entrySet()) {
                    if (entry.getValue().contains(key)) {
                        iUseThem.add(entry.getKey());
                    }
                }
                iUseThem.remove(key);
            }

            public void removeSelfIfSingular() {
                int users = theyUseMe.size();
                int used = iUseThem.size();
                if (used <= 1 && users > 0) {
                    groups.remove(key);
                }
            }

            public String toShortString() {
                StringBuilder sb = new StringBuilder();
                sb.append(key);
                sb.append("\n");
                for (String val : iUseThem) {
                    sb.append(val);
                    sb.append("\n");
                }
                return sb.toString();
            }

            @Override
            public String toString() {
                StringBuilder sb = new StringBuilder();
                sb.append("#======================================\n");
                sb.append("# Group: " + key);
                sb.append("\n");
                // sb.append("# Used by: ");
                // for (String key : theyUseMe) {
                // sb.append(" " + key + " ");
                // }
                // sb.append("\n");
                sb.append(key);
                sb.append("\n");
                for (String val : iUseThem) {
                    sb.append(val);
                    sb.append("\n");
                }
                return sb.toString();
            }

        }

        private final Map<String, List<String>> usedBy;
        private final Map<String, Group> groups = new LinkedHashMap<String, Group>();
        private List<String> ordering;

        Groups(Map<String, List<String>> usedBy) {
            this.usedBy = usedBy;
            for (String key : usedBy.keySet()) {
                groups.put(key, new Group(key));
            }
        }

        public int size() {
            return ordering.size();
        }

        @SuppressWarnings("unused")
		public List<String> getPaths() {
            size(); // Check.
            return ordering;
        }

        Groups parse(List<ImportContainer> containers) {
            if (ordering != null) {
                throw new RuntimeException("Already ordered");
            }
            for (Group g : new ArrayList<Group>(groups.values())) {
                g.removeSelfIfSingular();
            }
            ordering = new ArrayList<String>(groups.keySet());
            // Here we remove all the superfluous import containers.
            List<ImportContainer> copy = new ArrayList<ImportContainer>(
                    containers);
            containers.clear();
            for (String key : ordering) {
                for (ImportContainer importContainer : copy) {
                    if (importContainer.getFile().getAbsolutePath().equals(key)) {
                        containers.add(importContainer);
                    }
                }
            }
            // Now rewrite the filename chosen based on the first file in the
            // getUsedFiles.
            for (ImportContainer c : containers) {
                c.setFile(new File(c.getUsedFiles()[0]));
                c.updateUsedFilesTotalSize();
            }
            return this;
        }

        @SuppressWarnings("unused")
		void print() {
            Collection<Group> values = groups.values();
            if (values.size() == 1) {
                System.out.println(values.iterator().next().toShortString());
            } else {
                for (Group g : values) {
                    System.out.println(g);
                }
            }
        }

        @Override
        public String toString() {
            StringBuilder sb = new StringBuilder();
            for (Group g : groups.values()) {
                sb.append(g.toString());
                sb.append("\n");
            }
            return sb.toString();
        }

        static void line(String s) {
            System.out.println("\n# ************ " + s + " ************ \n");
        }

        static Groups test(int count, Map<String, List<String>> t) {

            System.out.println("\n\n");
            line("TEST " + count);
            Groups g = new Groups(t);
            System.out.println(g);
            g.parse(new ArrayList<ImportContainer>());
            line("RESULT " + count);
            System.out.println(g);
            return g;

        }

        @SuppressWarnings("unused")
		static Groups test() {
            System.out.println("\n");
            line("NOTICE");
            System.out
                    .println("#  You have entered \"\" \"\" as the path to import.");
            System.out
                    .println("#  This runs the test suite. If you would like to");
            System.out.println("#  import the current directory use \"\".");

            Map<String, List<String>> t = new LinkedHashMap<String, List<String>>();
            t.put("a.dv.log", Arrays.asList("b.dv"));
            t.put("b.dv", Arrays.asList("b.dv"));
            test(1, t);

            t = new LinkedHashMap<String, List<String>>();
            t.put("a.png", Arrays.asList("a.png"));
            test(2, t);

            t = new LinkedHashMap<String, List<String>>();
            t.put("a.tiff", Arrays.asList("a.tiff", "c.lei"));
            t.put("b.tiff", Arrays.asList("b.tiff", "c.lei"));
            t.put("c.lei", Arrays.asList("c.lei"));
            test(3, t);

            t = new LinkedHashMap<String, List<String>>();
            t.put("overlay.tiff", Arrays.asList("overlay.tiff"));
            t.put("b.tiff", Arrays.asList("b.tiff", "overlay.tiff"));
            return test(4, t);

        }

    }

}