HadoopMultiFileStore.java example

Explorer

knowledgestore-master
- ks-client
  - src
    - main
      - java
        eu
        fbk
        knowledgestore
        client
        Client.java
        ProxyConfig.java
        package-info.java
    - test
      - java
        eu
        fbk
        knowledgestore
        client
        ClientExample.java
        ClientSparqlTest.java
        ClientStressTest.java
        KnowledgeStoreClientTest.java
- ks-core
  - src
    - main
      - java
        eu
        fbk
        knowledgestore
        AbstractKnowledgeStore.java
        AbstractSession.java
        ForwardingKnowledgeStore.java
        ForwardingSession.java
        KnowledgeStore.java
        Operation.java
        OperationException.java
        Outcome.java
        Session.java
        data
        Criteria.java
        Data.java
        Dictionary.java
        Handler.java
        ParseException.java
        Record.java
        Representation.java
        Serializer.java
        Smaz.java
        Stream.java
        XPath.java
        XPathFunction.java
        XPathNavigator.java
        package-info.java
        internal
        CommandLine.java
        Compression.java
        Logging.java
        Util.java
        jaxrs
        Protocol.java
        Serializer.java
        rdf
        CompactBindingSet.java
        CompactValueFactory.java
        HtmlRDF.java
        HtmlSparql.java
        RDFUtil.java
        package-info.java
        package-info.java
        vocabulary
        CKR.java
        KS.java
        KSR.java
        NFO.java
        NIE.java
        NIF.java
        NWR.java
        SEM.java
        TIME.java
        package-info.java
        org
        jaxen
        expr
        DefaultRelationalExpr.java
    - test
      - java
        eu
        fbk
        knowledgestore
        RecordTest.java
        XPathTest.java
        data
        SerializerTest.java
- ks-distribution
  - src
    - test
      - java
        eu
        fbk
        knowledgestore
        distribution
        StoreTester.java
- ks-populator-naf
  - src
    - main
      - java
        eu
        fbk
        knowledgestore
        populator
        naf
        Consumer.java
        KSPresentation.java
        NAFRunner.java
        Producer.java
        connection
        KnowledgestoreServer.java
        ObjectFactory.java
        finalizeThread.java
        getToBeProcessed.java
        groupTxtF.java
        model
        Attribution.java
        CausalRelations.java
        Chunk.java
        Chunks.java
        Clink.java
        Component.java
        Constituency.java
        Coref.java
        Coreferences.java
        Dates.java
        Dep.java
        Deps.java
        Edge.java
        Entities.java
        Entity.java
        ExternalRef.java
        ExternalReferences.java
        FactVal.java
        Factualities.java
        Factuality.java
        Factualitylayer.java
        Factvalue.java
        FileDesc.java
        LinguisticProcessors.java
        Locations.java
        Lp.java
        Mark.java
        Markables.java
        NAF.java
        NafHeader.java
        Nt.java
        ObjectFactory.java
        Opinion.java
        OpinionExpression.java
        OpinionHolder.java
        OpinionTarget.java
        Opinions.java
        Predicate.java
        PredicateAnchor.java
        Public.java
        Raw.java
        References.java
        Role.java
        Sentiment.java
        Span.java
        Srl.java
        Statement.java
        StatementCue.java
        StatementSource.java
        StatementTarget.java
        T.java
        Target.java
        TemporalRelations.java
        Term.java
        Terms.java
        Text.java
        TimeExpressions.java
        Timex3.java
        Tlink.java
        Topic.java
        Topics.java
        Tree.java
        Tunits.java
        Wf.java
        nafPopulator.java
        package-info.java
        processNAF.java
        processNAFVariables.java
        runPopoulator.java
        statistics.java
        submitKS.java
        sumStatistics.java
    - test
      - java
        eu
        fbk
        knowledgestore
        populator
        naf
        runPopulatorTest.java
        testMain.java
- ks-populator-rdf
  - src
    - main
      - java
        eu
        fbk
        knowledgestore
        populator
        rdf
        Decoder.java
        RDFPopulator.java
        package-info.java
- ks-server
  - src
    - main
      - java
        eu
        fbk
        knowledgestore
        datastore
        CachingDataStore.java
        DataStore.java
        DataTransaction.java
        ForwardingDataStore.java
        ForwardingDataTransaction.java
        LoggingDataStore.java
        LuceneDataStore.java
        MemoryDataStore.java
        MySQLDataStore.java
        SolrDataStore.java
        SynchronizedDataStore.java
        TripleDataStore.java
        package-info.java
        utils
        LuceneIndexReader.java
        filestore
        FileExistsException.java
        FileMissingException.java
        FileStore.java
        ForwardingFileStore.java
        GzippedFileStore.java
        HadoopFileStore.java
        HadoopMultiFileStore.java
        LoggingFileStore.java
        SynchronizedFileStore.java
        package-info.java
        runtime
        Component.java
        DataCorruptedException.java
        Factory.java
        Files.java
        Launcher.java
        SerializerAvro.java
        Synchronizer.java
        package-info.java
        server
        Server.java
        SparqlHelper.java
        package-info.java
        triplestore
        ForwardingTripleStore.java
        ForwardingTripleTransaction.java
        LoggingTripleStore.java
        RepositoryTripleStore.java
        RewritingTripleStore.java
        SPARQLRenderer.java
        SelectQuery.java
        SynchronizedTripleStore.java
        TripleStore.java
        TripleTransaction.java
        package-info.java
    - test
      - java
        eu
        fbk
        knowledgestore
        datastore
        AbstractDataStoreTest.java
        LuceneDataStoreTest.java
        MySQLDataStoreTest.java
        filestore
        AbstractFileStoreTest.java
        HadoopFileStoreTest.java
        HadoopMultiFileStoreTest.java
        runtime
        FactoryTest.java
        triplestore
        SPARQLRendererComplexTest.java
        SPARQLRendererDAWGTest.java
        SPARQLRendererSail.java
        SPARQLRendererW3CTest.java
- ks-server-hbase
  - src
    - main
      - java
        eu
        fbk
        knowledgestore
        datastore
        hbase
        HBaseDataStore.java
        HBaseDataTransaction.java
        HBaseIterator.java
        HBaseScanIterator.java
        exception
        DataTransactionBlockingException.java
        package-info.java
        package-info.java
        utils
        AbstractHBaseUtils.java
        AvroSchemas.java
        AvroSerializer.java
        HBaseConstants.java
        HBaseFilter.java
        HBaseUtils.java
        OmidHBaseUtils.java
        TephraHBaseUtils.java
        package-info.java
    - test
      - java
        eu
        fbk
        knowledgestore
        datastore
        hbase
        HBaseDataStoreTest.java
        HBaseDataTransactionTester.java
        HBaseLowlevelUtilities.java
        utils
        AvroSchemasTest.java
        AvroSerializerTest.java
        HBaseTestUtils.java
- ks-server-http
  - src
    - main
      - java
        eu
        fbk
        knowledgestore
        server
        http
        CustomConfig.java
        HttpServer.java
        KeystoreConfig.java
        SecurityConfig.java
        UIConfig.java
        jaxrs
        Application.java
        Axioms.java
        Crud.java
        Custom.java
        Entities.java
        Files.java
        Match.java
        Mentions.java
        RenderUtils.java
        Resource.java
        Resources.java
        Root.java
        Sparql.java
        SparqlDelete.java
        SparqlUpdate.java
        TableQuery.java
        package-info.java
        package-info.java
    - test
      - java
        eu
        fbk
        knowledgestore
        server
        http
        RepresentationResourceTest.java
- ks-server-virtuoso
  - src
    - main
      - java
        eu
        fbk
        knowledgestore
        triplestore
        virtuoso
        VirtuosoJdbcTripleStore.java
        VirtuosoTripleStore.java
        VirtuosoTripleTransaction.java
        package-info.java
    - test
      - java
        eu
        fbk
        knowledgestore
        triplestore
        virtuoso
        VirtuosoTripleStoreTest.java
- ks-tool
  - src
    - main
      - java
        eu
        fbk
        knowledgestore
        tool
        Dumper.java
        TestDriver.java
        TestGenerator.java
        TestUtil.java

package eu.fbk.knowledgestore.filestore;

import com.google.common.base.MoreObjects;
import com.google.common.base.Preconditions;
import com.google.common.base.Throwables;
import com.google.common.collect.AbstractIterator;
import com.google.common.collect.HashMultiset;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Multiset;
import com.google.common.io.ByteStreams;
import eu.fbk.knowledgestore.data.Data;
import eu.fbk.knowledgestore.data.Stream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermDocs;
import org.apache.lucene.store.FSDirectory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.annotation.Nullable;
import java.io.*;
import java.util.*;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.locks.ReadWriteLock;
import java.util.concurrent.locks.ReentrantReadWriteLock;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
import java.util.zip.ZipOutputStream;

/**
 * A {@code FileStore} implementation based on the Hadoop API optimized for huge number of files.
 * <p>
 * An {@code HadoopFileStore} stores its files in an Hadoop
 * {@link org.apache.hadoop.fs.FileSystem}, under a certain, configurable root path; the
 * filesystem can be any of the filesystems supported by the Hadoop API, including the local (raw)
 * filesystem and the distributed HDFS filesystem.
 * </p>
 * <p>
 * Files are stored in a a two-level directory structure, where first level directories reflect
 * the MIME types of stored files, and second level directories are buckets of files whose name is
 * obtained by hashing the filename; buckets are used in order to equally split a large number of
 * files in several subdirectories, overcoming possible filesystem limitations in terms of maximum
 * number of files storable in a directory.
 * </p>
 */
public final class HadoopMultiFileStore implements FileStore {

    private static final Logger LOGGER = LoggerFactory.getLogger(HadoopMultiFileStore.class);

    private static final String DEFAULT_ROOT_PATH = "files";

    private static final String DEFAULT_LUCENE_PATH = "./lucene-index";

    private static final int DEFAULT_NUM_SMALL_FILES = 10;

    private static final long DEFAULT_CLEANUP_PERIOD = 10000L; // 5s

    private static final String SMALL_FILES_PATH = "_small";

    // private static final int MAX_LUCENE_SEGMENTS = 100;

    private final int MAX_LUCENE_ATTEMPTS = 3;

    private static final String KEY_FIELD = "filename";

    private static final String VALUE_FIELD = "zipfilename";

    private static final String DELETED = "__deleted";

    private final FileSystem fileSystem;

    private final Path rootPath;

    private final Path smallFilesPath;

    private final File luceneFolder;

    private final int numSmallFiles;

    private final long cleanupPeriod;

    private final Multiset<String> openedFiles;

    private final ReadWriteLock lock;

    private final AtomicBoolean active;

    private IndexReader luceneReader;

    private IndexWriter luceneWriter;

    private Future<?> cleanupFuture;

    private long zipNameCounter;

    /**
     * Creates a new {@code HadoopFileStore} storing files in the {@code FileSystem} and under the
     * {@code rootPath} specified.
     *
     * @param fileSystem
     *            the file system, not null
     * @param path
     *            the root path where to store files, possibly relative to the filesystem working
     *            directory; if null, the default root path {@code files} will be used
     * @param numSmallFile
     *            the number of files to put in each zip file
     * @param cleanupPeriod
     *            the amount of time in milliseconds between cleanup operations
     */
    public HadoopMultiFileStore(final FileSystem fileSystem, @Nullable final String lucenePath,
            @Nullable final String path, @Nullable final Integer numSmallFile,
            @Nullable final Long cleanupPeriod) {

        this.fileSystem = Preconditions.checkNotNull(fileSystem);
        this.luceneFolder = new File(MoreObjects.firstNonNull(lucenePath, DEFAULT_LUCENE_PATH));
        this.rootPath = new Path(MoreObjects.firstNonNull(path, DEFAULT_ROOT_PATH))
                .makeQualified(this.fileSystem); // resolve wrt workdir
        this.smallFilesPath = new Path(this.rootPath.toString() + File.separator
                + SMALL_FILES_PATH).makeQualified(this.fileSystem);
        this.numSmallFiles = numSmallFile != null ? numSmallFile : DEFAULT_NUM_SMALL_FILES;
        this.cleanupPeriod = cleanupPeriod != null ? cleanupPeriod : DEFAULT_CLEANUP_PERIOD;
        this.openedFiles = HashMultiset.create();
        this.lock = new ReentrantReadWriteLock(true);
        this.active = new AtomicBoolean(false);
        this.zipNameCounter = System.currentTimeMillis();
        LOGGER.info("{} configured, paths={};{}", getClass().getSimpleName(), this.rootPath,
                this.luceneFolder);
    }

    @Override
    public void init() throws IOException {

        // Create root folder if missing
        if (!this.fileSystem.exists(this.rootPath)) {
            LOGGER.debug("Creating root folder {}", this.rootPath);
            if (!this.fileSystem.mkdirs(this.rootPath)) {
                throw new IOException("Cannot create root folder " + this.luceneFolder);
            }
        }

        // Create sub-folder for small files, if missing
        if (!this.fileSystem.exists(this.smallFilesPath)) {
            LOGGER.debug("Creating small files folder {}", this.smallFilesPath);
            if (!this.fileSystem.mkdirs(this.smallFilesPath)) {
                throw new IOException("Cannot create small files folder " + this.smallFilesPath);
            }
        }

        // Create folder for lucene index, if missing
        if (!this.luceneFolder.exists()) {
            LOGGER.debug("Created lucene folder {}", this.luceneFolder);
            if (!this.luceneFolder.mkdirs()) {
                throw new IOException("Cannot create lucene folder " + this.luceneFolder);
            }
        }

        // Initialize Lucene writer and reader
        this.luceneWriter = new IndexWriter(FSDirectory.open(this.luceneFolder),
                new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED);
        this.luceneReader = this.luceneWriter.getReader();

        // Mark the component as active
        this.active.set(true);

        // Schedule periodic cleanup
        this.cleanupFuture = Data.getExecutor().scheduleWithFixedDelay(new Runnable() {

            @Override
            public void run() {
                try {
                    merge();
                    purge();
                    indexOptimize();
                } catch (final Throwable ex) {
                    LOGGER.warn("Periodic cleanup failed", ex);
                }
            }

        }, this.cleanupPeriod, this.cleanupPeriod, TimeUnit.MILLISECONDS);
    }

    @Override
    public InputStream read(final String fileName) throws IOException {

        // This prevents concurrent write/delete/merge/purge operations to occur
        this.lock.readLock().lock();

        try {
            // Check active flag
            Preconditions.checkState(this.active.get());

            // Lookup the current zip file / deleted status for the file name supplied
            final String zipName = indexGet(fileName);

            // Proceed only if file is not marked as deleted
            if (!DELETED.equals(zipName)) {
                if (zipName != null) {
                    // Search in zipped file
                    final Path zipPath = pathForZipFile(zipName);
                    try {
                        final ZipInputStream stream = new ZipInputStream(openForRead(zipPath));
                        ZipEntry entry;
                        while ((entry = stream.getNextEntry()) != null) {
                            if (entry.getName().equals(fileName)) {
                                LOGGER.debug("Reading {} from ZIP file {}", fileName, zipPath);
                                return stream;
                            }
                        }
                    } catch (final IOException ex) {
                        throw new IOException("Cannot read " + fileName + " from ZIP file"
                                + zipPath, ex);
                    }

                } else {
                    // Search in small files
                    final Path smallPath = pathForSmallFile(fileName);
                    if (this.fileSystem.exists(smallPath)) {
                        LOGGER.debug("Reading small file {}", smallPath);
                        return openForRead(smallPath);
                    }
                }
            }

            // Report missing file
            throw new FileMissingException(fileName, "The file does not exist");

        } finally {
            // Always release the lock
            this.lock.readLock().unlock();
        }
    }

    @Override
    public OutputStream write(final String fileName) throws IOException {

        // This prevents any other read/write/delete/merge/purge operation to occur
        this.lock.writeLock().lock();

        try {
            // Check active flag
            Preconditions.checkState(this.active.get());

            // Throw an exception in case a file with the same name already exists
            final String zipName = indexGet(fileName);
            final Path smallPath = pathForSmallFile(fileName);
            final boolean fileExists = this.fileSystem.exists(smallPath);
            if (!DELETED.equals(zipName) && (zipName != null || fileExists)) {
                throw new FileExistsException(fileName, "Cannot overwrite file");
            }

            // Write small file
            LOGGER.debug("Creating small file {}", smallPath);
            final OutputStream stream = openForWrite(smallPath); // may overwrite file

            // Update the index, marking the file as no more deleted if necessary
            if (fileExists) {
                try {
                    final Map<String, String> entries = new HashMap<>();
                    entries.put(fileName, null);
                    indexPut(entries);
                } catch (final Throwable ex) {
                    stream.close();
                    Throwables.propagateIfPossible(ex, IOException.class);
                    Throwables.propagate(ex);
                }
            }

            // Return opened stream
            return stream;

        } finally {
            // Always release the lock
            this.lock.writeLock().unlock();
        }
    }

    // Synchronization serves to (1) avoid the same ZIP file to be exploded multiple times and
    // (2) to avoid deleting files while merge is occurring

    @Override
    public void delete(final String fileName) throws FileMissingException, IOException {

        // This prevents other read/write/delete/merge/purge operations to occur
        this.lock.writeLock().lock();

        try {
            // Check active flag
            Preconditions.checkState(this.active.get());

            // Lookup the zip file / deleted status for the supplied file
            final String zipName = indexGet(fileName);

            // Proceed only if file is not marked as deleted
            if (!DELETED.equals(zipName)) {
                if (zipName != null) {

                    // Explode the ZIP file (except for the deleted file)
                    final Map<String, String> entries = new HashMap<>();
                    final Path zipPath = pathForZipFile(zipName);
                    LOGGER.debug("Exploding zip file {}", zipPath);
                    try (final ZipInputStream zipStream = new ZipInputStream(openForRead(zipPath))) {
                        ZipEntry entry;
                        while ((entry = zipStream.getNextEntry()) != null) {
                            final String smallName = entry.getName();
                            if (!smallName.equals(fileName)) {
                                entries.put(smallName, null);
                                final Path smallPath = pathForSmallFile(smallName);
                                // note: small file may already exist (as previously packed in ZIP
                                // file but not yet purged): in this case it is silently
                                // overwritten
                                try (OutputStream stream = openForWrite(smallPath)) {
                                    ByteStreams.copy(zipStream, stream);
                                }
                            }
                        }
                    } catch (final IOException ex) {
                        // Perform clean up and propagate exception
                        for (final String smallName : entries.keySet()) {
                            final Path smallPath = pathForSmallFile(smallName);
                            try {
                                this.fileSystem.delete(smallPath, false);
                            } catch (final Throwable ex2) {
                                LOGGER.warn("Could not delete extracted file " + smallPath
                                        + " after failed to explod ZIP file " + zipPath, ex2);
                            }
                        }
                        throw new IOException("Cannot explode ZIP file " + zipPath, ex);
                    }

                    // Update index, marking small file and ZIP as deleted and de-associating
                    // other small files previously in the ZIP from the ZIP file
                    entries.put(fileName, DELETED); // because the file may still exist
                    entries.put(zipName, DELETED);
                    indexPut(entries);
                    return;

                } else {
                    // Mark a small file with the name supplied as deleted
                    final Path smallPath = pathForSmallFile(fileName);
                    if (this.fileSystem.exists(smallPath)) {
                        LOGGER.debug("Marking small file {} as deleted", smallPath);
                        indexPut(ImmutableMap.of(fileName, DELETED));
                        return;
                    }
                }
            }

            // Report file does not exist
            throw new FileMissingException(fileName, "The file does not exist");

        } finally {
            // Always release the lock
            this.lock.writeLock().unlock();
        }
    }

    @Override
    public Stream<String> list() throws IOException {

        // This prevents write/delete/merge/purge operations to occur ONLY for the time needed to
        // retrieve (non-merged/non-deleted) small files and retrieve an iterator over Lucene
        // index. This DOES NOT PROTECT the iteration over the Lucene index, thus changes to the
        // file store during the iteration may be reflected in the iteration results
        this.lock.readLock().lock();

        try {
            // Check active flag
            Preconditions.checkState(this.active.get());

            // Retrieve small files
            final List<String> smallNames = new ArrayList<>();
            for (final FileStatus fs : this.fileSystem.listStatus(this.smallFilesPath)) {
                final String smallName = fs.getPath().getName();
                if (indexGet(smallName) != null) {
                    smallNames.add(smallName);
                }
            }

            // Retrieve an iterator over zipped files
            final Iterator<String> zippedNames = indexList(false);

            // Return the concatenation of the two
            return Stream.concat(Stream.create(smallNames), Stream.create(zippedNames));

        } finally {
            // Always release the lock
            this.lock.readLock().unlock();
        }
    }

    @Override
    public void close() {

        // This ensures no other read/write/delete/merge/delete operation is running
        this.lock.writeLock().lock();

        try {
            try {
                // Stop periodic cleanup
                this.cleanupFuture.cancel(false);
            } catch (final Throwable ex) {
                LOGGER.warn("Unable to stop periodic cleanup task", ex);
            }

            try {
                // Close Lucene reader
                this.luceneReader.close();
            } catch (final Throwable ex) {
                LOGGER.warn("Unable to close Lucene reader", ex);
            }

            try {
                // Optimize Lucene writer before closing
                this.luceneWriter.optimize();
            } catch (final Exception ex) {
                LOGGER.warn("Unable to optimize Lucene writer", ex);
            }

            try {
                // Close Lucene writer
                this.luceneWriter.close();
            } catch (final Exception ex) {
                LOGGER.warn("Unable to close Lucene writer", ex);
            }

        } finally {
            // Always mark component as inactive and release the lock
            this.active.set(false);
            this.lock.writeLock().unlock();
        }
    }

    @Override
    public String toString() {
        return getClass().getSimpleName();
    }

    private void purge() throws IOException {

        // This prevents other read/write/delete/merge/purge operations from running
        this.lock.writeLock().lock();

        try {
            // Identify deleted files that can be safely purged (i.e., not opened)
            final Set<String> zippedFiles = new HashSet<>();
            final Set<String> purgableFiles = new HashSet<>();
            for (final Iterator<String> i = indexList(true); i.hasNext();) {
                purgableFiles.add(i.next());
            }
            final FileStatus[] files = this.fileSystem.listStatus(this.smallFilesPath);
            if (files != null) {
                for (final FileStatus fs : files) {
                    final String fileName = fs.getPath().getName();
                    final String zipName = indexGet(fileName);
                    if (zipName != null && !zipName.equals(DELETED)) {
                        purgableFiles.add(fileName);
                        zippedFiles.add(fileName);
                    }
                }
            }
            synchronized (this.openedFiles) {
                purgableFiles.removeAll(this.openedFiles.elementSet());
            }

            // Abort if there is nothing to do
            if (purgableFiles.isEmpty()) {
                return;
            }

            // Delete purgable files
            LOGGER.debug("Purging {} files", purgableFiles.size());
            final Map<String, String> entries = new HashMap<>();
            for (final String file : purgableFiles) {
                try {
                    final Path smallPath = pathForSmallFile(file);
                    if (this.fileSystem.exists(smallPath)) {
                        this.fileSystem.delete(smallPath, false);
                        LOGGER.debug("Deleted small file {}", smallPath);
                    } else {
                        final Path zipPath = pathForZipFile(file);
                        if (this.fileSystem.exists(zipPath)) {
                            this.fileSystem.delete(zipPath, false);
                            LOGGER.debug("Deleted ZIP file {}", zipPath);
                        } else {
                            LOGGER.warn("Cannot find file " + file);
                        }
                    }
                    if (!zippedFiles.contains(file)) {
                        entries.put(file, null);
                    }
                } catch (final Throwable ex) {
                    LOGGER.warn("Cannot purge file " + file, ex);
                }
            }

            // Update index
            indexPut(entries);

        } finally {
            // Always release the lock
            this.lock.writeLock().unlock();
        }
    }

    private void merge() throws IOException {

        // This ensures no other read/write/delete/merge/purge operation is running
        this.lock.writeLock().lock();

        try {
            // Retrieve the list of small files that can be packed in a zip file
            final List<String> mergeableNames = new LinkedList<>();
            final FileStatus[] files = this.fileSystem.listStatus(this.smallFilesPath);
            if (files != null) {
                for (final FileStatus fs : files) {
                    final String name = fs.getPath().getName();
                    if (!fs.isDir() && indexGet(name) == null) {
                        mergeableNames.add(name);
                    }
                }
            }

            // Pack files in batches of 'numSmallFiles' size
            while (mergeableNames.size() > this.numSmallFiles) {

                // Determine the name of the zip file
                final String zipName = Data.hash(this.zipNameCounter++) + ".zip";
                final Path zipPath = pathForZipFile(zipName);

                // Status variable necessary for cleanup in case the operation fails
                boolean opened = false;
                final Map<String, String> entries = new HashMap<>();
                try {
                    // Try to build the zip file
                    try (final ZipOutputStream out = new ZipOutputStream(openForWrite(zipPath))) {
                        opened = true;
                        for (int i = 0; i < this.numSmallFiles; ++i) {
                            final String fileName = mergeableNames.remove(0);
                            out.putNextEntry(new ZipEntry(fileName));
                            try (final InputStream in = openForRead(pathForSmallFile(fileName))) {
                                ByteStreams.copy(in, out);
                            }
                            entries.put(fileName, zipName);
                        }
                    }

                    // Update index
                    indexPut(entries);

                } catch (final Throwable ex) {
                    // On failure, delete and unindex the zip file
                    try {
                        for (final Map.Entry<String, String> entry : entries.entrySet()) {
                            entry.setValue(null);
                        }
                        indexPut(entries);
                    } catch (final Throwable ex2) {
                        LOGGER.warn("Cannot unindex zip file after failure to generate it", ex2);
                    }
                    try {
                        if (opened) {
                            this.fileSystem.delete(zipPath, false);
                        }
                    } catch (final Throwable ex2) {
                        LOGGER.warn("Cannot delete zip file after failure to generate it", ex2);
                    }
                    throw new IOException("Cannot build and index zip file " + zipPath, ex);
                }

                // Log batch completion
                LOGGER.debug("Merged {}/{} small files in ZIP file {}", this.numSmallFiles,
                        this.numSmallFiles + mergeableNames.size(), zipPath);
            }

        } finally {
            // Always release the lock
            this.lock.writeLock().unlock();
        }
    }

    private void indexOptimize() throws IOException {
        synchronized (this.luceneWriter) {
            if (!this.luceneReader.isOptimized()) {
                // following command causes the index to always be detected as not optimized
                // this.luceneWriter.optimize(MAX_LUCENE_SEGMENTS);
                this.luceneWriter.optimize();
                this.luceneWriter.commit();
                this.luceneReader.close();
                this.luceneReader = this.luceneWriter.getReader();
                LOGGER.debug("Index optimized");
            }
        }
    }

    private String indexGet(final String key) throws IOException {
        final Term s = new Term(KEY_FIELD, key);
        synchronized (this.luceneWriter) {
            for (int attempt = 0;; ++attempt) {
                try {
                    final TermDocs termDocs = this.luceneReader.termDocs(s);
                    if (termDocs.next()) {
                        final Document doc = this.luceneReader.document(termDocs.doc());
                        return doc.get(VALUE_FIELD);
                    }
                    return null;
                } catch (final Throwable ex) {
                    indexError(ex, attempt);
                }
            }
        }
    }

    private void indexPut(final Map<String, String> entries) throws IOException {

        try {
            int numDeleted = 0;
            int numUpdated = 0;
            synchronized (this.luceneWriter) {
                for (final Map.Entry<String, String> entry : entries.entrySet()) {
                    if (entry.getValue() == null) {
                        this.luceneWriter.deleteDocuments(new Term(KEY_FIELD, entry.getKey()));
                        ++numDeleted;
                    } else {
                        final Document doc = new Document();
                        doc.add(new Field(KEY_FIELD, entry.getKey(), Field.Store.YES,
                                Field.Index.NOT_ANALYZED));
                        doc.add(new Field(VALUE_FIELD, entry.getValue(), Field.Store.YES,
                                Field.Index.NOT_ANALYZED));
                        LOGGER.debug("Document added: {}", doc.toString());
                        this.luceneWriter.updateDocument(new Term(KEY_FIELD, entry.getKey()), doc);
                        ++numUpdated;
                    }
                }
                this.luceneWriter.commit();
                this.luceneReader.close();
                this.luceneReader = this.luceneWriter.getReader();
            }
            LOGGER.debug("Updated Lucene index: {} documents updated, {} documents deleted",
                    numUpdated, numDeleted);
        } catch (final Throwable ex) {
            throw new IOException("Failed to update Lucene index with entries " + entries, ex);
        }
    }

    private Iterator<String> indexList(final boolean deleted) throws IOException {

        if (deleted) {
            // Perform a direct lookup
            final Term s = new Term(VALUE_FIELD, DELETED);
            final List<String> deletedNames = new ArrayList<>();
            synchronized (this.luceneWriter) {
                for (int attempt = 0;; ++attempt) {
                    try {
                        final TermDocs termDocs = this.luceneReader.termDocs(s);
                        while (termDocs.next()) {
                            deletedNames.add(this.luceneReader.document(termDocs.doc()).get(
                                    KEY_FIELD));
                        }
                        return deletedNames.iterator();
                    } catch (final Throwable ex) {
                        indexError(ex, attempt);
                    }
                }
            }

        } else {
            // Iterate over the whole index
            return new AbstractIterator<String>() {

                private int maxIndex = -1;

                private int currentIndex = 0;

                @Override
                protected String computeNext() {
                    try {
                        @SuppressWarnings("resource")
                        final HadoopMultiFileStore store = HadoopMultiFileStore.this;
                        synchronized (store.luceneWriter) {
                            for (int attempt = 0;; ++attempt) {
                                try {
                                    if (this.maxIndex < 0) {
                                        this.maxIndex = store.luceneReader.maxDoc();
                                    }
                                    while (this.currentIndex <= this.maxIndex) {
                                        final Document document = store.luceneReader
                                                .document(this.currentIndex++);
                                        if (document != null
                                                && !DELETED.equals(document.get(VALUE_FIELD))) {
                                            return document.get(HadoopMultiFileStore.KEY_FIELD);
                                        }
                                    }
                                    return endOfData();
                                } catch (final Throwable ex) {
                                    indexError(ex, attempt);
                                }
                            }
                        }
                    } catch (final Throwable ex) {
                        throw new RuntimeException("Error iterating over Lucene index", ex);
                    }
                }
            };
        }
    }

    private void indexError(final Throwable ex, final int numAttempt) throws IOException {
        if (numAttempt >= this.MAX_LUCENE_ATTEMPTS) {
            Throwables.propagateIfPossible(ex, IOException.class);
            Throwables.propagate(ex);
        }
        LOGGER.error("Error accessing Lucene index, will retry", ex);
        synchronized (this.luceneWriter) {
            try {
                this.luceneReader.close();
            } catch (final Throwable ex2) {
                LOGGER.warn("Cannot close lucene reader after failure", ex2);
            }
            this.luceneReader = this.luceneWriter.getReader();
        }
    }

    private InputStream openForRead(final Path filePath) throws IOException {
        final String fileName = filePath.getName();
        final InputStream stream = this.fileSystem.open(filePath);
        synchronized (this.openedFiles) {
            this.openedFiles.add(fileName);
        }
        LOGGER.trace("Opening {} for read", fileName);
        return new FilterInputStream(stream) {

            @Override
            public void close() throws IOException {
                try {
                    LOGGER.trace("Closing {}", fileName);
                    super.close();
                } finally {
                    synchronized (HadoopMultiFileStore.this.openedFiles) {
                        HadoopMultiFileStore.this.openedFiles.remove(fileName);
                    }
                }
            }

        };
    }

    private OutputStream openForWrite(final Path filePath) throws IOException {
        final String fileName = filePath.getName();
        final OutputStream stream = this.fileSystem.create(filePath, true);
        synchronized (this.openedFiles) {
            this.openedFiles.add(fileName);
        }
        LOGGER.trace("Opening {} for write", fileName);
        return new FilterOutputStream(stream) {

            @Override
            public void close() throws IOException {
                try {
                    LOGGER.trace("Closing {}", fileName);
                    super.close();
                } finally {
                    synchronized (HadoopMultiFileStore.this.openedFiles) {
                        HadoopMultiFileStore.this.openedFiles.remove(fileName);
                    }
                }
            }

        };
    }

    @Nullable
    private Path pathForSmallFile(@Nullable final String smallFile) {
        return smallFile == null ? null : new Path(this.smallFilesPath, smallFile);
    }

    @Nullable
    private Path pathForZipFile(@Nullable final String zipFile) {
        if (zipFile == null) {
            return null;
        }
        final String bucketDirectory = zipFile.substring(0, 2);
        return new Path(this.rootPath, bucketDirectory + File.separator + zipFile);
    }

}