package eu.fbk.knowledgestore.filestore;
import com.google.common.base.MoreObjects;
import com.google.common.base.Preconditions;
import com.google.common.base.Throwables;
import com.google.common.collect.AbstractIterator;
import com.google.common.collect.HashMultiset;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Multiset;
import com.google.common.io.ByteStreams;
import eu.fbk.knowledgestore.data.Data;
import eu.fbk.knowledgestore.data.Stream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermDocs;
import org.apache.lucene.store.FSDirectory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.annotation.Nullable;
import java.io.*;
import java.util.*;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.locks.ReadWriteLock;
import java.util.concurrent.locks.ReentrantReadWriteLock;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
import java.util.zip.ZipOutputStream;
/**
* A {@code FileStore} implementation based on the Hadoop API optimized for huge number of files.
* <p>
* An {@code HadoopFileStore} stores its files in an Hadoop
* {@link org.apache.hadoop.fs.FileSystem}, under a certain, configurable root path; the
* filesystem can be any of the filesystems supported by the Hadoop API, including the local (raw)
* filesystem and the distributed HDFS filesystem.
* </p>
* <p>
* Files are stored in a a two-level directory structure, where first level directories reflect
* the MIME types of stored files, and second level directories are buckets of files whose name is
* obtained by hashing the filename; buckets are used in order to equally split a large number of
* files in several subdirectories, overcoming possible filesystem limitations in terms of maximum
* number of files storable in a directory.
* </p>
*/
public final class HadoopMultiFileStore implements FileStore {
private static final Logger LOGGER = LoggerFactory.getLogger(HadoopMultiFileStore.class);
private static final String DEFAULT_ROOT_PATH = "files";
private static final String DEFAULT_LUCENE_PATH = "./lucene-index";
private static final int DEFAULT_NUM_SMALL_FILES = 10;
private static final long DEFAULT_CLEANUP_PERIOD = 10000L; // 5s
private static final String SMALL_FILES_PATH = "_small";
// private static final int MAX_LUCENE_SEGMENTS = 100;
private final int MAX_LUCENE_ATTEMPTS = 3;
private static final String KEY_FIELD = "filename";
private static final String VALUE_FIELD = "zipfilename";
private static final String DELETED = "__deleted";
private final FileSystem fileSystem;
private final Path rootPath;
private final Path smallFilesPath;
private final File luceneFolder;
private final int numSmallFiles;
private final long cleanupPeriod;
private final Multiset<String> openedFiles;
private final ReadWriteLock lock;
private final AtomicBoolean active;
private IndexReader luceneReader;
private IndexWriter luceneWriter;
private Future<?> cleanupFuture;
private long zipNameCounter;
/**
* Creates a new {@code HadoopFileStore} storing files in the {@code FileSystem} and under the
* {@code rootPath} specified.
*
* @param fileSystem
* the file system, not null
* @param path
* the root path where to store files, possibly relative to the filesystem working
* directory; if null, the default root path {@code files} will be used
* @param numSmallFile
* the number of files to put in each zip file
* @param cleanupPeriod
* the amount of time in milliseconds between cleanup operations
*/
public HadoopMultiFileStore(final FileSystem fileSystem, @Nullable final String lucenePath,
@Nullable final String path, @Nullable final Integer numSmallFile,
@Nullable final Long cleanupPeriod) {
this.fileSystem = Preconditions.checkNotNull(fileSystem);
this.luceneFolder = new File(MoreObjects.firstNonNull(lucenePath, DEFAULT_LUCENE_PATH));
this.rootPath = new Path(MoreObjects.firstNonNull(path, DEFAULT_ROOT_PATH))
.makeQualified(this.fileSystem); // resolve wrt workdir
this.smallFilesPath = new Path(this.rootPath.toString() + File.separator
+ SMALL_FILES_PATH).makeQualified(this.fileSystem);
this.numSmallFiles = numSmallFile != null ? numSmallFile : DEFAULT_NUM_SMALL_FILES;
this.cleanupPeriod = cleanupPeriod != null ? cleanupPeriod : DEFAULT_CLEANUP_PERIOD;
this.openedFiles = HashMultiset.create();
this.lock = new ReentrantReadWriteLock(true);
this.active = new AtomicBoolean(false);
this.zipNameCounter = System.currentTimeMillis();
LOGGER.info("{} configured, paths={};{}", getClass().getSimpleName(), this.rootPath,
this.luceneFolder);
}
@Override
public void init() throws IOException {
// Create root folder if missing
if (!this.fileSystem.exists(this.rootPath)) {
LOGGER.debug("Creating root folder {}", this.rootPath);
if (!this.fileSystem.mkdirs(this.rootPath)) {
throw new IOException("Cannot create root folder " + this.luceneFolder);
}
}
// Create sub-folder for small files, if missing
if (!this.fileSystem.exists(this.smallFilesPath)) {
LOGGER.debug("Creating small files folder {}", this.smallFilesPath);
if (!this.fileSystem.mkdirs(this.smallFilesPath)) {
throw new IOException("Cannot create small files folder " + this.smallFilesPath);
}
}
// Create folder for lucene index, if missing
if (!this.luceneFolder.exists()) {
LOGGER.debug("Created lucene folder {}", this.luceneFolder);
if (!this.luceneFolder.mkdirs()) {
throw new IOException("Cannot create lucene folder " + this.luceneFolder);
}
}
// Initialize Lucene writer and reader
this.luceneWriter = new IndexWriter(FSDirectory.open(this.luceneFolder),
new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED);
this.luceneReader = this.luceneWriter.getReader();
// Mark the component as active
this.active.set(true);
// Schedule periodic cleanup
this.cleanupFuture = Data.getExecutor().scheduleWithFixedDelay(new Runnable() {
@Override
public void run() {
try {
merge();
purge();
indexOptimize();
} catch (final Throwable ex) {
LOGGER.warn("Periodic cleanup failed", ex);
}
}
}, this.cleanupPeriod, this.cleanupPeriod, TimeUnit.MILLISECONDS);
}
@Override
public InputStream read(final String fileName) throws IOException {
// This prevents concurrent write/delete/merge/purge operations to occur
this.lock.readLock().lock();
try {
// Check active flag
Preconditions.checkState(this.active.get());
// Lookup the current zip file / deleted status for the file name supplied
final String zipName = indexGet(fileName);
// Proceed only if file is not marked as deleted
if (!DELETED.equals(zipName)) {
if (zipName != null) {
// Search in zipped file
final Path zipPath = pathForZipFile(zipName);
try {
final ZipInputStream stream = new ZipInputStream(openForRead(zipPath));
ZipEntry entry;
while ((entry = stream.getNextEntry()) != null) {
if (entry.getName().equals(fileName)) {
LOGGER.debug("Reading {} from ZIP file {}", fileName, zipPath);
return stream;
}
}
} catch (final IOException ex) {
throw new IOException("Cannot read " + fileName + " from ZIP file"
+ zipPath, ex);
}
} else {
// Search in small files
final Path smallPath = pathForSmallFile(fileName);
if (this.fileSystem.exists(smallPath)) {
LOGGER.debug("Reading small file {}", smallPath);
return openForRead(smallPath);
}
}
}
// Report missing file
throw new FileMissingException(fileName, "The file does not exist");
} finally {
// Always release the lock
this.lock.readLock().unlock();
}
}
@Override
public OutputStream write(final String fileName) throws IOException {
// This prevents any other read/write/delete/merge/purge operation to occur
this.lock.writeLock().lock();
try {
// Check active flag
Preconditions.checkState(this.active.get());
// Throw an exception in case a file with the same name already exists
final String zipName = indexGet(fileName);
final Path smallPath = pathForSmallFile(fileName);
final boolean fileExists = this.fileSystem.exists(smallPath);
if (!DELETED.equals(zipName) && (zipName != null || fileExists)) {
throw new FileExistsException(fileName, "Cannot overwrite file");
}
// Write small file
LOGGER.debug("Creating small file {}", smallPath);
final OutputStream stream = openForWrite(smallPath); // may overwrite file
// Update the index, marking the file as no more deleted if necessary
if (fileExists) {
try {
final Map<String, String> entries = new HashMap<>();
entries.put(fileName, null);
indexPut(entries);
} catch (final Throwable ex) {
stream.close();
Throwables.propagateIfPossible(ex, IOException.class);
Throwables.propagate(ex);
}
}
// Return opened stream
return stream;
} finally {
// Always release the lock
this.lock.writeLock().unlock();
}
}
// Synchronization serves to (1) avoid the same ZIP file to be exploded multiple times and
// (2) to avoid deleting files while merge is occurring
@Override
public void delete(final String fileName) throws FileMissingException, IOException {
// This prevents other read/write/delete/merge/purge operations to occur
this.lock.writeLock().lock();
try {
// Check active flag
Preconditions.checkState(this.active.get());
// Lookup the zip file / deleted status for the supplied file
final String zipName = indexGet(fileName);
// Proceed only if file is not marked as deleted
if (!DELETED.equals(zipName)) {
if (zipName != null) {
// Explode the ZIP file (except for the deleted file)
final Map<String, String> entries = new HashMap<>();
final Path zipPath = pathForZipFile(zipName);
LOGGER.debug("Exploding zip file {}", zipPath);
try (final ZipInputStream zipStream = new ZipInputStream(openForRead(zipPath))) {
ZipEntry entry;
while ((entry = zipStream.getNextEntry()) != null) {
final String smallName = entry.getName();
if (!smallName.equals(fileName)) {
entries.put(smallName, null);
final Path smallPath = pathForSmallFile(smallName);
// note: small file may already exist (as previously packed in ZIP
// file but not yet purged): in this case it is silently
// overwritten
try (OutputStream stream = openForWrite(smallPath)) {
ByteStreams.copy(zipStream, stream);
}
}
}
} catch (final IOException ex) {
// Perform clean up and propagate exception
for (final String smallName : entries.keySet()) {
final Path smallPath = pathForSmallFile(smallName);
try {
this.fileSystem.delete(smallPath, false);
} catch (final Throwable ex2) {
LOGGER.warn("Could not delete extracted file " + smallPath
+ " after failed to explod ZIP file " + zipPath, ex2);
}
}
throw new IOException("Cannot explode ZIP file " + zipPath, ex);
}
// Update index, marking small file and ZIP as deleted and de-associating
// other small files previously in the ZIP from the ZIP file
entries.put(fileName, DELETED); // because the file may still exist
entries.put(zipName, DELETED);
indexPut(entries);
return;
} else {
// Mark a small file with the name supplied as deleted
final Path smallPath = pathForSmallFile(fileName);
if (this.fileSystem.exists(smallPath)) {
LOGGER.debug("Marking small file {} as deleted", smallPath);
indexPut(ImmutableMap.of(fileName, DELETED));
return;
}
}
}
// Report file does not exist
throw new FileMissingException(fileName, "The file does not exist");
} finally {
// Always release the lock
this.lock.writeLock().unlock();
}
}
@Override
public Stream<String> list() throws IOException {
// This prevents write/delete/merge/purge operations to occur ONLY for the time needed to
// retrieve (non-merged/non-deleted) small files and retrieve an iterator over Lucene
// index. This DOES NOT PROTECT the iteration over the Lucene index, thus changes to the
// file store during the iteration may be reflected in the iteration results
this.lock.readLock().lock();
try {
// Check active flag
Preconditions.checkState(this.active.get());
// Retrieve small files
final List<String> smallNames = new ArrayList<>();
for (final FileStatus fs : this.fileSystem.listStatus(this.smallFilesPath)) {
final String smallName = fs.getPath().getName();
if (indexGet(smallName) != null) {
smallNames.add(smallName);
}
}
// Retrieve an iterator over zipped files
final Iterator<String> zippedNames = indexList(false);
// Return the concatenation of the two
return Stream.concat(Stream.create(smallNames), Stream.create(zippedNames));
} finally {
// Always release the lock
this.lock.readLock().unlock();
}
}
@Override
public void close() {
// This ensures no other read/write/delete/merge/delete operation is running
this.lock.writeLock().lock();
try {
try {
// Stop periodic cleanup
this.cleanupFuture.cancel(false);
} catch (final Throwable ex) {
LOGGER.warn("Unable to stop periodic cleanup task", ex);
}
try {
// Close Lucene reader
this.luceneReader.close();
} catch (final Throwable ex) {
LOGGER.warn("Unable to close Lucene reader", ex);
}
try {
// Optimize Lucene writer before closing
this.luceneWriter.optimize();
} catch (final Exception ex) {
LOGGER.warn("Unable to optimize Lucene writer", ex);
}
try {
// Close Lucene writer
this.luceneWriter.close();
} catch (final Exception ex) {
LOGGER.warn("Unable to close Lucene writer", ex);
}
} finally {
// Always mark component as inactive and release the lock
this.active.set(false);
this.lock.writeLock().unlock();
}
}
@Override
public String toString() {
return getClass().getSimpleName();
}
private void purge() throws IOException {
// This prevents other read/write/delete/merge/purge operations from running
this.lock.writeLock().lock();
try {
// Identify deleted files that can be safely purged (i.e., not opened)
final Set<String> zippedFiles = new HashSet<>();
final Set<String> purgableFiles = new HashSet<>();
for (final Iterator<String> i = indexList(true); i.hasNext();) {
purgableFiles.add(i.next());
}
final FileStatus[] files = this.fileSystem.listStatus(this.smallFilesPath);
if (files != null) {
for (final FileStatus fs : files) {
final String fileName = fs.getPath().getName();
final String zipName = indexGet(fileName);
if (zipName != null && !zipName.equals(DELETED)) {
purgableFiles.add(fileName);
zippedFiles.add(fileName);
}
}
}
synchronized (this.openedFiles) {
purgableFiles.removeAll(this.openedFiles.elementSet());
}
// Abort if there is nothing to do
if (purgableFiles.isEmpty()) {
return;
}
// Delete purgable files
LOGGER.debug("Purging {} files", purgableFiles.size());
final Map<String, String> entries = new HashMap<>();
for (final String file : purgableFiles) {
try {
final Path smallPath = pathForSmallFile(file);
if (this.fileSystem.exists(smallPath)) {
this.fileSystem.delete(smallPath, false);
LOGGER.debug("Deleted small file {}", smallPath);
} else {
final Path zipPath = pathForZipFile(file);
if (this.fileSystem.exists(zipPath)) {
this.fileSystem.delete(zipPath, false);
LOGGER.debug("Deleted ZIP file {}", zipPath);
} else {
LOGGER.warn("Cannot find file " + file);
}
}
if (!zippedFiles.contains(file)) {
entries.put(file, null);
}
} catch (final Throwable ex) {
LOGGER.warn("Cannot purge file " + file, ex);
}
}
// Update index
indexPut(entries);
} finally {
// Always release the lock
this.lock.writeLock().unlock();
}
}
private void merge() throws IOException {
// This ensures no other read/write/delete/merge/purge operation is running
this.lock.writeLock().lock();
try {
// Retrieve the list of small files that can be packed in a zip file
final List<String> mergeableNames = new LinkedList<>();
final FileStatus[] files = this.fileSystem.listStatus(this.smallFilesPath);
if (files != null) {
for (final FileStatus fs : files) {
final String name = fs.getPath().getName();
if (!fs.isDir() && indexGet(name) == null) {
mergeableNames.add(name);
}
}
}
// Pack files in batches of 'numSmallFiles' size
while (mergeableNames.size() > this.numSmallFiles) {
// Determine the name of the zip file
final String zipName = Data.hash(this.zipNameCounter++) + ".zip";
final Path zipPath = pathForZipFile(zipName);
// Status variable necessary for cleanup in case the operation fails
boolean opened = false;
final Map<String, String> entries = new HashMap<>();
try {
// Try to build the zip file
try (final ZipOutputStream out = new ZipOutputStream(openForWrite(zipPath))) {
opened = true;
for (int i = 0; i < this.numSmallFiles; ++i) {
final String fileName = mergeableNames.remove(0);
out.putNextEntry(new ZipEntry(fileName));
try (final InputStream in = openForRead(pathForSmallFile(fileName))) {
ByteStreams.copy(in, out);
}
entries.put(fileName, zipName);
}
}
// Update index
indexPut(entries);
} catch (final Throwable ex) {
// On failure, delete and unindex the zip file
try {
for (final Map.Entry<String, String> entry : entries.entrySet()) {
entry.setValue(null);
}
indexPut(entries);
} catch (final Throwable ex2) {
LOGGER.warn("Cannot unindex zip file after failure to generate it", ex2);
}
try {
if (opened) {
this.fileSystem.delete(zipPath, false);
}
} catch (final Throwable ex2) {
LOGGER.warn("Cannot delete zip file after failure to generate it", ex2);
}
throw new IOException("Cannot build and index zip file " + zipPath, ex);
}
// Log batch completion
LOGGER.debug("Merged {}/{} small files in ZIP file {}", this.numSmallFiles,
this.numSmallFiles + mergeableNames.size(), zipPath);
}
} finally {
// Always release the lock
this.lock.writeLock().unlock();
}
}
private void indexOptimize() throws IOException {
synchronized (this.luceneWriter) {
if (!this.luceneReader.isOptimized()) {
// following command causes the index to always be detected as not optimized
// this.luceneWriter.optimize(MAX_LUCENE_SEGMENTS);
this.luceneWriter.optimize();
this.luceneWriter.commit();
this.luceneReader.close();
this.luceneReader = this.luceneWriter.getReader();
LOGGER.debug("Index optimized");
}
}
}
private String indexGet(final String key) throws IOException {
final Term s = new Term(KEY_FIELD, key);
synchronized (this.luceneWriter) {
for (int attempt = 0;; ++attempt) {
try {
final TermDocs termDocs = this.luceneReader.termDocs(s);
if (termDocs.next()) {
final Document doc = this.luceneReader.document(termDocs.doc());
return doc.get(VALUE_FIELD);
}
return null;
} catch (final Throwable ex) {
indexError(ex, attempt);
}
}
}
}
private void indexPut(final Map<String, String> entries) throws IOException {
try {
int numDeleted = 0;
int numUpdated = 0;
synchronized (this.luceneWriter) {
for (final Map.Entry<String, String> entry : entries.entrySet()) {
if (entry.getValue() == null) {
this.luceneWriter.deleteDocuments(new Term(KEY_FIELD, entry.getKey()));
++numDeleted;
} else {
final Document doc = new Document();
doc.add(new Field(KEY_FIELD, entry.getKey(), Field.Store.YES,
Field.Index.NOT_ANALYZED));
doc.add(new Field(VALUE_FIELD, entry.getValue(), Field.Store.YES,
Field.Index.NOT_ANALYZED));
LOGGER.debug("Document added: {}", doc.toString());
this.luceneWriter.updateDocument(new Term(KEY_FIELD, entry.getKey()), doc);
++numUpdated;
}
}
this.luceneWriter.commit();
this.luceneReader.close();
this.luceneReader = this.luceneWriter.getReader();
}
LOGGER.debug("Updated Lucene index: {} documents updated, {} documents deleted",
numUpdated, numDeleted);
} catch (final Throwable ex) {
throw new IOException("Failed to update Lucene index with entries " + entries, ex);
}
}
private Iterator<String> indexList(final boolean deleted) throws IOException {
if (deleted) {
// Perform a direct lookup
final Term s = new Term(VALUE_FIELD, DELETED);
final List<String> deletedNames = new ArrayList<>();
synchronized (this.luceneWriter) {
for (int attempt = 0;; ++attempt) {
try {
final TermDocs termDocs = this.luceneReader.termDocs(s);
while (termDocs.next()) {
deletedNames.add(this.luceneReader.document(termDocs.doc()).get(
KEY_FIELD));
}
return deletedNames.iterator();
} catch (final Throwable ex) {
indexError(ex, attempt);
}
}
}
} else {
// Iterate over the whole index
return new AbstractIterator<String>() {
private int maxIndex = -1;
private int currentIndex = 0;
@Override
protected String computeNext() {
try {
@SuppressWarnings("resource")
final HadoopMultiFileStore store = HadoopMultiFileStore.this;
synchronized (store.luceneWriter) {
for (int attempt = 0;; ++attempt) {
try {
if (this.maxIndex < 0) {
this.maxIndex = store.luceneReader.maxDoc();
}
while (this.currentIndex <= this.maxIndex) {
final Document document = store.luceneReader
.document(this.currentIndex++);
if (document != null
&& !DELETED.equals(document.get(VALUE_FIELD))) {
return document.get(HadoopMultiFileStore.KEY_FIELD);
}
}
return endOfData();
} catch (final Throwable ex) {
indexError(ex, attempt);
}
}
}
} catch (final Throwable ex) {
throw new RuntimeException("Error iterating over Lucene index", ex);
}
}
};
}
}
private void indexError(final Throwable ex, final int numAttempt) throws IOException {
if (numAttempt >= this.MAX_LUCENE_ATTEMPTS) {
Throwables.propagateIfPossible(ex, IOException.class);
Throwables.propagate(ex);
}
LOGGER.error("Error accessing Lucene index, will retry", ex);
synchronized (this.luceneWriter) {
try {
this.luceneReader.close();
} catch (final Throwable ex2) {
LOGGER.warn("Cannot close lucene reader after failure", ex2);
}
this.luceneReader = this.luceneWriter.getReader();
}
}
private InputStream openForRead(final Path filePath) throws IOException {
final String fileName = filePath.getName();
final InputStream stream = this.fileSystem.open(filePath);
synchronized (this.openedFiles) {
this.openedFiles.add(fileName);
}
LOGGER.trace("Opening {} for read", fileName);
return new FilterInputStream(stream) {
@Override
public void close() throws IOException {
try {
LOGGER.trace("Closing {}", fileName);
super.close();
} finally {
synchronized (HadoopMultiFileStore.this.openedFiles) {
HadoopMultiFileStore.this.openedFiles.remove(fileName);
}
}
}
};
}
private OutputStream openForWrite(final Path filePath) throws IOException {
final String fileName = filePath.getName();
final OutputStream stream = this.fileSystem.create(filePath, true);
synchronized (this.openedFiles) {
this.openedFiles.add(fileName);
}
LOGGER.trace("Opening {} for write", fileName);
return new FilterOutputStream(stream) {
@Override
public void close() throws IOException {
try {
LOGGER.trace("Closing {}", fileName);
super.close();
} finally {
synchronized (HadoopMultiFileStore.this.openedFiles) {
HadoopMultiFileStore.this.openedFiles.remove(fileName);
}
}
}
};
}
@Nullable
private Path pathForSmallFile(@Nullable final String smallFile) {
return smallFile == null ? null : new Path(this.smallFilesPath, smallFile);
}
@Nullable
private Path pathForZipFile(@Nullable final String zipFile) {
if (zipFile == null) {
return null;
}
final String bucketDirectory = zipFile.substring(0, 2);
return new Path(this.rootPath, bucketDirectory + File.separator + zipFile);
}
}