package be.bagofwords.db.filedb; import be.bagofwords.application.BowTaskScheduler; import be.bagofwords.application.memory.MemoryGobbler; import be.bagofwords.application.memory.MemoryManager; import be.bagofwords.application.memory.MemoryStatus; import be.bagofwords.db.CoreDataInterface; import be.bagofwords.db.DBUtils; import be.bagofwords.db.combinator.Combinator; import be.bagofwords.iterator.CloseableIterator; import be.bagofwords.iterator.IterableUtils; import be.bagofwords.iterator.SimpleIterator; import be.bagofwords.ui.UI; import be.bagofwords.util.KeyValue; import be.bagofwords.util.MappedLists; import be.bagofwords.util.Pair; import be.bagofwords.util.SerializationUtils; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.mutable.MutableLong; import java.io.*; import java.nio.file.Files; import java.nio.file.StandardCopyOption; import java.util.*; import java.util.stream.Collectors; public class FileDataInterface<T extends Object> extends CoreDataInterface<T> implements MemoryGobbler { private static final long MAX_FILE_SIZE_WRITE = 50 * 1024 * 1024; private static final long MAX_FILE_SIZE_READ = 10 * 1024 * 1024; private static final long BITS_TO_DISCARD_FOR_FILE_BUCKETS = 58; private static final int BATCH_SIZE_PRIMITIVE_VALUES = 100000; private static final int BATCH_SIZE_NON_PRIMITIVE_VALUES = 100; private static final String META_FILE = "META_FILE"; private static final String LOCK_FILE = "LOCK"; private static final int LONG_SIZE = 8; private static final int INT_SIZE = 4; private MemoryManager memoryManager; private File directory; private List<FileBucket> fileBuckets; private final int sizeOfValues; private final long randomId; private final String sizeOfCachedFileContentsLock = new String("LOCK"); private final long maxSizeOfCachedFileContents; private long currentSizeOfCachedFileContents; private long timeOfLastWrite; private long timeOfLastRead; private boolean metaFileOutOfSync; public FileDataInterface(MemoryManager memoryManager, Combinator<T> combinator, Class<T> objectClass, String directory, String nameOfSubset, boolean isTemporaryDataInterface, BowTaskScheduler taskScheduler) { super(nameOfSubset, objectClass, combinator, isTemporaryDataInterface); this.directory = new File(directory, nameOfSubset); this.sizeOfValues = SerializationUtils.getWidth(objectClass); this.randomId = new Random().nextLong(); this.memoryManager = memoryManager; this.maxSizeOfCachedFileContents = memoryManager.getAvailableMemoryInBytes() / 3; timeOfLastRead = 0; checkDataDir(); MetaFile metaFile = readMetaInfo(); initializeFiles(metaFile); writeLockFile(randomId); currentSizeOfCachedFileContents = 0; taskScheduler.schedulePeriodicTask(() -> ifNotClosed(() -> { rewriteAllFiles(false); checkLock(); }), 1000); //rewrite files that are too large } @Override public T read(long key) { FileBucket bucket = getBucket(key); lockForRead(bucket); FileInfo file = bucket.getFile(key); try { int startPos; int pos = Arrays.binarySearch(file.getFileLocationsKeys(), key); if (pos == -1) { //Before first key, value can not be in file return null; } else { if (pos < 0) { pos = -(pos + 1); } if (pos == file.getFileLocationsKeys().length || file.getFileLocationsKeys()[pos] > key) { pos--; } startPos = file.getFileLocationsValues()[pos]; } int endPos = pos + 1 < file.getFileLocationsKeys().length ? file.getFileLocationsValues()[pos + 1] : file.getReadSize(); ReadBuffer readBuffer = getReadBuffer(file, startPos, endPos); startPos -= readBuffer.getOffset(); endPos -= readBuffer.getOffset(); byte firstByteOfKeyToRead = (byte) (key >> 56); byte[] buffer = readBuffer.getBuffer(); int position = startPos; while (position < endPos) { byte currentByte = buffer[position]; if (currentByte == firstByteOfKeyToRead) { long currentKey = SerializationUtils.bytesToLong(buffer, position); position += LONG_SIZE; if (currentKey == key) { ReadValue<T> readValue = readValue(buffer, position); return readValue.getValue(); } else if (currentKey > key) { return null; } else { //skip value position += skipValue(buffer, position); } } else if (currentByte > firstByteOfKeyToRead) { //key too large, value not in this file return null; } else if (currentByte < firstByteOfKeyToRead) { //key too small, skip key and value position += LONG_SIZE; position += skipValue(buffer, position); } } return null; } catch (Exception exp) { throw new RuntimeException("Error in file " + toFile(file).getAbsolutePath(), exp); } finally { dataWasRead(); bucket.unlockRead(); } } @Override public void write(long key, T value) { FileBucket bucket = getBucket(key); bucket.lockWrite(); FileInfo file = bucket.getFile(key); try { DataOutputStream dos = getAppendingOutputStream(file); int extraSize = writeValue(dos, key, value); dos.close(); file.increaseWriteSize(extraSize); dataWasWritten(); } catch (Exception e) { throw new RuntimeException("Failed to write value with key " + key + " to file " + toFile(file).getAbsolutePath(), e); } finally { bucket.unlockWrite(); } } @Override public void write(Iterator<KeyValue<T>> entries) { long batchSize = getBatchSize(); while (entries.hasNext()) { MappedLists<FileBucket, KeyValue<T>> entriesToFileBuckets = new MappedLists<>(); int numRead = 0; while (numRead < batchSize && entries.hasNext()) { KeyValue<T> curr = entries.next(); FileBucket fileBucket = getBucket(curr.getKey()); entriesToFileBuckets.get(fileBucket).add(curr); numRead++; } long totalSizeWrittenInBatch = 0; for (FileBucket bucket : entriesToFileBuckets.keySet()) { List<KeyValue<T>> values = entriesToFileBuckets.get(bucket); bucket.lockWrite(); try { MappedLists<FileInfo, KeyValue<T>> entriesToFiles = new MappedLists<>(); for (KeyValue<T> value : values) { FileInfo file = bucket.getFile(value.getKey()); entriesToFiles.get(file).add(value); } for (FileInfo file : entriesToFiles.keySet()) { try { List<KeyValue<T>> valuesForFile = entriesToFiles.get(file); DataOutputStream dos = getAppendingOutputStream(file); for (KeyValue<T> value : valuesForFile) { int extraSize = writeValue(dos, value.getKey(), value.getValue()); file.increaseWriteSize(extraSize); totalSizeWrittenInBatch += extraSize; } dataWasWritten(); dos.close(); } catch (Exception exp) { throw new RuntimeException("Failed to write multiple values to file " + toFile(file).getAbsolutePath(), exp); } } } finally { bucket.unlockWrite(); } } if (totalSizeWrittenInBatch > 0) { batchSize = BATCH_SIZE_PRIMITIVE_VALUES * 16 * batchSize / totalSizeWrittenInBatch; } } } @Override public CloseableIterator<KeyValue<T>> iterator(final Iterator<Long> keyIterator) { return new CloseableIterator<KeyValue<T>>() { private Iterator<KeyValue<T>> currBatchIterator; { readNextBatch(); //constructor } private void readNextBatch() { long batchSize = getBatchSize(); List<Long> keysInBatch = new ArrayList<>(); while (keyIterator.hasNext() && keysInBatch.size() < batchSize) { keysInBatch.add(keyIterator.next()); } Collections.sort(keysInBatch); List<KeyValue<T>> valuesInBatch = new ArrayList<>(); FileInfo currentFile = null; Map<Long, T> valuesInCurrentFile = null; for (Long key : keysInBatch) { FileBucket bucket = getBucket(key); lockForRead(bucket); FileInfo file = bucket.getFile(key); if (file != currentFile) { currentFile = file; valuesInCurrentFile = readMap(file); } bucket.unlockRead(); T value = valuesInCurrentFile.get(key); if (value != null) { valuesInBatch.add(new KeyValue<>(key, value)); } } currBatchIterator = valuesInBatch.iterator(); } @Override protected void closeInt() { //ok } @Override public boolean hasNext() { return currBatchIterator.hasNext(); } @Override public KeyValue<T> next() { KeyValue<T> next = currBatchIterator.next(); if (!currBatchIterator.hasNext()) { readNextBatch(); } return next; } }; } @Override public CloseableIterator<KeyValue<T>> iterator() { final FileIterator fileIterator = new FileIterator(); return IterableUtils.iterator(new SimpleIterator<KeyValue<T>>() { private Iterator<KeyValue<T>> valuesInFileIt; @Override public KeyValue<T> next() throws Exception { while ((valuesInFileIt == null || !valuesInFileIt.hasNext())) { Pair<FileBucket, FileInfo> next = fileIterator.lockCurrentBucketAndGetNextFile(); if (next != null) { FileBucket bucket = next.getFirst(); FileInfo file = next.getSecond(); List<KeyValue<T>> sortedEntries = readCleanValues(file); bucket.unlockRead(); valuesInFileIt = sortedEntries.iterator(); } else { valuesInFileIt = null; break; } } if (valuesInFileIt != null && valuesInFileIt.hasNext()) { return valuesInFileIt.next(); } else { return null; } } }); } @Override public CloseableIterator<Long> keyIterator() { final FileIterator fileIterator = new FileIterator(); return IterableUtils.iterator(new SimpleIterator<Long>() { private Iterator<Long> keysInFileIt; @Override public Long next() throws Exception { while ((keysInFileIt == null || !keysInFileIt.hasNext())) { Pair<FileBucket, FileInfo> next = fileIterator.lockCurrentBucketAndGetNextFile(); if (next != null) { FileBucket bucket = next.getFirst(); FileInfo file = next.getSecond(); List<Long> sortedKeys = readKeys(file); bucket.unlockRead(); keysInFileIt = sortedKeys.iterator(); } else { keysInFileIt = null; break; } } if (keysInFileIt != null && keysInFileIt.hasNext()) { return keysInFileIt.next(); } else { return null; } } }); } @Override public long freeMemory() { MutableLong totalBytesReleased = new MutableLong(0); ifNotClosed(() -> { for (FileBucket bucket : fileBuckets) { bucket.lockRead(); for (FileInfo fileInfo : bucket.getFiles()) { long bytesReleased = fileInfo.discardFileContents(); updateSizeOfCachedFileContents(-bytesReleased); totalBytesReleased.add(bytesReleased); } bucket.unlockRead(); } }); return totalBytesReleased.longValue(); } @Override public long getMemoryUsage() { return currentSizeOfCachedFileContents; } @Override public long apprSize() { int numOfFilesToSample = 100; long numOfObjects = 0; long sizeOfSampledFiles = 0; int numOfSampledFiles = 0; long sizeOfAllFiles = 0; try { FileIterator fileIt = new FileIterator(); Pair<FileBucket, FileInfo> next = fileIt.lockCurrentBucketAndGetNextFile(); while (next != null) { FileBucket bucket = next.getFirst(); FileInfo file = next.getSecond(); long fileSize = file.getReadSize(); if (numOfSampledFiles < numOfFilesToSample) { List<Long> keys = readKeys(file); numOfObjects += keys.size(); sizeOfSampledFiles += fileSize; if (fileSize == 0 && !keys.isEmpty()) { UI.writeError("Something is wrong with file " + file.getFirstKey()); } numOfSampledFiles++; } bucket.unlockRead(); sizeOfAllFiles += fileSize; next = fileIt.lockCurrentBucketAndGetNextFile(); } if (numOfObjects == 0) { return 0; } else { return sizeOfAllFiles * numOfObjects / sizeOfSampledFiles; } } catch (IOException exp) { throw new RuntimeException(exp); } } @Override public void flush() { updateShouldBeCleanedInfo(); } @Override public void optimizeForReading() { rewriteAllFiles(true); } @Override protected void doClose() { updateShouldBeCleanedInfo(); if (metaFileOutOfSync) { writeMetaFile(); } fileBuckets = null; } @Override public void dropAllData() { writeLockAllBuckets(); for (FileBucket bucket : fileBuckets) { for (FileInfo file : bucket.getFiles()) { deleteFile(file); } bucket.getFiles().clear(); bucket.setShouldBeCleanedBeforeRead(false); } makeSureAllFileBucketsHaveAtLeastOneFile(); writeUnlockAllBuckets(); writeMetaFile(); } private void updateShouldBeCleanedInfo() { for (FileBucket fileBucket : fileBuckets) { fileBucket.lockWrite(); if (!allFilesClean(fileBucket)) { fileBucket.setShouldBeCleanedBeforeRead(true); } fileBucket.unlockWrite(); } } private synchronized void rewriteAllFiles(boolean forceClean) { int numOfFilesRewritten = fileBuckets.parallelStream().collect(Collectors.summingInt(bucket -> rewriteBucket(bucket, forceClean))); if (metaFileOutOfSync) { writeMetaFile(); } if (DBUtils.DEBUG && numOfFilesRewritten > 0) { UI.write("Rewritten " + numOfFilesRewritten + " files for " + getName()); } } private int rewriteBucket(FileBucket bucket, boolean forceClean) { if (forceClean) { bucket.lockWrite(); } else { boolean success = bucket.tryLockWrite(); if (!success) { return 0; //will not clean bucket now but continue with other buckets, we'll be back soon. } } try { int numOfRewrittenFiles = 0; for (int fileInd = 0; fileInd < bucket.getFiles().size() && (!closeWasRequested() || forceClean); fileInd++) { FileInfo file = bucket.getFiles().get(fileInd); boolean needsRewrite; long targetSize; if (inReadPhase() || forceClean) { //read phrase needsRewrite = !file.isClean(); targetSize = MAX_FILE_SIZE_READ; } else { //write phase double probOfRewriteForSize = file.getWriteSize() * 4.0 / MAX_FILE_SIZE_WRITE - 3.0; needsRewrite = !file.isClean() && Math.random() < probOfRewriteForSize; targetSize = MAX_FILE_SIZE_READ; } if (needsRewrite) { // UI.write("Will rewrite file " + file.getFirstKey() + " " + getName() + " clean=" + file.isClean() + " force=" + forceClean + " readSize=" + file.getReadSize() + " writeSize=" + file.getWriteSize() + " targetSize=" + targetSize); List<KeyValue<T>> values = readAllValues(file); int filesMergedWithThisFile = inWritePhase() ? 0 : mergeFileIfTooSmall(bucket.getFiles(), fileInd, file.getWriteSize(), targetSize, values); DataOutputStream dos = getOutputStreamToTempFile(file); List<Pair<Long, Integer>> fileLocations = new ArrayList<>(); int currentSizeOfFile = 0; for (KeyValue<T> entry : values) { long key = entry.getKey(); T value = entry.getValue(); ByteArrayOutputStream bos = new ByteArrayOutputStream(); DataOutputStream tmpOutputStream = new DataOutputStream(bos); writeValue(tmpOutputStream, key, value); byte[] dataToWrite = bos.toByteArray(); if (currentSizeOfFile > 0 && currentSizeOfFile + dataToWrite.length > targetSize) { //Create new file if (filesMergedWithThisFile > 0) { throw new RuntimeException("Something went wrong! Merged file and then created new file?"); } dos.close(); swapTempForReal(file); file.fileWasRewritten(sample(fileLocations, 100), currentSizeOfFile, currentSizeOfFile); fileLocations = new ArrayList<>(); file = new FileInfo(key, 0, 0); currentSizeOfFile = 0; bucket.getFiles().add(fileInd + 1, file); fileInd++; dos = getOutputStreamToTempFile(file); } fileLocations.add(new Pair<>(key, currentSizeOfFile)); dos.write(dataToWrite); currentSizeOfFile += dataToWrite.length; } swapTempForReal(file); file.fileWasRewritten(sample(fileLocations, 100), currentSizeOfFile, currentSizeOfFile); dos.close(); numOfRewrittenFiles++; } } boolean allFilesClean = allFilesClean(bucket); if (allFilesClean) { bucket.setShouldBeCleanedBeforeRead(false); } if (numOfRewrittenFiles > 0) { metaFileOutOfSync = true; } return numOfRewrittenFiles; } catch (Exception exp) { UI.writeError("Unexpected exception while rewriting files", exp); throw new RuntimeException("Unexpected exception while rewriting files", exp); } finally { bucket.unlockWrite(); } } private boolean allFilesClean(FileBucket bucket) { boolean allFilesClean = true; for (FileInfo file : bucket.getFiles()) { allFilesClean &= file.isClean(); } return allFilesClean; } private void deleteFile(FileInfo file) { boolean success = toFile(file).delete(); if (!success) { throw new RuntimeException("Failed to delete file " + toFile(file).getAbsolutePath()); } } private void dataWasWritten() { timeOfLastWrite = System.currentTimeMillis(); metaFileOutOfSync = true; } private void dataWasRead() { timeOfLastRead = System.currentTimeMillis(); } private boolean inReadPhase() { return !inWritePhase(); } private boolean inWritePhase() { return timeOfLastWrite > timeOfLastRead && System.currentTimeMillis() - timeOfLastRead > 10 * 1000; } private void updateSizeOfCachedFileContents(long byteDiff) { synchronized (sizeOfCachedFileContentsLock) { currentSizeOfCachedFileContents += byteDiff; } } private void writeLockAllBuckets() { for (FileBucket fileBucket : fileBuckets) { fileBucket.lockWrite(); } } private void writeUnlockAllBuckets() { for (FileBucket fileBucket : fileBuckets) { fileBucket.unlockWrite(); } } private void readLockAllBuckets() { for (FileBucket fileBucket : fileBuckets) { fileBucket.lockRead(); } } private void readUnlockAllBuckets() { for (FileBucket fileBucket : fileBuckets) { fileBucket.unlockRead(); } } private void lockForRead(FileBucket bucket) { bucket.lockRead(); while (bucket.shouldBeCleanedBeforeRead()) { bucket.unlockRead(); rewriteBucket(bucket, true); bucket.lockRead(); } } private void swapTempForReal(FileInfo file) throws IOException { synchronized (file) { long releasedBytes = file.discardFileContents(); updateSizeOfCachedFileContents(-releasedBytes); } Files.move(toTempFile(file).toPath(), toFile(file).toPath(), StandardCopyOption.ATOMIC_MOVE, StandardCopyOption.REPLACE_EXISTING); } private int mergeFileIfTooSmall(List<FileInfo> fileList, int currentFileInd, long combinedSize, long maxFileSize, List<KeyValue<T>> values) { int nextFileInd = currentFileInd + 1; while (nextFileInd < fileList.size() && combinedSize + fileList.get(nextFileInd).getWriteSize() < maxFileSize) { //Combine the files FileInfo nextFile = fileList.remove(nextFileInd); values.addAll(readAllValues(nextFile)); combinedSize += nextFile.getWriteSize(); deleteFile(nextFile); } return nextFileInd - currentFileInd - 1; } private int writeValue(DataOutputStream dos, long key, T value) throws IOException { dos.writeLong(key); byte[] objectAsBytes = SerializationUtils.objectToBytesCheckForNull(value, getObjectClass()); if (sizeOfValues == -1) { dos.writeInt(objectAsBytes.length); dos.write(objectAsBytes); return 8 + 4 + objectAsBytes.length; } else { dos.write(objectAsBytes); return 8 + sizeOfValues; } } private ReadValue<T> readValue(byte[] buffer, int position) throws IOException { int lengthOfObject; int lenghtOfLengthValue; if (sizeOfValues == -1) { lengthOfObject = SerializationUtils.bytesToInt(buffer, position); lenghtOfLengthValue = INT_SIZE; } else { lengthOfObject = sizeOfValues; lenghtOfLengthValue = 0; } T value = SerializationUtils.bytesToObjectCheckForNull(buffer, position + lenghtOfLengthValue, lengthOfObject, getObjectClass()); return new ReadValue<>(lengthOfObject + lenghtOfLengthValue, value); } private List<FileBucket> createEmptyFileBuckets() { List<FileBucket> bucket = new ArrayList<>(1 << (64 - BITS_TO_DISCARD_FOR_FILE_BUCKETS)); long start = Long.MIN_VALUE >> BITS_TO_DISCARD_FOR_FILE_BUCKETS; long end = Long.MAX_VALUE >> BITS_TO_DISCARD_FOR_FILE_BUCKETS; for (long val = start; val <= end; val++) { long firstKey = val << BITS_TO_DISCARD_FOR_FILE_BUCKETS; long lastKey = ((val + 1) << BITS_TO_DISCARD_FOR_FILE_BUCKETS) - 1; if (lastKey < firstKey) { //overflow lastKey = Long.MAX_VALUE; } bucket.add(new FileBucket(firstKey, lastKey)); } return bucket; } private void checkDataDir() { if (!directory.exists()) { boolean success = directory.mkdirs(); if (!success) { throw new RuntimeException("Failed to create directory " + directory.getAbsolutePath()); } } if (directory.isFile()) { throw new IllegalArgumentException("File should be directory but is file! " + directory.getAbsolutePath()); } } private void initializeFiles(MetaFile metaFile) { String[] filesInDir = this.directory.list(); if (metaFile != null && metaFileUpToDate(metaFile, filesInDir)) { metaFileOutOfSync = false; timeOfLastRead = metaFile.getLastRead(); timeOfLastWrite = metaFile.getLastWrite(); fileBuckets = metaFile.getFileBuckets(); } else { metaFileOutOfSync = true; timeOfLastRead = timeOfLastWrite = 0; fileBuckets = createEmptyFileBuckets(); if (filesInDir.length > 0) { UI.write("Missing (up-to-date) meta information for " + getName() + " will reconstruct data structures from files found in directory."); updateBucketsFromFiles(filesInDir); } makeSureAllFileBucketsHaveAtLeastOneFile(); } } private boolean metaFileUpToDate(MetaFile metaFile, String[] filesInDir) { for (String file : filesInDir) { if (file.matches("-?[0-9]+")) { long key = Long.parseLong(file); FileBucket bucket = getBucket(metaFile.getFileBuckets(), key); long sizeOnDisk = new File(directory, file).length(); FileInfo fileInfo = bucket.getFile(key); if (fileInfo.getFirstKey() != key) { return false; //the name of the file on disk should be equal to the first key } if (fileInfo.getWriteSize() != sizeOnDisk) { return false; //the file write size should be equal to the size on disk } if (!fileInfo.isClean() && !bucket.shouldBeCleanedBeforeRead()) { return false; //if the file is dirty, the bucket should be marked as 'shouldBeCleanedBeforeRead' } } } for (FileBucket fileBucket : metaFile.getFileBuckets()) { if (fileBucket.getFiles().isEmpty()) { return false; //every bucket should contain at least one file } if (fileBucket.getFirstKey() != fileBucket.getFiles().get(0).getFirstKey()) { return false; //the first key of the bucket should match the first key of the first file } for (int i = 0; i < fileBucket.getFiles().size() - 1; i++) { if (fileBucket.getFiles().get(i).getFirstKey() >= fileBucket.getFiles().get(i + 1).getFirstKey()) { return false; //files should be sorted according to first key } } } return true; //all good! } private void updateBucketsFromFiles(String[] filesInDir) { for (String file : filesInDir) { if (file.matches("-?[0-9]+")) { long key = Long.parseLong(file); FileBucket bucket = getBucket(key); long sizeOnDisk = new File(directory, file).length(); FileInfo fileInfo = new FileInfo(key, 0, (int) sizeOnDisk); bucket.getFiles().add(fileInfo); bucket.setShouldBeCleanedBeforeRead(bucket.shouldBeCleanedBeforeRead() || sizeOnDisk > 0); } } } private void makeSureAllFileBucketsHaveAtLeastOneFile() { for (FileBucket bucket : fileBuckets) { if (bucket.getFiles().isEmpty()) { //We need at least one file per bucket.. FileInfo first = new FileInfo(bucket.getFirstKey(), 0, 0); try { boolean success = toFile(first).createNewFile(); if (!success) { throw new RuntimeException("Failed to create new file " + first + " at " + toFile(first).getAbsolutePath()); } else { bucket.getFiles().add(first); } } catch (IOException e) { throw new RuntimeException(e); } } else { Collections.sort(bucket.getFiles()); if (bucket.getFirstKey() != bucket.getFiles().get(0).getFirstKey()) { throw new RuntimeException("Missing file in " + getName() + " ? Expected file " + new File(directory, Long.toString(bucket.getFirstKey())).getAbsolutePath()); } } } } private MetaFile readMetaInfo() { File cleanFilesFile = new File(directory, META_FILE); if (cleanFilesFile.exists()) { try { InputStream fis = new BufferedInputStream(new FileInputStream(cleanFilesFile)); MetaFile result = SerializationUtils.readObject(MetaFile.class, fis); IOUtils.closeQuietly(fis); return result; } catch (Exception exp) { UI.writeError("Received exception while reading " + cleanFilesFile.getAbsolutePath(), exp); } } return null; } private synchronized void writeMetaFile() { readLockAllBuckets(); metaFileOutOfSync = false; File outputFile = new File(directory, META_FILE); try { MetaFile metaFile = new MetaFile(fileBuckets, timeOfLastWrite, timeOfLastRead); FileOutputStream fos = new FileOutputStream(outputFile); SerializationUtils.writeObject(metaFile, fos); IOUtils.closeQuietly(fos); } catch (Exception exp) { metaFileOutOfSync = true; throw new RuntimeException("Received exception while writing list of clean files to " + outputFile.getAbsolutePath(), exp); } finally { readUnlockAllBuckets(); } } private FileBucket getBucket(long key) { return getBucket(fileBuckets, key); } private FileBucket getBucket(List<FileBucket> fileBuckets, long key) { int ind = (int) ((key >> BITS_TO_DISCARD_FOR_FILE_BUCKETS) + fileBuckets.size() / 2); return fileBuckets.get(ind); } private ReadBuffer getReadBuffer(FileInfo file, int requestedStartPos, int requestedEndPos) throws IOException { byte[] fileContents = file.getCachedFileContents(); if (fileContents == null) { if (memoryManager.getMemoryStatus() == MemoryStatus.FREE && currentSizeOfCachedFileContents < maxSizeOfCachedFileContents) { //cache file contents. Lock on file object to make sure we don't read the content in parallel (this messes up the currentSizeOfCachedFileContents variable and is not very efficient) synchronized (file) { fileContents = file.getCachedFileContents(); if (fileContents == null) { fileContents = new byte[file.getReadSize()]; FileInputStream fis = new FileInputStream(toFile(file)); int bytesRead = fis.read(fileContents); if (bytesRead != file.getReadSize()) { throw new RuntimeException("Read " + bytesRead + " bytes, while we expected " + file.getReadSize() + " bytes in file " + toFile(file).getAbsolutePath() + " which currently has size " + toFile(file).length()); } updateSizeOfCachedFileContents(fileContents.length); IOUtils.closeQuietly(fis); } file.setCachedFileContents(fileContents); } return new ReadBuffer(fileContents, 0); } else { FileInputStream fis = new FileInputStream(toFile(file)); long bytesSkipped = fis.skip(requestedStartPos); if (bytesSkipped != requestedStartPos) { throw new RuntimeException("Skipped " + bytesSkipped + " bytes, while we expected to skip " + requestedStartPos + " bytes in file " + toFile(file).getAbsolutePath() + " which currently has size " + toFile(file).length()); } byte[] buffer = new byte[requestedEndPos - requestedStartPos]; int bytesRead = fis.read(buffer); if (bytesRead != buffer.length) { throw new RuntimeException("Read " + bytesRead + " bytes, while we expected " + file.getReadSize() + " bytes in file " + toFile(file).getAbsolutePath() + " which currently has size " + toFile(file).length()); } IOUtils.closeQuietly(fis); return new ReadBuffer(buffer, requestedStartPos); } } else { if (fileContents.length != file.getReadSize()) { throw new RuntimeException("Buffer and file size don't match!"); } return new ReadBuffer(fileContents, 0); } } private int skipValue(byte[] buffer, int position) throws IOException { //Skip some bytes Class<T> objectClass = getObjectClass(); if (objectClass == Long.class || objectClass == Double.class) { return LONG_SIZE; } else if (objectClass == Integer.class || objectClass == Float.class) { return INT_SIZE; } else { int length = SerializationUtils.bytesToInt(buffer, position); return INT_SIZE + length; } } private DataOutputStream getAppendingOutputStream(FileInfo fileInfo) throws FileNotFoundException { return new DataOutputStream(new BufferedOutputStream(new FileOutputStream(toFile(fileInfo), true))); } private DataOutputStream getOutputStreamToTempFile(FileInfo fileInfo) throws FileNotFoundException { return new DataOutputStream(new BufferedOutputStream(new FileOutputStream(toTempFile(fileInfo), false))); } private File toFile(FileInfo fileInfo) { if (directory == null) { throw new RuntimeException("Directory is null, probably the data interface was closed already!"); } return new File(directory, Long.toString(fileInfo.getFirstKey())); } private File toTempFile(FileInfo fileInfo) { if (directory == null) { throw new RuntimeException("Directory is null, probably the data interface was closed already!"); } return new File(directory, "tmp." + Long.toString(fileInfo.getFirstKey())); } private Map<Long, T> readMap(FileInfo file) { List<KeyValue<T>> values = readCleanValues(file); Map<Long, T> result = new HashMap<>(values.size()); for (KeyValue<T> value : values) { result.put(value.getKey(), value.getValue()); } return result; } private List<KeyValue<T>> readCleanValues(FileInfo file) { try { byte[] buffer = getReadBuffer(file, 0, file.getReadSize()).getBuffer(); int expectedNumberOfValues = getLowerBoundOnNumberOfValues(file.getReadSize()); List<KeyValue<T>> result = new ArrayList<>(expectedNumberOfValues); int position = 0; while (position < buffer.length) { long key = SerializationUtils.bytesToLong(buffer, position); position += LONG_SIZE; ReadValue<T> readValue = readValue(buffer, position); position += readValue.getSize(); result.add(new KeyValue<>(key, readValue.getValue())); } dataWasRead(); return result; } catch (Exception ex) { throw new RuntimeException("Unexpected exception while reading values from file " + toFile(file).getAbsolutePath(), ex); } } private List<KeyValue<T>> readAllValues(FileInfo file) { try { byte[] buffer = readCompleteFile(file); if (buffer.length > 0) { int expectedNumberOfValues = getLowerBoundOnNumberOfValues(file.getWriteSize()); List<KeyValue<T>> result = new ArrayList<>(expectedNumberOfValues); //read values in buckets int numberOfBuckets = Math.max(1, expectedNumberOfValues / 1000); List[] buckets = new List[numberOfBuckets]; for (int i = 0; i < buckets.length; i++) { buckets[i] = new ArrayList(expectedNumberOfValues / numberOfBuckets); } long start = file.getFirstKey(); long density = (1l << BITS_TO_DISCARD_FOR_FILE_BUCKETS) / numberOfBuckets; int position = 0; while (position < buffer.length) { long key = SerializationUtils.bytesToLong(buffer, position); position += LONG_SIZE; ReadValue<T> readValue = readValue(buffer, position); position += readValue.getSize(); int bucketInd = (int) ((key - start) / density); if (bucketInd == buckets.length) { bucketInd--; //rounding error? } buckets[bucketInd].add(new KeyValue<>(key, readValue.getValue())); } for (int bucketInd = 0; bucketInd < buckets.length; bucketInd++) { List<KeyValue<T>> currentBucket = buckets[bucketInd]; DBUtils.mergeValues(result, currentBucket, getCombinator()); buckets[bucketInd] = null; //Free some memory } return result; } else { return Collections.emptyList(); } } catch (Exception ex) { throw new RuntimeException("Unexpected exception while reading values from file " + toFile(file).getAbsolutePath(), ex); } } private byte[] readCompleteFile(FileInfo file) throws IOException { FileInputStream fis = new FileInputStream(toFile(file)); byte[] buffer = new byte[file.getWriteSize()]; int bytesRead = fis.read(buffer); if (bytesRead != buffer.length) { if (!(buffer.length == 0 && bytesRead == -1)) { throw new RuntimeException("Read " + bytesRead + " bytes, while we expected " + buffer.length + " bytes in file " + toFile(file).getAbsolutePath() + " which currently has size " + toFile(file).length()); } } IOUtils.closeQuietly(fis); return buffer; } private int getLowerBoundOnNumberOfValues(int sizeOfFile) { int width = sizeOfValues; if (width == -1) { width = 4; //will probably be much larger... } return sizeOfFile / (8 + width); } private List<Long> readKeys(FileInfo file) throws IOException { List<Long> result = new ArrayList<>(); byte[] buffer = getReadBuffer(file, 0, file.getReadSize()).getBuffer(); int position = 0; while (position < buffer.length) { result.add(SerializationUtils.bytesToLong(buffer, position)); position += LONG_SIZE; position += skipValue(buffer, position); } dataWasRead(); return result; } private List<Pair<Long, Integer>> sample(List<Pair<Long, Integer>> fileLocations, int invSampleRate) { List<Pair<Long, Integer>> result = new ArrayList<>(fileLocations.size() / invSampleRate); for (int i = 0; i < fileLocations.size(); i++) { if (i % invSampleRate == 0) { result.add(fileLocations.get(i)); } } return result; } private void checkLock() { File lockFile = new File(directory, LOCK_FILE); try { DataInputStream dis = new DataInputStream(new FileInputStream(lockFile)); long id = dis.readLong(); IOUtils.closeQuietly(dis); if (randomId != id) { writeLockFile(new Random().nextLong()); //try to notify other data interface that something is fucked up UI.writeError("The lock in " + lockFile.getAbsolutePath() + " was obtained by another data interface! Closing data interface. This will probably cause a lot of other errors..."); close(); } } catch (Exception exp) { throw new RuntimeException("Unexpected exception while trying to read lock file " + lockFile.getAbsolutePath()); } } private void writeLockFile(long id) { File lockFile = new File(directory, LOCK_FILE); try { DataOutputStream dos = new DataOutputStream(new FileOutputStream(lockFile)); dos.writeLong(id); IOUtils.closeQuietly(dos); } catch (Exception exp) { throw new RuntimeException("Unexpected exception while trying to write lock file to " + lockFile.getAbsolutePath(), exp); } } private long getBatchSize() { return SerializationUtils.getWidth(getObjectClass()) == -1 ? BATCH_SIZE_NON_PRIMITIVE_VALUES : BATCH_SIZE_PRIMITIVE_VALUES; } private static class ReadBuffer { private final byte[] buffer; private final int offset; private ReadBuffer(byte[] buffer, int offset) { this.buffer = buffer; this.offset = offset; } public byte[] getBuffer() { return buffer; } public int getOffset() { return offset; } } private static class ReadValue<T> { private int size; private T value; private ReadValue(int size, T value) { this.size = size; this.value = value; } public int getSize() { return size; } public T getValue() { return value; } } private class FileIterator { private int currentBucketInd = 0; private int fileInd = 0; public Pair<FileBucket, FileInfo> lockCurrentBucketAndGetNextFile() { if (currentBucketInd < fileBuckets.size()) { FileBucket bucket = fileBuckets.get(currentBucketInd); lockForRead(bucket); while (currentBucketInd < fileBuckets.size() && fileInd >= bucket.getFiles().size()) { fileInd = 0; bucket.unlockRead(); currentBucketInd++; if (currentBucketInd < fileBuckets.size()) { bucket = fileBuckets.get(currentBucketInd); lockForRead(bucket); } } if (currentBucketInd < fileBuckets.size()) { return new Pair<>(bucket, bucket.getFiles().get(fileInd++)); } } return null; } } public static class MetaFile { private List<FileBucket> fileBuckets; private long lastWrite; private long lastRead; public MetaFile(List<FileBucket> fileBuckets, long lastWrite, long lastRead) { this.fileBuckets = fileBuckets; this.lastRead = lastRead; this.lastWrite = lastWrite; } //Constructor used in serialization public MetaFile() { } public List<FileBucket> getFileBuckets() { return fileBuckets; } public void setFileBuckets(List<FileBucket> fileBuckets) { this.fileBuckets = fileBuckets; } public long getLastWrite() { return lastWrite; } public void setLastWrite(long lastWrite) { this.lastWrite = lastWrite; } public long getLastRead() { return lastRead; } public void setLastRead(long lastRead) { this.lastRead = lastRead; } } }