package com.neverwinterdp.scribengin.nizarS3.sink; import java.io.File; import java.io.IOException; import java.io.RandomAccessFile; import java.nio.MappedByteBuffer; import java.nio.channels.FileChannel; import java.util.Collection; import java.util.LinkedList; import org.apache.commons.io.FileUtils; import org.apache.log4j.LogManager; import org.apache.log4j.Logger; import com.neverwinterdp.scribengin.nizarS3.sink.S3SinkConfig; import com.neverwinterdp.scribengin.storage.sink.partitioner.SinkPartitioner; import com.neverwinterdp.scribengin.Record; /** * The Class SinkBuffer. */ public final class S3SinkBuffer { private final long FIVE_GB = 5368709120l; /** The max tuples. */ private long maxRecordsInMemory; /** The max buffer size. */ private long maxRecordsSizeInMemory; /** The max buffering time. */ private long maxBufferingTimeInMemory; /** The start time. */ // TODO reset this after purge to memory private long startBufferingTimeInMemory; /** The files. */ private LinkedList<String> files = new LinkedList<String>(); /** The chunk size. */ private int chunkSize; /** The partitioner. */ private SinkPartitioner partitioner; /** The memory buffering enabled. */ private boolean memoryBufferingEnabled; /** The logger. */ private static Logger logger = LogManager.getLogger(S3SinkBuffer.class); /** The buffer. */ private LinkedList<Record> tuples = new LinkedList<Record>(); /** The local tmp dir. */ private String localTmpDir; private Thread bufferThread; private boolean active = true; private long tuplesSizeInMemory; private LinkedList<Record> tuplesChunk = new LinkedList<Record>(); /** * The Constructor. * * @param partitioner * the partitioner * @param config * the configuration */ public S3SinkBuffer(SinkPartitioner partitioner, S3SinkConfig config) { this.localTmpDir = config.getLocalTmpDir(); this.maxRecordsSizeInMemory = config.getMemoryMaxBufferSize(); this.maxBufferingTimeInMemory = config.getMemoryMaxBufferingTime(); this.maxRecordsInMemory = config.getMemoryMaxRecords(); // this.mappedByteBufferSize = config.getMappedByteBufferSize(); this.partitioner = partitioner; this.chunkSize = config.getChunkSize(); memoryBufferingEnabled = config.isMemoryBufferingEnabled(); bufferThread = new Thread() { public void run() { try { runProcessLoop(); } catch (Exception e) { e.printStackTrace(); } } }; bufferThread.start(); } private void setProcessLoopActive(boolean active) { this.active = active; } /** * Adds the Record to the buffer. * * @param tuple * the tuple */ public boolean add(Record tuple) { if (memoryBufferingEnabled) { if (!checkMemoryAvailability(tuple.getData().length)) { setProcessLoopActive(true); tuplesSizeInMemory = 0; } tuples.add(tuple); tuplesSizeInMemory += tuple.getData().length; } else { addToDisk(tuple); } return true; } /** * Adds the to disk. * * @param tuple the tuple * @return true, if adds the to disk */ private boolean addToDisk(Record tuple) { boolean success = false; RandomAccessFile randomAccessFile = null; FileChannel fileChannel = null; try { tuplesChunk.add(tuple); // write every chunk of tuples in one file if (tuplesChunk.size() == chunkSize) { try { // TODO retrieve the offset from the registry long startOffset = Long.parseLong(tuplesChunk.getFirst().getKey()); long endOffset = Long.parseLong(tuplesChunk.getLast().getKey()); // call partitioner to get the path of the file // depending on // the offset // the path will be later used to deduce the s3 path String path = localTmpDir + "/" + partitioner.getPartition(startOffset, endOffset); // create file using the path File file = new File(path); File parent = file.getParentFile(); if (!parent.exists() && !parent.mkdirs()) { throw new IllegalStateException("Couldn't create dir: " + parent); } // write a memory mapped file int start = 0; randomAccessFile = new RandomAccessFile(file, "rw"); fileChannel = randomAccessFile.getChannel(); MappedByteBuffer mem; for (Record t : tuplesChunk) { mem = fileChannel.map(FileChannel.MapMode.READ_WRITE, start, t.getData().length + 1); start += t.getData().length + 1; mem.put(t.getData()); mem.put("\n".getBytes()); } // add the file to the list of file created if (file.length() >= FIVE_GB) { throw new IllegalArgumentException("File created is bigger than allowed s3 sink file size."); } success = files.add(file.getCanonicalPath()); tuplesChunk.clear(); } catch (Exception e) { e.printStackTrace(); } finally { randomAccessFile.close(); fileChannel.close(); } } } catch (IOException e) { e.printStackTrace(); } return success; } private void runProcessLoop() throws InterruptedException { while (true) { if (active) { purgeMemoryToDisk(); } Thread.sleep(1000); } } /** * Writes all tuples to disk creating more in-memory space. A few tuples that * couldn't complete a chunk will be left in-memory. */ public void purgeMemoryToDisk() { logger.info("purge Memory To Disk"); LinkedList<Record> tempTuples = tuples; tuples = new LinkedList<Record>(); int mustRemain = tuples.size() % chunkSize; if (mustRemain != 0) { for (int i = mustRemain; i < 1; i--) { tuples.add(tempTuples.get(tempTuples.size() - i)); } } while (tempTuples.size() > mustRemain) { addToDisk(tempTuples.poll()); } tempTuples.clear(); tempTuples = null; setProcessLoopActive(false); } /** * Check memory availability. * * @param newRecordSize * the new tuple size * @return true, if check memory availability */ // TODO check space availability on disk as well? private boolean checkMemoryAvailability(int newRecordSize) { if (startBufferingTimeInMemory == 0) { startBufferingTimeInMemory = System.currentTimeMillis(); } if (tuples.size() == maxRecordsInMemory || tuplesSizeInMemory + newRecordSize > maxRecordsSizeInMemory || (System.currentTimeMillis() - startBufferingTimeInMemory) > maxBufferingTimeInMemory) { return false; } return true; } /** * Clear tuples in memory and on disk. * * @throws IOException */ public void clear() throws IOException { String separator = System.getProperty("file.separator"); File file = new File(localTmpDir + separator + partitioner.getPartition()); try { FileUtils.deleteDirectory(file); } catch (Exception e) { e.printStackTrace(); } tuples.clear(); files.clear(); startBufferingTimeInMemory = 0; } /** * Gets the files size. * * @return the files size */ public int getFilesCount() { return files.size(); } /** * Poll from disk. * * @return the file */ // TODO name suggests that we actually read from disk? public File pollFromDisk() { return new File(files.poll()); } /* * Note that there are other methods for reading on-File size vs in-Memory * sizes */ public int size() { return tuples.size() + (files.size() * chunkSize); } public int tuplesInMemory() { return tuples.size(); } public int tuplesOnDisk() { return (files.size() * chunkSize); } public boolean isEmpty() { return tuples.isEmpty() && files.isEmpty(); } public boolean contains(Object o) { return tuples.contains(o); } // TODO also get file having tuple and remove public boolean remove(Object o) { return tuples.remove(o) && files.remove(o); } public boolean containsAll(Collection<?> collection) { return tuples.containsAll(collection); } public boolean addAll(Collection<? extends Record> ccollection) { boolean success = false; for (Record tuple : ccollection) { // TODO confirm if it does what it should success &= add(tuple); } return success; } // TODO and remove from files as well public boolean removeAll(Collection<?> collection) { return tuples.removeAll(collection); } /* * This methods exists solely for testing purposes. */ public LinkedList<String> getFiles() { return files; } }