package uk.ac.imperial.lsds.seepworker.core; import java.io.BufferedReader; import java.io.FileNotFoundException; import java.io.IOException; import java.net.URI; import java.net.URISyntaxException; import java.nio.ByteBuffer; import java.nio.channels.Channels; import java.nio.channels.ReadableByteChannel; import java.nio.channels.Selector; import java.nio.charset.Charset; import java.nio.charset.IllegalCharsetNameException; import java.nio.charset.StandardCharsets; import java.nio.charset.UnsupportedCharsetException; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; import java.nio.file.StandardOpenOption; import java.util.HashMap; import java.util.Map; import java.util.Map.Entry; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileSystem; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import uk.ac.imperial.lsds.seep.api.DataStore; import uk.ac.imperial.lsds.seep.api.DataStoreType; import uk.ac.imperial.lsds.seep.api.operator.sources.FileConfig; import uk.ac.imperial.lsds.seep.core.DataStoreSelector; import uk.ac.imperial.lsds.seep.core.IBuffer; import uk.ac.imperial.lsds.seep.util.Utils; import uk.ac.imperial.lsds.seepworker.WorkerConfig; // TODO: can we create an inputBuffer that knows how to read directly from file? public class FileSelector implements DataStoreSelector { final private static Logger LOG = LoggerFactory.getLogger(FileSelector.class); // private int numUpstreamResources; private Reader reader; private Thread readerWorker; // private Writer writer; private Thread writerWorker; private Map<Integer, IBuffer> dataAdapters; private String defaultCharacterSet = Charset.defaultCharset().name(); // private Map<Integer, SelectionKey> writerKeys; public FileSelector(WorkerConfig wc) { // this.writerKeys = new HashMap<>(); } @Override public boolean startSelector() { // Start readers if(readerWorker != null){ LOG.info("Starting reader: {}", readerWorker.getName()); readerWorker.start(); } // Start writers if(writerWorker != null){ LOG.info("Starting writer: {}", writerWorker.getName()); writerWorker.start(); } return true; } @Override public boolean stopSelector() { // Stop readers if(readerWorker != null){ LOG.info("Stopping reader: {}", readerWorker.getName()); reader.stop(); } // // Stop writers // if(writerWorker != null){ // LOG.info("Stopping writer: {}", writerWorker.getName()); // writer.stop(); // } return true; } @Override public DataStoreType type() { return DataStoreType.FILE; } @Override public boolean initSelector() { return true; } public void configureAccept(Map<Integer, DataStore> fileOrigins, Map<Integer, IBuffer> dataAdapters){ this.dataAdapters = dataAdapters; // this.numUpstreamResources = fileOrigins.size(); this.reader = new Reader(); this.readerWorker = new Thread(this.reader); this.readerWorker.setName("File-Reader"); Map<ReadableByteChannel, Integer> channels = new HashMap<>(); for(Entry<Integer, DataStore> e : fileOrigins.entrySet()){ try { FileConfig config = new FileConfig(e.getValue().getConfig()); //String absPath = Utils.absolutePath(config.getString(FileConfig.FILE_PATH)); String absPath = config.getString(FileConfig.FILE_PATH); Boolean isHDFS = config.getBoolean(FileConfig.HDFS_SOURCE); if (isHDFS) { //We have two Path types in this file, and the other is imported, so //fully qualify this one. org.apache.hadoop.fs.Path hdfsPath = new org.apache.hadoop.fs.Path(config.getString(FileConfig.HDFS_URI) + absPath); FileSystem fs = FileSystem.get(hdfsPath.toUri(), new Configuration()); LOG.info("Created URI to HDFS resource: {}", hdfsPath.toUri()); FSDataInputStream hdfsInput = fs.open(hdfsPath); LOG.info("Configuring file channel: {}", hdfsInput.toString()); ReadableByteChannel sbc = Channels.newChannel(hdfsInput); channels.put(sbc, e.getKey()); } else { URI uri = new URI(Utils.FILE_URI_SCHEME + absPath); LOG.info("Created URI to local resource: {}", uri.toString()); Path resource = Paths.get(uri); defaultCharacterSet = config.getString(FileConfig.CHARACTER_SET); LOG.info("Configuring file channel: {}", resource.toString()); ReadableByteChannel sbc = Files.newByteChannel(resource, StandardOpenOption.READ); channels.put(sbc, e.getKey()); } } catch (FileNotFoundException fnfe) { fnfe.printStackTrace(); } catch (URISyntaxException use) { use.printStackTrace(); } catch (IOException ioe) { ioe.printStackTrace(); } catch (IllegalCharsetNameException icne) { icne.printStackTrace(); } catch (IllegalArgumentException iae) { iae.printStackTrace(); } } this.reader.availableChannels(channels); this.reader.availableDataStores(fileOrigins); } public void addNewAccept(Path resource, int id, Map<Integer, IBuffer> dataAdapters) { //Call the new version of the function for consistency/maintainability. This function exists //to default to binary input for reasons of backwards compatibility. addNewAccept(resource, id, dataAdapters, false); } public void addNewAccept(Path resource, int id, Map<Integer, IBuffer> dataAdapters, boolean textSource) { //Call the more explicit version of the function for consistency/maintainability. //Uses the character set specified in configureAccept as the default. addNewAccept(resource, id, dataAdapters, false, defaultCharacterSet); } public void addNewAccept(Path resource, int id, Map<Integer, IBuffer> dataAdapters, boolean textSource, String characterSet) { this.dataAdapters = dataAdapters; Map<ReadableByteChannel, Integer> channels = new HashMap<>(); //ReadableByteChannel sbc = null; try { LOG.info("Configuring file channel: {}", resource.toString()); ReadableByteChannel sbc = Files.newByteChannel(resource, StandardOpenOption.READ); channels.put(sbc, id); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } this.reader = new Reader(); this.readerWorker = new Thread(this.reader); this.readerWorker.setName("File-Reader"); this.reader.availableChannels(channels); //TODO: set a DataStore so we can grab a Schema } // public void configureDownstreamFiles(Map<Integer, DataStore> fileDest, Set<OBuffer> obufsToStream) { // // Lazily configure the writer // this.writer = new Writer(); // this.writerWorker = new Thread(this.writer); // this.writerWorker.setName("File-Writer"); // // Notify the writer of a new set of downstream files // this.writer.newDownstreamFile(fileDest, obufsToStream); // } // @Override // public void readyForWrite(int id) { // writerKeys.get(id).selector().wakeup(); // } // // @Override // public void readyForWrite(List<Integer> ids) { // for(Integer id : ids){ // readyForWrite(id); // } // } class Reader implements Runnable { private boolean working = true; private Selector readSelector; private Map<ReadableByteChannel, Integer> channels; private Map<Integer, DataStore> channelDataStore; public Reader() { try { this.readSelector = Selector.open(); } catch (IOException e) { e.printStackTrace(); } } public void availableChannels(Map<ReadableByteChannel, Integer> channels) { this.channels = channels; } public void availableDataStores(Map<Integer, DataStore> channelDataStore) { this.channelDataStore = channelDataStore; } public void stop() { this.working = false; } @Override public void run() { LOG.info("Starting File Reader worker: {}, {} channels", Thread.currentThread().getName(), channels.entrySet().size()); while(working && channels.entrySet().size() > 0){ for(Entry<ReadableByteChannel, Integer> e: channels.entrySet()) { int id = e.getValue(); ReadableByteChannel rbc = e.getKey(); IBuffer ib = dataAdapters.get(id); if(rbc.isOpen()) { if (isTextSource(e)) { readFromText(e, ib, rbc); working = false; } else { int totalTuplesRead = ib.readFrom(rbc); if(totalTuplesRead == 0) { // Once the file is finished we can stop working here working = false; } } } else { working = false; } } } LOG.info("Finished text File Reader worker: {}", Thread.currentThread().getName()); this.closeReader(); } private boolean isTextSource(Entry<ReadableByteChannel, Integer> e) { if (channelDataStore.containsKey(e.getValue())) { FileConfig config = new FileConfig(channelDataStore.get(e.getValue()).getConfig()); defaultCharacterSet = config.getString(FileConfig.CHARACTER_SET); return config.getBoolean(FileConfig.TEXT_SOURCE); } return false; } private void readFromText(Entry<ReadableByteChannel, Integer> e, IBuffer ib, ReadableByteChannel rbc) { BufferedReader br = new BufferedReader (Channels.newReader(rbc, channelDataStore.get(e.getValue()).getSchema().getSchemaParser().getCharsetName())); String line; try { while ((line = br.readLine()) != null) { ib.pushData(channelDataStore.get(e.getValue()).getSchema().getSchemaParser().bytesFromString(line)); } } catch (IOException e1) { // TODO Auto-generated catch block e1.printStackTrace(); } } private void closeReader(){ for(ReadableByteChannel sbc : channels.keySet()){ try { sbc.close(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } } } // class Writer implements Runnable { // // private boolean working = true; // private Selector writeSelector; // private Map<SeekableByteChannel, Integer> channels; // // /** // * Close all files and then release any remaining resources // * Keep in mind that this does not wait for potential pending data to be written // */ // public void stop(){ // this.working = false; // stop worker // } // // public void newDownstreamFile(Map<Integer, DataStore> fileDest, Set<OBuffer> obufsToStream) { // // Create fileChannels for each of these files // for(Entry<Integer, DataStore> entry : fileDest.entrySet()) { // int id = entry.getKey(); // streamId is the key here // OBuffer oBuffer = getOBufferWithId(obufsToStream, id); // // DataStore ds = entry.getValue(); // String path = ds.getConfig().getProperty(FileConfig.FILE_PATH); // String pathAndFilename = path + id; // Path p = FileSystems.getDefault().getPath(pathAndFilename); // WritableByteChannel channel = null; // try { // // Create File first // boolean created = p.toFile().createNewFile(); // if(created) { // channel = FileChannel.open(p); // } // else{ // LOG.error("PANIC: could not create the file"); // System.exit(-1); // } // } // catch (IOException e) { // // TODO Auto-generated catch block // e.printStackTrace(); // } // // // Register channel with the selector // SelectionKey key = null; // try { // /** // * this doesnt work, instead create a normal buffered stream // * and hope the os knows how to operate this // */ // key = ((SelectableChannel) channel).register( // writeSelector, // SelectionKey.OP_WRITE); // } // catch (ClosedChannelException e) { // // TODO Auto-generated catch block // e.printStackTrace(); // } // // // Attach the OBuffer with the key, so that writer knows from where to read // key.attach(oBuffer); // LOG.info("Configured new output file OP: {} to {}", oBuffer.id(), p.toString()); // } // } // // private OBuffer getOBufferWithId(Set<OBuffer> bufs, int id) { // for(OBuffer ob : bufs) { // if(ob.id() == id) { // return ob; // } // } // return null; // } // // @Override // public void run() { // LOG.info("Started File Writer worker: {}", Thread.currentThread().getName()); // while(working) { // // Set<SelectionKey> selectedKeys = writeSelector.selectedKeys(); // Iterator<SelectionKey> keyIt = selectedKeys.iterator(); // while(keyIt.hasNext()) { // SelectionKey key = keyIt.next(); // keyIt.remove(); // // writable // if(key.isWritable()) { // OBuffer ob = (OBuffer)key.attachment(); // WritableByteChannel channel = (WritableByteChannel)key.channel(); // if(channel.isOpen()) { // boolean fullyWritten = ob.drainTo(channel); // if(fullyWritten) unsetWritable(key); // } // else { // LOG.error("Closed destiny file"); // } // } // if(! key.isValid()){ // String conn = ((WritableByteChannel)key.channel()).toString(); // LOG.warn("Invalid outgoing data connection to: {}", conn); // } // } // } // LOG.info("Finished File Reader worker: {}", Thread.currentThread().getName()); // this.closeWriter(); // } // // private void unsetWritable(SelectionKey key){ // final int newOps = key.interestOps() & ~SelectionKey.OP_WRITE; // key.interestOps(newOps); // } // // private void closeWriter(){ // try { // // Close all files // for(SelectionKey key : writeSelector.keys()) { // key.channel().close(); // key.cancel(); // } // writeSelector.close(); // } // catch (IOException e) { // e.printStackTrace(); // } // } // } }