package uk.ac.imperial.lsds.seepworker.core.output;
import java.io.BufferedOutputStream;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.nio.channels.Channels;
import java.nio.channels.ReadableByteChannel;
import java.nio.channels.WritableByteChannel;
import java.nio.file.FileSystems;
import java.nio.file.Path;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import uk.ac.imperial.lsds.seep.api.DataReference;
import uk.ac.imperial.lsds.seep.api.RuntimeEventRegister;
import uk.ac.imperial.lsds.seep.api.data.OTuple;
import uk.ac.imperial.lsds.seep.api.operator.sources.FileConfig;
import uk.ac.imperial.lsds.seep.core.OBuffer;
public class FileOutputBuffer implements OBuffer {
final private static Logger LOG = LoggerFactory.getLogger(FileOutputBuffer.class);
private DataReference dr;
private int id;
private OutputStream stream;
public FileOutputBuffer(DataReference dr, int batchSize) {
this.dr = dr;
this.id = dr.getId();
// Create output file attaching id for unique naming and output stream
this.stream = createOutputFile(batchSize);
}
private BufferedOutputStream createOutputFile(int batchSize) {
String path = dr.getDataStore().getConfig().getProperty(FileConfig.FILE_PATH);
String pathAndFilename = path + id;
Boolean isHDFS = new Boolean(dr.getDataStore().getConfig().getProperty(FileConfig.HDFS_SOURCE));
if (isHDFS) {
String hdfsUri = dr.getDataStore().getConfig().getProperty(FileConfig.HDFS_URI);
//We have two Path types in this file, and the other is imported, so
//fully qualify this one.
org.apache.hadoop.fs.Path hdfsPath = new org.apache.hadoop.fs.Path(hdfsUri + pathAndFilename);
try {
FileSystem fs = FileSystem.get(hdfsPath.toUri(), new Configuration());
FSDataOutputStream hdfsOutput;
try {
hdfsOutput = fs.create(hdfsPath, false);
} catch (IOException ioe) {
//file already exists, so open in append mode.
try {
//By convention it may be considered cleaner to check
//if a file exists, then open or append as necessary,
//but catching the exception like this gets rid of a
//race condition when !exists, (created), try to open
hdfsOutput = fs.append(hdfsPath);
} catch (IOException e){
// TODO Auto-generated catch block
e.printStackTrace();
return null;
}
}
return new BufferedOutputStream(hdfsOutput);
} catch (IOException io) {
// TODO Auto-generated catch block
io.printStackTrace();
return null;
}
} else {
Path p = FileSystems.getDefault().getPath(pathAndFilename);
File outputFile = p.toFile();
BufferedOutputStream bws = null;
try {
// Configured with the given batch size
bws = new BufferedOutputStream(new FileOutputStream(outputFile), batchSize);
} catch (FileNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return bws;
}
}
@Override
public int id() {
return id;
}
@Override
public DataReference getDataReference() {
return dr;
}
@Override
public boolean drainTo(WritableByteChannel channel) {
LOG.error("Not implemented for FileOutputBuffer");
return false;
}
@Override
public boolean write(OTuple o, RuntimeEventRegister reg) {
// TODO Auto-generated method stub
return false;
}
@Override
public boolean write(byte[] data, RuntimeEventRegister reg) {
// write that data to the output stream
try {
stream.write(data);
}
catch (IOException e) {
e.printStackTrace();
}
return true;
}
@Override
public void flush() {
try {
stream.flush();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
@Override
public boolean readyToWrite() {
LOG.error("Not implemented for FileOutputBuffer");
return false;
}
}