package edu.washington.escience.myria;
import java.io.BufferedWriter;
import java.io.File;
import java.nio.file.Files;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.nio.file.Path;
import java.util.List;
import java.util.UUID;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVPrinter;
import edu.washington.escience.myria.storage.ReadableTable;
/**
* CsvTupleWriter is a {@link TupleWriter} that serializes tuples to a delimited file, usually a CSV. It uses a
* {@link CSVPrinter} to do the underlying serialization. The fields to be output may contain special characters such as
* newlines, because fields may be quoted (using double quotes '"'). Double quotation marks inside of fields are escaped
* using the CSV-standard trick of replacing '"' with '""'.
*
* CSV files should be compatible with Microsoft Excel.
*
*/
public class CsvTupleWriter implements TupleWriter {
/** Required for Java serialization. */
static final long serialVersionUID = 1L;
/** The CSVWriter used to write the output. */
private transient CSVPrinter csvPrinter;
/** The CSV format **/
final CSVFormat csvFormat;
public CsvTupleWriter() {
this(CSVFormat.DEFAULT);
}
public CsvTupleWriter(final char separator) {
this(CSVFormat.DEFAULT.withDelimiter(separator));
}
public CsvTupleWriter(final CSVFormat format) {
csvFormat = format;
}
@Override
public void open(final OutputStream out) throws IOException {
csvPrinter = new CSVPrinter(new BufferedWriter(new OutputStreamWriter(out)), csvFormat);
}
@Override
public void writeColumnHeaders(final List<String> columnNames) throws IOException {
csvPrinter.printRecord(columnNames);
}
@Override
public void writeTuples(final ReadableTable tuples) throws IOException {
final String[] row = new String[tuples.numColumns()];
/* Serialize every row into the output stream. */
Schema tbsc = tuples.getSchema();
for (int i = 0; i < tuples.numTuples(); ++i) {
for (int j = 0; j < tuples.numColumns(); ++j) {
Type type = tbsc.getColumnType(j);
if (type.equals(Type.BLOB_TYPE)) {
// write the file out
ByteBuffer bb = tuples.getBlob(j, i);
String filename = writeToTempFile(bb);
//add file name to the csv.
row[j] = filename;
} else {
row[j] = tuples.getObject(j, i).toString();
}
}
csvPrinter.printRecord((Object[]) row);
}
}
@Override
public void done() throws IOException {
csvPrinter.flush();
csvPrinter.close();
}
@Override
public void error() throws IOException {
try {
csvPrinter.print("There was an error. Investigate the query status to see the message");
} finally {
csvPrinter.close();
}
}
/**
* Write the bytebuffer to file.
* @param bb - bytebuffer to be written to file
* @return
* @throws IOException in case of error.
*/
private static String writeToTempFile(final ByteBuffer bb) throws IOException {
Path path = Files.createTempFile("out", null);
Files.write(path, bb.array());
return path.toFile().getAbsolutePath();
}
}