package com.scaleunlimited.cascading.local;
import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.LineNumberReader;
import java.io.OutputStream;
import java.io.PrintWriter;
import java.util.Properties;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;
import cascading.flow.FlowProcess;
import cascading.scheme.SinkCall;
import cascading.scheme.local.TextLine;
import cascading.tuple.Fields;
@SuppressWarnings("serial")
public class TextLineScheme extends TextLine {
private boolean _compress = false;
public TextLineScheme() {
this(false);
}
public TextLineScheme(boolean compress) {
super();
_compress = compress;
}
public TextLineScheme(Fields sourceFields) {
super(sourceFields);
}
public TextLineScheme(Fields sourceFields, Fields sinkFields) {
this(sourceFields, sinkFields, false);
}
public TextLineScheme(Fields sourceFields, Fields sinkFields, boolean compress) {
super(sourceFields, sinkFields);
_compress = compress;
}
@Override
public LineNumberReader createInput(InputStream inputStream) {
if (!inputStream.markSupported()) {
inputStream = new BufferedInputStream(inputStream, 128);
}
// See if inputStream is gzipped.
inputStream.mark(2);
byte[] magic = new byte[2];
try {
int bytesRead = inputStream.read(magic);
inputStream.reset();
if ((bytesRead == 2) && (magic[0] == (byte)0x1f) && (magic[1] == (byte)0x8b)) {
return super.createInput(new GZIPInputStream(inputStream));
}
} catch (IOException e) {
// Ignore, and just return regular reader
}
return super.createInput(inputStream);
}
@Override
public PrintWriter createOutput(OutputStream outputStream) {
if (_compress) {
try {
outputStream = new GZIPOutputStream(outputStream);
} catch (IOException e) {
// Ignore the error, and we'll return a regular PrintWriter.
}
}
return super.createOutput(outputStream);
}
@Override
public void sinkCleanup(FlowProcess<Properties> flowProcess, SinkCall<PrintWriter, OutputStream> sinkCall) throws IOException {
// Currently cascading doesn't close the printwriter, so the underlying stream doesn't get flushed properly.
PrintWriter pw = sinkCall.getContext();
super.sinkCleanup(flowProcess, sinkCall);
try {
pw.close();
} catch (Exception e) {
// Just in case Cascading starts closing it.
}
}
}