package storm.cookbook.tfidf.functions;
import java.io.File;
import java.io.IOException;
import java.util.Map;
import org.apache.avro.Schema;
import org.apache.avro.file.DataFileWriter;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericDatumWriter;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.io.DatumWriter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import backtype.storm.utils.Time;
import storm.trident.operation.BaseFunction;
import storm.trident.operation.TridentCollector;
import storm.trident.operation.TridentOperationContext;
import storm.trident.tuple.TridentTuple;
public class PersistDocumentFunction extends BaseFunction {
Logger LOG = LoggerFactory.getLogger(PersistDocumentFunction.class);
private static final long serialVersionUID = 1L;
DataFileWriter<GenericRecord> dataFileWriter;
Schema schema;
@Override
public void prepare(Map conf, TridentOperationContext context) {
try {
String path = (String) conf.get("DOCUMENT_PATH");
schema = Schema.parse(PersistDocumentFunction.class.getResourceAsStream("/document.avsc"));
File file = new File(path);
DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<GenericRecord>(schema);
dataFileWriter = new DataFileWriter<GenericRecord>(datumWriter);
if (file.exists())
dataFileWriter.appendTo(file);
else
dataFileWriter.create(schema, file);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
@Override
public void cleanup() {
try {
dataFileWriter.close();
} catch (IOException e) {
LOG.error("Error Closing file: " + e);
}
}
public void execute(TridentTuple tuple, TridentCollector collector) {
GenericRecord docEntry = new GenericData.Record(schema);
docEntry.put("docid", tuple.getStringByField("documentId"));
docEntry.put("time", Time.currentTimeMillis());
docEntry.put("line", tuple.getStringByField("document"));
try {
dataFileWriter.append(docEntry);
dataFileWriter.flush();
} catch (IOException e) {
LOG.error("Error writing to document record: " + e);
throw new RuntimeException(e);
}
}
}