package org.atomnuke.examples.sinks;
import java.io.BufferedWriter;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import org.atomnuke.sink.AtomSink;
import org.atomnuke.sink.AtomSinkResult;
import java.io.IOException;
import java.io.OutputStreamWriter;
import org.atomnuke.atom.model.Entry;
import org.atomnuke.atom.model.Feed;
import org.atomnuke.sink.AtomSinkException;
import org.atomnuke.task.context.AtomTaskContext;
import org.atomnuke.lifecycle.DestructionException;
import org.atomnuke.lifecycle.InitializationException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.atomnuke.atom.io.AtomWriterFactory;
/**
*
* @author zinic
*/
public class HDFSFeedSink implements AtomSink {
private final Configuration configuration;
private final Path targetPath;
private final String feedName;
private final AtomWriterFactory writerFactory;
private SequenceFile.Writer fileWriter;
private boolean writeHeader;
private FileSystem hdfs;
public HDFSFeedSink(String feedName, AtomWriterFactory writerFactory) {
this.feedName = feedName;
this.writerFactory = writerFactory;
targetPath = new Path("/data/atom/" + feedName);
configuration = new Configuration();
configuration.set("fs.default.name", "hdfs://namenode:9000");
writeHeader = false;
}
@Override
public void init(AtomTaskContext tc) throws InitializationException {
try {
hdfs = FileSystem.get(configuration);
writeHeader = !hdfs.exists(targetPath);
fileWriter = SequenceFile.createWriter(hdfs, configuration, targetPath, Text.class, Text.class);
} catch (IOException ioe) {
throw new InitializationException(ioe);
}
}
@Override
public void destroy() {
try {
fileWriter.close();
hdfs.close();
} catch (IOException ioe) {
throw new DestructionException(ioe);
}
}
@Override
public AtomSinkResult entry(Entry entry) throws AtomSinkException {
try {
final ByteArrayOutputStream baos = new ByteArrayOutputStream();
writerFactory.getInstance().write(baos, entry);
append(new Text(entry.id().toString()), new Text(baos.toByteArray()));
} catch (Exception ioe) {
throw new AtomSinkException(ioe);
}
return AtomSinkResult.ok();
}
@Override
public AtomSinkResult feedPage(Feed page) throws AtomSinkException {
try {
if (writeHeader) {
writeFeedHeader(page);
}
final ByteArrayOutputStream baos = new ByteArrayOutputStream();
for (Entry e : page.entries()) {
writerFactory.getInstance().write(baos, page);
append(new Text(e.id().toString()), new Text(baos.toByteArray()));
}
} catch (Exception ioe) {
throw new AtomSinkException(ioe);
}
return AtomSinkResult.ok();
}
private void append(String key, String value) throws IOException {
append(new Text(key), new Text(value));
}
private void append(Text key, Text value) throws IOException {
fileWriter.append(key, value);
final BufferedWriter lookaside = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(new File("/home/zinic/atom.txt"), true)));
lookaside.write(key.toString());
lookaside.write("\n");
lookaside.write(value.toString());
lookaside.write("\n\n");
lookaside.close();
}
private void writeVariable(String key, String value, StringBuilder builder) {
builder.append("\"");
builder.append(key);
builder.append("\":\"");
builder.append(value);
builder.append("\"");
}
private void writeFeedHeader(Feed page) throws IOException {
writeHeader = false;
final StringBuilder header = new StringBuilder("{");
writeVariable("id", page.id().toString(), header);
header.append(",");
writeVariable("title", page.title().toString(), header);
header.append("}");
append(feedName + "-metadata", header.toString());
}
}