package com.oreilly.springdata.batch.item; import java.io.IOException; import java.io.UnsupportedEncodingException; import java.util.Collection; import java.util.List; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IOUtils; import org.springframework.batch.item.WriteFailedException; import org.springframework.batch.item.file.transform.LineAggregator; import org.springframework.beans.factory.InitializingBean; import org.springframework.data.hadoop.fs.FsShell; import org.springframework.integration.Message; import org.springframework.integration.MessageHandlingException; import org.springframework.util.Assert; public class HdfsTextItemWriter<T> extends AbstractHdfsItemWriter<T> implements InitializingBean { private static final String DEFAULT_LINE_SEPARATOR = System.getProperty("line.separator"); private FileSystem fileSystem; private FSDataOutputStream fsDataOutputStream; private LineAggregator<T> lineAggregator; private String lineSeparator = DEFAULT_LINE_SEPARATOR; private volatile String charset = "UTF-8"; public HdfsTextItemWriter(FileSystem fileSystem) { Assert.notNull(fileSystem, "Hadoop FileSystem must not be null."); this.fileSystem = fileSystem; } @Override public void write(List<? extends T> items) throws Exception { initializeCounterIfNecessary(); prepareOutputStream(); copy(getItemsAsBytes(items), this.fsDataOutputStream); } private void prepareOutputStream() throws IOException { boolean found = false; Path name = null; //TODO improve algorithm while (!found) { name = new Path(getFileName()); // If it doesn't exist, create it. If it exists, return false if (getFileSystem().createNewFile(name)) { found = true; this.resetBytesWritten(); this.fsDataOutputStream = this.getFileSystem().append(name); } else { if (this.getBytesWritten() >= getRolloverThresholdInBytes()) { close(); incrementCounter(); } else { found = true; } } } } /** * Simple not optimized copy */ public void copy(byte[] in, FSDataOutputStream out) throws IOException { Assert.notNull(in, "No input byte array specified"); Assert.notNull(out, "No OutputStream specified"); out.write(in); incrementBytesWritten(in.length); } @Override public FileSystem getFileSystem() { return this.fileSystem; } /** * Extracts the payload as a byte array. * @param message * @return */ private byte[] getItemsAsBytes(List<? extends T> items) { StringBuilder lines = new StringBuilder(); for (T item: items) { lines.append(lineAggregator.aggregate(item) + lineSeparator); } try { return lines.toString().getBytes(this.charset); } catch (UnsupportedEncodingException e) { throw new WriteFailedException("Could not write data.", e); } } public void close() { if (fsDataOutputStream != null) { IOUtils.closeStream(fsDataOutputStream); } } /** * Public setter for the {@link LineAggregator}. This will be used to * translate the item into a line for output. * * @param lineAggregator the {@link LineAggregator} to set */ public void setLineAggregator(LineAggregator<T> lineAggregator) { this.lineAggregator = lineAggregator; } @Override public void afterPropertiesSet() throws Exception { Assert.notNull(lineAggregator, "A LineAggregator must be provided."); } }