/* * Copyright 2013 the original author or authors. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.springframework.data.hadoop.batch.item; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.util.ArrayList; import java.util.List; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IOUtils; import org.springframework.batch.item.ExecutionContext; import org.springframework.batch.item.ItemStreamException; import org.springframework.batch.item.ItemStreamWriter; import org.springframework.batch.item.ItemWriter; import org.springframework.core.serializer.Serializer; import org.springframework.transaction.support.TransactionSynchronizationAdapter; import org.springframework.transaction.support.TransactionSynchronizationManager; import org.springframework.util.Assert; import org.springframework.util.CollectionUtils; import org.springframework.util.StringUtils; /** * An {@link ItemWriter} implementation used to write the incoming items to * HDFS. Due to the HDFS limitation that files cannot be deleted or modified, the * ability to roll back a file to a previously known state is not possible. This * prevents the ability to restart using this {@link ItemWriter}. * <br> * This {@link ItemWriter} is <em>not</em> thread-safe. * * @author Michael Minella */ public class HdfsItemWriter<T> implements ItemStreamWriter<T> { private static final String BUFFER_KEY_PREFIX = HdfsItemWriter.class.getName() + ".BUFFER_KEY"; private final String bufferKey; private String fileName; private FileSystem fileSystem; private FSDataOutputStream fsDataOutputStream; private Serializer<T> itemSerializer; /** * Constructor * * @param fileSystem - HDFS {@link FileSystem} reference * @param itemSerializer - Strategy for serializing items * @param fileName - Name of the file to be written to */ public HdfsItemWriter(FileSystem fileSystem, Serializer<T> itemSerializer, String fileName) { Assert.notNull(fileSystem, "Hadoop FileSystem is required."); Assert.notNull(itemSerializer, "A Serializer implementation is required"); Assert.isTrue(StringUtils.hasText(fileName), "A non-empty fileName is required."); this.fileSystem = fileSystem; this.bufferKey = BUFFER_KEY_PREFIX + "." + hashCode(); this.itemSerializer = itemSerializer; this.fileName = fileName; } @SuppressWarnings({"unchecked", "rawtypes"}) private List<? extends T> getCurrentBuffer() { if(!TransactionSynchronizationManager.hasResource(bufferKey)) { TransactionSynchronizationManager.bindResource(bufferKey, new ArrayList()); TransactionSynchronizationManager.registerSynchronization(new TransactionSynchronizationAdapter() { @Override public void beforeCommit(boolean readOnly) { List items = (List) TransactionSynchronizationManager.getResource(bufferKey); if(!CollectionUtils.isEmpty(items)) { if(!readOnly) { doWrite(items); } } } @Override public void afterCompletion(int status) { if(TransactionSynchronizationManager.hasResource(bufferKey)) { TransactionSynchronizationManager.unbindResource(bufferKey); } } }); } return (List) TransactionSynchronizationManager.getResource(bufferKey); } /** * Performs the actual write to the store via the template. * This can be overridden by a subclass if necessary. * * @param items the list of items to be persisted. */ protected void doWrite(List<? extends T> items) { if(! CollectionUtils.isEmpty(items)) { try { fsDataOutputStream.write(getPayloadAsBytes(items)); } catch (IOException ioe) { throw new RuntimeException("Error writing to HDFS", ioe); } } } @Override public void open(ExecutionContext executionContext) throws ItemStreamException { try { Path name = null; name = new Path(fileName); fileSystem.createNewFile(name); this.fsDataOutputStream = fileSystem.create(name); } catch (IOException ioe) { throw new RuntimeException("Unable to open file to write to", ioe); } } @Override public void update(ExecutionContext executionContext) throws ItemStreamException { // TODO: determine the state to maintain, if any } @Override @SuppressWarnings({"rawtypes", "unchecked"}) public void write(List<? extends T> items) throws Exception { if(!transactionActive()) { doWrite(items); return; } List bufferedItems = getCurrentBuffer(); bufferedItems.addAll(items); } @Override public void close() { if (fsDataOutputStream != null) { IOUtils.closeStream(fsDataOutputStream); } } /** * Extracts the payload as a byte array. * @param message * @return the payload as byte array */ private byte[] getPayloadAsBytes(List<? extends T> items) throws IOException{ ByteArrayOutputStream stream = new ByteArrayOutputStream(); for (T item : items) { itemSerializer.serialize(item, stream); } return stream.toByteArray(); } private boolean transactionActive() { return TransactionSynchronizationManager.isActualTransactionActive(); } }