/*
* Copyright 2014 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.springframework.batch.integration.x;
import java.io.IOException;
import java.io.InputStream;
import java.util.Arrays;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.springframework.batch.core.StepContribution;
import org.springframework.batch.core.scope.context.ChunkContext;
import org.springframework.batch.core.step.tasklet.Tasklet;
import org.springframework.batch.repeat.RepeatStatus;
import org.springframework.data.hadoop.store.output.OutputStreamWriter;
import org.springframework.integration.file.remote.InputStreamCallback;
import org.springframework.integration.file.remote.RemoteFileTemplate;
import org.springframework.messaging.MessagingException;
import org.springframework.util.Assert;
/**
* Retrieves a fileName from the step execution context; uses the {@link RemoteFileTemplate} to retrieve the file as a
* String and writes to hdfs.
*
* @author Gary Russell
*/
public class RemoteFileToHadoopTasklet implements Tasklet {
private final Logger logger = LoggerFactory.getLogger(this.getClass());
private final RemoteFileTemplate<?> template;
private final Configuration configuration;
private final String hdfsDirectory;
@SuppressWarnings("rawtypes")
public RemoteFileToHadoopTasklet(RemoteFileTemplate template, Configuration configuration, String hdfsDirectory) {
Assert.notNull(template, "'template' cannot be null");
Assert.notNull(configuration, "'configuration' cannot be null");
Assert.notNull(hdfsDirectory, "'hdfsDirectory' cannot be null");
this.template = template;
this.configuration = configuration;
this.hdfsDirectory = hdfsDirectory.endsWith("/") ? hdfsDirectory : (hdfsDirectory + "/");
}
@Override
public RepeatStatus execute(StepContribution contribution, ChunkContext chunkContext) throws Exception {
final String filePath = chunkContext.getStepContext().getStepExecution().getExecutionContext().getString(
"filePath");
Assert.notNull(filePath);
if (logger.isDebugEnabled()) {
logger.debug("Transferring " + filePath + " to HDFS");
}
boolean result = this.template.get(filePath, new InputStreamCallback() {
@Override
public void doWithInputStream(InputStream stream) throws IOException {
OutputStreamWriter writer = new OutputStreamWriter(configuration,
new Path(hdfsDirectory + filePath), null);
byte[] buff = new byte[1024];
int len;
while ((len = stream.read(buff)) > 0) {
if (len == buff.length) {
writer.write(buff);
}
else {
writer.write(Arrays.copyOf(buff, len));
}
}
writer.close();
}
});
if (!result) {
throw new MessagingException("Error during file transfer");
}
else {
if (logger.isDebugEnabled()) {
logger.debug("Transferred " + filePath + " to HDFS");
}
return RepeatStatus.FINISHED;
}
}
}