package edu.isi.karma.mapreduce.driver;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.Properties;
import org.apache.commons.io.FileUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.SequenceFile.CompressionType;
import org.apache.hadoop.io.SequenceFile.Writer;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class CreateSequenceFileFromKeyValuePairs extends Configured implements Tool{
private static Logger logger = LoggerFactory.getLogger(CreateSequenceFileFromKeyValuePairs.class);
public void configure(Properties p) throws Exception {
Configuration conf = getConf();
conf.setIfUnset("fs.default.name", p.getProperty("fs.default.name"));
}
public int run(String[] args) throws Exception {
// Configuration processed by ToolRunner
Properties p = new Properties();
p.load(new FileInputStream(new File(args[0])));
configure(p);
String outputFileName = p.getProperty("output.file");
Path outputPath = new Path(outputFileName);
SequenceFile.Writer writer = SequenceFile.createWriter(getConf(),Writer.keyClass(Text.class),
Writer.valueClass(Text.class), Writer.file(outputPath),Writer.compression(CompressionType.NONE));
if(null != p.getProperty("input.directory"))
{
String inputDirectoryName = p.getProperty("input.directory");
File f = new File(inputDirectoryName);
if(!f.exists() || !f.isDirectory()){
logger.error("Invalid input directory: " + inputDirectoryName);
return -1;
}
for(File document : f.listFiles())
{
addDocumentToSequenceFile(writer, document);
}
}
if(null != p.getProperty("input.file"))
{
String inputFileName = p.getProperty("input.file");
File document = new File(inputFileName);
if(!document.exists()){
logger.error("Invalid input: " + inputFileName);
return -1;
}
addDocumentToSequenceFile(writer, document);
}
writer.close();
return 0;
}
private void addDocumentToSequenceFile(SequenceFile.Writer writer,
File document) throws IOException {
String contents = FileUtils.readFileToString(document);
String[] kvPairs = contents.split("\n");
for(String kvPair : kvPairs){
int splitLocation = kvPair.indexOf(',');
writer.append(new Text(kvPair.substring(1, splitLocation)), new Text(kvPair.substring(splitLocation+1, kvPair.length()-1)));
}
}
public static void main(String[] args) throws Exception {
// Let ToolRunner handle generic command-line options
int res = ToolRunner.run(new Configuration(), new CreateSequenceFileFromKeyValuePairs(), args);
System.exit(res);
}
}