package hip.ch8.localjobrunner;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import java.io.DataOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
import static junit.framework.Assert.assertEquals;
import static junit.framework.Assert.assertTrue;
public class TextIOJobBuilder {
protected Configuration config = new Configuration();
protected List<String> inputs = new ArrayList<String>();
protected List<String> expectedOutputs = new ArrayList<String>();
protected String inputSeparator = "\t";
protected String outputSeparator = "\t";
protected Path inputPath = new Path("/tmp/mrtest/input");
protected Path outputPath = new Path("/tmp/mrtest/output");
protected FileSystem fs;
public TextIOJobBuilder() throws IOException {
this(new Configuration());
}
public TextIOJobBuilder(Configuration config) throws IOException {
config.set("mapred.job.tracker", "local");
config.set("fs.default.name", "file:///");
fs = FileSystem.get(config);
}
public Configuration getConfig() {
return config;
}
public TextIOJobBuilder setInputPath(Path inputPath) {
this.inputPath = inputPath;
return this;
}
public TextIOJobBuilder setOutputPath(Path outputPath) {
this.outputPath = outputPath;
return this;
}
public TextIOJobBuilder setInputSeparator(String separator) {
this.inputSeparator = separator;
return this;
}
public TextIOJobBuilder setOutputSeparator(String separator) {
this.outputSeparator = separator;
return this;
}
public TextIOJobBuilder addInput(String line) {
inputs.add(line);
return this;
}
public TextIOJobBuilder addInput(String key, String value) {
inputs.add(key + inputSeparator + value);
return this;
}
public TextIOJobBuilder addExpectedOutput(String line) {
expectedOutputs.add(line);
return this;
}
public TextIOJobBuilder addExpectedOutput(String key, String value) {
expectedOutputs.add(key + inputSeparator + value);
return this;
}
public TextIOJobBuilder writeInputs()
throws IOException {
if (fs.exists(outputPath)) {
fs.delete(outputPath, true);
}
if (fs.exists(inputPath)) {
fs.delete(inputPath, true);
}
fs.mkdirs(inputPath);
DataOutputStream stream = fs.create(new Path(inputPath, "part-0"));
IOUtils.writeLines(inputs, String.format("%n"), stream);
stream.close();
return this;
}
public TextIOJobBuilder verifyResults() throws IOException {
FileStatus[] outputFiles = fs.listStatus(outputPath, new PathFilter() {
@Override
public boolean accept(Path path) {
return path.getName().startsWith("part");
}
});
int i=0;
for(FileStatus file: outputFiles) {
List<String> actualLines = readLines(fs, file.getPath());
assertTrue(actualLines.size() <= expectedOutputs.size() - i);
for(String actualLine: actualLines) {
String expectedLine = expectedOutputs.get(i++);
assertEquals(expectedLine, actualLine);
}
}
return this;
}
public static List<String> readLines(FileSystem fs, Path p)
throws IOException {
InputStream stream = fs.open(p);
List<String> lines = IOUtils.readLines(stream);
stream.close();
return lines;
}
public Path getInputPath() {
return inputPath;
}
public Path getOutputPath() {
return outputPath;
}
}