package org.wikipedia.miner.extract.steps;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.Counters;
import org.apache.hadoop.mapred.RunningJob;
import org.apache.hadoop.util.Tool;
public abstract class Step extends Configured implements Tool {
private Counters counters ;
private Path workingDir ;
private FileSystem hdfs ;
public Step(Path workingDir) throws IOException {
this.workingDir = workingDir ;
Configuration conf = new Configuration();
hdfs = FileSystem.get(conf);
}
public Counters getCounters() {
return counters ;
}
public boolean isFinished() throws IOException {
return hdfs.exists(getFinishPath()) ;
}
public void finish(RunningJob job) throws IOException {
if (job != null)
counters = job.getCounters() ;
FSDataOutputStream out = hdfs.create(getFinishPath());
out.writeUTF("finished") ;
out.close();
}
public void reset() throws IOException {
hdfs.delete(getDir(), true) ;
}
public FileSystem getHdfs() {
return hdfs ;
}
public Path getWorkingDir() {
return workingDir ;
}
public Path getDir() {
return new Path(workingDir.toString() + Path.SEPARATOR + getDirName()) ;
}
private Path getFinishPath() {
return new Path(getDir().toString() + Path.SEPARATOR + "finished") ;
}
public abstract String getDirName() ;
}