package com.cloudera.knittingboar.sgd.iterativereduce;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import junit.framework.TestCase;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.TextInputFormat;
import com.cloudera.iterativereduce.io.TextRecordParser;
import com.cloudera.iterativereduce.irunit.IRUnitDriver;
import com.cloudera.knittingboar.sgd.iterativereduce.POLRMasterNode;
import com.cloudera.knittingboar.sgd.iterativereduce.POLRWorkerNode;
import com.cloudera.knittingboar.utils.DataUtils;
import com.cloudera.knittingboar.utils.DatasetConverter;
public class TestKnittingBoar_IRUnitSim extends TestCase {
private static JobConf defaultConf = new JobConf();
private static FileSystem localFs = null;
static {
try {
defaultConf.set("fs.defaultFS", "file:///");
localFs = FileSystem.getLocal(defaultConf);
} catch (IOException e) {
throw new RuntimeException("init failure", e);
}
}
//private static Path workDir = new Path(System.getProperty("test.build.data", "/Users/jpatterson/Downloads/datasets/20news-kboar/train4/"));
private static Path workDir20NewsLocal = new Path(new Path("/tmp"), "Dataset20Newsgroups");
private static File unzipDir = new File( workDir20NewsLocal + "/20news-bydate");
private static String strKBoarTrainDirInput = "" + unzipDir.toString() + "/KBoar-train/";
public void setupResources() throws IOException {
File file20News = DataUtils.getTwentyNewsGroupDir();
DatasetConverter.ConvertNewsgroupsFromSingleFiles( DataUtils.get20NewsgroupsLocalDataLocation() + "/20news-bydate-train/", strKBoarTrainDirInput, 6000);
}
public void testIRUnit_POLR() throws IOException {
System.out.println( "Starting: testIRUnit_POLR" );
setupResources();
String[] props = {
"app.iteration.count",
"com.cloudera.knittingboar.setup.FeatureVectorSize",
"com.cloudera.knittingboar.setup.numCategories",
"com.cloudera.knittingboar.setup.RecordFactoryClassname"
};
IRUnitDriver polr_driver = new IRUnitDriver("src/test/resources/app_unit_test.properties", props );
polr_driver.SetProperty("app.input.path", strKBoarTrainDirInput);
polr_driver.Setup();
polr_driver.SimulateRun();
System.out.println("\n\nComplete...");
POLRMasterNode IR_Master = (POLRMasterNode)polr_driver.getMaster();
Path out = new Path("/tmp/IR_Model_0.model");
FileSystem fs = out.getFileSystem(defaultConf);
FSDataOutputStream fos = fs.create(out);
//LOG.info("Writing master results to " + out.toString());
IR_Master.complete(fos);
fos.flush();
fos.close();
System.out.println("\n\nModel Saved: /tmp/IR_Model_0.model" );
}
}