package water; import java.io.File; import java.io.IOException; import water.fvec.Frame; import water.util.FileUtils; import static water.util.FileUtils.*; public class DataSet { private int id; public String uri; private int responseColumn; private Frame frame; public DataSet(int id) throws Exception { this.id = id; String[] dataSetEntry; boolean foundDataSet = false; for (String d : AccuracyTestingSuite.dataSetsCSVRows) { dataSetEntry = d.trim().split(",", -1); if (dataSetEntry[0].equals(Integer.toString(this.id))) { // found the data set uri = dataSetEntry[1]; responseColumn = Integer.parseInt(dataSetEntry[2]); foundDataSet = true; break; } } if (!foundDataSet) { throw new Exception("Couldn't find data set id: " + this.id + " in data sets csv."); } } public void load(boolean regression) throws IOException { AccuracyTestingSuite.summaryLog.println("Loading data set: " + this.id); frame = TestUtil.parse_test_file(makeDataSetFile(this.uri).getCanonicalPath()); if (!regression) { String responseColumnName = frame._names[responseColumn]; AccuracyTestingSuite.summaryLog.println("Converting response column (idx/name): " + responseColumn + "/" + responseColumnName + " to categorical for dataset: " + this.id); Scope.track(frame.replace(responseColumn, frame.vecs()[responseColumn].toCategoricalVec())); DKV.put(frame); } } public int getId() { return id; } public int getResponseColumn() { return responseColumn; } public Frame getFrame() { return frame; } public void removeFrame() { if (frame != null) { AccuracyTestingSuite.summaryLog.println("Removing frame: " + frame._key.toString() + " for data set id: " + id); frame.remove(); frame.delete(); } } private File makeDataSetFile(String uri) { String filePath; if (uri.contains("bigdata")) { filePath = "bigdata/laptop/testng/"; } else if (uri.contains("smalldata")) { filePath = "smalldata/testng/"; } else if (uri.contains("tmp")) { filePath = "/tmp/"; } else { filePath = ""; } String[] uriTokens = uri.trim().split("/", -1); String fileName = uriTokens[uriTokens.length - 1]; return locateFile(filePath + fileName); } }