package aima.core.learning.framework;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.util.Arrays;
import java.util.Hashtable;
import java.util.Iterator;
import java.util.List;
import aima.core.learning.data.DataResource;
import aima.core.util.Util;
/**
* @author Ravi Mohan
*
*/
public class DataSetFactory {
public DataSet fromFile(String filename, DataSetSpecification spec,
String separator) throws Exception {
// assumed file in data directory and ends in .csv
DataSet ds = new DataSet(spec);
try(BufferedReader reader = new BufferedReader(new InputStreamReader(
DataResource.class.getResourceAsStream(filename + ".csv")))) {
String line;
while ((line = reader.readLine()) != null) {
ds.add(exampleFromString(line, spec, separator));
}
}
return ds;
}
public static Example exampleFromString(String data,
DataSetSpecification dataSetSpec, String separator) {
Hashtable<String, Attribute> attributes = new Hashtable<String, Attribute>();
List<String> attributeValues = Arrays.asList(data.split(separator));
if (dataSetSpec.isValid(attributeValues)) {
List<String> names = dataSetSpec.getAttributeNames();
Iterator<String> nameiter = names.iterator();
Iterator<String> valueiter = attributeValues.iterator();
while (nameiter.hasNext() && valueiter.hasNext()) {
String name = nameiter.next();
AttributeSpecification attributeSpec = dataSetSpec
.getAttributeSpecFor(name);
Attribute attribute = attributeSpec.createAttribute(valueiter
.next());
attributes.put(name, attribute);
}
String targetAttributeName = dataSetSpec.getTarget();
return new Example(attributes, attributes.get(targetAttributeName));
} else {
throw new RuntimeException("Unable to construct Example from "
+ data);
}
}
public static DataSet getRestaurantDataSet() throws Exception {
DataSetSpecification spec = createRestaurantDataSetSpec();
return new DataSetFactory().fromFile("restaurant", spec, "\\s+");
}
public static DataSetSpecification createRestaurantDataSetSpec() {
DataSetSpecification dss = new DataSetSpecification();
dss.defineStringAttribute("alternate", Util.yesno());
dss.defineStringAttribute("bar", Util.yesno());
dss.defineStringAttribute("fri/sat", Util.yesno());
dss.defineStringAttribute("hungry", Util.yesno());
dss.defineStringAttribute("patrons", new String[] { "None", "Some",
"Full" });
dss.defineStringAttribute("price", new String[] { "$", "$$", "$$$" });
dss.defineStringAttribute("raining", Util.yesno());
dss.defineStringAttribute("reservation", Util.yesno());
dss.defineStringAttribute("type", new String[] { "French", "Italian",
"Thai", "Burger" });
dss.defineStringAttribute("wait_estimate", new String[] { "0-10",
"10-30", "30-60", ">60" });
dss.defineStringAttribute("will_wait", Util.yesno());
// last attribute is the target attribute unless the target is
// explicitly reset with dss.setTarget(name)
return dss;
}
public static DataSet getIrisDataSet() throws Exception {
DataSetSpecification spec = createIrisDataSetSpec();
return new DataSetFactory().fromFile("iris", spec, ",");
}
public static DataSetSpecification createIrisDataSetSpec() {
DataSetSpecification dss = new DataSetSpecification();
dss.defineNumericAttribute("sepal_length");
dss.defineNumericAttribute("sepal_width");
dss.defineNumericAttribute("petal_length");
dss.defineNumericAttribute("petal_width");
dss.defineStringAttribute("plant_category", new String[] { "setosa",
"versicolor", "virginica" });
return dss;
}
}