package context.arch.widget; import java.io.FileReader; import java.sql.Date; import java.text.DateFormat; import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.Enumeration; import context.arch.storage.Attribute; import context.arch.storage.Attributes; import weka.core.Instance; import weka.core.Instances; /** * Extends Widget to provide capabilities for reading non-constant Attributes from a WEKA .arff file. * It assumes that the last feature is the class feature and does NOT add it. * * @author Brian Y. Lim * */ public class ClassifierWidget extends Widget { /** * Names of features from the WEKA .arff file */ protected String[] FEATURES; /** * * @param id * @param widgetClassName * @param arffFilename file name of .arff file containing instances header */ @SuppressWarnings("unchecked") public ClassifierWidget(String id, String widgetClassName, String arffFilename) { super(id, widgetClassName); /* * Populate feature names from Weka ARFF file */ try { Instances dataset = new Instances(new FileReader(arffFilename)); Enumeration<weka.core.Attribute> attrs = dataset.enumerateAttributes(); FEATURES = new String[dataset.numAttributes() - 1]; // skip last attribute which is the class attribute for (int i = 0; i < FEATURES.length; i++) { weka.core.Attribute attr = attrs.nextElement(); String FEATURE = attr.name(); FEATURES[i] = FEATURE; // store into array // add to non-constant attributes addAttribute(Attribute.instance( FEATURE, wekaTypeToClass(attr.type()))); } } catch (Exception e) { e.printStackTrace(); } } /** * Utility method to convert a type from WEKA format to java class. * @param type * @return */ @SuppressWarnings("unchecked") public static <T extends Object & Comparable<? super T>> Class<T> wekaTypeToClass(int type) { switch (type) { case weka.core.Attribute.NUMERIC: return (Class<T>) Double.class; case weka.core.Attribute.NOMINAL: return (Class<T>) String.class; // TODO: would there be a better substitute class? case weka.core.Attribute.STRING: return (Class<T>) String.class; case weka.core.Attribute.DATE: return (Class<T>) Date.class; default: return (Class<T>) String.class; } } /** * Date format to parse date format used by WEKA: yyyy-MM-dd'T'HH:mm:ss */ public static final DateFormat wekaDateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss"); /** * Utility method to extract value of a weka.core.Attribute from an Instance and cast to the appropriate object class. * @param instance * @param attribute * @return */ @SuppressWarnings("unchecked") public static <T extends Comparable<? super T>> T getValue(Instance instance, weka.core.Attribute attribute) { int type = attribute.type(); switch (type) { case weka.core.Attribute.NUMERIC: return (T) new Double(instance.value(attribute)); case weka.core.Attribute.NOMINAL: return (T) instance.stringValue(attribute); case weka.core.Attribute.STRING: return (T) instance.stringValue(attribute); case weka.core.Attribute.DATE: String strValue = instance.stringValue(attribute); try { return (T) wekaDateFormat.parse(strValue); } catch (ParseException e) { e.printStackTrace(); return null; // this should cause a very nasty error to help spot the bug } default: return null; } } /** * Convenience method to extract Attributes from a WEKA instance. * It does not include the last weka attribute that is assumed to be the class attribute. * @param instance * @return */ public static Attributes instanceToAttributes(Instance instance) { Attributes atts = new Attributes(); for (int a = 0; a < instance.numAttributes() - 1; a++) { // skip last attribute weka.core.Attribute attribute = instance.attribute(a); atts.addAttribute( attribute.name(), ClassifierWidget.getValue(instance, attribute)); } return atts; } /* * Get input feature names */ public String[] getFeatureNames() { return FEATURES; } }