package hu.u_szeged.ml; import java.util.Collection; import java.util.List; import java.util.Map; import java.util.Set; public abstract class DataHandler implements Cloneable { // dataset create/save/load /** * creates a new empty dataset using the underlying native datatype */ abstract public void createNewDataset(Map<String, Object> parameters); // /* // * returns the underlying native datatype // */ // public Object getNativeDataset(); public DataHandler createEmptyDataHandler() { try{ return this.getClass().newInstance(); }catch (InstantiationException e){ e.printStackTrace(); }catch (IllegalAccessException e){ e.printStackTrace(); } return null; } /** * loads a native dataset from the given source * * @param source * A String denotes the source of the native dataset it contains a native dataset implementation dependent * resource string */ abstract public void loadDataset(String source) throws DataMiningException; /** * saves the current dataset to the given target * * @param target * A String denotes the target of the native dataset it contains a native dataset implementation dependent * resource string */ abstract public void saveDataset(String target); // data manipulation /** * sets the class label of the given instance */ abstract public <T extends Comparable<?>> void setLabel(String instanceId, T label); /** * @return the class label f the given instance */ abstract public <T extends Comparable<?>> T getLabel(String instanceId); /** * Sets the value of a numeric feature * * @param instanceId * instance identifier * @param featureName * name of the feature * @param value */ abstract public void setNumericValue(String instanceId, String featureName, double value); /** * Sets the value of a nominal feature if this is a new nominal value it is added to the dataset * * @param instanceId * instance identifier * @param featureName * name of the feature * @param value */ abstract public void setNominalValue(String instanceId, String featureName, String value); /** * Sets the value of a binary feature * * @param instanceId * instance identifier * @param featureName * name of the feature * @param value */ abstract public void setBinaryValue(String instanceId, String featureName, Boolean value); abstract public void setBinaryValue(String instanceId, String featureName, Boolean value, boolean ternal); /** * Sets the value of a feature, the type of the feature is given by the beginning of the feature name b_ binary * feature n_ numeric feature m_ nominal feature t_ ternal feature * * @param instanceId * instance identifier * @param featureName * name of the feature * @param value */ abstract public <T extends Comparable<?>> void setValue(String instanceId, String featureName, T value) throws DataMiningException; abstract public Double getNumericValue(String instanceId, String featureName) throws DataMiningException; abstract public String getNominalValue(String instanceId, String featureName) throws DataMiningException; abstract public Boolean getBinaryValue(String instanceId, String featureName) throws DataMiningException; abstract public <T extends Comparable<?>> T getValue(String instanceId, String featureName) throws DataMiningException; // instance manipulation abstract public void removeInstance(String instanceId) throws DataMiningException; abstract public Set<String> getInstanceIds(); abstract public int getInstanceCount(); // feature manipulation abstract public void removeFeature(String featureName) throws DataMiningException; abstract public Set<String> getFeatureNames(); abstract public int getFeatureCount(); // The value set of a nominal feature. // It returns null for numerical and binary features. abstract public List<String> getFeatureValues(String featureName); // It throws exception for numerical and binary features. abstract public void setDefaultFeatureValue(String featureName, String value) throws DataMiningException; // classifier functions abstract public void initClassifier(Map<String, Object> parameters) throws DataMiningException; abstract public Model trainClassifier() throws DataMiningException; abstract public ClassificationResult classifyDataset(Model model) throws DataMiningException; /** * creates a subset of the dataset where only the given instances and/or features are present * * @param instancesSelected * @param featuresSelected * @return */ abstract public DataHandler createSubset(Collection<String> instancesSelected, Set<String> featuresSelected) throws DataMiningException; abstract public void addDataHandler(DataHandler dh) throws DataMiningException; }