package shared; import java.io.Serializable; import java.util.Arrays; import util.ABAGAILArrays; import util.linalg.DenseVector; import util.linalg.Vector; /** * A data set description contains information * about the attributes of a data set * @author Andrew Guillory gtg008g@mail.gatech.edu * @version 1.0 */ public class DataSetDescription implements Serializable { /** * The description of the label type */ private DataSetDescription labelDescription; /** * The types of the attributes */ private AttributeType[] types; /** * The maximum value */ private Vector max; /** * The minimum value instance */ private Vector min; /** * Make a new data set description * @param types the types of the data * @param min the minimum value * @param max the maximum value * @param labelDescription the description of the label */ public DataSetDescription(AttributeType[] types, Vector min, Vector max, DataSetDescription labelDescription) { this.types = types; this.max = max; this.min = min; this.labelDescription = labelDescription; } /** * Make a new data set description * @param types the types of the data * @param max the maximum value * @param labelDescription the description of the label */ public DataSetDescription(AttributeType[] types, Vector max, DataSetDescription labelDescription) { this(types, null, max, labelDescription); min = new DenseVector(max.size()); } /** * Make a new data set description * @param types the types of the data * @param max the maximum value */ public DataSetDescription(AttributeType[] types, Vector max) { this(types, null, max, null); } /** * Make a new data set description * @param types the types of the data * @param set the data set * @param min the minimum value * @param max the maximum value * @param labelDescription the description of the label */ public DataSetDescription(AttributeType[] types, Vector min, Vector max) { this(types, min, max, null); } /** * Make a new empty data set description */ public DataSetDescription() {} /** * Make a new data set description induced from a data set * @param dataSet the data set to induce from */ public DataSetDescription(DataSet dataSet) { induceFrom(dataSet); } /** * Get the discrete max * @param i the attribute index * @return the max of the attribute */ public int getDiscreteRange(int i) { return (int) max.get(i) + 1; } /** * Get the discrete range * @return the range */ public int getDiscreteRange() { return getDiscreteRange(0); } /** * Get the continuous range * @param i the attribute index * @return the range of the attribute */ public double getRange(int i) { return getMax(i) - getMin(i); } /** * Get the continuous range * @return the range */ public double getRange() { return getRange(0); } /** * Get the continuous max * @param i the attribute index * @return the max of the attribute */ public double getMax(int i) { return max.get(i); } /** * Get the continuous max * @return the max */ public double getMax() { return getMax(0); } /** * Get the continuous max * @param i the attribute index * @return the max of the attribute */ public double getMin(int i) { return min.get(i); } /** * Get the continuous max * @return the max */ public double getMin() { return getMin(0); } /** * Get the description of the label * @return the description of the label */ public DataSetDescription getLabelDescription() { return labelDescription; } /** * Get the maximum value * @return the maximum value */ public Vector getMaxVector() { return max; } /** * Get the types of the data * @return the types */ public AttributeType[] getAttributeTypes() { return types; } /** * Get the attribute count * @return the count */ public int getAttributeCount() { return types.length; } /** * Get the min of the data * @return the min */ public Vector getMinVector() { return min; } /** * Set the label description * @param description the new description */ public void setLabelDescription(DataSetDescription description) { labelDescription = description; } /** * Set the max * @param instance the new max */ public void setMaxVector(Vector instance) { max = instance; } /** * Set the min * @param instance the new min */ public void setMinVector(Vector instance) { min = instance; } /** * Set the types * @param types the new types */ public void setAttributeTypes(AttributeType[] types) { this.types = types; } /** * Induce from the given data set * @param data the data set */ public void induceFrom(DataSet data) { boolean hasLabels = false; int i = 0; while (data.get(i) == null) { i++; } if (i >= data.size()) { return; } if (max == null) { max = (Vector) data.get(i).getData().copy(); } if (min == null) { min = (Vector) data.get(i).getData().copy(); } if (types == null) { types = new AttributeType[data.get(i).size()]; Arrays.fill(types, AttributeType.BINARY); } for (; i < data.size(); i++) { Instance cur = data.get(i); if (cur == null) { continue; } hasLabels = hasLabels || cur.getLabel() != null; max.maxEquals(cur.getData()); min.minEquals(cur.getData()); for (int j = 0; j < types.length; j++) { if (types[j] == AttributeType.BINARY && cur.getContinuous(j) != 1 && cur.getContinuous(j) != 0) { types[j] = AttributeType.DISCRETE; } if (types[j] == AttributeType.DISCRETE && cur.getDiscrete(j) != cur.getContinuous(j)) { types[j] = AttributeType.CONTINUOUS; } } } if (hasLabels) { if (labelDescription == null) { labelDescription = new DataSetDescription(); } labelDescription.induceFrom(data.getLabelDataSet()); } } /** * @see java.lang.Object#toString() */ public String toString() { String result = "Types : " + ABAGAILArrays.toString(types); result += "\nMax : " + max; result += "\nMin : " + min; if (labelDescription != null) { result += "\nLabel Description:\n" + labelDescription; } return result; } }