package shared.filt; import shared.AttributeType; import shared.DataSet; import shared.DataSetDescription; import shared.Instance; import util.linalg.DenseVector; /** * A filter that changes attributes from discrete to binary * @author Andrew Guillory gtg008g@mail.gatech.edu * @version 1.0 */ public class DiscreteToBinaryFilter implements DataSetFilter { /** * @see shared.filt.DataSetFilter#filter(shared.DataSet) */ public void filter(DataSet dataSet) { if (dataSet.getDescription() == null) { dataSet.setDescription(new DataSetDescription(dataSet)); } // count how big the new data vector is int newAttributeCount = 0; DataSetDescription oldDescription = dataSet.getDescription(); for (int i = 0; i < oldDescription.getAttributeTypes().length; i++) { if (oldDescription.getAttributeTypes()[i] == AttributeType.DISCRETE) { newAttributeCount += oldDescription.getDiscreteRange(i); } else { newAttributeCount++; } } if (newAttributeCount == oldDescription.getAttributeCount()) { return; } // for each instance for (int i = 0; i < dataSet.size(); i++) { Instance instance = dataSet.get(i); int k = 0; double[] data = new double[newAttributeCount]; for (int j = 0; j < oldDescription.getAttributeTypes().length; j++) { if (oldDescription.getAttributeTypes()[j] == AttributeType.DISCRETE) { data[k + instance.getDiscrete(j)] = 1; k += oldDescription.getDiscreteRange(j); } else { data[k] = instance.getContinuous(j); k++; } } instance.setData(new DenseVector(data)); } // the description is no longer valid dataSet.setDescription(null); } }