package edu.fudan.ml.types.featurecluster; import java.util.ArrayList; import edu.fudan.ml.types.Instance; import edu.fudan.ml.types.InstanceSet; import gnu.trove.map.hash.TIntObjectHashMap; public class InstanceSet2ClassData { private InstanceSet instanceSet; private ArrayList<ClassData> classdataList; private TIntObjectHashMap<String> index; private int[] counts; private int labelSize; public InstanceSet2ClassData(InstanceSet instanceSet, TIntObjectHashMap<String> index, int feaSize, int labelSize) { this.instanceSet = instanceSet; this.labelSize = labelSize; this.index = index; classdataList = new ArrayList<ClassData>(); counts = new int[feaSize]; calCounts(); genClassData(); } /** * @return the classdataList */ public ArrayList<ClassData> getClassdataList() { return classdataList; } private void calCounts() { for (int ii = 0; ii < instanceSet.size(); ii++) { Instance inst = instanceSet.getInstance(ii); int[][] data = (int[][]) inst.getData(); int[] golds = (int[]) inst.getTarget(); for (int i = 0; i < data.length; i++) { for (int j = 0; j < data[0].length; j++) { int idx = data[i][j] + golds[i]; if (idx >= 0) counts[idx]++; } } } } private void genClassData() { for (int i = 0; i < counts.length;) { int key = i; int indent = getIndent(i); double[] label = genLabel(i, i + indent); if (checkZero(label)) { int c = getLabelCount(label); ClassData cd = new ClassData(key, label, c); classdataList.add(cd); } i += indent; } } private double[] genLabel(int p, int q) { double[] label = new double[q-p]; for (int i = p, j = 0; i < q; i++, j++) label[j] = (double)counts[i]; return label; } private int getLabelCount(double[] label) { double c = 0; for (int i = 0; i < label.length; i++) { c += label[i]; } return (int)c; } private boolean checkZero(double[] label) { for (double data : label) { if (data != 0) return true; } System.out.println("Error: zero"); return false; } private int getIndent(int id) { String feature = index.get(id); if (feature.startsWith("0")) return labelSize * labelSize; else return labelSize; } }