package edu.isi.karma.cleaning.features; import java.io.File; import java.io.FileWriter; import java.util.Collection; import java.util.HashMap; import java.util.Vector; import au.com.bytecode.opencsv.CSVWriter; public class Data2Features { // convert the csv file to arff file // return the fpath of arff file /*public static void Data2arff(String csvpath, String arfpath) { try { CSVLoader loader = new CSVLoader(); loader.setSource(new File(csvpath)); Instances data = loader.getDataSet(); // save ARFF ArffSaver saver = new ArffSaver(); saver.setInstances(data); saver.setFile(new File(arfpath)); saver.setDestination(new File(arfpath)); saver.writeBatch(); } catch (Exception ex) { ex.printStackTrace(); } }*/ public static void Testdata2CSV(Vector<String> tests, String fpath,RecordFeatureSet rf) { try { CSVWriter writer = new CSVWriter(new FileWriter(new File(fpath))); // get attribute names // get attribute names Collection<String> attrStrings = rf.getFeatureNames(); String[] attr_names =attrStrings.toArray(new String[attrStrings.size()+1]); attr_names[attr_names.length - 1] = "label"; writer.writeNext(attr_names); for (String Record : tests) { Vector<String> row = new Vector<String>(); Collection<Feature> cf = rf.computeFeatures(Record,""); Feature[] x = cf.toArray(new Feature[cf.size()]); // row.add(f.getName()); for (int k = 0; k < cf.size(); k++) { row.add(String.valueOf(x[k].getScore())); } row.add(""); String[] dataEntry = row.toArray(new String[row.size()]); writer.writeNext(dataEntry); } writer.flush(); writer.close(); } catch (Exception ex) { ex.printStackTrace(); } } // class: records convert them into csv file public static void Traindata2CSV( HashMap<String, Vector<String>> class2Records, String fpath,RecordFeatureSet rf) { try { CSVWriter writer = new CSVWriter(new FileWriter(new File(fpath))); Vector<String> vsStrings = new Vector<String>(); for(Vector<String> vecs:class2Records.values()) { vsStrings.addAll(vecs); } rf.initialize(vsStrings); // get attribute names Collection<String> attrStrings = rf.getFeatureNames(); String[] attr_names =attrStrings.toArray(new String[attrStrings.size()+1]); attr_names[attr_names.length - 1] = "label"; writer.writeNext(attr_names); for (String label : class2Records.keySet()) { for (String Record : class2Records.get(label)) { Vector<String> row = new Vector<String>(); Collection<Feature> cf = rf.computeFeatures(Record, label); Feature[] x = cf.toArray(new Feature[cf.size()]); // row.add(f.getName()); for (int k = 0; k < cf.size(); k++) { row.add(String.valueOf(x[k].getScore())); } row.add(label); // change this according to the dataset. String[] dataEntry = row.toArray(new String[row.size()]); writer.writeNext(dataEntry); } } writer.flush(); writer.close(); } catch (Exception ex) { System.out.println("" + ex.toString()); } } }