package edu.isistan.daclassifier;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import au.com.bytecode.opencsv.CSVReader;
import au.com.bytecode.opencsv.CSVWriter;
import weka.core.*;
@SuppressWarnings("deprecation")
public class ArffGenerator {
// Declare feature attributes
public final static String sP = "P";
public final static String sP_DESC = "P_DESC";
public final static String sA0 = "A0";
public final static String sA0_DESC = "A0_DESC";
public final static String sA1 = "A1";
public final static String sA1_DESC = "A1_DESC";
public final static String sA2 = "A2";
public final static String sA2_DESC = "A2_DESC";
// Declare the class attributes along with its values
public final static String sIO = "IO";
public final static String sInput = "Input";
public final static String sEntry = "Entry";
public final static String sSelection = "Selection";
public final static String sOutput = "Output";
public final static String sDisplay = "Display";
public final static String sNotification = "Notification";
public final static String sData = "Data";
public final static String sRead = "Read";
public final static String sSingle = "Single";
public final static String sMultiple = "Multiple";
public final static String sWrite = "Write";
public final static String sCreate = "Create";
public final static String sUpdate = "Update";
public final static String sDelete = "Delete";
public final static String sProcess = "Process";
public final static String sCalculation = "Calculation";
public final static String sVerification = "Verification";
public final static String sCommunication = "Communication";
public final static String sIndoor = "Indoor";
public final static String sOutdoor = "Outdoor";
public final static String sUseCase = "UseCase";
public final static String sBegin = "Begin";
public final static String sFlow = "Flow";
public final static String sEnd = "End";
public final static String sNoise = "Noise";
// Others
public final static String sSentence = "Sentence";
public final static String sDomainActions = "DomainActions";
// Map
public static Map<String, Attribute> attributes = null;
private static List<Attribute> featureAttributes = null;
private static List<Attribute> classAttributes = null;
static {
initAttributesMap();
}
public static Instances readFromCSV(String[] filenames) throws IOException {
Instances instances = generateTrainingSet();
for(String filename : filenames) {
CSVReader reader = new CSVReader(new FileReader(filename), ';');
String[] heads = reader.readNext();
List<String[]> lines = reader.readAll();
for(String[] values : lines) {
//prettyPrint(heads, values);
Instance instance = generateInstance(heads, values, instances);
instances.add(instance);
}
}
return instances;
}
public static void appendWriteToCSV(List<String[]> lines, String sentence, String p, String p_desc, String a0, String a0_desc, String a1, String a1_desc, String a2, String a2_desc) {
List<String> line = new ArrayList<String>();
line.add(sentence);
line.add(p); line.add(p_desc);
line.add(a0); line.add(a0_desc);
line.add(a1); line.add(a1_desc);
line.add(a2); line.add(a2_desc);
line.add("");
lines.add(line.toArray(new String[] { }));
}
public static void writeToCSV(String filename, List<String[]> lines) throws IOException {
CSVWriter writer = new CSVWriter(new FileWriter(filename), ';');
String[] heads = { sSentence, sP, sP_DESC, sA0, sA0_DESC, sA1, sA1_DESC, sA2, sA2_DESC, sDomainActions };
writer.writeNext(heads);
writer.writeAll(lines);
writer.close();
}
@SuppressWarnings("unused")
private static void prettyPrint(String[] heads, String[] values) {
for(int i = 0; i < heads.length; i++) {
String head = heads[i];
String value = values[i];
if(!value.isEmpty())
System.out.println(head + ": " + value);
}
System.out.println("--------------");
}
@SuppressWarnings({ "rawtypes", "unchecked" })
private static void initAttributesMap() {
attributes = new HashMap<String, Attribute>();
featureAttributes = new ArrayList<Attribute>();
classAttributes = new ArrayList<Attribute>();
// Declare attributes
FastVector fvString = null;
Attribute aP = new Attribute(sP, fvString);
Attribute aP_DESC = new Attribute(sP_DESC, fvString);
Attribute aA0 = new Attribute(sA0, fvString);
Attribute aA0_DESC = new Attribute(sA0_DESC, fvString);
Attribute aA1 = new Attribute(sA1, fvString);
Attribute aA1_DESC = new Attribute(sA1_DESC, fvString);
Attribute aA2 = new Attribute(sA2, fvString);
Attribute aA2_DESC = new Attribute(sA2_DESC, fvString);
// Store attributes
attributes.put(sP, aP);
attributes.put(sP_DESC, aP_DESC);
attributes.put(sA0, aA0);
attributes.put(sA0_DESC, aA0_DESC);
attributes.put(sA1, aA1);
attributes.put(sA1_DESC, aA1_DESC);
attributes.put(sA2, aA2);
attributes.put(sA2_DESC, aA2_DESC);
featureAttributes.addAll(attributes.values());
// Declare the class attributes along with its values
FastVector fvNominal = new FastVector(2);
fvNominal.addElement("0");
fvNominal.addElement("1");
Attribute aIO = new Attribute(sIO, fvNominal.copy());
Attribute aInput = new Attribute(sInput, fvNominal.copy());
Attribute aEntry = new Attribute(sEntry, fvNominal.copy());
Attribute aSelection = new Attribute(sSelection, fvNominal.copy());
Attribute aOutput = new Attribute(sOutput, fvNominal.copy());
Attribute aDisplay = new Attribute(sDisplay, fvNominal.copy());
Attribute aNotification = new Attribute(sNotification, fvNominal.copy());
Attribute aData = new Attribute(sData, fvNominal.copy());
Attribute aRead = new Attribute(sRead, fvNominal.copy());
Attribute aSingle = new Attribute(sSingle, fvNominal.copy());
Attribute aMultiple = new Attribute(sMultiple, fvNominal.copy());
Attribute aWrite = new Attribute(sWrite, fvNominal.copy());
Attribute aCreate = new Attribute(sCreate, fvNominal.copy());
Attribute aUpdate = new Attribute(sUpdate, fvNominal.copy());
Attribute aDelete = new Attribute(sDelete, fvNominal.copy());
Attribute aProcess = new Attribute(sProcess, fvNominal.copy());
Attribute aCalculation = new Attribute(sCalculation, fvNominal.copy());
Attribute aVerification = new Attribute(sVerification, fvNominal.copy());
Attribute aCommunication = new Attribute(sCommunication, fvNominal.copy());
Attribute aIndoor = new Attribute(sIndoor, fvNominal.copy());
Attribute aOutdoor = new Attribute(sOutdoor, fvNominal.copy());
Attribute aUseCase = new Attribute(sUseCase, fvNominal.copy());
Attribute aBegin = new Attribute(sBegin, fvNominal.copy());
Attribute aFlow = new Attribute(sFlow, fvNominal.copy());
Attribute aEnd = new Attribute(sEnd, fvNominal.copy());
Attribute aNoise = new Attribute(sNoise, fvNominal.copy());
// Store classes
attributes.put(sIO, aIO);
attributes.put(sInput, aInput);
attributes.put(sEntry, aEntry);
attributes.put(sSelection, aSelection);
attributes.put(sOutput, aOutput);
attributes.put(sDisplay, aDisplay);
attributes.put(sNotification, aNotification);
attributes.put(sData, aData);
attributes.put(sRead, aRead);
attributes.put(sSingle, aSingle);
attributes.put(sMultiple, aMultiple);
attributes.put(sWrite, aWrite);
attributes.put(sCreate, aCreate);
attributes.put(sUpdate, aUpdate);
attributes.put(sDelete, aDelete);
attributes.put(sProcess, aProcess);
attributes.put(sCalculation, aCalculation);
attributes.put(sVerification, aVerification);
attributes.put(sCommunication, aCommunication);
attributes.put(sIndoor, aIndoor);
attributes.put(sOutdoor, aOutdoor);
attributes.put(sUseCase, aUseCase);
attributes.put(sBegin, aBegin);
attributes.put(sFlow, aFlow);
attributes.put(sEnd, aEnd);
attributes.put(sNoise, aNoise);
classAttributes.addAll(attributes.values()); classAttributes.removeAll(featureAttributes);
}
@SuppressWarnings({ "rawtypes", "unchecked" })
public static Instances generateTrainingSet() {
// Declare the feature vector
FastVector fvAttributes = new FastVector();
// Add feature attributes
fvAttributes.addElement(attributes.get(sP));
fvAttributes.addElement(attributes.get(sP_DESC));
fvAttributes.addElement(attributes.get(sA0));
fvAttributes.addElement(attributes.get(sA0_DESC));
fvAttributes.addElement(attributes.get(sA1));
fvAttributes.addElement(attributes.get(sA1_DESC));
fvAttributes.addElement(attributes.get(sA2));
fvAttributes.addElement(attributes.get(sA2_DESC));
// Add feature classes
fvAttributes.addElement(attributes.get(sIO));
fvAttributes.addElement(attributes.get(sInput));
fvAttributes.addElement(attributes.get(sEntry));
fvAttributes.addElement(attributes.get(sSelection));
fvAttributes.addElement(attributes.get(sOutput));
fvAttributes.addElement(attributes.get(sDisplay));
fvAttributes.addElement(attributes.get(sNotification));
fvAttributes.addElement(attributes.get(sData));
fvAttributes.addElement(attributes.get(sRead));
fvAttributes.addElement(attributes.get(sSingle));
fvAttributes.addElement(attributes.get(sMultiple));
fvAttributes.addElement(attributes.get(sWrite));
fvAttributes.addElement(attributes.get(sCreate));
fvAttributes.addElement(attributes.get(sUpdate));
fvAttributes.addElement(attributes.get(sDelete));
fvAttributes.addElement(attributes.get(sProcess));
fvAttributes.addElement(attributes.get(sCalculation));
fvAttributes.addElement(attributes.get(sVerification));
fvAttributes.addElement(attributes.get(sCommunication));
fvAttributes.addElement(attributes.get(sIndoor));
fvAttributes.addElement(attributes.get(sOutdoor));
fvAttributes.addElement(attributes.get(sUseCase));
fvAttributes.addElement(attributes.get(sBegin));
fvAttributes.addElement(attributes.get(sFlow));
fvAttributes.addElement(attributes.get(sEnd));
fvAttributes.addElement(attributes.get(sNoise));
// Create an empty training set
Instances instances = new Instances("DA-TrainingSet", fvAttributes, 1000);
return instances;
}
private static Instance modelInstance = null;
private static Instance getModelInstance(Instances instances) {
if(modelInstance == null) {
modelInstance = new DenseInstance(attributes.keySet().size());
modelInstance.setDataset(instances);
modelInstance.setValue(attributes.get(sP), "");
modelInstance.setValue(attributes.get(sP_DESC), "");
modelInstance.setValue(attributes.get(sA0), "");
modelInstance.setValue(attributes.get(sA0_DESC), "");
modelInstance.setValue(attributes.get(sA1), "");
modelInstance.setValue(attributes.get(sA1_DESC), "");
modelInstance.setValue(attributes.get(sA2), "");
modelInstance.setValue(attributes.get(sA2_DESC), "");
// Add feature classes
modelInstance.setValue(attributes.get(sIO), 0);
modelInstance.setValue(attributes.get(sInput), 0);
modelInstance.setValue(attributes.get(sEntry), 0);
modelInstance.setValue(attributes.get(sSelection), 0);
modelInstance.setValue(attributes.get(sOutput), 0);
modelInstance.setValue(attributes.get(sDisplay), 0);
modelInstance.setValue(attributes.get(sNotification), 0);
modelInstance.setValue(attributes.get(sData), 0);
modelInstance.setValue(attributes.get(sRead), 0);
modelInstance.setValue(attributes.get(sSingle), 0);
modelInstance.setValue(attributes.get(sMultiple), 0);
modelInstance.setValue(attributes.get(sWrite), 0);
modelInstance.setValue(attributes.get(sCreate), 0);
modelInstance.setValue(attributes.get(sUpdate), 0);
modelInstance.setValue(attributes.get(sDelete), 0);
modelInstance.setValue(attributes.get(sProcess), 0);
modelInstance.setValue(attributes.get(sCalculation), 0);
modelInstance.setValue(attributes.get(sVerification), 0);
modelInstance.setValue(attributes.get(sCommunication), 0);
modelInstance.setValue(attributes.get(sIndoor), 0);
modelInstance.setValue(attributes.get(sOutdoor), 0);
modelInstance.setValue(attributes.get(sUseCase), 0);
modelInstance.setValue(attributes.get(sBegin), 0);
modelInstance.setValue(attributes.get(sFlow), 0);
modelInstance.setValue(attributes.get(sEnd), 0);
modelInstance.setValue(attributes.get(sNoise), 0);
}
return modelInstance;
}
public static Instance generateTestInstance(String p, String p_desc, String a0, String a0_desc, String a1, String a1_desc, String a2, String a2_desc) {
Instance instance = new DenseInstance(attributes.keySet().size());
instance.setValue(attributes.get(sP), p);
instance.setValue(attributes.get(sP_DESC), p_desc);
instance.setValue(attributes.get(sA0), a0);
instance.setValue(attributes.get(sA0_DESC), a0_desc);
instance.setValue(attributes.get(sA1), a1);
instance.setValue(attributes.get(sA1_DESC), a1_desc);
instance.setValue(attributes.get(sA2), a2);
instance.setValue(attributes.get(sA2_DESC), a2_desc);
// Add feature classes
instance.setValue(attributes.get(sIO), 0);
instance.setValue(attributes.get(sInput), 0);
instance.setValue(attributes.get(sEntry), 0);
instance.setValue(attributes.get(sSelection), 0);
instance.setValue(attributes.get(sOutput), 0);
instance.setValue(attributes.get(sDisplay), 0);
instance.setValue(attributes.get(sNotification), 0);
instance.setValue(attributes.get(sData), 0);
instance.setValue(attributes.get(sRead), 0);
instance.setValue(attributes.get(sSingle), 0);
instance.setValue(attributes.get(sMultiple), 0);
instance.setValue(attributes.get(sWrite), 0);
instance.setValue(attributes.get(sCreate), 0);
instance.setValue(attributes.get(sUpdate), 0);
instance.setValue(attributes.get(sDelete), 0);
instance.setValue(attributes.get(sProcess), 0);
instance.setValue(attributes.get(sCalculation), 0);
instance.setValue(attributes.get(sVerification), 0);
instance.setValue(attributes.get(sCommunication), 0);
instance.setValue(attributes.get(sIndoor), 0);
instance.setValue(attributes.get(sOutdoor), 0);
instance.setValue(attributes.get(sUseCase), 0);
instance.setValue(attributes.get(sBegin), 0);
instance.setValue(attributes.get(sFlow), 0);
instance.setValue(attributes.get(sEnd), 0);
instance.setValue(attributes.get(sNoise), 0);
return instance;
}
private static Instance generateInstance(String[] heads, String[] values, Instances instances) {
// Create the instance
Instance instance = new DenseInstance(getModelInstance(instances));
instance.setDataset(instances);
for(int i = 0; i < heads.length; i++) {
String head = heads[i];
//String value = values[i];
String value = clean(values[i]);
Attribute attribute = null;
if(!head.equals(sSentence)) {
if(head.equals(sDomainActions)) {
String[] domainActions = value.split(",");
List<Attribute> daAttributes = new ArrayList<Attribute>(classAttributes);
for(String domainAction : domainActions) {
if(!domainAction.isEmpty()) {
attribute = attributes.get(domainAction);
instance.setValue(attribute, 1);
daAttributes.remove(attribute);
}
}
for(Attribute daAttribute : daAttributes) {
instance.setValue(daAttribute, 0);
}
}
else {
attribute = attributes.get(head);
if(value.isEmpty()) {
if(featureAttributes.contains(attribute))
instance.setValue(attribute, "");
if(classAttributes.contains(attribute))
instance.setValue(attribute, 0);
}
else {
instance.setValue(attribute, value);
}
}
}
}
return instance;
}
public static String clean(String value) {
String output = new String(value);
output = output.replaceAll("\'", "");
output = output.replaceAll("\"", "");
output = output.replaceAll("�", "");
output = output.replaceAll("�", "");
return output;
}
public static void main(String[] args) throws IOException {
String[] filenames = Utils.getCSVFilenames();
Instances instances = ArffGenerator.readFromCSV(filenames);
System.out.println(instances.toString());
}
}