package edu.uncc.cs.watsonsim.researchers; import java.io.File; import java.io.IOException; import java.sql.Timestamp; import java.util.ArrayList; import java.util.Collection; import java.util.List; import edu.uncc.cs.watsonsim.Answer; import edu.uncc.cs.watsonsim.Question; import edu.uncc.cs.watsonsim.Score; import weka.core.Attribute; import weka.core.FastVector; import weka.core.Instance; import weka.core.Instances; import weka.core.converters.ArffSaver; import weka.core.converters.Saver; /** Pipe Answer scores to an ARFF file for Weka */ public class WekaTee extends Researcher { private final static List<Score> dataset = new ArrayList<>(); private static ArffSaver saver; private static int saved_schema_version = -1; // Make every run unique, but overwrite between questions // This way, you still get /something/ if you interrupt it private final Timestamp start_time; /** * Dump the training data to an ARFF file marked by the given timestamp * @param start_time */ public WekaTee(Timestamp start_time) { this.start_time = start_time; } @Override /** * Multithreaded counterpart to dump, which is synchronized */ public List<Answer> question(Question q, List<Answer> answers) { List<Score> new_entries = new ArrayList<>(); for (Answer a : answers) { new_entries.add(a.scores.clone()); } dump(new_entries, start_time); return answers; } /** File-writing serialized counterpart to question() * * @param new_entries The new arrays to dump * @param start_time The timestamp of the file to dump to */ private static synchronized void dump(List<Score> new_entries, Timestamp start_time) { dataset.addAll(new_entries); Collection<String> names = Score.latestSchema(); try { if (names.size() != saved_schema_version) { dump_from_scratch(names, start_time); } else { // Only do a few quick updates for (Score row : new_entries) saver.writeIncremental(new Instance(1.0, row.getEach(names))); } // There are synchronization issues otherwise. saver.getWriter().flush(); } catch (IOException e) { e.printStackTrace(); throw new RuntimeException("Failed to write Weka Log!"); } } /** * When the score changes, rewrite the file. * This is really rare in practice, so don't bother optimizing it. */ private static void dump_from_scratch(Collection<String> names, Timestamp start_time) throws IOException { saved_schema_version = names.size(); FastVector attributes = new FastVector(); // Answer score names for (String name: names) attributes.addElement(new Attribute(name)); Instances data = new Instances("Watsonsim captured question stream", attributes, 0); // Save the results to a file saver = new ArffSaver(); saver.setStructure(data); saver.setRetrieval(Saver.INCREMENTAL); saver.setFile(new File("data/weka-log." + start_time + ".arff")); for (Score row : dataset) saver.writeIncremental(new Instance(1.0, row.getEach(names))); } }