package net.seninp.grammarviz.model;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.math.BigDecimal;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Observable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import net.seninp.gi.GIAlgorithm;
import net.seninp.gi.logic.GrammarRuleRecord;
import net.seninp.gi.logic.GrammarRules;
import net.seninp.gi.logic.RuleInterval;
import net.seninp.gi.repair.RePairFactory;
import net.seninp.gi.repair.RePairGrammar;
import net.seninp.gi.sequitur.SAXRule;
import net.seninp.gi.sequitur.SequiturFactory;
import net.seninp.grammarviz.logic.GrammarVizChartData;
import net.seninp.jmotif.sax.NumerosityReductionStrategy;
import net.seninp.jmotif.sax.SAXProcessor;
import net.seninp.jmotif.sax.alphabet.NormalAlphabet;
import net.seninp.jmotif.sax.datastructure.SAXRecords;
import net.seninp.jmotif.sax.parallel.ParallelSAXImplementation;
import net.seninp.util.StackTrace;
/**
* Implements the Sequitur Model component of MVC GUI pattern.
*
* @author psenin
*
*/
public class GrammarVizModel extends Observable {
final static Charset DEFAULT_CHARSET = StandardCharsets.UTF_8;
private static final String SPACE = " ";
private static final String CR = "\n";
/** The data filename. */
private String dataFileName;
/** If that data was read - it is stored here. */
private double[] ts;
/** Data structure that keeps the chart data. */
private GrammarVizChartData chartData;
// static block - we instantiate the logger
//
private static final Logger LOGGER = LoggerFactory.getLogger(GrammarVizModel.class);
/**
* The file name getter.
*
* @return current filename.
*/
public synchronized String getDataFileName() {
return this.dataFileName;
}
/**
* Set data source filename.
*
* @param filename the filename.
*/
public synchronized void setDataSource(String filename) {
LOGGER.info("setting " + filename + " as the data source");
// action
this.dataFileName = filename;
// notify the View
this.setChanged();
notifyObservers(new GrammarVizMessage(GrammarVizMessage.DATA_FNAME, this.getDataFileName()));
// this notification tells GUI which file was selected as the data source
this.log("set " + filename + " as the data source");
}
/**
* Load the data which is supposedly in the file which is selected as the data source.
*
* @param limitStr the limit of lines to read.
*/
public synchronized void loadData(String limitStr) {
// check if everything is ready
if ((null == this.dataFileName) || this.dataFileName.isEmpty()) {
this.log("unable to load data - no data source selected yet");
return;
}
// make sure the path exists
Path path = Paths.get(this.dataFileName);
if (!(Files.exists(path))) {
this.log("file " + this.dataFileName + " doesn't exist.");
return;
}
// read the input
//
ArrayList<Double> data = new ArrayList<Double>();
// lets go
try {
// set the lines limit
long loadLimit = 0l;
if (!(null == limitStr) && !(limitStr.isEmpty())) {
loadLimit = Long.parseLong(limitStr);
}
// open the reader
BufferedReader reader = Files.newBufferedReader(path, DEFAULT_CHARSET);
// read by the line in the loop from reader
String line = null;
long lineCounter = 0;
while ((line = reader.readLine()) != null) {
String[] lineSplit = line.trim().split("\\s+");
// we read only first column
// for (int i = 0; i < lineSplit.length; i++) {
double value = new BigDecimal(lineSplit[0]).doubleValue();
data.add(value);
// }
lineCounter++;
// break the load if needed
if ((loadLimit > 0) && (lineCounter > loadLimit)) {
break;
}
}
reader.close();
}
catch (Exception e) {
String stackTrace = StackTrace.toString(e);
System.err.println(StackTrace.toString(e));
this.log("error while trying to read data from " + this.dataFileName + ":\n" + stackTrace);
}
finally {
assert true;
}
// convert to simple doubles array and clean the variable
if (!(data.isEmpty())) {
this.ts = new double[data.size()];
for (int i = 0; i < data.size(); i++) {
this.ts[i] = data.get(i);
}
}
data = new ArrayList<Double>();
LOGGER.info("loaded " + this.ts.length + " points....");
// notify that the process finished
this.log("loaded " + this.ts.length + " points from " + this.dataFileName);
// and send the timeseries
setChanged();
notifyObservers(new GrammarVizMessage(GrammarVizMessage.TIME_SERIES_MESSAGE, this.ts));
}
/**
* Process data with GI algorithm. Instantiate, populate, and broadcast the ChartData object.
*
* @param algorithm the algorithm, 0 Sequitur, 1 RE-PAIR.
* @param useSlidingWindow The use sliding window parameter.
* @param numerosityReductionStrategy The numerosity reduction strategy.
* @param windowSize The SAX sliding window size.
* @param paaSize The SAX PAA size.
* @param alphabetSize The SAX alphabet size.
* @param normalizationThreshold The normalization threshold.
* @param grammarOutputFileName The file name to where save the grammar.
* @throws IOException
*/
public synchronized void processData(GIAlgorithm algorithm, boolean useSlidingWindow,
NumerosityReductionStrategy numerosityReductionStrategy, int windowSize, int paaSize,
int alphabetSize, double normalizationThreshold, String grammarOutputFileName)
throws IOException {
// check if the data is loaded
//
if (null == this.ts || this.ts.length == 0) {
this.log("unable to \"Process data\" - no data were loaded ...");
}
else {
// the logging block
//
StringBuffer sb = new StringBuffer("setting up GI with params: ");
if (GIAlgorithm.SEQUITUR.equals(algorithm)) {
sb.append("algorithm: Sequitur, ");
}
else {
sb.append("algorithm: RePair, ");
}
sb.append("sliding window ").append(useSlidingWindow);
sb.append(", numerosity reduction ").append(numerosityReductionStrategy.toString());
sb.append(", SAX window ").append(windowSize);
sb.append(", PAA ").append(paaSize);
sb.append(", Alphabet ").append(alphabetSize);
LOGGER.info(sb.toString());
this.log(sb.toString());
LOGGER.debug("creating ChartDataStructure");
this.chartData = new GrammarVizChartData(this.dataFileName, this.ts, useSlidingWindow,
numerosityReductionStrategy, windowSize, paaSize, alphabetSize, normalizationThreshold);
NormalAlphabet na = new NormalAlphabet();
try {
if (GIAlgorithm.SEQUITUR.equals(algorithm)) {
SAXProcessor sp = new SAXProcessor();
SAXRecords saxFrequencyData = new SAXRecords();
if (useSlidingWindow) {
saxFrequencyData = sp.ts2saxViaWindow(ts, windowSize, paaSize, na.getCuts(alphabetSize),
numerosityReductionStrategy, normalizationThreshold);
}
else {
saxFrequencyData = sp.ts2saxByChunking(ts, paaSize, na.getCuts(alphabetSize),
normalizationThreshold);
}
SAXRule sequiturGrammar = SequiturFactory
.runSequitur(saxFrequencyData.getSAXString(SPACE));
GrammarRules rules = sequiturGrammar.toGrammarRulesData();
SequiturFactory.updateRuleIntervals(rules, saxFrequencyData, useSlidingWindow, this.ts,
windowSize, paaSize);
this.chartData.setGrammarRules(rules);
}
else {
ParallelSAXImplementation ps = new ParallelSAXImplementation();
SAXRecords parallelRes = ps.process(ts, 2, windowSize, paaSize, alphabetSize,
numerosityReductionStrategy, normalizationThreshold);
RePairGrammar rePairGrammar = RePairFactory.buildGrammar(parallelRes);
rePairGrammar.expandRules();
rePairGrammar.buildIntervals(parallelRes, ts, windowSize);
GrammarRules rules = rePairGrammar.toGrammarRulesData();
this.chartData.setGrammarRules(rules);
}
}
catch (Exception e) {
this.log("error while processing data " + StackTrace.toString(e));
e.printStackTrace();
}
this.log("processed data, broadcasting charts");
LOGGER.info("process finished");
setChanged();
notifyObservers(new GrammarVizMessage(GrammarVizMessage.CHART_MESSAGE, this.chartData));
}
}
/**
* Performs logging messages distribution.
*
* @param message the message to log.
*/
private void log(String message) {
this.setChanged();
notifyObservers(new GrammarVizMessage(GrammarVizMessage.STATUS_MESSAGE, "model: " + message));
}
/**
* Saves the grammar stats.
*
* @param data the data for collecting stats.
*/
protected void saveGrammarStats(GrammarVizChartData data) {
boolean fileOpen = false;
BufferedWriter bw = null;
try {
String currentPath = new File(".").getCanonicalPath();
bw = new BufferedWriter(new OutputStreamWriter(
new FileOutputStream(currentPath + File.separator + "grammar_stats.txt"), "UTF-8"));
StringBuffer sb = new StringBuffer();
sb.append("# filename: ").append(this.dataFileName).append(CR);
sb.append("# sliding window: ").append(data.getSAXWindowSize()).append(CR);
if (data.isSlidingWindowOn()) {
sb.append("# window size: ").append(data.getSAXWindowSize()).append(CR);
}
sb.append("# paa size: ").append(data.getSAXPaaSize()).append(CR);
sb.append("# alphabet size: ").append(data.getSAXAlphabetSize()).append(CR);
bw.write(sb.toString());
fileOpen = true;
}
catch (IOException e) {
System.err.print(
"Encountered an error while writing stats file: \n" + StackTrace.toString(e) + "\n");
}
// ArrayList<int[]> ruleLengths = new ArrayList<int[]>();
for (GrammarRuleRecord ruleRecord : data.getGrammarRules()) {
StringBuffer sb = new StringBuffer();
sb.append("/// ").append(ruleRecord.getRuleName()).append(CR);
sb.append(ruleRecord.getRuleName()).append(" -> \'").append(ruleRecord.getRuleString().trim())
.append("\', expanded rule string: \'").append(ruleRecord.getExpandedRuleString())
.append("\'").append(CR);
if (ruleRecord.getRuleIntervals().size() > 0) {
int[] starts = new int[ruleRecord.getRuleIntervals().size()];
int[] lengths = new int[ruleRecord.getRuleIntervals().size()];
int i = 0;
for (RuleInterval sp : ruleRecord.getRuleIntervals()) {
starts[i] = sp.getStart();
lengths[i] = (sp.endPos - sp.startPos);
i++;
}
sb.append("subsequences starts: ").append(Arrays.toString(starts)).append(CR)
.append("subsequences lengths: ").append(Arrays.toString(lengths)).append(CR);
}
sb.append("rule occurrence frequency ").append(ruleRecord.getRuleIntervals().size())
.append(CR);
sb.append("rule use frequency ").append(ruleRecord.getRuleUseFrequency()).append(CR);
sb.append("min length ").append(ruleRecord.minMaxLengthAsString().split(" - ")[0]).append(CR);
sb.append("max length ").append(ruleRecord.minMaxLengthAsString().split(" - ")[1]).append(CR);
sb.append("mean length ").append(ruleRecord.getMeanLength()).append(CR);
if (fileOpen) {
try {
bw.write(sb.toString());
}
catch (IOException e) {
System.err.print(
"Encountered an error while writing stats file: \n" + StackTrace.toString(e) + "\n");
}
}
}
// try to write stats into the file
try {
if (fileOpen) {
bw.close();
}
}
catch (IOException e) {
System.err.print(
"Encountered an error while writing stats file: \n" + StackTrace.toString(e) + "\n");
}
}
}