package beast.util;
import static beast.util.OutputUtils.format;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import beast.app.BEASTVersion2;
import beast.app.util.Utils;
import beast.core.util.ESS;
import beast.core.util.Log;
public class LogAnalyser {
public static final int BURN_IN_PERCENTAGE = 10; // default
protected final String fileName;
/**
* column labels in log file *
*/
protected String[] m_sLabels;
/**
* distinguish various column types *
*/
protected enum type {
REAL, INTEGER, BOOL, NOMINAL
}
protected type[] m_types;
/**
* range of a column, if it is not a REAL *
*/
protected List<String>[] m_ranges;
/**
* data from log file with burn-in removed *
*/
protected Double[][] m_fTraces;
/**
* statistics on the data, one per column. First column (sample nr) is not set *
*/
Double[] m_fMean, m_fStdError, m_fStdDev, m_fMedian, m_f95HPDup, m_f95HPDlow, m_fESS, m_fACT, m_fGeometricMean;
/**
* used for storing comments before the actual log file commences *
*/
protected String m_sPreAmble;
/**
* If set, analyzer works in "quiet" mode.
*/
protected boolean quiet = false;
final protected static String BAR = "|---------|---------|---------|---------|---------|---------|---------|---------|";
public LogAnalyser() {
fileName = null;
}
/**
*
* @param args
* @param burnInPercentage burnInPercentage typical = 10; percentage of data that can be ignored
* @throws IOException
*/
public LogAnalyser(String[] args, int burnInPercentage) throws IOException {
this(args, burnInPercentage, false, true);
}
public LogAnalyser(String[] args, int burnInPercentage, boolean quiet, boolean calcStats) throws IOException {
fileName = args[args.length - 1];
readLogFile(fileName, burnInPercentage);
this.quiet = quiet;
if (calcStats) {
calcStats();
}
}
public LogAnalyser(String[] args) throws IOException {
this(args, BURN_IN_PERCENTAGE, false, true);
}
public LogAnalyser(String fileName, int burnInPercentage) throws IOException {
this(fileName, burnInPercentage, false, true);
}
public LogAnalyser(String fileName, int burnInPercentage, boolean quiet) throws IOException {
this(fileName, burnInPercentage, quiet, true);
}
public LogAnalyser(String fileName) throws IOException {
this(fileName, BURN_IN_PERCENTAGE);
}
public LogAnalyser(String fileName, int burnInPercentage, boolean quiet, boolean calcStats) throws IOException {
this.fileName = fileName;
this.quiet = quiet;
readLogFile(fileName, burnInPercentage);
if (calcStats) {
calcStats();
}
}
@SuppressWarnings("unchecked")
protected void readLogFile(String fileName, int burnInPercentage) throws IOException {
log("\nLoading " + fileName);
BufferedReader fin = new BufferedReader(new FileReader(fileName));
String str;
m_sPreAmble = "";
m_sLabels = null;
int data = 0;
// first, sweep through the log file to determine size of the log
while (fin.ready()) {
str = fin.readLine();
if (str.indexOf('#') < 0 && str.matches(".*[0-9a-zA-Z].*")) {
if (m_sLabels == null)
m_sLabels = str.split("\\s");
else
data++;
} else {
m_sPreAmble += str + "\n";
}
}
int lines = Math.max(1, data / 80);
// reserve memory
int items = m_sLabels.length;
m_ranges = new List[items];
int burnIn = data * burnInPercentage / 100;
m_fTraces = new Double[items][data - burnIn];
fin.close();
fin = new BufferedReader(new FileReader(fileName));
data = -burnIn - 1;
logln(", burnin " + burnInPercentage + "%, skipping " + burnIn + " log lines\n\n" + BAR);
// grab data from the log, ignoring burn in samples
m_types = new type[items];
Arrays.fill(m_types, type.INTEGER);
while (fin.ready()) {
str = fin.readLine();
int i = 0;
if (str.indexOf('#') < 0 && str.matches("[0-9].*")) // {
//data++;
if (++data >= 0) //{
for (String str2 : str.split("\\s")) {
try {
if (str2.indexOf('.') >= 0) {
m_types[i] = type.REAL;
}
m_fTraces[i][data] = Double.parseDouble(str2);
} catch (Exception e) {
if (m_ranges[i] == null) {
m_ranges[i] = new ArrayList<>();
}
if (!m_ranges[i].contains(str2)) {
m_ranges[i].add(str2);
}
m_fTraces[i][data] = 1.0 * m_ranges[i].indexOf(str2);
}
i++;
}
//}
//}
if (data % lines == 0) {
log("*");
}
}
logln("");
// determine types
for (int i = 0; i < items; i++)
if (m_ranges[i] != null)
if (m_ranges[i].size() == 2 && m_ranges[i].contains("true") && m_ranges[i].contains("false") ||
m_ranges[i].size() == 1 && (m_ranges[i].contains("true") || m_ranges[i].contains("false")))
m_types[i] = type.BOOL;
else
m_types[i] = type.NOMINAL;
fin.close();
} // readLogFile
/**
* calculate statistics on the data, one per column.
* First column (sample nr) is not set *
*/
public void calcStats() {
logln("\nCalculating statistics\n\n" + BAR);
int stars = 0;
int items = m_sLabels.length;
m_fMean = new Double[items];
m_fStdError = new Double[items];
m_fStdDev = new Double[items];
m_fMedian = new Double[items];
m_f95HPDlow = new Double[items];
m_f95HPDup = new Double[items];
m_fESS = new Double[items];
m_fACT = new Double[items];
m_fGeometricMean = new Double[items];
int sampleInterval = (int) (m_fTraces[0][1] - m_fTraces[0][0]);
for (int i = 1; i < items; i++) {
// calc mean and standard deviation
Double[] trace = m_fTraces[i];
double sum = 0, sum2 = 0;
for (double f : trace) {
sum += f;
sum2 += f * f;
}
if (m_types[i] != type.NOMINAL) {
m_fMean[i] = sum / trace.length;
m_fStdDev[i] = Math.sqrt(sum2 / trace.length - m_fMean[i] * m_fMean[i]);
} else {
m_fMean[i] = Double.NaN;
m_fStdDev[i] = Double.NaN;
}
if (m_types[i] == type.REAL || m_types[i] == type.INTEGER) {
// calc median, and 95% HPD interval
Double[] sorted = trace.clone();
Arrays.sort(sorted);
m_fMedian[i] = sorted[trace.length / 2];
// n instances cover 95% of the trace, reduced down by 1 to match Tracer
int n = (int) ((sorted.length - 1) * 95.0 / 100.0);
double minRange = Double.MAX_VALUE;
int hpdIndex = 0;
for (int k = 0; k < sorted.length - n; k++) {
double range = sorted[k + n] - sorted[k];
if (range < minRange) {
minRange = range;
hpdIndex = k;
}
}
m_f95HPDlow[i] = sorted[hpdIndex];
m_f95HPDup[i] = sorted[hpdIndex + n];
// calc effective sample size
m_fACT[i] = ESS.ACT(m_fTraces[i], sampleInterval);
m_fStdError[i] = ESS.stdErrorOfMean(trace, sampleInterval);
m_fESS[i] = trace.length / (m_fACT[i] / sampleInterval);
// calc geometric mean
if (sorted[0] > 0) {
// geometric mean is only defined when all elements are positive
double gm = 0;
for (double f : trace)
gm += Math.log(f);
m_fGeometricMean[i] = Math.exp(gm / trace.length);
} else
m_fGeometricMean[i] = Double.NaN;
} else {
m_fMedian[i] = Double.NaN;
m_f95HPDlow[i] = Double.NaN;
m_f95HPDup[i] = Double.NaN;
m_fACT[i] = Double.NaN;
m_fESS[i] = Double.NaN;
m_fGeometricMean[i] = Double.NaN;
}
while (stars < 80 * (i + 1) / items) {
log("*");
stars++;
}
}
logln("\n");
} // calcStats
public void setData(Double[][] traces, String[] labels, type[] types) {
m_fTraces = traces.clone();
m_sLabels = labels.clone();
m_types = types.clone();
calcStats();
}
public void setData(Double[] trace, int sampleStep) {
Double[][] traces = new Double[2][];
traces[0] = new Double[trace.length];
for (int i = 0; i < trace.length; i++) {
traces[0][i] = (double) i * sampleStep;
}
traces[1] = trace.clone();
setData(traces, new String[]{"column", "data"}, new type[]{type.REAL, type.REAL});
}
public int indexof(String label) {
return CollectionUtils.indexof(label, m_sLabels);
}
/**
* First column "Sample" (sample nr) needs to be removed
* @return
*/
public List<String> getLabels() {
if (m_sLabels.length < 2)
return new ArrayList<>();
return CollectionUtils.toList(m_sLabels, 1, m_sLabels.length);
}
public Double [] getTrace(int index) {
return m_fTraces[index].clone();
}
public Double [] getTrace(String label) {
return m_fTraces[indexof(label)].clone();
}
public double getMean(String label) {
return getMean(indexof(label));
}
public double getStdError(String label) {
return getStdError(indexof(label));
}
public double getStdDev(String label) {
return getStdDev(indexof(label));
}
public double getMedian(String label) {
return getMedian(indexof(label));
}
public double get95HPDup(String label) {
return get95HPDup(indexof(label));
}
public double get95HPDlow(String label) {
return get95HPDlow(indexof(label));
}
public double getESS(String label) {
return getESS(indexof(label));
}
public double getACT(String label) {
return getACT(indexof(label));
}
public double getGeometricMean(String label) {
return getGeometricMean(indexof(label));
}
public double getMean(int column) {
return m_fMean[column];
}
public double getStdDev(int column) {
return m_fStdDev[column];
}
public double getStdError(int column) {
return m_fStdError[column];
}
public double getMedian(int column) {
return m_fMedian[column];
}
public double get95HPDup(int column) {
return m_f95HPDup[column];
}
public double get95HPDlow(int column) {
return m_f95HPDlow[column];
}
public double getESS(int column) {
return m_fESS[column];
}
public double getACT(int column) {
return m_fACT[column];
}
public double getGeometricMean(int column) {
return m_fGeometricMean[column];
}
public double getMean(Double[] trace) {
setData(trace, 1);
return m_fMean[1];
}
public double getStdDev(Double[] trace) {
setData(trace, 1);
return m_fStdDev[1];
}
public double getMedian(Double[] trace) {
setData(trace, 1);
return m_fMedian[1];
}
public double get95HPDup(Double[] trace) {
setData(trace, 1);
return m_f95HPDup[1];
}
public double get95HPDlow(Double[] trace) {
setData(trace, 1);
return m_f95HPDlow[1];
}
public double getESS(Double[] trace) {
setData(trace, 1);
return m_fESS[1];
}
public double getACT(Double[] trace, int sampleStep) {
setData(trace, sampleStep);
return m_fACT[1];
}
public double getGeometricMean(Double[] trace) {
setData(trace, 1);
return m_fGeometricMean[1];
}
public String getLogFile() {
return fileName;
}
/**
* print statistics for each column except first column (sample nr). *
*/
final String SPACE = OutputUtils.SPACE;
public void print(PrintStream out) {
// set up header for prefix, if any is specified
String prefix = System.getProperty("prefix");
String prefixHead = (prefix == null ? "" : "prefix ");
if (prefix != null) {
String [] p = prefix.trim().split("\\s+");
if (p.length > 1) {
prefixHead = "";
for (int i = 0; i < p.length; i++) {
prefixHead += "prefix" + i + " ";
}
}
}
try {
// delay so that stars can be flushed from stderr
Thread.sleep(100);
} catch (Exception e) {
}
int max = 0;
for (int i = 1; i < m_sLabels.length; i++)
max = Math.max(m_sLabels[i].length(), max);
String space = "";
for (int i = 0; i < max; i++)
space += " ";
out.println("item" + space.substring(4) + " " + prefixHead +
format("mean") + format("stderr") + format("stddev") + format("median") + format("95%HPDlo") + format("95%HPDup") + format("ACT") + format("ESS") + format("geometric-mean"));
for (int i = 1; i < m_sLabels.length; i++) {
out.println(m_sLabels[i] + space.substring(m_sLabels[i].length()) + SPACE + (prefix == null ? "" : prefix + SPACE) +
format(m_fMean[i]) + SPACE + format(m_fStdError[i]) + SPACE + format(m_fStdDev[i]) +
SPACE + format(m_fMedian[i]) + SPACE + format(m_f95HPDlow[i]) + SPACE + format(m_f95HPDup[i]) +
SPACE + format(m_fACT[i]) + SPACE + format(m_fESS[i]) + SPACE + format(m_fGeometricMean[i]));
}
}
/**
* Display header used in one-line mode.
*
* @param out output stream
*/
public void printOneLineHeader(PrintStream out) {
String[] postFix = {
"mean", "stderr", "stddev",
"median", "95%HPDlo", "95%HPDup",
"ACT", "ESS", "geometric-mean"
};
for (int paramIdx=1; paramIdx<m_sLabels.length; paramIdx++) {
for (int i=0; i<postFix.length; i++) {
if (paramIdx> 1 || i>0)
out.print("\t");
out.print(m_sLabels[paramIdx] + "." + postFix[i]);
}
}
out.println();
}
/**
* Display results for single log on one line.
*
* @param out output stream
*/
public void printOneLine(PrintStream out) {
for (int paramIdx=1; paramIdx<m_sLabels.length; paramIdx++) {
if (paramIdx>1)
out.print("\t");
out.print(m_fMean[paramIdx] + "\t");
out.print(m_fStdError[paramIdx] + "\t");
out.print(m_fStdDev[paramIdx] + "\t");
out.print(m_fMedian[paramIdx] + "\t");
out.print(m_f95HPDlow[paramIdx] + "\t");
out.print(m_f95HPDup[paramIdx] + "\t");
out.print(m_fACT[paramIdx] + "\t");
out.print(m_fESS[paramIdx] + "\t");
out.print(m_fGeometricMean[paramIdx]);
}
out.println();
}
protected void log(String s) {
if (!quiet)
Log.warning.print(s);
}
protected void logln(String s) {
if (!quiet)
Log.warning.println(s);
}
static void printUsageAndExit() {
System.out.println("LogAnalyser [-b <burninPercentage] [file1] ... [filen]");
System.out.println("-burnin <burninPercentage>");
System.out.println("--burnin <burninPercentage>");
System.out.println("-b <burninPercentage> percentage of log file to disregard, default " + BURN_IN_PERCENTAGE);
System.out.println("-oneline Display only one line of output per file.\n" +
" Header is generated from the first file only.\n" +
" (Implies quiet mode.)");
System.out.println("-quiet Quiet mode. Avoid printing status updates to stderr.");
System.out.println("-help");
System.out.println("--help");
System.out.println("-h print this message");
System.out.println("[fileX] log file to analyse. Multiple files are allowed, each is analysed separately");
System.exit(0);
}
/**
* @param args
*/
public static void main(String[] args) {
try {
LogAnalyser analyser;
// process args
int burninPercentage = BURN_IN_PERCENTAGE;
boolean oneLine = false;
boolean quiet = false;
List<String> files = new ArrayList<>();
int i = 0;
while (i < args.length) {
String arg = args[i];
switch (arg) {
case "-b":
case "-burnin":
case "--burnin":
if (i+1 >= args.length) {
Log.warning.println("-b argument requires another argument");
printUsageAndExit();
}
burninPercentage = Integer.parseInt(args[i+1]);
i += 2;
break;
case "-oneline":
oneLine = true;
i += 1;
break;
case "-quiet":
quiet = true;
i += 1;
break;
case "-h":
case "-help":
case "--help":
printUsageAndExit();
break;
default:
if (arg.startsWith("-")) {
Log.warning.println("unrecognised command " + arg);
printUsageAndExit();
}
files.add(arg);
i++;
}
}
if (files.size() == 0) {
// no file specified, open file dialog to select one
BEASTVersion2 version = new BEASTVersion2();
File file = Utils.getLoadFile("LogAnalyser " + version.getVersionString() + " - Select log file to analyse",
null, "BEAST log (*.log) Files", "log", "txt");
if (file == null) {
return;
}
analyser = new LogAnalyser(file.getAbsolutePath(), burninPercentage, quiet);
analyser.print(System.out);
} else {
// process files
if (oneLine) {
for (int idx=0; idx<files.size(); idx++) {
analyser = new LogAnalyser(files.get(idx), burninPercentage, true);
if (idx == 0)
analyser.printOneLineHeader(System.out);
analyser.printOneLine(System.out);
}
} else {
for (String file : files) {
analyser = new LogAnalyser(file, burninPercentage, quiet);
analyser.print(System.out);
}
}
}
} catch (Exception e) {
e.printStackTrace();
}
}
}