/*
* PersistentTS.java - Copyright(c) 2014 Joe Pasqua
* Provided under the MIT License. See the LICENSE file for details.
* Created: Nov 25, 2014
*/
package org.noroomattheinn.timeseries;
import com.google.common.collect.Range;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.PrintStream;
import java.util.Date;
import java.util.Timer;
import java.util.TimerTask;
import static org.noroomattheinn.timeseries.TSBase.logger;
/**
* PersistentTS: A persistent repository for time series data.
*
* A PersistentTS is represented by a header file and a data file.
*
* The header file contains two lines:
* VERSION:
* A number that corresponds to the implementation that wrote the repository
* STRING[\tSTRING]*
* A tab-separated list of strings. Each String represents the name of
* a column that is stored in the data file
*
* The data file contains lines that are either comments or data rows:
* COMMENT: Any line beginning with a # is an uninterpreted comment
* DATA ROW: All data rows have the form:
* TIMESTAMP BITVECTOR VAL[\tVAL\]*
* where
* TIMESTAMP is a long which indicating the time of the sample. This value
* is delta-encoded meaning you must accumulate values up to a row in
* order to know the timestamp of that row. If the stored value is negative
* then it represents an absolute (not delta-encoded) value given by abs();
*
* BITVECTOR is the hex representation of a 64-bit bit vector
* which indicates which samples were recorded at this timestamp
*
* VAL+ is a tab separated list of values. There must be as
* many values in this list as 1 bits in the bit vector.
* A value may be:<ul>
* <li>A double value represented as a String</li>
* <li>The literal "*" which indicates that this value
* is the same as the last recorded value of this column.</li>
* <li>The literal "!" which indicates that this value
* should be ignored and removed from the bit vector. This
* can be used to take the place of NaN or INF values.</li>
* </ul>
*
* @author Joe Pasqua <joe at NoRoomAtTheInn dot org>
*/
public class PersistentTS extends TSBase {
/*------------------------------------------------------------------------------
*
* Constants and Enums
*
*----------------------------------------------------------------------------*/
private static final int RepoVersion = 1;
private static final long FlushInterval = 20 * 1000L;
/*------------------------------------------------------------------------------
*
* Internal State
*
*----------------------------------------------------------------------------*/
private final Repo repo; // The underlying repository
private final Emitter emitter; // Used to write rows
private final Timer timer; // To manage flushing
private Row pendingRow; // Used to merge rows if needed
private long timeOfFirstRow; // The oldest data in the series
/*==============================================================================
* ------- -------
* ------- Public Interface To This Class -------
* ------- -------
*============================================================================*/
/**
* Create PersistentTimeSeries object that is ready to take writes
*
* @param container The directory that should contain the persistent store
* @param baseName The baseName of the persistent store files
* @param descriptor Describes the schema of the rows in the store
* @param forceOrdering If true, then all data added to the time series
* will be forced to have monotonically increasing
* timestamps. If a row or value is added whose time-
* stamp is less than a value that has already been
* added, the newer timestamp will be used.
* If false, an old timestamp will result in an
* IllegalArgumentException
*/
public PersistentTS(File container, String baseName, RowDescriptor schema, boolean forceOrdering)
throws IOException {
super(schema);
this.repo = Repo.getRepo(container, baseName, schema);
this.emitter = new Emitter(forceOrdering);
this.pendingRow = null;
this.timer = new Timer();
timer.schedule(
new TimerTask() { @Override public void run() { flush(); } },
FlushInterval);
timeOfFirstRow = Long.MAX_VALUE; // If no rows...
streamRows(Range.<Long>all(), new RowCollector() {
@Override public boolean collect(Row r) {
timeOfFirstRow = r.timestamp;
return false;
}
});
}
public static boolean repoExistsFor(File container, String baseName) {
return Repo.repoExistsFor(container, baseName);
}
/*------------------------------------------------------------------------------
*
* Methods overriden from TimeSeries
*
*----------------------------------------------------------------------------*/
@Override public long firstTime() { return timeOfFirstRow; }
@Override public synchronized Row storeRow(Row r) throws IllegalArgumentException {
if (pendingRow == null) {
pendingRow = r;
} else {
if (deflate(r.timestamp) == deflate(pendingRow.timestamp)) {
pendingRow.mergeWith(r);
logger.info("Merging");
} else {
emitter.emit(pendingRow);
pendingRow = r;
}
}
return r;
}
@Override public final synchronized void streamRows(
Range<Long> period, RowCollector collector) {
double accumulator[] = new double[schema.nColumns];
if (period == null) period = Range.all();
long fromTime = period.hasLowerBound() ? period.lowerEndpoint() : 0L;
long toTime = period.hasUpperBound() ? period.upperEndpoint() : Long.MAX_VALUE;
long prevTime = 0;
BufferedReader rdr = null;
try {
rdr = repo.getReader();
String line;
while ((line = rdr.readLine()) != null) {
if (line.startsWith("#")) { continue; }
String[] tokens = line.split("\t");
// The first entry on the line is the time in delta format
Long time = longValue(tokens[0]);
if (time == null) { continue; } // Invalid format, ignore this line
time = time < 0 ? -time : time + prevTime;
prevTime = time; // Keep a running tally of the current time
time = inflate(time);
if (time < fromTime) continue; // Out of range, ignore & move on
if (time > toTime) break; // Out of range, ignore & stop
Row row = new Row(time, 0L, schema.nColumns);
// The second element is a bitvector corresponding to which
// columns have values on this line
Long bitVector = longValue("0x" + tokens[1]);
if (bitVector == null) { continue; } // Invalid format, Ignore this line
row.bitVector = bitVector;
// The remaining entries are readings. There is one reading for
// each 1 bit in the bitvector. The positions in the bitvector
// correspond to the columns in the order initially specified
long bit = 1;
int tokenIndex = 2;
for (int i = 0; i < schema.nColumns; i++) {
row.values[i] = accumulator[i]; // Start off with the previous value
if (row.includes(bit)) {
String valString = tokens[tokenIndex++];
switch (valString) {
case "*": break;
case "!": row.clear(bit); break;
default:
Double val = doubleValue(valString);
if (val == null) { row.clear(bit); }
else { accumulator[i] = row.values[i] = val.doubleValue(); }
break;
}
} else {
row.values[i] = accumulator[i];
}
bit = bit << 1;
}
if (!collector.collect(row)) break;
}
} catch (IOException ex) {
logger.severe("Error loading from repository" + ex);
}
if (rdr != null) try {
rdr.close();
} catch (IOException e) {
logger.warning("Failure closing reader: " + e);
}
}
@Override public synchronized void flush() {
if (pendingRow != null) {
emitter.emit(pendingRow);
pendingRow = null;
}
repo.flush();
}
@Override public synchronized void close() {
flush();
repo.close();
timer.cancel();
}
/*------------------------------------------------------------------------------
*
* PRIVATE - Utility methods
*
*----------------------------------------------------------------------------*/
private static Long longValue(String valString) {
try {
return Long.decode(valString);
} catch (NumberFormatException e) {
logger.warning("Invalid Long in TimeSeries: " + valString);
return null;
}
}
private static Double doubleValue(String valString) {
try {
return Double.valueOf(valString);
} catch (NumberFormatException e) {
logger.warning("Invalid Double in TimeSeries: " + valString);
return null;
}
}
private static long deflate(long timestamp) { return timestamp/100; }
private static long inflate(long timestamp) { return timestamp*100; }
private class Emitter {
private Row lastRowEmitted;
private final PrintStream ps;
private final boolean forceOrdering;
Emitter(boolean forceOrdering) {
this.lastRowEmitted = null;
this.forceOrdering = forceOrdering;
this.ps = repo.getPrintStream();
}
Row emit(Row r) throws IllegalArgumentException {
// Emit the timestamp for the row
ps.print(adjustTimeIfNeeded(r.timestamp));
// Emit the bit vector describing which columns are included
ps.append("\t");
ps.append(Long.toHexString(r.bitVector));
// Emit the column values
long bitForColumn = 1;
for (int i = 0; i < schema.nColumns; i++) {
if (r.includes(bitForColumn)) {
ps.append("\t");
double val = r.values[i];
if (Double.isInfinite(val) || Double.isNaN(val)) {
ps.print("!");
} else if (lastRowEmitted != null && val == lastRowEmitted.values[i]) {
ps.print("*");
} else {
ps.print(val);
}
}
bitForColumn = bitForColumn << 1;
}
ps.println();
lastRowEmitted = r;
return r;
}
private long adjustTimeIfNeeded(long newTime) {
if (lastRowEmitted == null) { return -deflate(newTime); }
else {
long oldTime = lastRowEmitted.timestamp;
long time = deflate(newTime) - deflate(oldTime);
if (time < 0) {
if (forceOrdering) {
time = deflate(lastRowEmitted.timestamp);
logger.fine("Forcing timestamps into sequence: " +
newTime + ", " + oldTime);
} else throw new IllegalArgumentException(
"Timestamps out of sequence: " + newTime +
", " + oldTime);
}
return time;
}
}
}
/*------------------------------------------------------------------------------
*
* PRIVATE - The class implementing the filed-based repository
*
*----------------------------------------------------------------------------*/
private static class Repo {
private final RowDescriptor schema;
private final File dataFile;
private final File hdrFile;
private PrintStream ps;
private Repo(File container, String name, RowDescriptor schema) {
this.schema = schema;
this.dataFile = dataFile(container, name);
this.hdrFile = headerFile(container, name);
this.ps = null;
}
static boolean repoExistsFor(File container, String baseName) {
File header = headerFile(container, baseName);
File data = dataFile(container, baseName);
boolean hdrExists = header.exists();
boolean dataExists = data.exists();
return hdrExists && dataExists;
}
public void flush() { if (ps != null) ps.flush(); }
public void close() { if (ps != null) ps.close(); }
static Repo getRepo(File container, String name, RowDescriptor schema)
throws IOException {
Repo repo = new Repo(container, name, schema);
if (!repo.hdrFile.exists() && repo.dataFile.exists()) {
// Danger! The data file has become "disconnected" from the
// header file. Don't create a new data file - the data is valuable
// Don't just create a new header file because you don't know
// if the schemas match. It's safest to raise an exception.
throw new FileNotFoundException("Data file without Header file");
}
repo.ensureValidHeader();
if (!repo.dataFile.exists()) repo.createDataFile();
repo.ps = new PrintStream(new FileOutputStream(repo.dataFile, true));
return repo;
}
public PrintStream getPrintStream() { return ps; }
public BufferedReader getReader() throws FileNotFoundException {
return new BufferedReader(new FileReader(dataFile));
};
private void ensureValidHeader() throws IOException {
if (!hdrFile.exists()) {
createHeaderFile();
return;
}
// Read the existing header file and make sure it's valid
String line;
BufferedReader reader = new BufferedReader(new FileReader(hdrFile));
line = reader.readLine();
if (line == null) throw new IOException("Empty Header File");
int version = Integer.valueOf(line);
if (version > RepoVersion)
throw new IOException(
"Can't read newer repo version :" + version + " vs " + RepoVersion);
line = reader.readLine();
if (line == null) throw new IOException("Missing column name declarations");
String[] declaredNames = line.split("\t");
if (declaredNames.length > schema.nColumns) {
throw new IOException("Mismatched column names - too few supplied names");
}
for (int i = 0; i < declaredNames.length; i++) {
if (!declaredNames[i].equals(schema.columnNames[i])) {
throw new IOException("Mismatched column names");
}
}
reader.close();
if (schema.nColumns > declaredNames.length) {
logger.info("Adding new column(s)");
createHeaderFile(); // We've got new columns! Overwrite the header file
}
}
private void createHeaderFile() throws FileNotFoundException {
PrintStream writer = new PrintStream(new FileOutputStream(hdrFile, false));
writer.format("%d\n", RepoVersion);
int lastIndex = schema.nColumns-1;
int index = 0;
while (true) {
writer.append(schema.columnNames[index]);
if (index++ != lastIndex) writer.append("\t");
else break;
}
writer.close();
}
private void createDataFile() throws FileNotFoundException {
PrintStream writer = new PrintStream(new FileOutputStream(dataFile), false);
writer.format("# %s\n", (new Date().toString()));
writer.close();
}
private static File headerFile(File container, String baseName) {
return new File(container, baseName + ".pts.hdr");
}
private static File dataFile(File container, String baseName) {
return new File(container, baseName + ".pts.data");
}
}
}