PersistentTS.java example

Explorer
VisibleTesla-master
- src
  - javafx
    - scene
      - chart
        XYChart.java
  - org
    - noroomattheinn
/*
 * PersistentTS.java - Copyright(c) 2014 Joe Pasqua
 * Provided under the MIT License. See the LICENSE file for details.
 * Created: Nov 25, 2014
 */
package org.noroomattheinn.timeseries;

import com.google.common.collect.Range;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.PrintStream;
import java.util.Date;
import java.util.Timer;
import java.util.TimerTask;
import static org.noroomattheinn.timeseries.TSBase.logger;

/**
 * PersistentTS: A persistent repository for time series data.
 *
 * A PersistentTS is represented by a header file and a data file.
 * 
 * The header file contains two lines:
 * VERSION:
 *      A number that corresponds to the implementation that wrote the repository
 * STRING[\tSTRING]*
 *      A tab-separated list of strings. Each String represents the name of 
 *      a column that is stored in the data file
 * 
 * The data file contains lines that are either comments or data rows:
 * COMMENT: Any line beginning with a # is an uninterpreted comment
 * DATA ROW: All data rows have the form:
 *      TIMESTAMP BITVECTOR VAL[\tVAL\]*
 * where
 *      TIMESTAMP is a long which indicating the time of the sample. This value
 *      is delta-encoded meaning you must accumulate values up to a row in
 *      order to know the timestamp of that row. If the stored value is negative
 *      then it represents an absolute (not delta-encoded) value given by abs();
 * 
 *      BITVECTOR is the hex representation of a 64-bit bit vector
 *      which indicates which samples were recorded at this timestamp
 * 
 *      VAL+ is a tab separated list of values. There must be as
 *      many values in this list as 1 bits in the bit vector.
 *      A value may be:<ul>
 *      <li>A double value represented as a String</li>
 *      <li>The literal "*" which indicates that this value 
 *      is the same as the last recorded value of this column.</li>
 *      <li>The literal "!" which indicates that this value 
 *      should be ignored and removed from the bit vector. This
 *      can be used to take the place of NaN or INF values.</li>
 *      </ul>
 * 
 * @author Joe Pasqua <joe at NoRoomAtTheInn dot org>
 */
public class PersistentTS extends TSBase {
/*------------------------------------------------------------------------------
 *
 * Constants and Enums
 * 
 *----------------------------------------------------------------------------*/
    private static final int RepoVersion = 1;
    private static final long FlushInterval = 20 * 1000L;
    
/*------------------------------------------------------------------------------
 *
 * Internal State
 * 
 *----------------------------------------------------------------------------*/

    private final Repo repo;                // The underlying repository
    private final Emitter emitter;          // Used to write rows
    private final Timer timer;              // To manage flushing
    private Row pendingRow;                 // Used to merge rows if needed
    private long timeOfFirstRow;            // The oldest data in the series
    
/*==============================================================================
 * -------                                                               -------
 * -------              Public Interface To This Class                   ------- 
 * -------                                                               -------
 *============================================================================*/
    
    /**
     * Create PersistentTimeSeries object that is ready to take writes
     * 
     * @param container     The directory that should contain the persistent store
     * @param baseName      The baseName of the persistent store files
     * @param descriptor    Describes the schema of the rows in the store
     * @param forceOrdering If true, then all data added to the time series
     *                      will be forced to have monotonically increasing
     *                      timestamps. If a row or value is added whose time-
     *                      stamp is less than a value that has already been
     *                      added, the newer timestamp will be used.
     *                      If false, an old timestamp will result in an
     *                      IllegalArgumentException
     */
    public PersistentTS(File container, String baseName, RowDescriptor schema, boolean forceOrdering)
            throws IOException {
        super(schema);
        
        this.repo = Repo.getRepo(container, baseName, schema);
        this.emitter = new Emitter(forceOrdering);
        this.pendingRow = null;
        this.timer = new Timer();
        
        timer.schedule(
                new TimerTask() { @Override public void run() { flush(); } },
                FlushInterval);
        
        timeOfFirstRow = Long.MAX_VALUE;    // If no rows...
        streamRows(Range.<Long>all(), new RowCollector() {
            @Override public boolean collect(Row r) {
                timeOfFirstRow = r.timestamp;
                return false;
            }
        });
    }
    
    public static boolean repoExistsFor(File container, String baseName) {
        return Repo.repoExistsFor(container, baseName);
    }
 
/*------------------------------------------------------------------------------
 *
 * Methods overriden from TimeSeries
 * 
 *----------------------------------------------------------------------------*/
    
    @Override public long firstTime() { return timeOfFirstRow; }
    
    @Override public synchronized Row storeRow(Row r) throws IllegalArgumentException {
        if (pendingRow == null) {
            pendingRow = r;
        } else {
            if (deflate(r.timestamp) == deflate(pendingRow.timestamp)) {
                pendingRow.mergeWith(r);
                logger.info("Merging");
            } else {
                emitter.emit(pendingRow);
                pendingRow = r;
            }
        }
        
        return r;
    }
    
    @Override public final synchronized void streamRows(
            Range<Long> period, RowCollector collector) {
        double accumulator[] = new double[schema.nColumns];
        if (period == null) period = Range.all();
        long fromTime = period.hasLowerBound() ? period.lowerEndpoint() : 0L;
        long toTime = period.hasUpperBound() ? period.upperEndpoint() : Long.MAX_VALUE;
        long prevTime = 0;
        BufferedReader rdr = null;
        try {
            rdr = repo.getReader();
            String line;
            while ((line = rdr.readLine()) != null) {
                if (line.startsWith("#")) { continue; }
                String[] tokens = line.split("\t");
                
                // The first entry on the line is the time in delta format
                Long time = longValue(tokens[0]);
                if (time == null) { continue; } // Invalid format, ignore this line
                time = time < 0 ? -time : time + prevTime;
                prevTime = time;    // Keep a running tally of the current time
                
                time = inflate(time);
                if (time < fromTime) continue;  // Out of range, ignore & move on
                if (time > toTime) break;       // Out of range, ignore & stop
                
                Row row = new Row(time, 0L, schema.nColumns);
                
                // The second element is a bitvector corresponding to which
                // columns have values on this line
                Long bitVector = longValue("0x" + tokens[1]);
                if (bitVector == null) { continue; }    // Invalid format, Ignore this line
                row.bitVector = bitVector;
                
                // The remaining entries are readings. There is one reading for
                // each 1 bit in the bitvector. The positions in the bitvector
                // correspond to the columns in the order initially specified
                long bit = 1;
                int tokenIndex = 2;
                for (int i = 0; i < schema.nColumns; i++) {
                    row.values[i] = accumulator[i]; // Start off with the previous value
                    if (row.includes(bit)) {
                        String valString = tokens[tokenIndex++];
                        switch (valString) {
                            case "*": break;
                            case "!": row.clear(bit); break;
                            default: 
                                Double val = doubleValue(valString);
                                if (val == null) { row.clear(bit); }
                                else { accumulator[i] = row.values[i] = val.doubleValue(); }
                                break;
                        }
                    } else {
                        row.values[i] = accumulator[i];
                    }
                    bit = bit << 1;
                }
                if (!collector.collect(row)) break;
            }
        } catch (IOException ex) {
            logger.severe("Error loading from repository" + ex);
        }
        if (rdr != null) try {
            rdr.close();
        } catch (IOException e) {
            logger.warning("Failure closing reader: " + e);
        }
    }

    @Override public synchronized void flush() {
        if (pendingRow != null) {
            emitter.emit(pendingRow);
            pendingRow = null;
        }
        repo.flush();
    }
    
    @Override public synchronized void close() {
        flush();
        repo.close();
        timer.cancel();
    }
    
/*------------------------------------------------------------------------------
 *
 * PRIVATE - Utility methods
 * 
 *----------------------------------------------------------------------------*/
    
    private static Long longValue(String valString) {
        try {
            return Long.decode(valString);
        } catch (NumberFormatException e) {
            logger.warning("Invalid Long in TimeSeries: " + valString);
            return null;
        }
    }
    
    private static Double doubleValue(String valString) {
        try {
            return Double.valueOf(valString);
        } catch (NumberFormatException e) {
            logger.warning("Invalid Double in TimeSeries: " + valString);
            return null;
        }
    }
    
    private static long deflate(long timestamp) { return timestamp/100; }
    private static long inflate(long timestamp) { return timestamp*100; }
    
    private class Emitter {
        private Row lastRowEmitted;
        private final PrintStream ps;
        private final boolean forceOrdering;
        
        Emitter(boolean forceOrdering) {
            this.lastRowEmitted = null;
            this.forceOrdering = forceOrdering;
            this.ps = repo.getPrintStream();
        }
        
        Row emit(Row r) throws IllegalArgumentException {
            // Emit the timestamp for the row
            ps.print(adjustTimeIfNeeded(r.timestamp));

            // Emit the bit vector describing which columns are included
            ps.append("\t");
            ps.append(Long.toHexString(r.bitVector));

            // Emit the column values
            long bitForColumn = 1;
            for (int i = 0; i < schema.nColumns; i++) {
                if (r.includes(bitForColumn)) {
                    ps.append("\t");
                    double val = r.values[i];
                    if (Double.isInfinite(val) || Double.isNaN(val)) {
                        ps.print("!");
                    } else if (lastRowEmitted != null && val == lastRowEmitted.values[i]) {
                        ps.print("*");
                    } else {
                        ps.print(val);
                    }
                }
                bitForColumn = bitForColumn << 1;
            }
            ps.println();

            lastRowEmitted = r;
            return r;
        }
        
        private long adjustTimeIfNeeded(long newTime) {
            if (lastRowEmitted == null) { return -deflate(newTime); }
            else {
                long oldTime = lastRowEmitted.timestamp;
                long time = deflate(newTime) - deflate(oldTime);
                if (time < 0) {
                    if (forceOrdering) {
                        time = deflate(lastRowEmitted.timestamp);
                        logger.fine("Forcing timestamps into sequence: " +
                                newTime + ", " + oldTime);
                    } else throw new IllegalArgumentException(
                            "Timestamps out of sequence: " + newTime +
                            ", " + oldTime);
                }
                return time;
            }
        }
    }
    
/*------------------------------------------------------------------------------
 *
 * PRIVATE - The class implementing the filed-based repository
 * 
 *----------------------------------------------------------------------------*/

    private static class Repo {
        private final RowDescriptor schema;
        private final File dataFile;
        private final File hdrFile;
        private PrintStream ps;
        
        private Repo(File container, String name, RowDescriptor schema) {
            this.schema = schema;
            this.dataFile = dataFile(container, name);
            this.hdrFile =  headerFile(container, name);
            this.ps = null;
        }
        
        static boolean repoExistsFor(File container, String baseName) {
            File header = headerFile(container, baseName);
            File data = dataFile(container, baseName);
            boolean hdrExists = header.exists();
            boolean dataExists = data.exists();
            return hdrExists && dataExists;
        }
        
        public void flush() { if (ps != null) ps.flush(); }

        public void close() { if (ps != null) ps.close(); }
        
        
        static Repo getRepo(File container, String name, RowDescriptor schema)
                throws IOException {
            Repo repo = new Repo(container, name, schema);
            if (!repo.hdrFile.exists() && repo.dataFile.exists()) {
                // Danger! The data file has become "disconnected" from the
                // header file. Don't create a new data file - the data is valuable
                // Don't just create a new header file because you don't know
                // if the schemas match. It's safest to raise an exception.
                throw new FileNotFoundException("Data file without Header file");
            }
            
            repo.ensureValidHeader();
            if (!repo.dataFile.exists()) repo.createDataFile();
            repo.ps = new PrintStream(new FileOutputStream(repo.dataFile, true));
            return repo;
        }
        
        public PrintStream getPrintStream() { return ps; }
        public BufferedReader getReader() throws FileNotFoundException {
            return new BufferedReader(new FileReader(dataFile));
        };

        private void ensureValidHeader() throws IOException {
            if (!hdrFile.exists()) {
                createHeaderFile();
                return;
            }
            
            // Read the existing header file and make sure it's valid
            String line;
            BufferedReader reader = new BufferedReader(new FileReader(hdrFile));
            
            line = reader.readLine();
            if (line == null)  throw new IOException("Empty Header File");
            
            int version = Integer.valueOf(line);
            if (version > RepoVersion)
                throw new IOException(
                        "Can't read newer repo version :" + version + " vs " + RepoVersion);

            line = reader.readLine();
            if (line == null) throw new IOException("Missing column name declarations");

            String[] declaredNames = line.split("\t");
            if (declaredNames.length > schema.nColumns) {
                throw new IOException("Mismatched column names - too few supplied names");
            }
            
            for (int i = 0; i < declaredNames.length; i++) {
                if (!declaredNames[i].equals(schema.columnNames[i])) {
                    throw new IOException("Mismatched column names");
                }
            }
            reader.close();
            
            if (schema.nColumns > declaredNames.length) {
                logger.info("Adding new column(s)");
                createHeaderFile(); // We've got new columns! Overwrite the header file
            }
        }
        
        private void createHeaderFile() throws FileNotFoundException {
            PrintStream writer = new PrintStream(new FileOutputStream(hdrFile, false));
            writer.format("%d\n", RepoVersion);
            int lastIndex = schema.nColumns-1;
            int index = 0;
            while (true) {
                writer.append(schema.columnNames[index]);
                if (index++ != lastIndex) writer.append("\t");
                else break;
            }
            writer.close();
        }
        
        private void createDataFile() throws FileNotFoundException {
            PrintStream writer = new PrintStream(new FileOutputStream(dataFile), false);
            writer.format("# %s\n", (new Date().toString()));
            writer.close();
        }

        private static File headerFile(File container, String baseName) {
            return new File(container, baseName + ".pts.hdr");
        }
    
        private static File dataFile(File container, String baseName) {
            return new File(container, baseName + ".pts.data");
        }

    }
}