/*******************************************************************************
* Copyright (c) 2011 The Board of Trustees of the Leland Stanford Junior University
* as Operator of the SLAC National Accelerator Laboratory.
* Copyright (c) 2011 Brookhaven National Laboratory.
* EPICS archiver appliance is distributed subject to a Software License Agreement found
* in file LICENSE that is included with this distribution.
*******************************************************************************/
package edu.stanford.slac.archiverappliance.PlainPB;
import static java.nio.file.StandardCopyOption.ATOMIC_MOVE;
import static java.nio.file.StandardCopyOption.REPLACE_EXISTING;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URLEncoder;
import java.nio.file.Files;
import java.nio.file.NoSuchFileException;
import java.nio.file.Path;
import java.nio.file.StandardCopyOption;
import java.nio.file.attribute.FileTime;
import java.sql.Timestamp;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Random;
import java.util.concurrent.Callable;
import java.util.concurrent.ConcurrentHashMap;
import org.apache.log4j.Logger;
import org.epics.archiverappliance.Event;
import org.epics.archiverappliance.EventStream;
import org.epics.archiverappliance.NoDataException;
import org.epics.archiverappliance.StoragePlugin;
import org.epics.archiverappliance.common.BasicContext;
import org.epics.archiverappliance.common.PartitionGranularity;
import org.epics.archiverappliance.common.TimeUtils;
import org.epics.archiverappliance.config.ArchDBRTypes;
import org.epics.archiverappliance.config.ConfigService;
import org.epics.archiverappliance.config.PVNameToKeyMapping;
import org.epics.archiverappliance.etl.ConversionFunction;
import org.epics.archiverappliance.etl.ETLBulkStream;
import org.epics.archiverappliance.etl.ETLContext;
import org.epics.archiverappliance.etl.ETLDest;
import org.epics.archiverappliance.etl.ETLInfo;
import org.epics.archiverappliance.etl.ETLSource;
import org.epics.archiverappliance.etl.StorageMetrics;
import org.epics.archiverappliance.etl.StorageMetricsContext;
import org.epics.archiverappliance.retrieval.CallableEventStream;
import org.epics.archiverappliance.retrieval.RemotableEventStreamDesc;
import org.epics.archiverappliance.retrieval.postprocessors.DefaultRawPostProcessor;
import org.epics.archiverappliance.retrieval.postprocessors.PostProcessor;
import org.epics.archiverappliance.retrieval.postprocessors.PostProcessorWithConsolidatedEventStream;
import org.epics.archiverappliance.retrieval.postprocessors.PostProcessors;
import org.epics.archiverappliance.utils.nio.ArchPaths;
import org.epics.archiverappliance.utils.ui.URIUtils;
import edu.stanford.slac.archiverappliance.PB.EPICSEvent;
/**
* The plain PB storage plugin stores data in a chunk per PV per partition in sequential form.
* No index is maintained, simple search algorithms are used to locate events.
* This plugin has these configuration parameters.
* <dl>
* <dt>name</dt><dd>This serves to identify this plugin; mandatory</dd>
* <dt>rootFolder</dt><dd>This serves as the rootFolder that is prepended to the path generated for a PV+chunk ; mandatory.
* One can use environment variables here; for example, <code>pb://localhost?name=STS&rootFolder=${ARCHAPPL_SHORT_TERM_FOLDER}&partitionGranularity=PARTITION_HOUR</code> where the value for ${ARCHAPPL_SHORT_TERM_FOLDER} is picked up from the environment/system properties.
* </dd>
* <dt>partitionGranularity</dt><dd>Defines the time partition granularity for this plugin. For example, if the granularity <code>PARTITION_HOUR</code>, then a new chunk is created for each hour of data. The partitions are clean; that is, they contain data only for that partition. It is possible to predict which chunk contains data for a particular instant in time and which chunks contain data for a particular time period. This is a mandatory field.</dd>
* <dt>compress</dt><dd>This is an optional field that defines the compression mode.
* The support for zip compression is experimental.
* If the zip compression is used, the <code>rootfolder</code> is prepended with <code>{@link org.epics.archiverappliance.utils.nio.ArchPaths#ZIP_PREFIX ZIP_PREFIX}</code>.
* If this is absent in the <code>rootfolder</code>, the initialization code automatically adds it in.
* </dd>
* <dt>hold & gather</dt><dd><code>hold</code> and <code>gather</code> are optional fields that work together to implement high/low watermarks for data transfer.
* By default, both <code>hold</code> and <code>gather</code> are 0 which leads to data being transferred out of this plugin as soon as the partition boundary is reached.
* You can <code>hold</code> a certain number of partitions in this store (perhaps because this store is a high performing one).
* In this case, ETL does not start until the first event in this store is older than <code>hold</code> partitions.
* Once ETL begins, you can transfer <code>gather</code> partitions at a time.
* For example, <code>hold=5&gather=3</code> lets you keep at least <code>5-3=2</code> partitions in this store. ETL kicks in once the oldest event is older than than <code>5</code> partitions and data is moved <code>3</code> partitions at a time.
* </dd>
* <dt>pp</dt><dd>An optional parameter, this contains a list of {@link org.epics.archiverappliance.retrieval.postprocessors.PostProcessor post processing operators} that are computed and cached during ETL.
* During retrieval, if an exact match is found, then the data from the cached copy is used (greatly improving retrieval performance).
* Otherwise, the post processor is applied and the data is computed at runtime.
* To specify multiple post processors, use standard URL syntax like so <code>pp=rms&pp=mean_3600</code>
* </dd>
* <dt>consolidateOnShutdown</dt><dd>This lets you control if ETL should push data to the subsequent store on appserver shutdown. This is useful if you are using a RAMDisk for the short term store.</dd>
* <dt>reducedata</dt><dd>An optional parameter; use this parameter to reduce the data as you move it into this store. You can use any of the <a href="http://slacmshankar.github.io/epicsarchiver_docs/userguide.html#post_processing">post processors</a> that can be used with the <code>pp</code> argument.
* For example, if you define the LTS as <code>pb://localhost?name=LTS&rootFolder=${ARCHAPPL_LONG_TERM_FOLDER}&partitionGranularity=PARTITION_YEAR&reducedata=firstSample_3600</code>, then when moving data into this store, ETL will apply the <code>firstSample_3600</code> operator on the raw data to reduce the data and store only the reduced data.
* The difference between this parameter and the <code>pp</code> parameter is that in the <code>reducedata</code> case, only the reduced data is stored. The raw data is thrown away.
* If you specify both the <code>pp</code> and the <code>reducedata</code>, you may get unpredictable results because the raw data is necessary to precompute the caches.
* </dd>
* <dt>etlIntoStoreIf</dt><dd>An optional parameter; use this parameter to control if ETL should move data into this store.
* If the named flag specified by this parameter is false, this plugin will behave like the blackhole plugin (and you will lose data).
* Note that named flags are false by default; so the default behavior if you specify this flag and forget to the set the named flag is to lose data.
* If you don't set this flag at all; then this plugin behaves normally and will accept all the ETL data coming in.
* For example, if you add a <code>etlIntoStoreIf=testFlag</code>; then data will be moved into this store only if the value of the named flag <code>testFlag</code> is true.
* </dd>
* <dt>etlOutofStoreIf</dt><dd>An optional parameter; use this parameter to control if ETL should move data out of this store.
* If the named flag specified by this parameter is false, this plugin will behave like a bag of holding and accumulate all the data it can.
* Note that named flags are false by default; so the default behavior if you specify this flag and forget to the set the named flag is to collect data till you run out of space.
* If you don't set this flag at all; then this plugin behaves normally and will move data out as before.
* For example, if you add a <code>etlOutofStoreIf=testFlag</code>; then data will be moved ouf of this store only if the value of the named flag <code>testFlag</code> is true.
* </dd>
* </dl>
* @author mshankar
*
*/
public class PlainPBStoragePlugin implements StoragePlugin, ETLSource, ETLDest, StorageMetrics {
private static Logger logger = Logger.getLogger(PlainPBStoragePlugin.class.getName());
public static final String PB_EXTENSION = ".pb";
public static final String APPEND_EXTENSION = ".pbappend";
private String rootFolder = "/tmp";
private String name;
private ConfigService configService;
private PVNameToKeyMapping pv2key;
/**
* Support for ZIP_PER_PV is still experimental.
* @author mshankar
*/
public enum CompressionMode {
NONE,
ZIP_PER_PV
}
private String desc = "Plain PB plugin";
// By default, we partition based on a year's boundary.
PartitionGranularity partitionGranularity = PartitionGranularity.PARTITION_YEAR;
/**
* Should we backup the affected partitions before letting ETL touch that partition.
* This has some performance implications as we will be copying the file on each run
*/
private boolean backupFilesBeforeETL = false;
private CompressionMode compressionMode = CompressionMode.NONE;
private List<String> postProcessorUserArgs = null;
private String reducedataPostProcessor = null;
private ConcurrentHashMap<String, AppendDataStateData> appendDataStates = new ConcurrentHashMap<String, AppendDataStateData>();
private int holdETLForPartions = 0;
private int gatherETLinPartitions = 0;
private boolean consolidateOnShutdown = false;
/**
* Most of the time; this will be null.
* However; if specified; we should use the value of the named flag identified by this variable to control if this plugin behaves like a black hole plugin or not.
*/
private String etlIntoStoreIf;
private String etlOutofStoreIf;
public List<Callable<EventStream>> getDataForPV(BasicContext context, String pvName, Timestamp startTime, Timestamp endTime) throws IOException {
DefaultRawPostProcessor postProcessor = new DefaultRawPostProcessor();
return getDataForPV(context, pvName, startTime, endTime, postProcessor);
}
/*
* (non-Javadoc)
* @see org.epics.archiverappliance.Reader#getDataForPV(java.lang.String, java.sql.Timestamp, java.sql.Timestamp, boolean)
*/
@Override
public List<Callable<EventStream>> getDataForPV(BasicContext context, String pvName, Timestamp startTime, Timestamp endTime, PostProcessor postProcessor) throws IOException {
try {
Path[] paths = null;
String extension = "." + postProcessor.getExtension();
boolean userWantsRawData = extension.equals(PB_EXTENSION);
boolean askingForProcessedDataButAbsentInCache = false;
// We assume that if things are cached then all of the caches are available.
// There's probably a more accurate but slightly slower way to do this where we check if each partition has its cached data and if not return a wrapped version.
// For now, we assume that ETL is doing its job.
// If this is not the case, we should switch to the more accurate algorithm.
if(userWantsRawData) {
logger.debug("User wants raw data.");
paths = PlainPBPathNameUtility.getPathsWithData(context.getPaths(), rootFolder, pvName, startTime, endTime, PB_EXTENSION, partitionGranularity, this.compressionMode, this.pv2key);
} else {
paths = PlainPBPathNameUtility.getPathsWithData(context.getPaths(), rootFolder, pvName, startTime, endTime, extension, partitionGranularity, this.compressionMode, this.pv2key);
if(paths == null || paths.length == 0) {
logger.debug("Did not find any cached entries for " + pvName + " for post processor " + extension + ". Defaulting to using the raw streams and computing the data at runtime.");
askingForProcessedDataButAbsentInCache = true;
paths = PlainPBPathNameUtility.getPathsWithData(context.getPaths(), rootFolder, pvName, startTime, endTime, PB_EXTENSION, partitionGranularity, this.compressionMode, this.pv2key);
} else {
logger.debug("Found " + paths.length + " cached entries for " + pvName + " for post processor " + extension);
}
}
logger.debug(desc + " Found " + (paths != null ? paths.length : 0) + " matching files for pv " + pvName + " in store " + this.getName());
boolean useSearchForPositions = (this.compressionMode == CompressionMode.NONE);
boolean doNotuseSearchForPositions = !useSearchForPositions;
ArrayList<Callable<EventStream>> ret = new ArrayList<Callable<EventStream>>();
// Regardless of what we find, we add the last event from the partition before the start time
// This takes care of several multi-year bugs and hopefully does not introduce more.
// The mergededup consumer will digest this using its buffers and serve data appropriately.
Callable<EventStream> lastEventOfPreviousStream = getLastEventOfPreviousPartitionBeforeTimeAsStream(context, pvName, startTime, postProcessor, askingForProcessedDataButAbsentInCache);
if(lastEventOfPreviousStream != null) ret.add(lastEventOfPreviousStream);
if(paths != null && paths.length == 1) {
PBFileInfo fileInfo = new PBFileInfo(paths[0]);
ArchDBRTypes dbrtype = fileInfo.getType();
if(fileInfo.getLastEventEpochSeconds() <= TimeUtils.convertToEpochSeconds(startTime)) {
logger.debug("All we can get from this store is the last known event at " + TimeUtils.convertToHumanReadableString(fileInfo.getLastEventEpochSeconds()));
ret.add(CallableEventStream.makeOneEventCallable(fileInfo.getLastEvent(), new RemotableEventStreamDesc(dbrtype, pvName, fileInfo.getDataYear()), postProcessor, askingForProcessedDataButAbsentInCache));
} else {
ret.add(CallableEventStream.makeOneStreamCallable(new FileBackedPBEventStream(pvName, paths[0], dbrtype, startTime, endTime, doNotuseSearchForPositions), postProcessor, askingForProcessedDataButAbsentInCache));
}
} else if(paths != null && paths.length > 1) {
PBFileInfo fileInfo = new PBFileInfo(paths[0]);
ArchDBRTypes dbrtype = fileInfo.getType();
int pathsCount = paths.length;
for(int pathid = 0; pathid < pathsCount; pathid++) {
if(pathid == 0) {
ret.add(CallableEventStream.makeOneStreamCallable(new FileBackedPBEventStream(pvName, paths[pathid], dbrtype, startTime, endTime, doNotuseSearchForPositions), postProcessor, askingForProcessedDataButAbsentInCache));
} else if(pathid == pathsCount -1 ) {
ret.add(CallableEventStream.makeOneStreamCallable(new FileBackedPBEventStream(pvName, paths[pathid], dbrtype, startTime, endTime, doNotuseSearchForPositions), postProcessor, askingForProcessedDataButAbsentInCache));
} else {
ret.add(CallableEventStream.makeOneStreamCallable(new FileBackedPBEventStream(pvName, paths[pathid], dbrtype), postProcessor, askingForProcessedDataButAbsentInCache));
}
}
} else {
logger.debug("Ret should have only the last event of the previous partition for pv " + pvName);
}
return ret;
} catch (NoDataException nex) {
logger.warn(desc + ": did not find any data for " + pvName + " returning null", nex);
return null;
} catch (Exception ex) {
throw new IOException("Exception retrieving data from " + desc + " for pv " + pvName, ex);
} finally {
}
}
private Callable<EventStream> getLastEventOfPreviousPartitionBeforeTimeAsStream(BasicContext context, String pvName, Timestamp startTime, PostProcessor postProcessor, boolean askingForProcessedDataButAbsentInCache) throws Exception, IOException {
Path mostRecentPath = PlainPBPathNameUtility.getPreviousPartitionBeforeTime(context.getPaths(), rootFolder, pvName, startTime, PB_EXTENSION, partitionGranularity, this.compressionMode, this.pv2key);
if(mostRecentPath != null) {
// Should we use these two here?
// boolean useSearchForPositions = (this.compressionMode == CompressionMode.NONE);
// boolean doNotuseSearchForPositions = !useSearchForPositions;
logger.debug("Last known event for PV comes from " + mostRecentPath.toString());
PBFileInfo fileInfo = new PBFileInfo(mostRecentPath);
ArchDBRTypes dbrtype = fileInfo.getType();
RemotableEventStreamDesc lastKnownEventDesc = new RemotableEventStreamDesc(dbrtype, pvName, fileInfo.getDataYear());
lastKnownEventDesc.setSource("Last known event from " + this.getName() + " from " + mostRecentPath.getFileName());
return CallableEventStream.makeOneEventCallable(fileInfo.getLastEvent(), lastKnownEventDesc, postProcessor, askingForProcessedDataButAbsentInCache);
}
logger.debug(desc + ": did not even find the most recent file with data for " + pvName + " returning null.");
return null;
}
private AppendDataStateData getAppendDataState(BasicContext context, String pvName) throws IOException {
if(appendDataStates.containsKey(pvName)) {
return appendDataStates.get(pvName);
} else {
logger.debug("Creating new append data state for pv " + pvName);
AppendDataStateData state = new AppendDataStateData(this.partitionGranularity, this.rootFolder, this.desc, getLastKnownTimestampForAppend(context, pvName), this.compressionMode, this.pv2key);
appendDataStates.put(pvName, state);
return state;
}
}
/* (non-Javadoc)
* @see org.epics.archiverbenchmarks.Writer#appendData(java.lang.String, org.epics.archiverbenchmarks.EventStream)
* Append the data to the end of the file.
* For now we are assuming that the caller is doing the partitioning by year.
*/
@Override
public boolean appendData(BasicContext context, String pvName, EventStream stream) throws IOException {
AppendDataStateData state = getAppendDataState(context, pvName);
state.partitionBoundaryAwareAppendData(context, pvName, stream, PB_EXTENSION, null);
return true;
}
/* (non-Javadoc)
* Append the data to the end of the ETL append data file.
*/
@Override
public boolean appendToETLAppendData(String pvName, EventStream stream, ETLContext context) throws IOException {
if(this.etlIntoStoreIf != null) {
boolean namedFlagValue = this.configService.getNamedFlag(this.etlIntoStoreIf);
if(!namedFlagValue) {
logger.info("Skipping ETL append data for " + pvName + " as named flag " + this.etlIntoStoreIf + " is false.");
return true;
}
}
AppendDataStateData state = getAppendDataState(context, pvName);
if(this.reducedataPostProcessor != null) {
try {
PostProcessor postProcessor = PostProcessors.findPostProcessor(this.reducedataPostProcessor);
postProcessor.initialize(reducedataPostProcessor, pvName);
stream = CallableEventStream.makeOneStreamCallable(stream, postProcessor, true).call();
logger.debug("Wrapped stream with post processor " + this.reducedataPostProcessor + " for pv " + pvName);
if(postProcessor instanceof PostProcessorWithConsolidatedEventStream) {
stream = ((PostProcessorWithConsolidatedEventStream) postProcessor).getConsolidatedEventStream();
logger.debug("Using consolidated event stream for pv " + pvName);
}
} catch (Exception ex) {
logger.error("Exception moving reduced data for pv " + pvName + " to store " + this.getName() + " using operator " + this.reducedataPostProcessor, ex);
return false;
}
}
boolean bulkInserted = false;
if(stream instanceof ETLBulkStream) {
ETLBulkStream bulkStream = (ETLBulkStream) stream;
if(backupFilesBeforeETL) {
bulkInserted = state.bulkAppend(pvName, context, bulkStream, APPEND_EXTENSION, PB_EXTENSION);
} else {
bulkInserted = state.bulkAppend(pvName, context, bulkStream, PB_EXTENSION, null);
}
}
if(!bulkInserted) {
if(backupFilesBeforeETL) {
state.partitionBoundaryAwareAppendData(context, pvName, stream, APPEND_EXTENSION, PB_EXTENSION);
} else {
state.partitionBoundaryAwareAppendData(context, pvName, stream, PB_EXTENSION, null);
}
}
return true;
}
@Override
public String getDescription() {
return desc;
}
@Override
public void initialize(String configURL, ConfigService configService) throws IOException {
this.configService = configService;
this.pv2key = this.configService.getPVNameToKeyConverter();
assert(pv2key != null);
try {
URI srcURI = new URI(configURL);
HashMap<String, String> queryNVPairs = URIUtils.parseQueryString(srcURI);
if(queryNVPairs.containsKey("name")) {
name = queryNVPairs.get("name");
} else {
throw new IOException("Cannot initialize the plugin; this plugin implements the storage metrics API which needs an identity");
}
String rootFolderStr = null;
if(queryNVPairs.containsKey("rootFolder")) {
rootFolderStr = queryNVPairs.get("rootFolder");
} else {
throw new IOException("Cannot initialize the plugin; this needs both the rootFolder and the partitionGranularity to be specified");
}
if(queryNVPairs.containsKey("partitionGranularity")) {
this.setPartitionGranularity(PartitionGranularity.valueOf(queryNVPairs.get("partitionGranularity")));
} else {
throw new IOException("Cannot initialize the plugin; this needs both the rootFolder and the partitionGranularity to be specified");
}
if(queryNVPairs.containsKey("hold")) {
this.setHoldETLForPartions(Integer.parseInt(queryNVPairs.get("hold")));
}
if(queryNVPairs.containsKey("gather")) {
this.setGatherETLinPartitions(Integer.parseInt(queryNVPairs.get("gather")));
}
if(queryNVPairs.containsKey("compress")) {
compressionMode = CompressionMode.valueOf(queryNVPairs.get("compress"));
if(compressionMode != CompressionMode.NONE) {
if(!rootFolderStr.startsWith(ArchPaths.ZIP_PREFIX)) {
String rootFolderWithPath = ArchPaths.ZIP_PREFIX + rootFolderStr;
logger.debug("Automatically adding url scheme for compression to rootfolder " + rootFolderWithPath);
rootFolderStr = rootFolderWithPath;
}
}
}
setRootFolder(rootFolderStr);
this.postProcessorUserArgs = URIUtils.getMultiValuedParamFromQueryString(srcURI, "pp");
if(queryNVPairs.containsKey("reducedata")) {
reducedataPostProcessor = queryNVPairs.get("reducedata");
}
if(queryNVPairs.containsKey("consolidateOnShutdown")) {
this.consolidateOnShutdown = Boolean.parseBoolean(queryNVPairs.get("consolidateOnShutdown"));
}
if(queryNVPairs.containsKey("etlIntoStoreIf")) {
this.etlIntoStoreIf = queryNVPairs.get("etlIntoStoreIf");
}
if(queryNVPairs.containsKey("etlOutofStoreIf")) {
this.etlOutofStoreIf = queryNVPairs.get("etlOutofStoreIf");
}
this.setDesc("PlainPBStorage plugin - " + name + " with rootFolder " + rootFolder + " and granularity " + partitionGranularity);
} catch(URISyntaxException ex) {
throw new IOException(ex);
}
}
/**
* Return a URL representation of this plugin suitable for parsing by StoragePluginURLParser
* @return ret A URL representation
*/
public String getURLRepresentation() {
try {
StringBuilder buf = new StringBuilder();
buf.append("pb://localhost?name=");
buf.append(URLEncoder.encode(name, "UTF-8"));
buf.append("&rootFolder=");
buf.append(URLEncoder.encode(rootFolder, "UTF-8"));
buf.append("&partitionGranularity=");
buf.append(partitionGranularity.toString());
if(this.holdETLForPartions != 0) {
buf.append("&hold=");
buf.append(Integer.toString(holdETLForPartions));
}
if(this.gatherETLinPartitions != 0) {
buf.append("&gather=");
buf.append(Integer.toString(gatherETLinPartitions));
}
if(this.consolidateOnShutdown) {
buf.append("&consolidateOnShutdown=");
buf.append(Boolean.toString(consolidateOnShutdown));
}
if(this.compressionMode != CompressionMode.NONE) {
buf.append("&compress=");
buf.append(compressionMode.toString());
}
if(this.postProcessorUserArgs != null && !this.postProcessorUserArgs.isEmpty()) {
for(String postProcessorUserArg : postProcessorUserArgs) {
buf.append("&pp=");
buf.append(postProcessorUserArg);
}
}
if(this.reducedataPostProcessor != null) {
buf.append("&reducedata=");
buf.append(reducedataPostProcessor);
}
if(this.etlIntoStoreIf != null) {
buf.append("&etlIntoStoreIf=");
buf.append(this.etlIntoStoreIf);
}
if(this.etlOutofStoreIf != null) {
buf.append("&etlOutofStoreIf=");
buf.append(this.etlOutofStoreIf);
}
String ret = buf.toString();
logger.debug("URL representation " + ret);
return ret;
} catch(Exception ex) {
logger.error("Exception generating URL representation of plugin", ex);
return null;
}
}
private static void loadPBclasses() {
try {
EPICSEvent.ScalarDouble.newBuilder()
.setSecondsintoyear(0)
.setNano(0)
.setVal(0)
.setSeverity(0)
.setStatus(0)
.build().toByteArray();
} catch(Exception ex) {
logger.error(ex.getMessage(), ex);
}
}
public void setRootFolder(String rootFolder) throws IOException {
this.rootFolder = rootFolder;
logger.debug("Setting root folder to " + rootFolder);
try(ArchPaths paths = new ArchPaths()) {
if(this.compressionMode == CompressionMode.NONE) {
Path path = paths.get(this.rootFolder);
if(!Files.exists(path)) {
logger.warn(desc + ": The root folder specified does not exist - " + rootFolder + ". Creating it");
Files.createDirectories(path);
return;
}
if(!Files.isDirectory(path)) {
logger.error(desc + ": The root folder specified is not a directory - " + rootFolder);
return;
}
} else {
Path path = paths.get(this.rootFolder.replace(ArchPaths.ZIP_PREFIX, "/"));
if(!Files.exists(path)) {
logger.warn(desc + ": The root folder specified does not exist - " + rootFolder + " Creating it");
Files.createDirectories(path);
return;
}
if(!Files.isDirectory(path)) {
logger.error(desc + ": The root folder specified is not a directory - " + rootFolder);
return;
}
}
}
loadPBclasses();
return;
}
public void setDesc(String newDesc) {
this.desc = newDesc;
}
public String getRootFolder() {
return rootFolder;
}
public String getDesc() {
return desc;
}
@Override
public PartitionGranularity getPartitionGranularity() {
return partitionGranularity;
}
public void setPartitionGranularity(PartitionGranularity partitionGranularity) {
this.partitionGranularity = partitionGranularity;
}
@Override
public List<ETLInfo> getETLStreams(String pvName, Timestamp currentTime, ETLContext context) throws IOException {
if(etlOutofStoreIf != null) {
boolean namedFlagValue = this.configService.getNamedFlag(etlOutofStoreIf);
if(!namedFlagValue) {
logger.info("Skipping getting ETL Streams for " + pvName + " as named flag " + this. etlOutofStoreIf + " is false.");
return new LinkedList<ETLInfo>();
}
}
Path[] paths = PlainPBPathNameUtility.getPathsBeforeCurrentPartition(context.getPaths(), rootFolder, pvName, currentTime, PB_EXTENSION, partitionGranularity, this.compressionMode, this.pv2key);
if(paths == null || paths.length == 0) {
if(logger.isInfoEnabled()) {
logger.debug("No files for ETL for pv " + pvName + " for time " + TimeUtils.convertToISO8601String(currentTime));
}
return null;
}
if((holdETLForPartions - gatherETLinPartitions) < 0) {
logger.error("holdETLForPartions - gatherETLinPartitions is invalid for hold=" + holdETLForPartions + " and gather=" + gatherETLinPartitions);
}
long holdInEpochSeconds = TimeUtils.getPreviousPartitionLastSecond(TimeUtils.convertToEpochSeconds(currentTime) - partitionGranularity.getApproxSecondsPerChunk()*holdETLForPartions, partitionGranularity);
long gatherInEpochSeconds = TimeUtils.getPreviousPartitionLastSecond(TimeUtils.convertToEpochSeconds(currentTime) - partitionGranularity.getApproxSecondsPerChunk()*(holdETLForPartions - (gatherETLinPartitions - 1)), partitionGranularity);
boolean skipHoldAndGather = (holdETLForPartions == 0) && (gatherETLinPartitions == 0);
ArrayList<ETLInfo> etlreadystreams = new ArrayList<ETLInfo>();
boolean holdOk = false;
for(Path path : paths) {
try {
if(!Files.exists(path)) {
logger.warn("Path " + path + " does not seem to exist for ETL at time " + TimeUtils.convertToISO8601String(currentTime));
continue;
}
if(Files.size(path) <= 0) {
logger.warn("Path " + path + " is of size zero bytes at time " + TimeUtils.convertToISO8601String(currentTime));
long lastModifiedInMillis = Files.getLastModifiedTime(path).toMillis();
long currentTimeInMillis = currentTime.getTime();
if((currentTimeInMillis - lastModifiedInMillis) > ((this.holdETLForPartions+1) * this.getPartitionGranularity().getApproxSecondsPerChunk()*60)) {
logger.warn("Zero byte file is older than current ETL time by holdETLForPartions; deleting it " + path.toAbsolutePath().toString());
try {
Files.delete(path);
} catch(Exception ex) {
logger.error("Exception deleting file " + path.toAbsolutePath().toString(), ex);
}
}
continue;
}
PBFileInfo fileinfo = new PBFileInfo(path);
ETLInfo etlInfo = new ETLInfo(pvName, fileinfo.getType(), path.toAbsolutePath().toString(), partitionGranularity, new FileStreamCreator(pvName, path, fileinfo), fileinfo.getFirstEvent(), Files.size(path));
if(skipHoldAndGather) {
logger.debug("Skipping computation of hold and gather");
etlreadystreams.add(etlInfo);
} else {
if(fileinfo.getFirstEvent() == null) {
logger.debug("We seem to have an empty file " + path.toAbsolutePath().toString());
long lastModifiedInMillis = Files.getLastModifiedTime(path).toMillis();
long currentTimeInMillis = currentTime.getTime();
if((currentTimeInMillis - lastModifiedInMillis) > ((this.holdETLForPartions+1) * this.getPartitionGranularity().getApproxSecondsPerChunk()*60)) {
logger.warn("Empty file is older than current ETL time by holdETLForPartions; deleting it " + path.toAbsolutePath().toString());
try {
Files.delete(path);
} catch(Exception ex) {
logger.error("Exception deleting file " + path.toAbsolutePath().toString(), ex);
}
}
continue;
}
if(!holdOk) {
if(fileinfo.getFirstEventEpochSeconds() <= holdInEpochSeconds) {
holdOk = true;
} else {
logger.debug("Hold not satisfied for first event " + TimeUtils.convertToISO8601String(fileinfo.getFirstEventEpochSeconds()) + " and hold = " + TimeUtils.convertToISO8601String(holdInEpochSeconds));
return etlreadystreams;
}
}
if(fileinfo.getFirstEventEpochSeconds() <= gatherInEpochSeconds) {
etlreadystreams.add(etlInfo);
} else {
logger.debug("Gather not satisfied for first event " + TimeUtils.convertToISO8601String(fileinfo.getFirstEventEpochSeconds()) + " and gather = " + TimeUtils.convertToISO8601String(gatherInEpochSeconds));
}
}
} catch(IOException ex) {
logger.error("Skipping ading " + path.toAbsolutePath().toString() + " to ETL list due to exception. Should we go ahead and mark this file for deletion in this case? ", ex);
}
}
return etlreadystreams;
}
@Override
public void markForDeletion(ETLInfo info, ETLContext context) {
try {
Path path = context.getPaths().get(info.getKey());
long size = Files.size(path);
long sizeFromInfo = info.getSize();
if(sizeFromInfo == -1) {
logger.error("We are missing size information from ETLInfo for " + info.getKey());
Files.delete(path);
} else {
if(sizeFromInfo == size) {
Files.delete(path);
} else {
logger.error("The path " + info.getKey() + " has changed since we generate the ETLInfo. Not deleting it this time around. If this persists, please manually remove the file. Current Size " + size + ". Size from info " + sizeFromInfo);
}
}
} catch(Exception ex) {
logger.error("Exception deleting " + info.getKey() + ". Please manually remove this file", ex);
}
}
@Override
public Event getLastKnownEvent(BasicContext context, String pvName) throws IOException {
try {
Path[] paths = PlainPBPathNameUtility.getAllPathsForPV(context.getPaths(), rootFolder, pvName, PB_EXTENSION, partitionGranularity, this.compressionMode, this.pv2key);
logger.debug(desc + " Found " + (paths != null ? paths.length : 0) + " matching files for pv " + pvName);
if(paths != null && paths.length > 0) {
for(int i = paths.length-1; i >=0; i--) {
if(logger.isDebugEnabled()) logger.debug("Looking for last known event in file " + paths[i].toAbsolutePath().toString());
try {
if(Files.size(paths[i]) <= 0) {
logger.debug("Ignoring zero byte file " + paths[i].toAbsolutePath().toString());
continue;
}
PBFileInfo fileInfo = new PBFileInfo(paths[i]);
if(fileInfo.getLastEvent() != null) return fileInfo.getLastEvent();
} catch(Exception ex) {
logger.warn("Exception determing header information from file " + paths[i].toAbsolutePath().toString(), ex);
}
}
}
} catch(NoSuchFileException ex) {
// We expect a NoSuchFileException if the file does not exist.
return null;
} catch (Exception ex) {
logger.error("Exception determining last known event for " + pvName, ex);
}
return null;
}
@Override
public Event getFirstKnownEvent(BasicContext context, String pvName) throws IOException {
try {
Path[] paths = PlainPBPathNameUtility.getAllPathsForPV(context.getPaths(), rootFolder, pvName, PB_EXTENSION, partitionGranularity, this.compressionMode, this.pv2key);
logger.debug(desc + " Found " + (paths != null ? paths.length : 0) + " matching files for pv " + pvName);
if(paths != null && paths.length > 0) {
for(int i = 0; i < paths.length; i++) {
if(logger.isDebugEnabled()) logger.debug("Looking for first known event in file " + paths[i].toAbsolutePath().toString());
try {
PBFileInfo fileInfo = new PBFileInfo(paths[i]);
if(fileInfo.getFirstEvent() != null) return fileInfo.getFirstEvent();
} catch(Exception ex) {
logger.warn("Exception determing header information from file " + paths[i].toAbsolutePath().toString(), ex);
}
}
}
} catch(NoSuchFileException ex) {
// We expect a NoSuchFileException if the file does not exist.
return null;
} catch (Exception ex) {
logger.error("Exception determining first known event for " + pvName, ex);
}
return null;
}
/**
* Get last known timestamp for append purposes. If last event is not known, we return time(0)
* @param pvName The name of PV.
* @return Timestamp Last known Timestamp
* @throws IOException
*/
private Timestamp getLastKnownTimestampForAppend(BasicContext context, String pvName) throws IOException {
Event event = getLastKnownEvent(context, pvName);
if(event != null) {
return event.getEventTimeStamp();
}
return new Timestamp(0);
}
@Override
public boolean prepareForNewPartition(String pvName, Event ev, ArchDBRTypes archDBRType, ETLContext context) throws IOException {
// The functionality in AppendDataState should take care of automatically preparing partitions and the like.
return true;
}
@Override
public boolean commitETLAppendData(String pvName, ETLContext context) throws IOException {
if(compressionMode == CompressionMode.NONE) {
if(backupFilesBeforeETL) {
// Get all append data files for the specified PV name and partition granularity.
Path[] appendDataPaths = PlainPBPathNameUtility.getAllPathsForPV(context.getPaths(), rootFolder, pvName, APPEND_EXTENSION, partitionGranularity, this.compressionMode, this.pv2key);
if (appendDataPaths == null) {
logger.debug("No " + APPEND_EXTENSION + " files found for PV " + pvName);
return true;
}
if(logger.isDebugEnabled()) logger.debug(desc + " Found " + (appendDataPaths != null ? appendDataPaths.length : 0) + " matching files for pv " + pvName);
for(Path srcPath : appendDataPaths) {
Path destPath = context.getPaths().get(srcPath.toUri().toString().replace(APPEND_EXTENSION, PB_EXTENSION));
Files.move(srcPath, destPath, REPLACE_EXISTING, ATOMIC_MOVE);
}
}
}
return true;
}
@Override
public boolean runPostProcessors(String pvName, ArchDBRTypes dbrtype, ETLContext context) throws IOException {
if(postProcessorUserArgs != null && !postProcessorUserArgs.isEmpty()) {
for(String postProcessorUserArg : postProcessorUserArgs) {
PostProcessor postProcessor = PostProcessors.findPostProcessor(postProcessorUserArg);
if(postProcessor == null) {
logger.error("Cannot find post processor for " + postProcessorUserArg);
continue;
}
postProcessor.initialize(postProcessorUserArg, pvName);
String ppExt = "." + postProcessor.getExtension();
List<PPMissingPaths> missingOrOlderPPPaths = getListOfPathsWithMissingOrOlderPostProcessorData(context, pvName, postProcessor);
if(missingOrOlderPPPaths != null && !missingOrOlderPPPaths.isEmpty()) {
for(PPMissingPaths missingOrOlderPath : missingOrOlderPPPaths) {
if(logger.isDebugEnabled()) logger.debug("Generating pp data for " + missingOrOlderPath.ppsPath.toString() + " from " + missingOrOlderPath.srcPath.toString() + " and pp with extension" + ppExt + ". Size of src before " + Files.size(missingOrOlderPath.srcPath));
Callable<EventStream> callable = CallableEventStream.makeOneStreamCallable(new FileBackedPBEventStream(pvName, missingOrOlderPath.srcPath, dbrtype), postProcessor, true);
try(EventStream stream = callable.call()) {
// The post processor data can be generated at any time in any sequence; so we suspend the initial monotonicity checks for the post processor where we compare with the last known event.
// Ideally this should be the first event of the source stream minus some buffer.
Timestamp timezero = TimeUtils.convertFromEpochSeconds(0, 0);
AppendDataStateData state = new AppendDataStateData(this.partitionGranularity, this.rootFolder, this.desc, timezero, this.compressionMode, this.pv2key);
int eventsAppended = state.partitionBoundaryAwareAppendData(context, pvName, stream, ppExt, null);
if(logger.isDebugEnabled()) logger.debug("Done generating pp data for " + missingOrOlderPath.ppsPath.toString() + " from " + missingOrOlderPath.srcPath.toString() + " appending " + eventsAppended + " events. Size of src after " + Files.size(missingOrOlderPath.srcPath));
} catch(Exception ex) {
logger.error("Exception appending pp data for pv " + pvName + " for source " + missingOrOlderPath.srcPath.toString() + " for " + postProcessorUserArg);
}
}
} else {
logger.debug("All paths are current for pv " + pvName + " for pp " + postProcessorUserArg);
}
}
}
return true;
}
public boolean isBackupFilesBeforeETL() {
return backupFilesBeforeETL;
}
public void setBackupFilesBeforeETL(boolean backupFilesBeforeETL) {
this.backupFilesBeforeETL = backupFilesBeforeETL;
}
/**
* The hold and gather are used to implement a high/low watermark for ETL.
* ETL is skipped until the first known event in the partitions available for ETL is earlier than <i>hold</i> partitions.
* Once this is true, we then include in the ETL list all partitions whose first event is earlier than <i>hold - gather</i> partitions.
* For example, in a PARTITION_DAY, if you want to run ETL once every 7 days, but when you run you want to move 5 days worth of data to the dest, set hold to 7 and gather to 5.
* Hold and gather default to a scenario where we aggressively push data to the destination as soon as it is available.
* @return holdETLForPartions
*/
public int getHoldETLForPartions() {
return holdETLForPartions;
}
public void setHoldETLForPartions(int holdETLForPartions) throws IOException {
if((holdETLForPartions - gatherETLinPartitions) < 0) throw new IOException("holdETLForPartions - gatherETLinPartitions is invalid for hold=" + holdETLForPartions + " and gather=" + gatherETLinPartitions);
this.holdETLForPartions = holdETLForPartions;
}
public int getGatherETLinPartitions() {
return gatherETLinPartitions;
}
public void setGatherETLinPartitions(int gatherETLinPartitions) throws IOException {
if((holdETLForPartions - gatherETLinPartitions) < 0) throw new IOException("holdETLForPartions - gatherETLinPartitions is invalid for hold=" + holdETLForPartions + " and gather=" + gatherETLinPartitions);
this.gatherETLinPartitions = gatherETLinPartitions;
}
@Override
public String getName() {
return this.name;
}
@Override
public long getTotalSpace(StorageMetricsContext storageMetricsContext) throws IOException {
return storageMetricsContext.getFileStore(this.getRootFolder()).getTotalSpace();
}
@Override
public long getUsableSpace(StorageMetricsContext storageMetricsContext) throws IOException {
return storageMetricsContext.getFileStore(this.getRootFolder()).getUsableSpace();
}
@Override
public long spaceConsumedByPV(String pvName) throws IOException {
// Using a blank extension should fetch everything?
Path[] rawPaths = PlainPBPathNameUtility.getAllPathsForPV(new ArchPaths(), rootFolder, pvName, "", partitionGranularity, this.compressionMode, this.pv2key);
long spaceConsumed = 0;
if(rawPaths != null) {
for(Path f : rawPaths) {
spaceConsumed = spaceConsumed + f.toFile().length();
}
}
return spaceConsumed;
}
public void setName(String name) {
this.name = name;
}
public CompressionMode getCompressionMode() {
return compressionMode;
}
private class PPMissingPaths {
Path srcPath;
Path ppsPath;
PPMissingPaths(Path srcPath, Path ppsPath) {
this.srcPath = srcPath;
this.ppsPath = ppsPath;
}
}
private List<PPMissingPaths> getListOfPathsWithMissingOrOlderPostProcessorData(BasicContext context, String pvName, PostProcessor postProcessor) throws IOException {
String ppExt = "." + postProcessor.getExtension();
logger.debug("Looking for missing " + ppExt + " paths based on the list of " + PB_EXTENSION + " paths");
Path[] rawPaths = PlainPBPathNameUtility.getAllPathsForPV(context.getPaths(), this.rootFolder, pvName, PB_EXTENSION, this.partitionGranularity, this.compressionMode, this.pv2key);
Path[] ppPaths = PlainPBPathNameUtility.getAllPathsForPV(context.getPaths(), this.rootFolder, pvName, ppExt, this.partitionGranularity, this.compressionMode, this.pv2key);
HashMap<String, Path> ppPathsMap = new HashMap<String, Path>();
for(Path ppPath : ppPaths) {
ppPathsMap.put(ppPath.toUri().toString(), ppPath);
}
LinkedList<PPMissingPaths> ret = new LinkedList<PPMissingPaths>();
for(Path rawPath : rawPaths) {
String expectedPPPath = rawPath.toUri().toString().replace(PB_EXTENSION, ppExt);
if(!ppPathsMap.containsKey(expectedPPPath)) {
if(logger.isDebugEnabled()) logger.debug("Missing pp path " + expectedPPPath);
ret.add(new PPMissingPaths(rawPath, context.getPaths().get(expectedPPPath)));
} else {
if(logger.isDebugEnabled()) logger.debug("pp path " + expectedPPPath + " already present");
Path actualPPPath = ppPathsMap.get(expectedPPPath);
FileTime rawPathTime = Files.getLastModifiedTime(rawPath);
FileTime ppPathTime = Files.getLastModifiedTime(actualPPPath);
if(logger.isDebugEnabled()) logger.debug("Modification time of src " + rawPathTime.toString() + " and of pp file " + ppPathTime.toString());
if(rawPathTime.compareTo(ppPathTime) > 0) {
logger.debug("Raw file is newer than PP file for " + expectedPPPath);
ret.add(new PPMissingPaths(rawPath, context.getPaths().get(expectedPPPath)));
}
}
}
return ret;
}
@Override
public boolean consolidateOnShutdown() {
return consolidateOnShutdown;
}
private List<String> getPPExtensions() {
LinkedList<String> ret = new LinkedList<String>();
for(String postProcessorUserArg : postProcessorUserArgs) {
PostProcessor postProcessor = PostProcessors.findPostProcessor(postProcessorUserArg);
String ppExt = "." + postProcessor.getExtension();
ret.add(ppExt);
}
return ret;
}
/* (non-Javadoc)
* @see org.epics.archiverappliance.StoragePlugin#renamePV(java.lang.String, java.lang.String, org.epics.archiverappliance.config.PVTypeInfo, org.epics.archiverappliance.config.PVTypeInfo)
*
* We need to do these things here
* <ol>
* <li>Move the files to the new location as determined from the new name.</li>
* <li>Change the name in the header/MessageInfo. Since the header is the first line in the PB file, we have to copy over to the new location.</li>
* Note this applies to the PB data and also any preprocessor data that is stored.
* </ol>
*/
@Override
public void renamePV(BasicContext context, String oldName, String newName) throws IOException {
// Copy data for the main pb file.
{
Path[] paths = PlainPBPathNameUtility.getAllPathsForPV(context.getPaths(), rootFolder, oldName, PB_EXTENSION, partitionGranularity, this.compressionMode, this.pv2key);
if(paths != null && paths.length > 0) {
for(Path path : paths) {
logger.debug("Copying over data from " + path.toString() + " to new pv " + newName);
PBFileInfo info = new PBFileInfo(path);
this.appendData(context, newName, new FileBackedPBEventStream(oldName, path, info.getType()));
}
}
}
// Copy data for the post processors...
for(String ppExt : getPPExtensions()) {
Path[] paths = PlainPBPathNameUtility.getAllPathsForPV(context.getPaths(), rootFolder, oldName, ppExt, partitionGranularity, this.compressionMode, this.pv2key);
if(paths != null && paths.length > 0) {
for(Path path : paths) {
logger.debug("Copying over data from " + path.toString() + " to new pv " + newName + " for extension " + ppExt);
PBFileInfo info = new PBFileInfo(path);
AppendDataStateData state = getAppendDataState(context, newName);
state.partitionBoundaryAwareAppendData(context, newName, new FileBackedPBEventStream(oldName, path, info.getType()), ppExt, null);
}
}
}
}
/* (non-Javadoc)
* @see org.epics.archiverappliance.StoragePlugin#convert(org.epics.archiverappliance.common.BasicContext, org.epics.archiverappliance.etl.ConversionFunction)
*
* We find all the paths for this PV and then apply the conversion function for the
*
*/
@Override
public void convert(BasicContext context, String pvName, ConversionFunction conversionFuntion) throws IOException {
// Convert data for the main pb file.
Random r = new Random();
int randomInt = r.nextInt();
String randSuffix = "_tmp_" + randomInt;
{
Path[] paths = PlainPBPathNameUtility.getAllPathsForPV(context.getPaths(), rootFolder, pvName, PB_EXTENSION, partitionGranularity, this.compressionMode, this.pv2key);
if(paths != null && paths.length > 0) {
for(Path path : paths) {
logger.info("Converting data in " + path.toString() + " for pv " + pvName);
PBFileInfo info = new PBFileInfo(path);
AppendDataStateData state = new AppendDataStateData(this.partitionGranularity, this.rootFolder, this.desc, new Timestamp(0), this.compressionMode, this.pv2key);
state.partitionBoundaryAwareAppendData(context, pvName, conversionFuntion.convertStream(new FileBackedPBEventStream(pvName, path, info.getType())), PB_EXTENSION + randSuffix, null);
}
}
}
// Convert data for the post processors...
for(String ppExt : getPPExtensions()) {
Path[] paths = PlainPBPathNameUtility.getAllPathsForPV(context.getPaths(), rootFolder, pvName, ppExt, partitionGranularity, this.compressionMode, this.pv2key);
if(paths != null && paths.length > 0) {
for(Path path : paths) {
logger.info("Converting data in " + path.toString() + " for pv " + pvName + " for extension " + ppExt);
PBFileInfo info = new PBFileInfo(path);
AppendDataStateData state = new AppendDataStateData(this.partitionGranularity, this.rootFolder, this.desc, new Timestamp(0), this.compressionMode, this.pv2key);
state.partitionBoundaryAwareAppendData(context, pvName, conversionFuntion.convertStream(new FileBackedPBEventStream(pvName, path, info.getType())), ppExt + randSuffix, null);
}
}
}
// Switch the files for the main pb file
{
Path[] paths = PlainPBPathNameUtility.getAllPathsForPV(context.getPaths(), rootFolder, pvName, PB_EXTENSION + randSuffix, partitionGranularity, this.compressionMode, this.pv2key);
if(paths != null && paths.length > 0) {
for(Path path : paths) {
Path destPath = context.getPaths().get(path.toString().replace(randSuffix, ""));
logger.info("Moving path " + path + " to " + destPath);
Files.move(path, destPath, StandardCopyOption.ATOMIC_MOVE);
}
}
}
// Switch the files for the post processors...
{
for(String ppExt : getPPExtensions()) {
Path[] paths = PlainPBPathNameUtility.getAllPathsForPV(context.getPaths(), rootFolder, pvName, ppExt + randSuffix, partitionGranularity, this.compressionMode, this.pv2key);
if(paths != null && paths.length > 0) {
for(Path path : paths) {
Path destPath = context.getPaths().get(path.toString().replace(randSuffix, ""));
logger.info("Moving path " + path + " to " + destPath);
Files.move(path, destPath, StandardCopyOption.ATOMIC_MOVE);
}
}
}
}
}
}