package org.epics.archiverappliance.etl;
import java.io.File;
import java.io.IOException;
import java.nio.file.FileVisitResult;
import java.nio.file.FileVisitor;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.attribute.BasicFileAttributes;
import java.sql.Timestamp;
import java.text.DecimalFormat;
import java.util.ArrayList;
import junit.framework.TestCase;
import org.apache.commons.io.FileUtils;
import org.apache.log4j.Logger;
import org.epics.archiverappliance.common.BasicContext;
import org.epics.archiverappliance.common.TimeUtils;
import org.epics.archiverappliance.common.YearSecondTimestamp;
import org.epics.archiverappliance.config.ArchDBRTypes;
import org.epics.archiverappliance.config.ConfigServiceForTests;
import org.epics.archiverappliance.config.PVTypeInfo;
import org.epics.archiverappliance.config.StoragePluginURLParser;
import org.epics.archiverappliance.config.exception.AlreadyRegisteredException;
import org.epics.archiverappliance.data.ScalarValue;
import org.epics.archiverappliance.engine.membuf.ArrayListEventStream;
import org.epics.archiverappliance.etl.bpl.reports.ApplianceMetricsDetails;
import org.epics.archiverappliance.retrieval.RemotableEventStreamDesc;
import org.epics.archiverappliance.utils.simulation.SimulationEvent;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import edu.stanford.slac.archiverappliance.PlainPB.PlainPBStoragePlugin;
/**
* An ETL benchmark. Generate some data for PVs and then time the movement to the next store.
* Use the following exports to control ETL src and ETL dest.
* <code><pre>
* export ARCHAPPL_SHORT_TERM_FOLDER=/dev/shm/test
* export ARCHAPPL_MEDIUM_TERM_FOLDER=/scratch/LargeDisk/ArchiverStore
* </pre></code>
*
* @author mshankar
*
*/
public class ETLTimeTest extends TestCase{
private static Logger logger = Logger.getLogger(ETLTimeTest.class.getName());
String shortTermFolderName=ConfigServiceForTests.getDefaultShortTermFolder()+"/shortTerm";
String mediumTermFolderName=ConfigServiceForTests.getDefaultPBTestFolder()+"/mediumTerm";
PlainPBStoragePlugin storageplugin1;
PlainPBStoragePlugin storageplugin2;
short currentYear = TimeUtils.getCurrentYear();
ArchDBRTypes type = ArchDBRTypes.DBR_SCALAR_DOUBLE;
private ConfigServiceForTests configService;
@Before
public void setUp() throws Exception {
configService = new ConfigServiceForTests(new File("./bin"));
if(new File(shortTermFolderName).exists()) {
FileUtils.deleteDirectory(new File(shortTermFolderName));
}
if(new File(mediumTermFolderName).exists()) {
FileUtils.deleteDirectory(new File(mediumTermFolderName));
}
new File(shortTermFolderName).mkdirs();
new File(mediumTermFolderName).mkdirs();
storageplugin1 = (PlainPBStoragePlugin) StoragePluginURLParser.parseStoragePlugin("pb://localhost?name=STS&rootFolder=" + shortTermFolderName + "/&partitionGranularity=PARTITION_HOUR", configService);
storageplugin2 = (PlainPBStoragePlugin) StoragePluginURLParser.parseStoragePlugin("pb://localhost?name=MTS&rootFolder=" + mediumTermFolderName + "/&partitionGranularity=PARTITION_YEAR", configService);
}
@After
public void tearDown() throws Exception {
configService.shutdownNow();
}
@Test
public void testTime() throws AlreadyRegisteredException, IOException, InterruptedException{
ArrayList<String> pvs=new ArrayList<String>();
for(int i=0;i<200000;i++) {
int tableName=i/200;
String pvName = "ArchUnitTest" +tableName+ ":ETLTimeTest"+i;
PVTypeInfo typeInfo = new PVTypeInfo(pvName, ArchDBRTypes.DBR_SCALAR_DOUBLE, true, 1);
String[] dataStores = new String[] { storageplugin1.getURLRepresentation(), storageplugin2.getURLRepresentation()};
typeInfo.setDataStores(dataStores);
configService.updateTypeInfoForPV(pvName, typeInfo);
configService.registerPVToAppliance(pvName, configService.getMyApplianceInfo());
pvs.add(pvName);
}
configService.getETLLookup().manualControlForUnitTests();
logger.info("Generating data for " + pvs.size() + " pvs");
for(int m=0;m<pvs.size();m++){
String pvnameTemp=pvs.get(m);
try(BasicContext context = new BasicContext()) {
// Generate subset of data for one hour. We vary the amount of data we generate to mimic LCLS distribution...
int totalNum=1;
if(m < 500) {
totalNum = 10*60*60;
} else if(m < 5000) {
totalNum = 60*60;
}
ArrayListEventStream testData = new ArrayListEventStream(totalNum, new RemotableEventStreamDesc(type, pvnameTemp, currentYear));
for(int s = 0; s < totalNum; s++) {
testData.add(new SimulationEvent(s*10, currentYear, type, new ScalarValue<Double>((double) s*10)));
}
storageplugin1.appendData(context, pvnameTemp, testData);
}
}
logger.info("Done generating data for " + pvs.size() + " pvs");
CountFiles sizeVisitor = new CountFiles();
Files.walkFileTree(Paths.get(shortTermFolderName), sizeVisitor);
long time1=System.currentTimeMillis();
YearSecondTimestamp yts = new YearSecondTimestamp((short) (currentYear+1), 6*60*24*10+100, 0);
Timestamp etlTime = TimeUtils.convertFromYearSecondTimestamp(yts);
logger.info("Running ETL as if it was " + TimeUtils.convertToHumanReadableString(etlTime));
ETLExecutor.runETLs(configService, etlTime);
for(int i = 0; i < 5; i++) {
logger.info("Calling sync " + i);
ProcessBuilder pBuilder = new ProcessBuilder("sync");
pBuilder.inheritIO();
int exitValue = pBuilder.start().waitFor();
assertTrue("Nonzero exit from sync " + exitValue, exitValue == 0);
}
long time2=System.currentTimeMillis();
DecimalFormat twoSignificantDigits = new DecimalFormat("###,###,###,###,###,###.##");
double hundredKPVEstimateTimeSecs = ((time2-time1)/1000.0)*(200000.0/pvs.size());
double dataSizeInGBPerHour = sizeVisitor.totalSize/(1024.0*1024.0*1024.0);
double fudgeFactor = 5.0; // Inner sectors; read/write; varying event rates etc.
logger.info("Time for moving "
+ pvs.size() + " pvs"
+ " with data " + twoSignificantDigits.format(dataSizeInGBPerHour) + "(GB/Hr) and " + twoSignificantDigits.format(dataSizeInGBPerHour*24) + "(GB/day)"
+ " from " + shortTermFolderName
+ " to " + mediumTermFolderName
+ " in " + (time2-time1) + "(ms)."
+ " Estimated time for 200K PVs is " + twoSignificantDigits.format(hundredKPVEstimateTimeSecs) + "(s)"
+ " Estimated capacity consumed = " + twoSignificantDigits.format(hundredKPVEstimateTimeSecs*100*fudgeFactor/3600.0));
// No pb files should exist in short term folder...
CountFiles postETLSrcVisitor = new CountFiles();
Files.walkFileTree(Paths.get(shortTermFolderName), postETLSrcVisitor);
CountFiles postETLDestVisitor = new CountFiles();
Files.walkFileTree(Paths.get(mediumTermFolderName), postETLDestVisitor);
assertTrue("We have some files that have not moved " + postETLSrcVisitor.filesPresent, postETLSrcVisitor.filesPresent == 0);
assertTrue("Dest file count " + postETLDestVisitor.filesPresent + " is not the same as PV count " + pvs.size(), postETLDestVisitor.filesPresent == pvs.size());
if(postETLSrcVisitor.filesPresent == 0) {
FileUtils.deleteDirectory(new File(shortTermFolderName));
FileUtils.deleteDirectory(new File(mediumTermFolderName));
}
logger.info(ApplianceMetricsDetails.getETLMetricsDetails(configService));
}
}
class CountFiles implements FileVisitor<Path> {
public long filesPresent = 0;
public long totalSize = 0;
@Override
public FileVisitResult postVisitDirectory(Path dir, IOException exc) throws IOException {
return FileVisitResult.CONTINUE;
}
@Override
public FileVisitResult preVisitDirectory(Path dir, BasicFileAttributes attrs) throws IOException {
return FileVisitResult.CONTINUE;
}
@Override
public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException {
filesPresent++;
totalSize += Files.size(file);
return FileVisitResult.CONTINUE;
}
@Override
public FileVisitResult visitFileFailed(Path file, IOException exc) throws IOException {
return FileVisitResult.CONTINUE;
}
};