/**
*
*/
package edu.washington.escience.myria.perfenforce;
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.InputStreamReader;
import java.io.PrintWriter;
import java.nio.channels.FileChannel;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import org.slf4j.LoggerFactory;
import com.google.common.base.Joiner;
import edu.washington.escience.myria.api.encoding.PerfEnforceQueryMetadataEncoding;
import edu.washington.escience.myria.parallel.Server;
/**
* This class focuses on running the learning algorithm for PerfEnforce
*/
public class PerfEnforceOnlineLearning {
private final Server server;
List<String> previousDataPoints;
private PerfEnforceQueryMetadataEncoding currentQuery;
private final String onlineLearningPath;
private final Double[] queryPredictions;
private int currentConfiguration;
private int selectedTier;
private int queryCounter;
protected static final org.slf4j.Logger LOGGER =
LoggerFactory.getLogger(PerfEnforceOnlineLearning.class);
/**
* The constructor for the PerfEnforceOnlineLearning class
*
* @param server an instance of the server class
* @param tier the tier selected by the user
*/
public PerfEnforceOnlineLearning(final Server server, final int tier) {
selectedTier = tier;
currentConfiguration = PerfEnforceDriver.configurations.get(tier);
currentQuery = new PerfEnforceQueryMetadataEncoding();
queryPredictions = new Double[PerfEnforceDriver.configurations.size()];
onlineLearningPath =
PerfEnforceDriver.configurationPath.resolve("PerfEnforceScaling").toString();
previousDataPoints = new ArrayList<String>();
this.server = server;
}
/**
* Replaces the query's reference to the fact table based on the current cluster configuration
*
* @param queryText the query that the user will run
* @param configuration the current configuration
* @return the final query string after modifying the current cluster size
*/
public String convertQueryForConfiguration(String queryText, int configuration) {
String convertedQuery = "";
String factTableName = PerfEnforceDriver.factTableDesc.relationKey.getRelationName();
if (queryText.contains(factTableName)) {
convertedQuery = queryText.replace(factTableName, factTableName + configuration);
}
return convertedQuery;
}
/**
* Finds the SLA for a given query
*
* @param querySQL the query from the user
* @throws PerfEnforceException if there is an error computing the query's SLA
*/
public void findSLA(final String querySQL) throws PerfEnforceException {
String pslaPath = PerfEnforceDriver.configurationPath.resolve("PSLAGeneration").toString();
int currentConfigurationSize = PerfEnforceDriver.configurations.get(selectedTier);
String currentQueryForConfiguration =
convertQueryForConfiguration(querySQL, currentConfigurationSize);
String currentQueryFeatures =
PerfEnforceUtils.getMaxFeature(
server, currentQueryForConfiguration, currentConfigurationSize);
try (PrintWriter featureWriter =
new PrintWriter(Paths.get(pslaPath, "current-q-features.arff").toString(), "UTF-8")) {
featureWriter.write("@relation testing \n");
featureWriter.write("@attribute numberTables numeric \n");
featureWriter.write("@attribute postgesEstCostMin numeric \n");
featureWriter.write("@attribute postgesEstCostMax numeric \n");
featureWriter.write("@attribute postgesEstNumRows numeric \n");
featureWriter.write("@attribute postgesEstWidth numeric \n");
featureWriter.write("@attribute numberOfWorkers numeric \n");
featureWriter.write("@attribute realTime numeric \n");
featureWriter.write("\n");
featureWriter.write("@data \n");
featureWriter.write(currentQueryFeatures + "\n");
featureWriter.close();
// predict the runtime
String[] cmd = {
"java",
"-classpath",
Paths.get(pslaPath, "weka.jar").toString(),
"weka.classifiers.rules.M5Rules",
"-M",
"4.0",
"-t",
Paths.get(pslaPath, "training.arff").toString(),
"-T",
Paths.get(pslaPath, "current-q-features.arff").toString(),
"-p",
"0",
"-classifications",
"weka.classifiers.evaluation.output.prediction.CSV -file \""
+ Paths.get(pslaPath, "current-q-results.txt").toString()
+ "\""
};
Process p = Runtime.getRuntime().exec(cmd);
try (BufferedReader reader = new BufferedReader(new InputStreamReader(p.getInputStream()))) {
while ((reader.readLine()) != null) {}
}
String querySLA = "";
try (BufferedReader predictionReader =
new BufferedReader(
new FileReader(Paths.get(pslaPath, "current-q-results.txt").toString()));) {
predictionReader.readLine();
querySLA = predictionReader.readLine().split(",")[2];
}
currentQuery =
new PerfEnforceQueryMetadataEncoding(
queryCounter, Double.parseDouble(querySLA), querySQL);
} catch (Exception e) {
throw new PerfEnforceException("Error finding SLA");
}
}
/**
* Determines the best configuration size for currentQuery
* @throws PerfEnforceException if there is an error selecting the best configuration size
*/
public void findBestConfigurationSize() throws PerfEnforceException {
try {
for (int c : PerfEnforceDriver.configurations) {
int currentConfigurationSize = c;
String currentQueryForConfiguration =
convertQueryForConfiguration(currentQuery.getQueryText(), currentConfigurationSize);
String currentQueryFeatures =
PerfEnforceUtils.getMaxFeature(
server, currentQueryForConfiguration, currentConfigurationSize);
FileWriter featureWriterForConfiguration;
featureWriterForConfiguration =
new FileWriter(Paths.get(onlineLearningPath, "features", String.valueOf(c)).toString());
featureWriterForConfiguration.write(currentQueryFeatures + '\n');
featureWriterForConfiguration.close();
}
} catch (Exception e) {
throw new PerfEnforceException("Error selecting best configuration size");
}
List<Thread> threadList = new ArrayList<Thread>();
for (int i = 0; i < PerfEnforceDriver.configurations.size(); i++) {
final int configurationIndex = i;
Thread thread =
new Thread(
new Runnable() {
@Override
public void run() {
try {
trainOnlineQueries(configurationIndex);
} catch (Exception e) {
e.printStackTrace();
}
}
});
threadList.add(thread);
}
for (Thread t : threadList) {
t.start();
}
for (Thread t : threadList) {
try {
t.join();
} catch (InterruptedException e) {
e.printStackTrace();
}
}
double maxScore = 0;
int winnerIndex = 0;
for (int currentState = 0;
currentState < PerfEnforceDriver.configurations.size();
currentState++) {
double onlinePrediction = queryPredictions[currentState];
onlinePrediction = (onlinePrediction < 0) ? 0 : onlinePrediction;
double currentRatio = 0;
if (currentQuery.slaRuntime == 0) {
currentRatio = onlinePrediction / 1;
} else {
currentRatio = onlinePrediction / currentQuery.slaRuntime;
}
double currentScore = closeToOneScore(currentRatio);
if (currentScore > maxScore) {
winnerIndex = currentState;
maxScore = currentScore;
}
}
currentConfiguration = PerfEnforceDriver.configurations.get(winnerIndex);
}
/**
* Given a configuration size, this method predicts the runtime of the currentQuery
*
* @param configurationIndex the configuration size in consideration
* @throws PerfEnforceException if there is an error during the training phase
*/
public void trainOnlineQueries(final int configurationIndex) throws PerfEnforceException {
String MOAFileName = Paths.get(onlineLearningPath, "moa.jar").toString();
String trainingFileName = Paths.get(onlineLearningPath, "training.arff").toString();
String modifiedTrainingFileName =
Paths.get(onlineLearningPath, "training-modified-" + configurationIndex + ".arff")
.toString();
String predictionsFileName =
Paths.get(onlineLearningPath, "predictions" + configurationIndex + ".txt").toString();
try {
PrintWriter outputWriter = new PrintWriter(modifiedTrainingFileName);
outputWriter.close();
PrintWriter predictionWriter = new PrintWriter(predictionsFileName);
predictionWriter.close();
// copy training file to new file
FileChannel src = new FileInputStream(trainingFileName).getChannel();
FileChannel dest = new FileOutputStream(modifiedTrainingFileName).getChannel();
dest.transferFrom(src, 0, src.size());
src.close();
dest.close();
// Append all previous data points
FileWriter appendDataWriter = new FileWriter(modifiedTrainingFileName, true);
for (String s : previousDataPoints) {
appendDataWriter.write(s + "\n");
}
// Append the current point
String newPoint = getQueryFeature(configurationIndex, 0);
appendDataWriter.write(newPoint + "\n");
appendDataWriter.close();
String moaCommand =
String.format(
"EvaluatePrequentialRegression -l (rules.functions.Perceptron -d -l %s) -s (ArffFileStream -f %s) -e (WindowRegressionPerformanceEvaluator -w 1) -f 1 -o %s",
.04,
modifiedTrainingFileName,
predictionsFileName);
String[] arrayCommand =
new String[] {"java", "-classpath", MOAFileName, "moa.DoTask", moaCommand};
Process p = Runtime.getRuntime().exec(arrayCommand);
try (BufferedReader reader = new BufferedReader(new InputStreamReader(p.getInputStream()))) {
while ((reader.readLine()) != null) {}
}
parsingOnlineFile(configurationIndex, predictionsFileName);
} catch (Exception e) {
throw new PerfEnforceException("Error during training");
}
}
/**
* Helps parse the output file from MOA to read the runtime prediction
*
* @param configurationIndex the configuration size in consideration
* @param predictionFileName the file that contains the prediction
* @throws PerfEnforceException if there is an error from the parsing the predictions output
*/
public void parsingOnlineFile(final int configurationIndex, final String predictionFileName)
throws PerfEnforceException {
try (BufferedReader streamReader = new BufferedReader(new FileReader(predictionFileName))) {
String currentLine = "";
double nextQueryPrediction = 0;
while ((currentLine = streamReader.readLine()) != null) {
nextQueryPrediction = Double.parseDouble((currentLine.split(",")[0]).split(":")[1]);
}
queryPredictions[configurationIndex] = nextQueryPrediction;
} catch (Exception e) {
throw new PerfEnforceException("Error parsing online predictions file");
}
}
/**
* Returns features for a particular query
*
* @param configurationIndex the configuration size in consideration
* @param queryRuntime the runtime of the query
* @return the string containing features for a query
* @throws PerfEnforceException if there is an error collecting the query features
*/
public String getQueryFeature(final int configurationIndex, final double queryRuntime)
throws PerfEnforceException {
String featureFilePath =
Paths.get(
onlineLearningPath,
"features",
String.valueOf(PerfEnforceDriver.configurations.get(configurationIndex)))
.toString();
try (BufferedReader featureReader = new BufferedReader(new FileReader(featureFilePath))) {
String result = featureReader.readLine();
if (queryRuntime != 0) {
String[] parts = result.split(",");
result = Joiner.on(",").join(Arrays.copyOfRange(parts, 0, parts.length - 1));
result += "," + queryRuntime;
}
return result;
} catch (Exception e) {
throw new PerfEnforceException("Error collecting query feature");
}
}
/**
* Returns a score to determine the distance between the real runtime and the SLA runtime
*
* @param ratio the ratio between the real runtime and the SLA runtime
*/
public double closeToOneScore(final double ratio) {
if (ratio == 1.0) {
return Double.MAX_VALUE;
} else {
return Math.abs(1 / (ratio - 1.0));
}
}
/**
* Records the real runtime of the query. Used for learning.
*
* @param queryRuntime the runtime of the query
*/
public void recordRealRuntime(final double queryRuntime) throws PerfEnforceException {
previousDataPoints.add(
getQueryFeature(
PerfEnforceDriver.configurations.indexOf(currentConfiguration), queryRuntime));
}
/**
* Returns metadata about the current query
*
*/
public PerfEnforceQueryMetadataEncoding getCurrentQuery() {
return currentQuery;
}
/**
* Gets the current cluster size
*
*/
public int getClusterSize() {
return currentConfiguration;
}
/**
* Returns the tier selected by the user
*
*/
public int getSelectedTier() {
return selectedTier;
}
}