package org.wikibrain.loader.pipeline; import com.typesafe.config.Config; import net.sourceforge.jeval.EvaluationException; import net.sourceforge.jeval.Evaluator; import org.apache.commons.lang3.ArrayUtils; import org.wikibrain.core.lang.Language; import org.wikibrain.core.lang.LanguageInfo; import org.wikibrain.core.lang.LanguageSet; import org.wikibrain.core.model.MetaInfo; import org.wikibrain.utils.JvmUtils; import java.io.IOException; import java.util.*; /** * @author Shilad Sen */ public class PipelineStage { /** * Name of the stage */ private final String name; /** * The class whose main method should be run for this stage */ private final Class klass; /** * Stages required to be run before this stage. */ private List<PipelineStage> dependsOn = new ArrayList<PipelineStage>(); /** * (One of) the class that is loaded during this stage. */ private final String loadsClass; /** * Stage-specific args that should be appended to any standard args. */ private final String extraArgs[]; // Explicit user request, if it exists private Boolean shouldRun; // Explicit arguments requested by user; takes precidence over extraArgs. private String [] argsOverride; /** * Information about what was loaded for this stage at the beginning of Pipeline execution. */ private MetaInfo loadedInfo; /** * If true, don't actually run things. Just record what you would have run. */ private boolean dryRun = false; /** * Arguments used during the previous run. * Null indicates that the stage was not run. */ private String actualArgs[] = null; /** * Whether or not the stage has already been run this Pipeline execution. */ private boolean hasBeenRun = false; /** * Time the stage started. */ private Date startTime = null; /** * Time the stage required. */ private double elapsedSeconds = 0; /** * Whether the stage succeded or failed */ private Boolean succeeded = null; /** * Equation used to estimate the time required for a particular stage. */ private final String timeEstimateEquation; /** * Equation used to estimate the disk space required for a particular stage in MBs. */ private final String diskEstimateEquation; /** * Equation used to estimate the disk space required for a particular stage in MBs. */ private final String downloadEstimateEquation; public PipelineStage(Config config, Collection<PipelineStage> previousStages, Map<String, MetaInfo> loadedInfo) throws ClassNotFoundException { this.name = config.getString("name"); this.klass = Class.forName(config.getString("class")); this.extraArgs = config.getStringList("extraArgs").toArray(new String[0]); this.loadsClass = config.hasPath("loadsClass") ? config.getString("loadsClass") : null; if (config.hasPath("dependsOnStage")) { Object obj = config.getAnyRef("dependsOnStage"); if (obj instanceof String) { dependsOn.add(getStage(previousStages, (String)obj)); } else if (obj instanceof List) { for (String s : (List<String>)obj) { dependsOn.add(getStage(previousStages, s)); } } else { throw new IllegalArgumentException("Invalid dependsOn value for pipeline stage " + name + ": " + obj); } } this.timeEstimateEquation = config.getString("runtime"); this.diskEstimateEquation = config.getString("diskSpace"); if (config.hasPath("downloadSize")) { this.downloadEstimateEquation = config.getString("downloadSize"); } else { this.downloadEstimateEquation = "0.0"; } this.loadedInfo = loadsClass == null ? null : loadedInfo.get(loadsClass); } public void setOverrideOptions(Boolean run, String args[]) { this.shouldRun = run; this.argsOverride = args; } public boolean isNeeded(boolean forceRerun) { if (hasBeenRun()) { // if run this execution cycle, skip return false; } else if (shouldRun != null && !shouldRun) { // if user said not to run, skip return false; } else if (forceRerun) { // if we should rerun everything, rerun return true; } else { // check to see if the class is loaded return loadedInfo == null || loadedInfo.getNumRecords() == 0; } } public void runWithDependenciesIfNeeded(String [] cmdLineArgs, boolean forceRerun) throws IOException, InterruptedException, StageFailedException { for (PipelineStage stage : dependsOn) { stage.runWithDependenciesIfNeeded(cmdLineArgs, forceRerun); } if (isNeeded(forceRerun)) { run(cmdLineArgs); } } public void run(String [] cmdLineArgs) throws IOException, InterruptedException, StageFailedException { if (argsOverride == null) { actualArgs = ArrayUtils.addAll(cmdLineArgs, extraArgs); } else { actualArgs = ArrayUtils.addAll(cmdLineArgs, argsOverride); } if (!dryRun) { startTime = new Date(); long before = System.currentTimeMillis(); Process p = JvmUtils.launch(klass, actualArgs); int retVal = p.waitFor(); if (retVal != 0) { hasBeenRun = true; succeeded = false; throw new StageFailedException(this, retVal); } succeeded = true; long after = System.currentTimeMillis(); elapsedSeconds = (after - before) / 1000.0; } hasBeenRun = true; } public void setDryRun(boolean dryRun) { reset(); this.dryRun = dryRun; } public String getName() { return name; } public Class getKlass() { return klass; } public boolean hasBeenRun() { return hasBeenRun; } public Boolean getShouldRun() { return shouldRun; } @Override public String toString() { String deps = new String(); for (PipelineStage s : dependsOn) { if (deps.length() > 0) { deps += ", "; } deps += s; } return "PipelineStage{" + "name='" + name + '\'' + ", klass=" + klass + ", dependsOn=" + deps + ", loadsClass='" + loadsClass + '\'' + ", extraArgs=" + Arrays.toString(extraArgs) + ", shouldRun=" + shouldRun + ", argsOverride=" + Arrays.toString(argsOverride) + ", loadedInfo=" + loadedInfo + ", hasBeenRun=" + hasBeenRun + '}'; } public void reset() { dryRun = false; hasBeenRun = false; argsOverride = null; } public String[] getActualArgs() { return actualArgs; } public Date getStartTime() { return startTime; } public double getElapsedSeconds() { return elapsedSeconds; } public Boolean getSucceeded() { return succeeded; } public double estimateSeconds(LanguageSet langs) { int numArticles = 0; int numLinks = 0; for (Language lang : langs) { LanguageInfo li = LanguageInfo.getByLanguage(lang); numLinks += li.getNumLinks(); numArticles += li.getNumArticles(); } Evaluator mathEvaluator = new Evaluator(); Map<String, String> variables = new HashMap<String, String>(); variables.put("singleCoreSpeed", ""+CpuBenchmarker.getSingleCoreSpeed()); variables.put("multiCoreSpeed", ""+CpuBenchmarker.getMultiCoreSpeed()); variables.put("links", ""+numLinks); variables.put("articles", ""+numArticles); mathEvaluator.setVariables(variables); try { return mathEvaluator.getNumberResult(timeEstimateEquation); } catch (EvaluationException e) { throw new RuntimeException(e); } } public double estimateDiskMegabytes(LanguageSet langs) { int numArticles = 0; int numLinks = 0; for (Language lang : langs) { LanguageInfo li = LanguageInfo.getByLanguage(lang); numLinks += li.getNumLinks(); numArticles += li.getNumArticles(); } Evaluator mathEvaluator = new Evaluator(); Map<String, String> variables = new HashMap<String, String>(); variables.put("links", ""+numLinks); variables.put("articles", ""+numArticles); mathEvaluator.setVariables(variables); try { return mathEvaluator.getNumberResult(diskEstimateEquation); } catch (EvaluationException e) { throw new RuntimeException(e); } } public double estimateDownloadMegabytes(LanguageSet langs) { int numArticles = 0; int numLinks = 0; for (Language lang : langs) { LanguageInfo li = LanguageInfo.getByLanguage(lang); numLinks += li.getNumLinks(); numArticles += li.getNumArticles(); } Evaluator mathEvaluator = new Evaluator(); Map<String, String> variables = new HashMap<String, String>(); variables.put("links", ""+numLinks); variables.put("articles", ""+numArticles); mathEvaluator.setVariables(variables); try { return mathEvaluator.getNumberResult(downloadEstimateEquation); } catch (EvaluationException e) { throw new RuntimeException(e); } } private PipelineStage getStage(Collection<PipelineStage> previousStages, String stage) { for (PipelineStage s : previousStages) { if (s.name.equalsIgnoreCase(stage)) { return s; } } throw new IllegalArgumentException("Unknown pipeline stage: " + stage); } }