/* * Eoulsan development code * * This code may be freely distributed and modified under the * terms of the GNU Lesser General Public License version 2.1 or * later and CeCILL-C. This should be distributed with the code. * If you do not have a copy, see: * * http://www.gnu.org/licenses/lgpl-2.1.txt * http://www.cecill.info/licences/Licence_CeCILL-C_V1-en.txt * * Copyright for this code is held jointly by the Genomic platform * of the Institut de Biologie de l'École normale supérieure and * the individual authors. These should be listed in @author doc * comments. * * For more information on the Eoulsan project and its aims, * or to join the Eoulsan Google group, visit the home page * at: * * http://outils.genomique.biologie.ens.fr/eoulsan * */ package fr.ens.biologie.genomique.eoulsan.core.workflow; import static com.google.common.base.Preconditions.checkState; import static fr.ens.biologie.genomique.eoulsan.EoulsanLogger.getLogger; import static fr.ens.biologie.genomique.eoulsan.Globals.STEP_RESULT_EXTENSION; import static fr.ens.biologie.genomique.eoulsan.core.Step.StepState.ABORTED; import static fr.ens.biologie.genomique.eoulsan.core.Step.StepState.DONE; import static fr.ens.biologie.genomique.eoulsan.core.Step.StepState.FAILED; import static fr.ens.biologie.genomique.eoulsan.core.Step.StepState.READY; import static fr.ens.biologie.genomique.eoulsan.core.Step.StepState.WORKING; import static fr.ens.biologie.genomique.eoulsan.core.Step.StepType.DESIGN_STEP; import static fr.ens.biologie.genomique.eoulsan.core.Step.StepType.GENERATOR_STEP; import static fr.ens.biologie.genomique.eoulsan.core.Step.StepType.STANDARD_STEP; import static java.util.Objects.requireNonNull; import java.io.IOException; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Objects; import java.util.Set; import com.google.common.base.Joiner; import com.google.common.collect.ArrayListMultimap; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Multimap; import fr.ens.biologie.genomique.eoulsan.Common; import fr.ens.biologie.genomique.eoulsan.EoulsanLogger; import fr.ens.biologie.genomique.eoulsan.EoulsanRuntime; import fr.ens.biologie.genomique.eoulsan.EoulsanRuntimeException; import fr.ens.biologie.genomique.eoulsan.Globals; import fr.ens.biologie.genomique.eoulsan.core.FileNaming; import fr.ens.biologie.genomique.eoulsan.core.InputPort; import fr.ens.biologie.genomique.eoulsan.core.Naming; import fr.ens.biologie.genomique.eoulsan.core.OutputPort; import fr.ens.biologie.genomique.eoulsan.core.Step.StepState; import fr.ens.biologie.genomique.eoulsan.core.Step.StepType; import fr.ens.biologie.genomique.eoulsan.core.schedulers.TaskScheduler; import fr.ens.biologie.genomique.eoulsan.core.schedulers.TaskSchedulerFactory; import fr.ens.biologie.genomique.eoulsan.data.Data; import fr.ens.biologie.genomique.eoulsan.data.DataFile; import fr.ens.biologie.genomique.eoulsan.data.protocols.HDFSPathDataProtocol; import fr.ens.biologie.genomique.eoulsan.design.Design; import fr.ens.biologie.genomique.eoulsan.design.Sample; /** * This class define a token manager for a step. * @author Laurent Jourdren * @since 2.0 */ public class TokenManager implements Runnable { private static final int CHECKING_DELAY_MS = 1000; private final AbstractStep step; private final TaskScheduler scheduler; private final StepInputPorts inputPorts; private final StepOutputPorts outputPorts; private final Set<Integer> receivedTokens = new HashSet<>(); private final Multimap<InputPort, Data> inputTokens = ArrayListMultimap.create(); private final Multimap<OutputPort, Data> outputTokens = ArrayListMultimap.create(); private final Set<InputPort> closedPorts = new HashSet<>(); private final Set<ImmutableMap<InputPort, Data>> cartesianProductsUsed = new HashSet<>(); private final Set<Data> failedOutputDataToRemove = new HashSet<>(); private volatile boolean endOfStep; private boolean isStarted; // // Getters // /** * Test if there is no token to be received by the token manager. * @return true if no token to be received by the token manager */ public boolean isNoTokenToReceive() { return this.inputPorts.size() == this.closedPorts.size(); } /** * Get the number of context created by the token manager. * @return the number of context created by the token manager */ public int getContextCount() { return this.inputPorts.size() == 0 ? 1 : this.cartesianProductsUsed.size(); } /** * Test if this is the end of the step. * @return true if this is the end of the step */ public boolean isEndOfStep() { return this.endOfStep; } // // Port checking methods // /** * Check if all the ports has received some data. * @return true if all the ports has received some data */ private boolean checkIfAllPortsHasReceivedSomeData() { for (StepInputPort port : this.inputPorts) { if (this.inputTokens.get(port).isEmpty()) { return false; } } return true; } /** * Check if all the list ports are closed. * @return true if all the list ports are closed */ private boolean checkIfAllListPortsAreClosed() { for (StepInputPort port : this.inputPorts) { if (!port.isList()) { break; } if (!this.closedPorts.contains(port)) { return false; } } return true; } // // Token handling methods // /** * Log the token that are send by the step. * @param outputPort the output port * @param token the token sent */ public void logSendingToken(final StepOutputPort outputPort, final Token token) { Objects.requireNonNull(token); Objects.requireNonNull(outputPort); // Test if the token is an end token if (!token.isEndOfStepToken()) { // Get the data final Data data = token.getData(); synchronized (this.outputTokens) { for (Data e : data.getListElements()) { this.outputTokens.put(outputPort, e); } } // Create compatibility link for result files if (EoulsanRuntime.getSettings() .getBooleanSetting("debug.compatibility.result.file.links") && this.step.getType() == StepType.STANDARD_STEP) { createCompatibilityLinkResultFiles(data); } // Create symbolic links in output directory createSymlinksInOutputDirectory(data); // Get the metadata storage final DataMetadataStorage metadataStorage = DataMetadataStorage .getInstance(this.step.getAbstractWorkflow().getOutputDirectory()); // Store token metadata only if step is not skipped if (!this.step.isSkip()) { metadataStorage.saveMetaData(data); } } } /** * This method allow to create symbolic link on step result file with the same * name as in Eoulsan 1.x. * @param data the data */ private void createCompatibilityLinkResultFiles(final Data data) { requireNonNull(data, "data argument cannot be null"); for (Data e : data.getListElements()) { // Get the sample id from metadata final int sampleNumber = e.getMetadata().getSampleNumber(); // Do nothing if sample id is not set in metadata if (sampleNumber == -1) { continue; } // For all data for (DataFile f : WorkflowDataUtils.getDataFiles(e)) { try { final DataFile parentDir = f.getParent(); // Do something only if local file if (!parentDir.isLocalFile()) { continue; } // Parse the filename final FileNaming name = FileNaming.parse(f.getName()); name.setSampleNumber(sampleNumber); // Create link name final DataFile link = new DataFile(parentDir, name.compatibilityFilename()); // Create link only if not already exists if (!link.exists()) { f.symlink(link, true); } } catch (IOException exp) { getLogger().warning( "Error while creating compatibility link: " + exp.getMessage()); } } } } /** * Create symbolic links in output directory. * @param data the data */ private void createSymlinksInOutputDirectory(final Data data) { requireNonNull(data, "data argument cannot be null"); final DataFile outputDir = this.step.getAbstractWorkflow().getOutputDirectory(); final DataFile workingDir = this.step.getStepOutputDirectory(); // Nothing to to if the step working directory is the output directory if (this.step.getType() == DESIGN_STEP || outputDir.equals(workingDir)) { return; } for (Data dataElement : data.getListElements()) { for (DataFile file : WorkflowDataUtils.getDataFiles(dataElement)) { final DataFile link = new DataFile(outputDir, file.getName()); try { // Remove existing symlink if (link.exists(false)) { if (link.getMetaData().isSymbolicLink()) { link.delete(); } else { throw new IOException(); } } // Create symbolic link file.symlink(link, true); } catch (IOException e) { EoulsanLogger.getLogger() .severe("Cannot create symbolic link: " + link); } } } } /** * Post a token to the the token manager. * @param inputPort port where the token must be posted * @param token the token to post */ public void postToken(final StepInputPort inputPort, final Token token) { Objects.requireNonNull(token); Objects.requireNonNull(inputPort); // Check origin step state final StepState originStepState = token.getOrigin().getStep().getState(); checkState(originStepState.isWorkingState() || originStepState == DONE, "Invalid token step origin state: " + originStepState); // Check if token has already been processed checkState(!this.receivedTokens.contains(token.getId()), "Token has been already received: " + token.getId()); // Check if the input is linked to the step checkState(this.inputPorts.contains(inputPort), "Unknown port: " + inputPort); // Check if the origin of the token and the input port are linked checkState(inputPort.getLink() == token.getOrigin(), "The input port (" + inputPort + ") and the output port (" + token.getOrigin() + ") are not linked:"); // Check if the input port is closed checkState(!this.closedPorts.contains(inputPort), "The input port is closed for the step " + this.step.getId() + ": " + inputPort.getName()); synchronized (this.receivedTokens) { this.receivedTokens.add(token.getId()); } // Test if the token is an end token if (token.isEndOfStepToken()) { // Check if input port is empty for non skipped steps checkState( !(!this.step.isSkip() && this.inputTokens.get(inputPort).isEmpty()), "No data receive for port on step " + this.step.getId() + ": " + inputPort.getName()); // The input port must be closed this.closedPorts.add(inputPort); } else { // Register data to process final Data data = token.getData(); // Synchronized this part to avoid the lost of some data when creating // Cartesian product synchronized (this.cartesianProductsUsed) { if (data.isList()) { for (Data e : data.getListElements()) { addData(inputPort, e); } } else { addData(inputPort, data); } } } } /** * Add data to the token manager. * @param inputPort inputPort port for the data * @param data the data */ private void addData(final StepInputPort inputPort, final Data data) { if (!inputPort.isList()) { synchronized (this.inputTokens) { this.inputTokens.put(inputPort, data); } } else { // Get or create the data list final DataList dataList; final Collection<Data> inputData = this.inputTokens.get(inputPort); // Design is required by metadata final Design design = this.step.getAbstractWorkflow().getDesign(); synchronized (inputData) { if (inputData.size() == 0) { dataList = new DataList(inputPort, design); inputData.add(dataList); } else { dataList = (DataList) inputData.iterator().next(); } // Add the data to the data list dataList.getModifiableList().add(data); } } } /** * Send end of step tokens. */ private void sendEndOfStepTokens() { for (StepOutputPort outputPort : this.outputPorts) { this.step.sendToken(new Token(outputPort)); } } /** * Send all the tokens of a skip step. */ private void sendSkipStepTokens() { // Create a map with the samples final Map<String, Sample> samples = new HashMap<>(); for (Sample sample : this.step.getWorkflow().getDesign().getSamples()) { samples.put(Naming.toValidName(sample.getId()), sample); } for (StepOutputPort port : this.outputPorts) { // If port is not linked or only connected to skipped steps there is need // to check if output data exists if (port.getLinks().isEmpty() || port.isAllLinksToSkippedSteps()) { continue; } final Set<Data> existingData = port.getExistingData(); if (existingData.size() == 0) { throw new EoulsanRuntimeException("No output files of the step \"" + this.step.getId() + "\" matching with " + WorkflowFileNaming.glob(port) + " found"); } for (Data data : existingData) { // Get the metadata storage final DataMetadataStorage metadataStorage = DataMetadataStorage .getInstance(this.step.getAbstractWorkflow().getOutputDirectory()); // Set the metadata of data from the storage of metadata final boolean isMetadataSet = metadataStorage.loadMetadata(data); // If metadata has not been found from metadata storage if (!isMetadataSet) { // Set the metadata from sample metadata if (samples.containsKey(data.getName())) { WorkflowDataUtils.setDataMetaData(data, samples.get(data.getName())); } } this.step.sendToken(new Token(port, data)); } } // Send end of step token sendEndOfStepTokens(); } // // TaskContext creation methods // /** * Create output data for a new context. * @return a map with the output data */ private Map<OutputPort, AbstractData> createContextOutputData() { // Design is required by metadata final Design design = this.step.getAbstractWorkflow().getDesign(); final Map<OutputPort, AbstractData> result = new HashMap<>(); for (StepOutputPort outputPort : this.outputPorts) { final AbstractData data; if (outputPort.isList()) { data = new DataList(outputPort, design); } else { data = new DataElement(outputPort, design); } result.put(outputPort, data); } return result; } /** * Create the contexts of the step. * @param workflowContext the Workflow context * @return a set with the context */ private Set<TaskContextImpl> createContexts( final WorkflowContext workflowContext) { final Set<TaskContextImpl> result = new HashSet<>(); final Set<ImmutableMap<InputPort, Data>> cartesianProductToProcess; // Process only the cartesian products of data that have not been converted // in Context synchronized (this.cartesianProductsUsed) { if (!checkIfAllPortsHasReceivedSomeData() || !checkIfAllListPortsAreClosed()) { cartesianProductToProcess = Collections.emptySet(); } else { cartesianProductToProcess = this.step.getDataProduct() .makeProduct(this.inputPorts, this.inputTokens); } cartesianProductToProcess.removeAll(this.cartesianProductsUsed); this.cartesianProductsUsed.addAll(cartesianProductToProcess); } // For each result of the cartesian product, create a context object for (Map<InputPort, Data> inputData : cartesianProductToProcess) { // Create the Data object for the output port Map<OutputPort, AbstractData> outputData = createContextOutputData(); // Create the context object result.add(new TaskContextImpl(workflowContext, this.step, inputData, outputData)); } return result; } /** * Create a context when no input port exists. * @param workflowContext the workflow context * @return a singleton set with the context */ private Set<TaskContextImpl> createContextWhenNoInputPortExist( final WorkflowContext workflowContext) { // Empty input Data for the context Map<InputPort, Data> inputData = Collections.emptyMap(); // Create the Data object for the output port Map<OutputPort, AbstractData> outputData = createContextOutputData(); return Collections.singleton( new TaskContextImpl(workflowContext, this.step, inputData, outputData)); } // // Step results methods // /** * This method write the step result in a file. */ private void writeStepResult(final StepResult result) { if (result == null) { return; } // Step result file DataFile logFile = new DataFile(this.step.getAbstractWorkflow().getJobDirectory(), this.step.getId() + STEP_RESULT_EXTENSION); try { result.write(logFile, false); } catch (IOException e) { Common.showAndLogErrorMessage( "Unable to create log file for " + this.step.getId() + " step."); } // Write the result file in old format if (EoulsanRuntime.getSettings().isUseOldEoulsanResultFormat()) { // Step result file logFile = new DataFile(this.step.getAbstractWorkflow().getJobDirectory(), this.step.getId() + Globals.STEP_RESULT_OLD_FORMAT_EXTENSION); try { result.write(logFile, true); } catch (IOException e) { Common.showAndLogErrorMessage( "Unable to create log file for " + this.step.getId() + " step."); } } } // // Cleanup methods // /** * Add a failed task. * @param failedContext failed task context */ void addFailedOutputData(final TaskContextImpl failedContext) { requireNonNull(failedContext, "failedContext cannot be null"); for (OutputPort port : this.outputPorts) { this.failedOutputDataToRemove.add(failedContext.getOutputData(port)); } } /** * Remove outputs to discard. */ void removeOutputsToDiscard() { // Remove output only for standard steps if (this.step.getType() != STANDARD_STEP) { return; } final DataFile outputStepDir = this.step.getStepOutputDirectory(); final DataFile expectedOutputStepDir = StepOutputDirectory.getInstance().workflowDirectory( this.step.getAbstractWorkflow(), this.step, this.step.getModule()); final DataFile outputWorkflowDir = this.step.getAbstractWorkflow().getOutputDirectory(); // Only remove symbolic links if the output directory of the step is not the // expected output directory final boolean remove = !expectedOutputStepDir.equals(outputStepDir); // In debug mode do not remove links if (!remove && EoulsanRuntime.getSettings() .getBooleanSetting("debug.keep.step.output.links")) { return; } for (Data entry : this.outputTokens.values()) { for (Data data : entry.getListElements()) { // Standard data file if (data.getFormat().getMaxFilesCount() < 2) { if (remove) { removeFileAndSymLink(data.getDataFile(), outputWorkflowDir); } else { removeSymLink(data.getDataFile(), outputWorkflowDir); } } // Multi file data file else { for (int i = 0; i < data.getDataFileCount(); i++) { if (remove) { removeFileAndSymLink(data.getDataFile(i), outputWorkflowDir); } else { removeSymLink(data.getDataFile(i), outputWorkflowDir); } } } } } } /** * Remove all outputs of the step. */ void removeAllOutputs() { // Remove output only for standard steps if (!(this.step.getType() == STANDARD_STEP || this.step.getType() == GENERATOR_STEP)) { return; } final DataFile outputWorkflowDir = this.step.getAbstractWorkflow().getOutputDirectory(); final List<Data> list = new ArrayList<>(); list.addAll(this.outputTokens.values()); list.addAll(this.failedOutputDataToRemove); for (Data entry : list) { for (Data data : entry.getListElements()) { // Standard data file if (data.getFormat().getMaxFilesCount() < 2) { removeFileAndSymLink(data.getDataFile(), outputWorkflowDir); } // Multi file data file else { for (int i = 0; i < data.getDataFileCount(); i++) { removeFileAndSymLink(data.getDataFile(i), outputWorkflowDir); } } } } } /** * Remove a file and its symbolic link. * @param file file to remove * @param symlinkDir the directory where is the symbolic link to remove */ private void removeFileAndSymLink(final DataFile file, final DataFile symlinkDir) { // Remove the file getLogger().fine("Remove output file: " + file); try { if (HDFSPathDataProtocol.PROTOCOL_NAME .equals(file.getProtocol().getName())) { // If file is on HDFS, file removing must be recursive file.delete(true); } else { // In other case, do not use recursion is more safe file.delete(); } } catch (IOException e) { getLogger().severe("Cannot remove data to discard: " + file + " (" + e.getMessage() + ")"); } // Remove the symbolic link removeSymLink(file, symlinkDir); } /** * Remove the symbolic link of a file. * @param file file to remove * @param symlinkDir the directory where is the symbolic link to remove */ private void removeSymLink(final DataFile file, final DataFile symlinkDir) { final DataFile link = new DataFile(symlinkDir, file.getName()); try { if (link.exists(false) && link.getMetaData().isSymbolicLink()) { getLogger().fine("Remove symbolic link: " + link); link.delete(); } } catch (IOException e) { getLogger().severe("Cannot remove data symbolic link to discard: " + link + " (" + e.getMessage() + ")"); } } // // Thread methods // /** * Start the Token manager thread. */ void start() { // Check if the thread has been already started checkState(!this.isStarted, "The token manager thread for step " + this.step.getId() + " is already started"); // Start the thread new Thread(this, "TokenManager_" + this.step.getId()).start(); this.isStarted = true; } /** * Test if the thread for the token is started. * @return true if the thread for the token is started */ public boolean isStarted() { return this.isStarted; } /** * Stop the Token manager thread. */ void stop() { // Check if the thread has been started checkState(this.isStarted, "The token manager thread for step " + this.step.getId() + " is not started"); this.isStarted = false; this.endOfStep = true; } @Override public void run() { try { boolean firstSubmission = true; do { try { Thread.sleep(CHECKING_DELAY_MS); } catch (InterruptedException e) { getLogger().severe(e.getMessage()); } // Do nothing until the step is not ready final StepState state = this.step.getState(); if (!(state == READY || state.isWorkingState())) { continue; } // Set the step to the working state if (state == READY) { this.step.setState(WORKING); } // Create new contexts to submit final Set<TaskContextImpl> contexts; synchronized (this) { // Get the Workflow context final WorkflowContext workflowContext = this.step.getAbstractWorkflow().getWorkflowContext(); if (this.inputPorts.size() > 0) { // Standard case contexts = createContexts(workflowContext); } else { // When the step has no input port contexts = createContextWhenNoInputPortExist(workflowContext); } } // Submit execution of the available contexts if (!this.step.isSkip()) { // Create the step output directory if this is the first submission if (firstSubmission) { final DataFile outputDirectory = this.step.getStepOutputDirectory(); if (!outputDirectory.exists()) { outputDirectory.mkdirs(); } firstSubmission = false; } this.scheduler.submit(this.step, contexts); } // If no more token to receive if (isNoTokenToReceive()) { // Log received tokens logReceivedTokens(); if (!this.step.isSkip()) { // Wait end of all context this.scheduler.waitEndOfTasks(this.step); if (this.step.getState() != ABORTED) { // Get the result final StepResult result = this.scheduler.getResult(this.step); // Set the result immutable result.setImmutable(); // Change Step state if (result.isSuccess()) { this.step.setState(DONE); // Write step result if (this.step.isCreateLogFiles()) { writeStepResult(result); } // Send end of step tokens sendEndOfStepTokens(); } } else { this.step.setState(FAILED); } } else { // If the step is skip the result is always OK this.step.setState(DONE); // Send all the tokens of step tokens sendSkipStepTokens(); } // Log sent tokens logSentTokens(); this.endOfStep = true; } } while (!this.endOfStep); } catch (Throwable exception) { // Stop the analysis this.step.getAbstractWorkflow().emergencyStop(exception, "Error while executing the workflow"); } } /** * Log received tokens. */ private void logReceivedTokens() { String msg = "Step #" + this.step.getNumber() + " " + this.step.getId() + " has received tokens: "; if (this.inputTokens.size() == 0) { msg += "no token received"; } else { List<String> list = new ArrayList<>(); for (InputPort port : this.inputTokens.keySet()) { list.add(port.getName() + " (" + port.getFormat().getName() + "): " + this.inputTokens.get(port).size()); } msg += Joiner.on(", ").join(list); } getLogger().fine(msg); } /** * Log sent tokens. */ private void logSentTokens() { String msg = "Step #" + this.step.getNumber() + " " + this.step.getId() + " has sent tokens: "; if (this.outputTokens.size() == 0) { msg += " no token sent"; } else { List<String> list = new ArrayList<>(); for (OutputPort port : this.outputTokens.keySet()) { list.add(port.getName() + " (" + port.getFormat().getName() + "): " + this.outputTokens.get(port).size()); } msg += Joiner.on(", ").join(list); } getLogger().fine(msg); } // // Constructor // /** * Constructor. * @param step step that tokens must be managed by this instance */ TokenManager(final AbstractStep step) { requireNonNull(step, "step argument cannot be null"); this.step = step; this.inputPorts = step.getWorkflowInputPorts(); this.outputPorts = step.getWorkflowOutputPorts(); // Get the scheduler this.scheduler = TaskSchedulerFactory.getScheduler(); } }