/** * Copyright (C) 2012-2013 Selventa, Inc. * * This file is part of the OpenBEL Framework. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * The OpenBEL Framework is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public * License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with the OpenBEL Framework. If not, see <http://www.gnu.org/licenses/>. * * Additional Terms under LGPL v3: * * This license does not authorize you and you are prohibited from using the * name, trademarks, service marks, logos or similar indicia of Selventa, Inc., * or, in the discretion of other licensors or authors of the program, the * name, trademarks, service marks, logos or similar indicia of such authors or * licensors, in any marketing or advertising materials relating to your * distribution of the program or any covered product. This restriction does * not waive or limit your obligation to keep intact all copyright notices set * forth in the program as delivered to you. * * If you distribute the program in whole or in part, or any modified version * of the program, and you assume contractual liability to the recipient with * respect to the program or modified version, then you will indemnify the * authors and licensors of the program for any liabilities that these * contractual assumptions directly impose on those licensors and authors. */ package org.openbel.framework.tools; import static java.lang.String.format; import static java.lang.System.currentTimeMillis; import static java.util.Collections.emptySet; import static org.openbel.framework.common.BELUtilities.asPath; import static org.openbel.framework.common.BELUtilities.createDirectories; import static org.openbel.framework.common.BELUtilities.noItems; import static org.openbel.framework.common.Strings.EXPAND_NAMED_COMPLEXES_HELP; import static org.openbel.framework.common.Strings.EXPAND_PROTEIN_FAMILIES_HELP; import static org.openbel.framework.common.Strings.EXPECTED_ONE_NETWORK; import static org.openbel.framework.common.Strings.GS_INJECTION_DISABLED; import static org.openbel.framework.common.Strings.INJECTIONS_DISABLED; import static org.openbel.framework.common.Strings.INPUT_FILE_UNREADABLE; import static org.openbel.framework.common.Strings.NC_INJECTION_DISABLED; import static org.openbel.framework.common.Strings.NOT_A_PHASE2_DIR; import static org.openbel.framework.common.Strings.NO_GENE_SCAFFOLDING_HELP; import static org.openbel.framework.common.Strings.NO_NAMED_COMPLEXES_HELP; import static org.openbel.framework.common.Strings.NO_PHASE_THREE; import static org.openbel.framework.common.Strings.NO_PROTEIN_FAMILIES_HELP; import static org.openbel.framework.common.Strings.NO_ORTHOLOGY_HELP; import static org.openbel.framework.common.Strings.ORTHO_INJECTION_DISABLED; import static org.openbel.framework.common.Strings.PF_INJECTION_DISABLED; import static org.openbel.framework.common.Strings.PHASE3_STAGE1_HDR; import static org.openbel.framework.common.Strings.PHASE3_STAGE2_HDR; import static org.openbel.framework.common.Strings.PHASE3_STAGE3_HDR; import static org.openbel.framework.common.Strings.PHASE3_STAGE4_HDR; import static org.openbel.framework.common.Strings.PHASE3_STAGE5_HDR; import static org.openbel.framework.common.Strings.PHASE3_STAGE6_HDR; import static org.openbel.framework.common.Strings.PHASE3_NO_ORTHOLOGY_LONG_OPTION; import static org.openbel.framework.common.cfg.SystemConfiguration.getSystemConfiguration; import static org.openbel.framework.common.enums.ExitCode.FAILED_TO_MERGE_PROTO_NETWORKS; import static org.openbel.framework.common.enums.ExitCode.NO_CONVERTED_DOCUMENTS; import static org.openbel.framework.common.enums.ExitCode.NO_PROTO_NETWORKS_SAVED; import static org.openbel.framework.common.enums.ExitCode.NO_VALID_DOCUMENTS; import static org.openbel.framework.core.df.cache.ResourceType.BEL; import static org.openbel.framework.core.df.cache.ResourceType.fromLocation; import static org.openbel.framework.tools.PhaseThreeOptions.phaseThreeOptions; import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; import java.util.Iterator; import java.util.List; import java.util.Set; import javax.xml.stream.XMLStreamException; import org.apache.commons.cli.Option; import org.openbel.framework.common.Strings; import org.openbel.framework.common.cfg.SystemConfiguration; import org.openbel.framework.common.enums.ExitCode; import org.openbel.framework.common.index.Index; import org.openbel.framework.common.index.ResourceIndex; import org.openbel.framework.common.index.ResourceLocation; import org.openbel.framework.common.model.Document; import org.openbel.framework.common.model.EquivalenceDataIndex; import org.openbel.framework.common.protonetwork.model.ProtoNetwork; import org.openbel.framework.common.protonetwork.model.ProtoNetworkError; import org.openbel.framework.common.util.BELPathFilters.GlobalProtonetworkFilter; import org.openbel.framework.compiler.DefaultPhaseOne; import org.openbel.framework.compiler.DefaultPhaseOne.Stage1Output; import org.openbel.framework.compiler.DefaultPhaseThree; import org.openbel.framework.compiler.DefaultPhaseThree.DocumentModificationResult; import org.openbel.framework.compiler.DefaultPhaseTwo; import org.openbel.framework.compiler.PhaseOneImpl; import org.openbel.framework.compiler.PhaseThreeImpl; import org.openbel.framework.compiler.PhaseTwoImpl; import org.openbel.framework.core.BELConverterService; import org.openbel.framework.core.BELConverterServiceImpl; import org.openbel.framework.core.BELValidatorService; import org.openbel.framework.core.BELValidatorServiceImpl; import org.openbel.framework.core.XBELConverterService; import org.openbel.framework.core.XBELValidatorService; import org.openbel.framework.core.annotation.AnnotationDefinitionService; import org.openbel.framework.core.annotation.AnnotationService; import org.openbel.framework.core.annotation.DefaultAnnotationDefinitionService; import org.openbel.framework.core.annotation.DefaultAnnotationService; import org.openbel.framework.core.compiler.SemanticService; import org.openbel.framework.core.compiler.SemanticServiceImpl; import org.openbel.framework.core.compiler.ValidationError; import org.openbel.framework.core.compiler.expansion.ExpansionService; import org.openbel.framework.core.compiler.expansion.ExpansionServiceImpl; import org.openbel.framework.core.df.cache.CacheLookupService; import org.openbel.framework.core.df.cache.CacheableResourceService; import org.openbel.framework.core.df.cache.DefaultCacheLookupService; import org.openbel.framework.core.df.cache.DefaultCacheableResourceService; import org.openbel.framework.core.df.cache.ResolvedResource; import org.openbel.framework.core.df.cache.ResourceType; import org.openbel.framework.core.equivalence.EquivalenceIndexerService; import org.openbel.framework.core.equivalence.EquivalenceIndexerServiceImpl; import org.openbel.framework.core.equivalence.EquivalenceMapResolutionFailure; import org.openbel.framework.core.namespace.DefaultNamespaceService; import org.openbel.framework.core.namespace.NamespaceIndexerService; import org.openbel.framework.core.namespace.NamespaceIndexerServiceImpl; import org.openbel.framework.core.namespace.NamespaceService; import org.openbel.framework.core.protocol.ResourceDownloadError; import org.openbel.framework.core.protonetwork.BinaryProtoNetworkDescriptor; import org.openbel.framework.core.protonetwork.BinaryProtoNetworkExternalizer; import org.openbel.framework.core.protonetwork.ProtoNetworkExternalizer; import org.openbel.framework.core.protonetwork.ProtoNetworkService; import org.openbel.framework.core.protonetwork.ProtoNetworkServiceImpl; import org.openbel.framework.core.protonetwork.TextProtoNetworkExternalizer; /** * BEL phase three compiler. */ public final class PhaseThreeApplication extends PhaseApplication { private final DefaultPhaseThree p3; private final DefaultPhaseTwo p2; private final DefaultPhaseOne p1; private final CacheableResourceService cache; private final SystemConfiguration sysconfig; /* Phase-specific command-line options. */ private final static String EXPAND_PF_LONG_OPT = "expand-protein-families"; private final static String EXPAND_NC_LONG_OPT = "expand-named-complexes"; private final static String NO_PF_LONG_OPT = "no-protein-families"; private final static String NO_NC_LONG_OPT = "no-named-complexes"; private final static String NO_GS_LONG_OPT = "no-gene-scaffolding"; private final static String NO_P3_LONG_OPT = "no-phaseIII"; /** Phase three artifact directory. */ public final static String DIR_ARTIFACT = "phaseIII"; /** Phase three pruned PF proto-network. */ public final static String PRUNED_PF_NAME = "pruned-protein-families"; /** Phase three pruned GS proto-network. */ public final static String PRUNED_GS_NAME = "pruned-gene-scaffolding"; /** Phase three pruned NC proto-network. */ public final static String PRUNED_NC_NAME = "pruned-named-complexes"; /** Intermediate output. */ private final static String STAGE1_OUTPUT = "stage1"; private final static String STAGE2_OUTPUT = "stage2"; private final static String STAGE3_OUTPUT = "stage3"; public final static String INJECTED_PF_NETWORK = "injected-protfam"; public final static String INJECTED_GS_NETWORK = "injected-genescaff"; private final static String NUM_PHASES = "6"; /** * Phase three application constructor. * * @param args Command-line arguments */ public PhaseThreeApplication(String[] args) { super(args); sysconfig = getSystemConfiguration(); cache = new DefaultCacheableResourceService(); final ProtoNetworkService protonetService = new ProtoNetworkServiceImpl(); final EquivalenceIndexerService indexer = new EquivalenceIndexerServiceImpl(); final PhaseTwoImpl phaseTwo = new PhaseTwoImpl(cache, indexer, protonetService); phaseTwo.setReportable(getReportable()); p2 = phaseTwo; p3 = new PhaseThreeImpl(protonetService, p2); final XBELValidatorService validator = createValidator(); final XBELConverterService converter = createConverter(); final BELValidatorService belValidator = new BELValidatorServiceImpl(); final BELConverterService belConverter = new BELConverterServiceImpl(); final NamespaceIndexerService nsindexer = new NamespaceIndexerServiceImpl(); final CacheLookupService cacheLookup = new DefaultCacheLookupService(); final NamespaceService nsService = new DefaultNamespaceService( cache, cacheLookup, nsindexer); final SemanticService semantics = new SemanticServiceImpl(nsService); final ExpansionService expansion = new ExpansionServiceImpl(); final AnnotationService annotationService = new DefaultAnnotationService(); final AnnotationDefinitionService annotationDefinitionService = new DefaultAnnotationDefinitionService(cache, cacheLookup); p1 = new PhaseOneImpl(validator, converter, belValidator, belConverter, nsService, semantics, expansion, protonetService, annotationService, annotationDefinitionService); } /** * Determine whether phase III will be skipped (such as with the --no-phaseIII * command line option). * * @return */ public boolean isSkipped() { // --no-phaseIII is the same as: // --no-gene-scaffolding --no-named-complexes --no-protein-families --no-orthology return (hasOption(NO_P3_LONG_OPT) || (hasOption(NO_GS_LONG_OPT) && hasOption(NO_NC_LONG_OPT) && hasOption(NO_PF_LONG_OPT) && hasOption(PHASE3_NO_ORTHOLOGY_LONG_OPTION))); } /** * {@inheritDoc} */ @Override public void start() { super.start(); PhaseThreeOptions phasecfg = getPhaseConfiguration(); if (hasOption(EXPAND_NC_LONG_OPT)) { phasecfg.setExpandNamedComplexes(true); } if (hasOption(EXPAND_PF_LONG_OPT)) { phasecfg.setExpandProteinFamilies(true); } if (hasOption(NO_NC_LONG_OPT)) { phasecfg.setInjectNamedComplexes(false); } if (hasOption(NO_PF_LONG_OPT)) { phasecfg.setInjectProteinFamilies(false); } if (hasOption(NO_GS_LONG_OPT)) { phasecfg.setInjectGeneScaffolding(false); } if (hasOption(PHASE3_NO_ORTHOLOGY_LONG_OPTION)) { phasecfg.setInjectOrthology(false); } phaseOutput(format("=== %s ===", getApplicationName())); if (isSkipped()) { final StringBuilder bldr = new StringBuilder(); bldr.append(getApplicationShortName()); bldr.append(" has been skipped."); phaseOutput(bldr.toString()); return; } // load the resource index for phase III use. String resourceIndexURL = sysconfig.getResourceIndexURL(); try { final ResolvedResource resource = cache.resolveResource( ResourceType.RESOURCE_INDEX, resourceIndexURL); ResourceIndex.INSTANCE.loadIndex(resource.getCacheResourceCopy()); } catch (ResourceDownloadError e) { failIndex(phasecfg, e.getUserFacingMessage()); } catch (FileNotFoundException e) { failIndex(phasecfg, e.getMessage()); } catch (XMLStreamException e) { failIndex(phasecfg, e.getMessage()); } processOutputDirectory(); } /** * Logic to recover from a missing resource index. * * @param phasecfg * @param errorMessage */ private void failIndex(PhaseThreeOptions phasecfg, String errorMessage) { stageError(errorMessage); final StringBuilder bldr = new StringBuilder(); bldr.append("Could not find resource index file."); bldr.append("Expansion of protein families, named complexes, "); bldr.append("gene scaffolding, and orthology will not occur."); stageError(bldr.toString()); ResourceIndex.INSTANCE.loadIndex(); phasecfg.setInjectProteinFamilies(false); phasecfg.setInjectNamedComplexes(false); phasecfg.setInjectGeneScaffolding(false); phasecfg.setInjectOrthology(false); } /* * Processes the output directory. */ private void processOutputDirectory() { final String root = outputDirectory.getAbsolutePath(); final String leaf = PhaseTwoApplication.DIR_ARTIFACT; final String path = asPath(root, leaf); final File phaseIIPath = new File(path); // Fail if the working path doesn't contain a phase II artifact if (!phaseIIPath.isDirectory()) { error(NOT_A_PHASE2_DIR + ": " + phaseIIPath); failUsage(); } // Fail if the working path doesn't contain any proto-networks final File[] networks = phaseIIPath.listFiles(new GlobalProtonetworkFilter()); if (networks.length == 0) { final String err = Strings.NO_GLOBAL_PROTO_NETWORK; error(err + " in: " + phaseIIPath); failUsage(); } // Fail if the working path contains more than one proto-network if (networks.length > 1) { error(EXPECTED_ONE_NETWORK + ", found " + networks.length); failUsage(); } // Create the directory artifact or fail artifactPath = createDirectoryArtifact(outputDirectory, DIR_ARTIFACT); reconstituteNetwork(networks[0]); } /* * Reconstitutes the global proto-network. */ private void reconstituteNetwork(final File file) { BinaryProtoNetworkDescriptor inputProtoNetworkDesc = new BinaryProtoNetworkDescriptor(file); ProtoNetworkExternalizer pne = new BinaryProtoNetworkExternalizer(); ProtoNetwork global = null; try { global = pne.readProtoNetwork(inputProtoNetworkDesc); } catch (ProtoNetworkError e) { error(e.getUserFacingMessage()); final ExitCode ec = ExitCode.NO_GLOBAL_PROTO_NETWORK; exit(ec); } processNetwork(global); } /** * Runs the phase three stages over the input proto-network. * * @param pn Proto-network input by user */ private void processNetwork(final ProtoNetwork pn) { // Inject protein families ProtoNetwork pfamMerged = stage1(pn); // Inject named complexes ProtoNetwork ncMerged = stage2(pfamMerged); // Inject gene scaffolding ProtoNetwork geneMerged = stage3(ncMerged); // Inject homology knowledge ProtoNetwork orthoMerged = stage4(geneMerged); // Equivalence the network ProtoNetwork equived = stage5(orthoMerged); stage6(equived); } /** * Runs stage two protein family injection, if requested by the user. * * @param pn Proto-network input by user * @return Merged proto-network */ private ProtoNetwork stage1(final ProtoNetwork pn) { beginStage(PHASE3_STAGE1_HDR, "1", NUM_PHASES); final StringBuilder bldr = new StringBuilder(); // Load the pfam resource from the resource index. Index resourceIndex = ResourceIndex.INSTANCE.getIndex(); ResourceLocation pfResource = resourceIndex.getProteinFamilyResource(); if (pfResource == null || pfResource.getResourceLocation() == null) { getPhaseConfiguration().setInjectProteinFamilies(false); stageError("Resource location for protein family is not set in " + "resource index, disabling protein family expansion."); markEndStage(bldr); return pn; } if (!withProteinFamilyInjection()) { bldr.append(PF_INJECTION_DISABLED); markEndStage(bldr); stageOutput(bldr.toString()); return pn; } String pfLocation = pfResource.getResourceLocation(); File pfamResource = null; final ResourceType pfamType = fromLocation(pfLocation); try { ResolvedResource resolvedResource = cache.resolveResource(pfamType, pfLocation); pfamResource = resolvedResource.getCacheResourceCopy(); } catch (ResourceDownloadError e) { return failProteinFamilies(pn, bldr, pfLocation, e.getUserFacingMessage()); } // Fail if the protein family file is not readable if (!pfamResource.canRead()) { stageError(INPUT_FILE_UNREADABLE + pfamResource); failUsage(); } stageOutput("Processing protein families"); long t1 = currentTimeMillis(); final Stage1Output output; if (pfamType == BEL) { output = p1.stage1BELValidation(pfamResource); } else { output = p1.stage1XBELValidation(pfamResource); } if (output.hasValidationErrors()) { for (final ValidationError error : output.getValidationErrors()) { stageError(error.getUserFacingMessage()); } bail(NO_VALID_DOCUMENTS); return pn; // Dead code } if (output.hasConversionError()) { stageError(output.getConversionError().getUserFacingMessage()); bail(NO_CONVERTED_DOCUMENTS); return pn; // Dead code } if (output.getSymbolWarning() != null) { stageError(output.getSymbolWarning().getUserFacingMessage()); } long t2 = currentTimeMillis(); Document pfDoc = output.getDocument(); bldr.setLength(0); markTime(bldr, t1, t2); stageOutput(bldr.toString()); stageOutput("Pruning protein families"); boolean expand = withProteinFamilyExpansion(); t1 = currentTimeMillis(); DocumentModificationResult r = p3.pruneFamilies(expand, pfDoc, pn); t2 = currentTimeMillis(); bldr.setLength(0); markTime(bldr, t1, t2); stageOutput(bldr.toString()); if (r.isFailure()) { for (final String error : r.getErrors()) { stageError(error); } return pn; } for (final String warning : r.getWarnings()) { stageError(warning); } if (r.getRemainingStatements() == 0) { bldr.setLength(0); bldr.append("No statements remain after pruning"); markEndStage(bldr); stageOutput(bldr.toString()); return pn; } bldr.setLength(0); bldr.append(r.getRemainingStatements()); bldr.append(" of "); bldr.append(r.getTotalStatements()); bldr.append(" statements remain after pruning"); stageOutput(bldr.toString()); stageOutput("Inferring protein family relationships"); t1 = currentTimeMillis(); r = p3.inferFamilies(pfDoc, pn); t2 = currentTimeMillis(); bldr.setLength(0); markTime(bldr, t1, t2); stageOutput(bldr.toString()); if (r.isFailure()) { for (final String error : r.getErrors()) { stageError(error); } return pn; } for (final String warning : r.getWarnings()) { stageError(warning); } bldr.setLength(0); if (r.getDeltaStatements() == 0) { bldr.append("No statements inferred for protein families"); } else { bldr.append(r.getDeltaStatements()); bldr.append(" of "); bldr.append(r.getTotalStatements()); bldr.append(" statements inferred for protein families"); } stageOutput(bldr.toString()); stageOutput("Compiling pruned protein families"); t1 = currentTimeMillis(); ProtoNetwork pfpn = p3.compile(pfDoc); t2 = currentTimeMillis(); bldr.setLength(0); markTime(bldr, t1, t2); stageOutput(bldr.toString()); if (withDebug()) { final String rootpath = artifactPath.getAbsolutePath(); final String pfpath = asPath(rootpath, PRUNED_PF_NAME); createDirectories(pfpath); try { TextProtoNetworkExternalizer textExternalizer = new TextProtoNetworkExternalizer(); textExternalizer.writeProtoNetwork(pfpn, pfpath); } catch (ProtoNetworkError e) { stageError(e.getUserFacingMessage()); } } stageOutput("Merging proto-networks"); t1 = currentTimeMillis(); try { p3.merge(pn, pfpn); // Change the input proto-network descriptor to merged network ProtoNetworkExternalizer pne = new BinaryProtoNetworkExternalizer(); final String artpath = artifactPath.getAbsolutePath(); final String s1path = asPath(artpath, STAGE1_OUTPUT); createDirectoryArtifact(artifactPath, STAGE1_OUTPUT); pne.writeProtoNetwork(pn, s1path); // if debug, then save the text-based merged network if (withDebug()) { TextProtoNetworkExternalizer textExternalizer = new TextProtoNetworkExternalizer(); textExternalizer.writeProtoNetwork(pn, s1path); } } catch (ProtoNetworkError e) { stageError(e.getUserFacingMessage()); bail(FAILED_TO_MERGE_PROTO_NETWORKS); return pn; // Dead code } t2 = currentTimeMillis(); bldr.setLength(0); markTime(bldr, t1, t2); markEndStage(bldr); stageOutput(bldr.toString()); return pn; } /** * Logic to recover from a failed protein family document. * * @param pn * @param bldr * @param pfLocation * @param e * @return */ private ProtoNetwork failProteinFamilies(final ProtoNetwork pn, final StringBuilder bldr, String pfLocation, String errorMessage) { bldr.append("PROTEIN FAMILY RESOLUTION FAILURE in "); bldr.append(pfLocation); bldr.append("\n\treason: "); bldr.append(errorMessage); stageWarning(bldr.toString()); // could not resolve protein family resource so return original // proto network. return pn; } /** * Runs stage two named complexes injection, if requested by the user. * * @param pn Proto-network input by user * @return Merged proto-network */ private ProtoNetwork stage2(final ProtoNetwork pn) { beginStage(PHASE3_STAGE2_HDR, "2", NUM_PHASES); final StringBuilder bldr = new StringBuilder(); // Load the named complexes resource from the resource index. Index resourceIndex = ResourceIndex.INSTANCE.getIndex(); ResourceLocation ncResource = resourceIndex.getNamedComplexesResource(); if (ncResource == null || ncResource.getResourceLocation() == null) { getPhaseConfiguration().setInjectNamedComplexes(false); stageError("Resource location for named complexes is not set " + "in resource index, disabling named complex expansion."); markEndStage(bldr); return pn; } if (!withNamedComplexInjection()) { bldr.append(NC_INJECTION_DISABLED); markEndStage(bldr); stageOutput(bldr.toString()); return pn; } String ncLocation = ncResource.getResourceLocation(); File ncFile = null; final ResourceType ncType = fromLocation(ncLocation); try { ResolvedResource resolvedResource = cache.resolveResource( ncType, ncLocation); ncFile = resolvedResource.getCacheResourceCopy(); } catch (ResourceDownloadError e) { return failNamedComplexes(pn, bldr, ncLocation, e.getUserFacingMessage()); } // Fail if the named complexes file is not readable if (!ncFile.canRead()) { stageError(INPUT_FILE_UNREADABLE + ncFile); failUsage(); } stageOutput("Processing named complexes"); long t1 = currentTimeMillis(); final Stage1Output output; if (ncType == BEL) { output = p1.stage1BELValidation(ncFile); } else { output = p1.stage1XBELValidation(ncFile); } if (output.hasValidationErrors()) { for (final ValidationError error : output.getValidationErrors()) { stageError(error.getUserFacingMessage()); } bail(NO_VALID_DOCUMENTS); return pn; // Dead code } if (output.hasConversionError()) { stageError(output.getConversionError().getUserFacingMessage()); bail(NO_CONVERTED_DOCUMENTS); return pn; // Dead code } if (output.getSymbolWarning() != null) { stageError(output.getSymbolWarning().getUserFacingMessage()); } long t2 = currentTimeMillis(); Document ncDoc = output.getDocument(); bldr.setLength(0); markTime(bldr, t1, t2); stageOutput(bldr.toString()); stageOutput("Pruning named complexes"); boolean expand = withNamedComplexExpansion(); t1 = currentTimeMillis(); DocumentModificationResult pr = p3.pruneComplexes(expand, ncDoc, pn); t2 = currentTimeMillis(); bldr.setLength(0); markTime(bldr, t1, t2); stageOutput(bldr.toString()); if (pr.isFailure()) { for (final String error : pr.getErrors()) { stageError(error); } return pn; } for (final String warning : pr.getWarnings()) { stageError(warning); } if (pr.getRemainingStatements() == 0) { bldr.setLength(0); bldr.append("No statements remain after pruning"); markEndStage(bldr); stageOutput(bldr.toString()); return pn; } bldr.setLength(0); bldr.append(pr.getRemainingStatements()); bldr.append(" of "); bldr.append(pr.getTotalStatements()); bldr.append(" statements remain after pruning"); stageOutput(bldr.toString()); stageOutput("Compiling named complexes proto-network"); t1 = currentTimeMillis(); ProtoNetwork ncpn = p3.compile(ncDoc); t2 = currentTimeMillis(); bldr.setLength(0); markTime(bldr, t1, t2); stageOutput(bldr.toString()); // if debug, then save named complexes proto network if (withDebug()) { final String rootpath = artifactPath.getAbsolutePath(); final String ncpath = asPath(rootpath, PRUNED_NC_NAME); createDirectories(ncpath); try { TextProtoNetworkExternalizer textExternalizer = new TextProtoNetworkExternalizer(); textExternalizer.writeProtoNetwork(ncpn, ncpath); } catch (ProtoNetworkError e) { stageError(e.getUserFacingMessage()); } } stageOutput("Merging proto-networks"); t1 = currentTimeMillis(); try { p3.merge(pn, ncpn); // Change the input proto-network descriptor to merged network ProtoNetworkExternalizer pne = new BinaryProtoNetworkExternalizer(); final String artpath = artifactPath.getAbsolutePath(); final String s2path = asPath(artpath, STAGE2_OUTPUT); createDirectoryArtifact(artifactPath, STAGE2_OUTPUT); pne.writeProtoNetwork(pn, s2path); // if debug, then save the text-based merged network if (withDebug()) { TextProtoNetworkExternalizer textExternalizer = new TextProtoNetworkExternalizer(); textExternalizer.writeProtoNetwork(pn, s2path); } } catch (ProtoNetworkError e) { stageError(e.getUserFacingMessage()); bail(FAILED_TO_MERGE_PROTO_NETWORKS); return pn; // Dead code } t2 = currentTimeMillis(); bldr.setLength(0); markTime(bldr, t1, t2); markEndStage(bldr); stageOutput(bldr.toString()); return pn; } /** * Logic to recover from a failure to resolve a named complexes resource. * * @param pn * @param bldr * @param ncLocation * @param errorMessage * @return */ private ProtoNetwork failNamedComplexes(final ProtoNetwork pn, final StringBuilder bldr, String ncLocation, String errorMessage) { bldr.append("NAMED COMPLEXES RESOLUTION FAILURE in "); bldr.append(ncLocation); bldr.append("\n\treason: "); bldr.append(errorMessage); stageWarning(bldr.toString()); // could not resolve named complexes resource so return original // proto network. return pn; } /** * Runs stage three gene scaffolding, if requested by the user. * * @param pn Proto-network input by user * @return Merged proto-network */ private ProtoNetwork stage3(final ProtoNetwork pn) { beginStage(PHASE3_STAGE3_HDR, "3", NUM_PHASES); final StringBuilder bldr = new StringBuilder(); // Load the gene scaffolding resource from the resource index. Index resourceIndex = ResourceIndex.INSTANCE.getIndex(); ResourceLocation gsResource = resourceIndex.getGeneScaffoldingResource(); if (gsResource == null || gsResource.getResourceLocation() == null) { getPhaseConfiguration().setInjectGeneScaffolding(false); bldr.append("Resource location for gene scaffolding is not set "); bldr.append("in resource index, disabling gene scaffolding "); bldr.append("expansion."); stageError(bldr.toString()); markEndStage(bldr); return pn; } if (!withGeneScaffoldingInjection()) { bldr.append(GS_INJECTION_DISABLED); markEndStage(bldr); stageOutput(bldr.toString()); return pn; } // TODO Timing for cache resolving. String gsloc = gsResource.getResourceLocation(); File gsFile = null; final ResourceType gsType = fromLocation(gsloc); try { ResolvedResource resolvedResource = cache.resolveResource(gsType, gsloc); gsFile = resolvedResource.getCacheResourceCopy(); } catch (ResourceDownloadError e) { return failGeneScaffolding(pn, bldr, gsloc, e.getUserFacingMessage()); } // Fail if the gene scaffolding file is not readable if (!gsFile.canRead()) { stageError(INPUT_FILE_UNREADABLE + gsFile); failUsage(); } stageOutput("Processing gene scaffolding"); long t1 = currentTimeMillis(); final Stage1Output output; if (gsType == BEL) { output = p1.stage1BELValidation(gsFile); } else { output = p1.stage1XBELValidation(gsFile); } if (output.hasValidationErrors()) { for (final ValidationError error : output.getValidationErrors()) { stageError(error.getUserFacingMessage()); } bail(NO_VALID_DOCUMENTS); return pn; // Dead code } if (output.hasConversionError()) { stageError(output.getConversionError().getUserFacingMessage()); bail(NO_CONVERTED_DOCUMENTS); return pn; // Dead code } if (output.getSymbolWarning() != null) { stageError(output.getSymbolWarning().getUserFacingMessage()); } long t2 = currentTimeMillis(); Document gsDoc = output.getDocument(); bldr.setLength(0); markTime(bldr, t1, t2); stageOutput(bldr.toString()); stageOutput("Pruning gene scaffolding"); t1 = currentTimeMillis(); DocumentModificationResult pr = p3.pruneGene(gsDoc, pn); t2 = currentTimeMillis(); bldr.setLength(0); markTime(bldr, t1, t2); stageOutput(bldr.toString()); if (pr.isFailure()) { for (final String error : pr.getErrors()) { stageError(error); } } for (final String warning : pr.getWarnings()) { stageError(warning); } if (pr.getRemainingStatements() == 0) { bldr.setLength(0); bldr.append("No statements remain after pruning"); markEndStage(bldr); stageOutput(bldr.toString()); return pn; } bldr.setLength(0); bldr.append(pr.getRemainingStatements()); bldr.append(" of "); bldr.append(pr.getTotalStatements()); bldr.append(" statements remain after pruning"); stageOutput(bldr.toString()); stageOutput("Compiling gene scaffolding proto-network"); t1 = currentTimeMillis(); ProtoNetwork gpn = p3.compile(gsDoc); t2 = currentTimeMillis(); bldr.setLength(0); markTime(bldr, t1, t2); stageOutput(bldr.toString()); // if debug, write out proto network for gene scaffolds if (withDebug()) { final String rootpath = artifactPath.getAbsolutePath(); final String gpath = asPath(rootpath, PRUNED_NC_NAME); createDirectories(gpath); try { TextProtoNetworkExternalizer textExternalizer = new TextProtoNetworkExternalizer(); textExternalizer.writeProtoNetwork(gpn, gpath); } catch (ProtoNetworkError e) { stageError(e.getUserFacingMessage()); } } stageOutput("Merging proto-networks"); t1 = currentTimeMillis(); try { p3.merge(pn, gpn); // Change the input proto-network descriptor to merged network ProtoNetworkExternalizer pne = new BinaryProtoNetworkExternalizer(); final String artpath = artifactPath.getAbsolutePath(); final String s3path = asPath(artpath, STAGE3_OUTPUT); createDirectoryArtifact(artifactPath, STAGE3_OUTPUT); pne.writeProtoNetwork(pn, s3path); // if debug, then save the text-based merged network if (withDebug()) { TextProtoNetworkExternalizer textExternalizer = new TextProtoNetworkExternalizer(); textExternalizer.writeProtoNetwork(pn, s3path); } } catch (ProtoNetworkError e) { stageError(e.getUserFacingMessage()); bail(FAILED_TO_MERGE_PROTO_NETWORKS); return pn; // Dead code } t2 = currentTimeMillis(); bldr.setLength(0); markTime(bldr, t1, t2); markEndStage(bldr); stageOutput(bldr.toString()); return pn; } /** * Logic to recover from a failure to resolve the gene scaffolding resource. * * @param pn * @param bldr * @param gsLocation * @param errorMessage * @return */ private ProtoNetwork failGeneScaffolding(final ProtoNetwork pn, final StringBuilder bldr, String gsLocation, String errorMessage) { bldr.append("GENE SCAFFOLDING RESOURCE RESOLUTION FAILURE in "); bldr.append(gsLocation); bldr.append("\n\treason: "); bldr.append(errorMessage); stageWarning(bldr.toString()); // could not resolve gene scaffolding resource so return original // proto network. return pn; } /** * Runs stage four injecting of homology knowledge. * * @param pn {@link ProtoNetwork} * @return the {@link ProtoNetwork} with homology knowledge injected */ private ProtoNetwork stage4(final ProtoNetwork pn) { beginStage(PHASE3_STAGE4_HDR, "4", NUM_PHASES); if (!getPhaseConfiguration().getInjectOrthology()) { final StringBuilder bldr = new StringBuilder(); bldr.append(ORTHO_INJECTION_DISABLED); markEndStage(bldr); stageOutput(bldr.toString()); return pn; } // create output directory for orthologized proto network artifactPath = createDirectoryArtifact(outputDirectory, DIR_ARTIFACT); final Index index = ResourceIndex.INSTANCE.getIndex(); final Set<ResourceLocation> resources = index.getOrthologyResources(); if (noItems(resources)) { final StringBuilder bldr = new StringBuilder(); bldr.append("No orthology documents included."); markEndStage(bldr); stageOutput(bldr.toString()); return pn; } // equivalence network first, before pruning Set<EquivalenceDataIndex> equivs; try { equivs = p2.stage2LoadNamespaceEquivalences(); } catch (EquivalenceMapResolutionFailure e) { stageError(e.getUserFacingMessage()); equivs = emptySet(); } try { p2.stage3EquivalenceParameters(pn, equivs); } catch (IOException e) { } final Iterator<ResourceLocation> it = resources.iterator(); final ResourceLocation first = it.next(); final ProtoNetwork orthoMerge = pruneResource(pn, first); while (it.hasNext()) { final ResourceLocation resource = it.next(); final ProtoNetwork opn = pruneResource(pn, resource); try { p3.merge(orthoMerge, opn); } catch (ProtoNetworkError e) { e.printStackTrace(); } } try { runPhaseThree(orthoMerge); } catch (ProtoNetworkError e) { stageError(e.getUserFacingMessage()); bail(ExitCode.GENERAL_FAILURE); } try { p3.merge(pn, orthoMerge); } catch (ProtoNetworkError e) { stageError(e.getUserFacingMessage()); bail(ExitCode.GENERAL_FAILURE); } return pn; } private void runPhaseThree(final ProtoNetwork orthoPn) throws ProtoNetworkError { final Index resourceIndex = ResourceIndex.INSTANCE.getIndex(); final ResourceLocation pfResource = resourceIndex .getProteinFamilyResource(); final Document pfdoc = readResource(pfResource); p3.pruneFamilies(false, pfdoc, orthoPn); p3.inferFamilies(pfdoc, orthoPn); final ProtoNetwork pfpn = p3.compile(pfdoc); p3.merge(orthoPn, pfpn); final ResourceLocation ncResource = resourceIndex .getNamedComplexesResource(); final Document ncdoc = readResource(ncResource); p3.pruneComplexes(false, ncdoc, orthoPn); final ProtoNetwork ncpn = p3.compile(ncdoc); p3.merge(orthoPn, ncpn); final ResourceLocation gsResource = resourceIndex .getGeneScaffoldingResource(); final Document gsdoc = readResource(gsResource); p3.pruneGene(gsdoc, orthoPn); final ProtoNetwork gspn = p3.compile(gsdoc); p3.merge(orthoPn, gspn); } private ProtoNetwork pruneResource(final ProtoNetwork pn, final ResourceLocation resource) { Document doc = readResource(resource); long t1 = currentTimeMillis(); stageOutput(format("Processing orthology document '%s'", doc.getName())); p3.pruneOrthologyDocument(doc, pn); final ProtoNetwork opn = p3.compile(doc); long t2 = currentTimeMillis(); final StringBuilder bldr = new StringBuilder(); markTime(bldr, t1, t2); stageOutput(bldr.toString()); return opn; } private Document readResource(final ResourceLocation resource) { final String rloc = resource.getResourceLocation(); final ResourceType type = fromLocation(rloc); File res = null; try { final ResolvedResource rsv = cache.resolveResource(type, rloc); res = rsv.getCacheResourceCopy(); } catch (ResourceDownloadError e) { e.printStackTrace(); return null; } final Stage1Output output; if (type == BEL) { output = p1.stage1BELValidation(res); } else { output = p1.stage1XBELValidation(res); } if (output.hasValidationErrors()) { for (final ValidationError error : output.getValidationErrors()) { stageError(error.getUserFacingMessage()); } bail(NO_VALID_DOCUMENTS); return null; // Dead code } if (output.hasConversionError()) { stageError(output.getConversionError().getUserFacingMessage()); bail(NO_CONVERTED_DOCUMENTS); return null; // Dead code } if (output.getSymbolWarning() != null) { stageError(output.getSymbolWarning().getUserFacingMessage()); } Document doc = output.getDocument(); return doc; } /** * Runs stage five equivalencing of the proto-network. * * @param pn Proto-network, post-scaffolding * @return Equivalenced proto-network */ private ProtoNetwork stage5(ProtoNetwork pn) { beginStage(PHASE3_STAGE5_HDR, "5", NUM_PHASES); final StringBuilder bldr = new StringBuilder(); if (!withGeneScaffoldingInjection() && !withNamedComplexInjection() && !withProteinFamilyInjection()) { bldr.append(INJECTIONS_DISABLED); markEndStage(bldr); stageOutput(bldr.toString()); return pn; } // load equivalences Set<EquivalenceDataIndex> equivs; try { equivs = p2.stage2LoadNamespaceEquivalences(); } catch (EquivalenceMapResolutionFailure e) { stageError(e.getUserFacingMessage()); equivs = emptySet(); } long t1 = currentTimeMillis(); int pct = stage5Parameter(pn, equivs, bldr); stage5Term(pn, pct); stage5Statement(pn, pct); long t2 = currentTimeMillis(); final int paramct = pn.getParameterTable().getTableParameters().size(); final int termct = pn.getTermTable().getTermValues().size(); final int stmtct = pn.getStatementTable().getStatements().size(); bldr.setLength(0); bldr.append(stmtct); bldr.append(" statements, "); bldr.append(termct); bldr.append(" terms, "); bldr.append(paramct); bldr.append(" parameters"); stageOutput(bldr.toString()); bldr.setLength(0); markTime(bldr, t1, t2); markEndStage(bldr); stageOutput(bldr.toString()); return pn; } /** * Stage five parameter equivalencing. * * @param network the {@link ProtoNetwork network} to equivalence * @param equivalences the {@link Set set} of {@link EquivalenceDataIndex} * @param bldr the {@link StringBuilder} * @return the {@code int} count of parameter equivalences */ private int stage5Parameter(final ProtoNetwork network, Set<EquivalenceDataIndex> equivalences, final StringBuilder bldr) { bldr.append("Equivalencing parameters"); stageOutput(bldr.toString()); ProtoNetwork ret = network; int ct = 0; try { ct = p2.stage3EquivalenceParameters(ret, equivalences); stageOutput("(" + ct + " equivalences)"); } catch (IOException ioex) { final String err = ioex.getMessage(); fatal(err); } return ct; } /** * Stage five term equivalencing. * * @param network the {@link ProtoNetwork network} to equivalence * @param pct the parameter equivalencing count to control output */ private void stage5Term(final ProtoNetwork network, int pct) { if (pct > 0) { stageOutput("Equivalencing terms"); int tct = p2.stage3EquivalenceTerms(network); stageOutput("(" + tct + " equivalences)"); } else { stageOutput("Skipping term equivalencing"); } } /** * Stage five statement equivalencing. * * @param network the {@link ProtoNetwork network} to equivalence * @param pct the parameter equivalencing count to control output */ private void stage5Statement(final ProtoNetwork network, int pct) { if (pct > 0) { stageOutput("Equivalencing statements"); int sct = p2.stage3EquivalenceStatements(network); stageOutput("(" + sct + " equivalences)"); } else { stageOutput("Skipping statement equivalencing"); } } /** * Runs stage six network saving. * * @param pn Proto-network */ private void stage6(ProtoNetwork pn) { beginStage(PHASE3_STAGE6_HDR, "6", NUM_PHASES); stageOutput("Saving augmented network"); final String rootpath = artifactPath.getAbsolutePath(); long t1 = currentTimeMillis(); try { p3.write(rootpath, pn); if (withDebug()) { try { TextProtoNetworkExternalizer textExternalizer = new TextProtoNetworkExternalizer(); textExternalizer.writeProtoNetwork(pn, rootpath); } catch (ProtoNetworkError e) { error("Could not write out equivalenced proto network."); } } } catch (ProtoNetworkError e) { stageError(e.getUserFacingMessage()); bail(NO_PROTO_NETWORKS_SAVED); } long t2 = currentTimeMillis(); final StringBuilder bldr = new StringBuilder(); markTime(bldr, t1, t2); markEndStage(bldr); stageOutput(bldr.toString()); } /** * {@inheritDoc} */ @Override public PhaseThreeOptions getPhaseConfiguration() { return phaseThreeOptions(); } /** * {@inheritDoc} */ @Override public boolean validCommandLine() { // We only need output from phase two. Any command-line is valid. return true; } /** * Returns * {@code "Phase III: Expansion and augmentation of composite network"}. * * @return String */ @Override public String getApplicationName() { return "Phase III: Expansion and augmentation of composite network"; } /** * Returns {@code "Phase III"}. * * @return String */ @Override public String getApplicationShortName() { return "Phase III"; } /** * {@inheritDoc} */ @Override public String getApplicationDescription() { return "Performs expansion and augmentation of a merged composite network."; } /** * {@inheritDoc} */ @Override public String getUsage() { StringBuilder bldr = new StringBuilder(); bldr.append("[OPTION]..."); return bldr.toString(); } /** * {@inheritDoc} */ @Override public List<Option> getCommandLineOptions() { List<Option> ret = super.getCommandLineOptions(); String help; help = EXPAND_PROTEIN_FAMILIES_HELP; ret.add(new Option(null, EXPAND_PF_LONG_OPT, false, help)); help = EXPAND_NAMED_COMPLEXES_HELP; ret.add(new Option(null, EXPAND_NC_LONG_OPT, false, help)); help = NO_PROTEIN_FAMILIES_HELP; ret.add(new Option(null, NO_PF_LONG_OPT, false, help)); help = NO_NAMED_COMPLEXES_HELP; ret.add(new Option(null, NO_NC_LONG_OPT, false, help)); help = NO_GENE_SCAFFOLDING_HELP; ret.add(new Option(null, NO_GS_LONG_OPT, false, help)); help = NO_ORTHOLOGY_HELP; ret.add(new Option(null, PHASE3_NO_ORTHOLOGY_LONG_OPTION, false, help)); help = NO_PHASE_THREE; ret.add(new Option(null, NO_P3_LONG_OPT, false, help)); return ret; } /** * Returns {@link #getPhaseConfiguration()} * {@link PhaseThreeOptions#getExpandNamedComplexes()}, a delegate method * for convenience. * * @return boolean */ private boolean withNamedComplexExpansion() { return getPhaseConfiguration().getExpandNamedComplexes(); } /** * Returns {@link #getPhaseConfiguration()} * {@link PhaseThreeOptions#getExpandProteinFamilies()}, a delegate method * for convenience. * * @return boolean */ private boolean withProteinFamilyExpansion() { return getPhaseConfiguration().getExpandProteinFamilies(); } /** * Returns {@link #getPhaseConfiguration()} * {@link PhaseThreeOptions#getInjectNamedComplexes()}, a delegate method * for convenience. * * @return boolean */ private boolean withNamedComplexInjection() { return getPhaseConfiguration().getInjectNamedComplexes(); } /** * Returns {@link #getPhaseConfiguration()} * {@link PhaseThreeOptions#getInjectProteinFamilies()}, a delegate method * for convenience. * * @return boolean */ private boolean withProteinFamilyInjection() { return getPhaseConfiguration().getInjectProteinFamilies(); } /** * Returns {@link #getPhaseConfiguration()} * {@link PhaseThreeOptions#getInjectGeneScaffolding()}, a delegate method * for convenience. * * @return boolean */ private boolean withGeneScaffoldingInjection() { return getPhaseConfiguration().getInjectGeneScaffolding(); } /** * Invokes {@link #harness(PhaseApplication)} for * {@link PhaseThreeApplication}. * * @param args Command-line arguments */ public static void main(String[] args) { harness(new PhaseThreeApplication(args)); } /** * {@inheritDoc} */ public static String getRequiredArguments() { // Nothing is required return ""; } }