/** * Copyright 2014 VU University Medical Center. * Licensed under the Apache License version 2.0 (see http://www.apache.org/licenses/LICENSE-2.0.html). */ package nl.vumc.biomedbridges.examples; import java.io.File; import java.io.IOException; import java.lang.management.ManagementFactory; import java.lang.management.ThreadInfo; import java.lang.management.ThreadMXBean; import java.net.MalformedURLException; import java.net.URL; import java.nio.charset.Charset; import java.nio.file.Files; import java.util.Arrays; import java.util.List; import nl.vumc.biomedbridges.core.Constants; import nl.vumc.biomedbridges.core.DefaultWorkflowFactory; import nl.vumc.biomedbridges.core.FileUtils; import nl.vumc.biomedbridges.core.Workflow; import nl.vumc.biomedbridges.core.WorkflowFactory; import nl.vumc.biomedbridges.core.WorkflowType; import nl.vumc.biomedbridges.galaxy.configuration.GalaxyConfiguration; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * This class contains a simple example of the workflow running functionality: the line, word, and character count * workflow returns these statistics for the input file. * * You can run this example using the following Maven command: * mvn compile exec:java -Dexec.mainClass="nl.vumc.biomedbridges.examples.LineCountExample" * * @author <a href="mailto:f.debruijn@vumc.nl">Freek de Bruijn</a> */ public class LineCountExample extends AbstractBaseExample { /** * The name of the Galaxy history. */ protected static final String HISTORY_NAME = "Line, word, and character count history"; /** * The name of the output dataset. */ protected static final String OUTPUT_NAME = "Line/Word/Character count on data 1"; /** * The header line for the expected output. */ protected static final String HEADER_LINE = "#lines\twords"; /** * The logger for this class. */ private static final Logger logger = LoggerFactory.getLogger(LineCountExample.class); /** * Construct the line count example. * * @param workflowFactory the workflow factory to use. */ public LineCountExample(final WorkflowFactory workflowFactory) { super(null, workflowFactory, logger); } /** * Main method. * * @param arguments unused command-line arguments. * @throws MalformedURLException when the book URL is invalid. */ // CHECKSTYLE_OFF: UncommentedMain public static void main(final String[] arguments) throws MalformedURLException { final LineCountExample example = new LineCountExample(new DefaultWorkflowFactory()); // Use a book classic to do some counting: The Adventures of Sherlock Holmes, by Arthur Conan Doyle. final URL bookUrl = new URL("https://www.gutenberg.org/ebooks/1661.txt.utf-8"); final File bookFile = FileUtils.createTemporaryFileFromURL(bookUrl); example.run(Constants.THE_HYVE_GALAXY_URL, Constants.LINE_COUNT_WORKFLOW, bookFile, false); } // CHECKSTYLE_ON: UncommentedMain /** * Run this example workflow and return the result. * * @param galaxyInstanceUrl the URL of the Galaxy instance to use. * @param workflowName the name of the workflow. * @param bookFile the file to count in. * @param useInternalCounts whether to use the internalCounts (true) or getExpectedLines (false) method. * @return whether the workflow ran successfully. */ public boolean run(final String galaxyInstanceUrl, final String workflowName, final File bookFile, final boolean useInternalCounts) { initializeExample(logger, "LineCountExample.runExample"); final GalaxyConfiguration galaxyConfiguration = new GalaxyConfiguration().setDebug(httpLogging); galaxyConfiguration.buildConfiguration(galaxyInstanceUrl, null, HISTORY_NAME); final Workflow workflow = workflowFactory.getWorkflow(WorkflowType.GALAXY, galaxyConfiguration, workflowName); if (bookFile != null) { try { workflow.addInput("Input Dataset", bookFile); workflow.setParameter(2, "options", Arrays.asList("lines", "words")); workflow.run(); final List<String> expectedLines = useInternalCounts ? internalCounts(bookFile) : getExpectedLines(); checkWorkflowSingleOutput(workflow, OUTPUT_NAME, expectedLines); } catch (final InterruptedException | IOException e) { logger.error("Exception while running workflow {}.", workflow.getName(), e); } } System.out.println(); System.out.println("Thread dump:"); System.out.println(getThreadDump()); return finishExample(workflow); } /** * Retrieve information about the Java threads. * * @return a string with information about the Java threads. */ private static String getThreadDump() { final StringBuilder threadDump = new StringBuilder(); final ThreadMXBean threadMXBean = ManagementFactory.getThreadMXBean(); final int maxThreadDepth = 100; for (final ThreadInfo threadInfo : threadMXBean.getThreadInfo(threadMXBean.getAllThreadIds(), maxThreadDepth)) { threadDump.append('"'); threadDump.append(threadInfo.getThreadName()); threadDump.append("\" \n java.lang.Thread.State: "); threadDump.append(threadInfo.getThreadState()); for (final StackTraceElement stackTraceElement : threadInfo.getStackTrace()) { threadDump.append("\n at "); threadDump.append(stackTraceElement); } threadDump.append("\n\n"); } return threadDump.toString(); } /** * Get the expected output lines. * * @return the expected output lines. */ protected List<String> getExpectedLines() { return Arrays.asList(HEADER_LINE, "13052\t107533"); } /** * Simple internal implementation of wc to determine the expected output of the workflow. * * @param inputFile the file to count in. * @return the expected output lines. * @throws IOException when reading from the file fails. */ protected static List<String> internalCounts(final File inputFile) throws IOException { final List<String> lines = Files.readAllLines(inputFile.toPath(), Charset.forName("UTF-8")); long wordCount = 0; for (final String line : lines) wordCount += line.split("\\s+").length; return Arrays.asList(HEADER_LINE, String.format("%d\t%d", lines.size(), wordCount)); } }