/* * Copyright (C) 2013 SeqWare * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ package io.seqware.pipeline.plugins; import com.google.common.collect.ImmutableList; import com.google.common.collect.Lists; import io.seqware.common.model.ProcessingStatus; import io.seqware.common.model.WorkflowRunStatus; import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.IOException; import java.nio.charset.Charset; import java.nio.file.FileSystems; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; import java.nio.file.StandardOpenOption; import java.sql.Connection; import java.sql.DriverManager; import java.sql.SQLException; import java.sql.Statement; import java.util.List; import java.util.Map; import java.util.Map.Entry; import joptsimple.ArgumentAcceptingOptionSpec; import joptsimple.OptionSpecBuilder; import net.sourceforge.seqware.common.model.FileProvenanceParam; import net.sourceforge.seqware.common.module.ReturnValue; import net.sourceforge.seqware.common.util.Log; import net.sourceforge.seqware.pipeline.plugin.Plugin; import net.sourceforge.seqware.pipeline.plugin.PluginInterface; import net.sourceforge.seqware.pipeline.plugins.fileprovenance.ProvenanceUtility; import org.apache.commons.dbutils.DbUtils; import org.apache.commons.dbutils.QueryRunner; import org.apache.commons.dbutils.handlers.MapListHandler; import org.apache.commons.io.FileUtils; import org.apache.commons.lang3.StringUtils; import org.openide.util.lookup.ServiceProvider; /** * Pulls back a file provenance report, runs an arbitrarily complex SQL query on the results, and saves the results as a protobuf binary for * use as a part of interpreted language deciders. * * @author dyuen * @version 1.1.0 */ @ServiceProvider(service = PluginInterface.class) public class FileProvenanceQueryTool extends Plugin { private static final String TABLE_NAME = "FILE_REPORT"; private final ArgumentAcceptingOptionSpec<String> outFileSpec; private final ArgumentAcceptingOptionSpec<String> querySpec; private final ArgumentAcceptingOptionSpec<String> inFileSpec; private final OptionSpecBuilder useH2InMemorySpec; public FileProvenanceQueryTool() { ProvenanceUtility.configureFileProvenanceParams(parser); this.inFileSpec = parser.accepts( "in", "The tab separated file that will be used instead of pulling back a " + "fresh file provenance report. " + "Must be a tab separated file with a fixed number of columns with " + "a provided header (that will be used for column names). ").withRequiredArg(); this.outFileSpec = parser.accepts("out", "The tab separated file into which the results will be written.").withRequiredArg() .required(); this.useH2InMemorySpec = parser.accepts("H2mem", "Use H2 in-memory database for better performance (but with memory limits)"); this.querySpec = parser.accepts("query", "The standard SQL query that should be run. Table queried should be " + TABLE_NAME) .withRequiredArg().required(); } @Override public String get_description() { return "Pulls back a file provenance report (or a previous tab-separated file), runs an arbitrarily complex SQL query on the results and saves the results as a tab separated file for use as a part of interpreted language deciders."; } @Override public ReturnValue init() { if (!options.has(inFileSpec) && !ProvenanceUtility.checkForValidOptions(options)) { println("One of the various contraints or '--all' must be specified."); println(this.get_syntax()); return new ReturnValue(ReturnValue.INVALIDPARAMETERS); } return new ReturnValue(); } @Override public ReturnValue do_test() { return new ReturnValue(); } @Override public ReturnValue do_run() { Path randomTempDirectory = null; Path originalReport = null; Path bulkImportFile = null; try { if (options.has(this.inFileSpec)) { originalReport = FileSystems.getDefault().getPath(options.valueOf(inFileSpec)); } else { originalReport = populateOriginalReportFromWS(); } List<String> headers; List<Boolean> numericDataType; // construct column name and datatypes // convert file provenance report into derby bulk load format try (BufferedReader originalReader = Files.newBufferedReader(originalReport, Charset.defaultCharset())) { // construct column name and datatypes String headerLine = originalReader.readLine(); headers = Lists.newArrayList(); numericDataType = Lists.newArrayList(); for (String column : headerLine.split("\t")) { String editedColumnName = StringUtils.lowerCase(column).replaceAll(" ", "_").replaceAll("-", "_"); headers.add(editedColumnName); // note that Parent Sample SWID is a silly column that has colons in it numericDataType.add(!editedColumnName.contains("parent_sample") && (editedColumnName.contains("swid"))); } bulkImportFile = Files.createTempFile("import", "txt"); try (BufferedWriter derbyImportWriter = Files.newBufferedWriter(bulkImportFile, Charset.defaultCharset())) { Log.debug("Bulk import file written to " + bulkImportFile.toString()); while (originalReader.ready()) { String line = originalReader.readLine(); StringBuilder builder = new StringBuilder(); int i = 0; for (String colValue : line.split("\t")) { if (i != 0) { builder.append("\t"); } if (numericDataType.get(i)) { if (!colValue.trim().isEmpty()) { builder.append(colValue); } } else { // assume that this is a string // need to double quotes to preserve them, see // https://db.apache.org/derby/docs/10.4/tools/ctoolsimportdefaultformat.html builder.append("\"").append(colValue.replaceAll("\"", "\"\"")).append("\""); } i++; } derbyImportWriter.write(builder.toString()); derbyImportWriter.newLine(); } } } randomTempDirectory = Files.createTempDirectory("randomFileProvenanceQueryDir"); // try using in-memory for better performance String protocol = "jdbc:h2:"; if (options.has(useH2InMemorySpec)) { protocol = protocol + "mem:"; } Connection connection = spinUpEmbeddedDB(randomTempDirectory, "org.h2.Driver", protocol); // drop table if it exists already (running in IDE?) Statement dropTableStatement = null; try { dropTableStatement = connection.createStatement(); dropTableStatement.executeUpdate("DROP TABLE " + TABLE_NAME); } catch (SQLException e) { Log.debug("Report table didn't exist (normal)"); } finally { DbUtils.closeQuietly(dropTableStatement); } // create table creation query StringBuilder tableCreateBuilder = new StringBuilder(); // tableCreateBuilder tableCreateBuilder.append("CREATE TABLE " + TABLE_NAME + " ("); for (int i = 0; i < headers.size(); i++) { if (i != 0) { tableCreateBuilder.append(","); } if (numericDataType.get(i)) { tableCreateBuilder.append(headers.get(i)).append(" INT "); } else { tableCreateBuilder.append(headers.get(i)).append(" VARCHAR "); } } tableCreateBuilder.append(")"); bulkImportH2(tableCreateBuilder, connection, bulkImportFile); // query the database and dump the results to try (BufferedWriter outputWriter = Files.newBufferedWriter(Paths.get(options.valueOf(outFileSpec)), Charset.defaultCharset(), StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING)) { // query the database and dump the results to QueryRunner runner = new QueryRunner(); List<Map<String, Object>> mapList = runner.query(connection, options.valueOf(querySpec), new MapListHandler()); // output header if (mapList.isEmpty()) { Log.fatal("Query had no results"); System.exit(-1); } StringBuilder builder = new StringBuilder(); for (String columnName : mapList.get(0).keySet()) { if (builder.length() != 0) { builder.append("\t"); } builder.append(StringUtils.lowerCase(columnName)); } outputWriter.append(builder); outputWriter.newLine(); for (Map<String, Object> rowMap : mapList) { StringBuilder rowBuilder = new StringBuilder(); for (Entry<String, Object> e : rowMap.entrySet()) { if (rowBuilder.length() != 0) { rowBuilder.append("\t"); } rowBuilder.append(e.getValue()); } outputWriter.append(rowBuilder); outputWriter.newLine(); } } DbUtils.closeQuietly(connection); Log.stdoutWithTime("Wrote output to " + options.valueOf(outFileSpec)); return new ReturnValue(); } catch (IOException | SQLException | ClassNotFoundException | InstantiationException | IllegalAccessException ex) { throw new RuntimeException(ex); } finally { if (originalReport != null) { FileUtils.deleteQuietly(originalReport.toFile()); } if (bulkImportFile != null) { FileUtils.deleteQuietly(bulkImportFile.toFile()); } if (randomTempDirectory != null && randomTempDirectory.toFile().exists()) { FileUtils.deleteQuietly(randomTempDirectory.toFile()); } } } private void bulkImportH2(StringBuilder tableCreateBuilder, Connection connection, Path importFile) throws SQLException { tableCreateBuilder.append("AS SELECT * FROM CSVREAD('").append(importFile.toString()).append("', null, 'fieldSeparator=\t')"); Log.debug("Table creation query is: " + tableCreateBuilder.toString()); Statement createTableStatement = null; try { createTableStatement = connection.createStatement(); createTableStatement.executeUpdate(tableCreateBuilder.toString()); } finally { DbUtils.closeQuietly(createTableStatement); } } private Connection spinUpEmbeddedDB(Path randomTempDirectory, String driver, String protocol) throws IllegalAccessException, SQLException, ClassNotFoundException, InstantiationException { Class.forName(driver).newInstance(); Connection connection = DriverManager.getConnection(protocol + randomTempDirectory.toString() + "/tempDB;create=true"); return connection; } private Path populateOriginalReportFromWS() throws IOException { Map<FileProvenanceParam, List<String>> map = ProvenanceUtility.convertOptionsToMap(options, metadata); // specify some standard filters that are required for filters map.put(FileProvenanceParam.skip, new ImmutableList.Builder<String>().add("false").build()); map.put(FileProvenanceParam.workflow_run_status, new ImmutableList.Builder<String>().add(WorkflowRunStatus.completed.toString()) .build()); map.put(FileProvenanceParam.processing_status, new ImmutableList.Builder<String>().add(ProcessingStatus.success.toString()).build()); Path originalReport = Files.createTempFile("file_provenance", "txt"); Log.debug("Original report written to " + originalReport.toString()); try (BufferedWriter originalWriter = Files.newBufferedWriter(originalReport, Charset.defaultCharset())) { metadata.fileProvenanceReport(map, originalWriter); } return originalReport; } @Override public ReturnValue clean_up() { return new ReturnValue(); } }