/*************************************************************************
* *
* This file is part of the 20n/act project. *
* 20n/act enables DNA prediction for synthetic biology/bioengineering. *
* Copyright (C) 2017 20n Labs, Inc. *
* *
* Please direct all queries to act@20n.com. *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* *
*************************************************************************/
package act.installer.reachablesexplorer;
import com.act.utils.CLIUtil;
import com.act.utils.TSVWriter;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Option;
import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.mongojack.DBCursor;
import org.mongojack.DBQuery;
import org.mongojack.JacksonDBCollection;
import java.io.File;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
/**
* This class outputs a TSV file that can be consumed by various web services that support the reachables wiki.
*/
public class WikiWebServicesExporter {
private static final Logger LOGGER = LogManager.getFormatterLogger(WikiWebServicesExporter.class);
private static final List<String> HEADER = Arrays.asList("inchi", "inchi_key", "display_name", "image_name");
private static final String OPTION_INPUT_DB = "d";
private static final String OPTION_INPUT_DB_HOST = "H";
private static final String OPTION_INPUT_DB_PORT = "p";
private static final String OPTION_INPUT_DB_COLLECTION = "c";
private static final String OPTION_OUTPUT_FILE = "o";
private static final String OPTION_EXPORT_SOME = "m";
private static final String OPTION_INPUT_SEQUENCE_COLLECTION = "s";
private static final String DEFAULT_HOST = "localhost";
private static final String DEFAULT_PORT = "27017";
// TODO change the defaults to something more plain/easy to know why they are like they are (No version numbers etc.)
private static final String DEFAULT_DB = "wiki_reachables";
private static final String DEFAULT_COLLECTION = "reachablesv7";
private static final String DEFAULT_SEQUENCES_COLLECTION = "sequencesv7";
private static final String DEFAULT_RENDERING_CACHE = "/tmp";
// We wont touch this db, but need it for the Loader's constructor
private static final String UNUSED_SOURCE_DB = "dummy_source_db";
private static final List<Option.Builder> OPTION_BUILDERS = new ArrayList<Option.Builder>() {{
add(Option.builder(OPTION_INPUT_DB)
.argName("db name")
.desc(String.format("The name of the reachables database to read (default: %s)", DEFAULT_DB))
.hasArg()
.longOpt("db")
);
add(Option.builder(OPTION_INPUT_DB_HOST)
.argName("db host")
.desc(String.format("The host to which connect when reading from a DB (default: %s)", DEFAULT_HOST))
.hasArg()
.longOpt("host")
);
add(Option.builder(OPTION_INPUT_DB_PORT)
.argName("db port")
.desc(String.format("The port to which connect when reading from a DB (default: %s)", DEFAULT_PORT))
.hasArg()
.longOpt("port")
);
add(Option.builder(OPTION_INPUT_DB_COLLECTION)
.argName("collection")
.desc(String.format("The collection from which to read reachables documents (default: %s)", DEFAULT_COLLECTION))
.hasArg()
.longOpt("collection")
);
add(Option.builder(OPTION_OUTPUT_FILE)
.argName("file name")
.desc("The name of the output tsv to write")
.hasArg().required()
.longOpt("out")
);
add(Option.builder(OPTION_INPUT_SEQUENCE_COLLECTION)
.argName("sequence-collection")
.desc("The sequence collection that should be used for export.")
.hasArg()
.longOpt("sequence-collection")
);
add(Option.builder(OPTION_EXPORT_SOME)
.argName("ids")
.desc("Only export molecules with the specified ids")
.hasArgs().valueSeparator(',')
.longOpt("only-export")
);
}};
private static final String HELP_MESSAGE = StringUtils.join(new String[] {
"This class write the contents of a Reachables collection as a TSV that can be consumed by the substructure ",
"search service"
}, "");
private static final HelpFormatter HELP_FORMATTER = new HelpFormatter();
static {
HELP_FORMATTER.setWidth(100);
}
public static void main(String[] args) throws Exception {
CLIUtil cliUtil = new CLIUtil(WikiWebServicesExporter.class, HELP_MESSAGE, OPTION_BUILDERS);
CommandLine cl = cliUtil.parseCommandLine(args);
String host = cl.getOptionValue(OPTION_INPUT_DB_HOST, DEFAULT_HOST);
Integer port = Integer.parseInt(cl.getOptionValue(OPTION_INPUT_DB_PORT, DEFAULT_PORT));
String dbName = cl.getOptionValue(OPTION_INPUT_DB, DEFAULT_DB);
String collection = cl.getOptionValue(OPTION_INPUT_DB_COLLECTION, DEFAULT_COLLECTION);
String sequenceCollection = cl.getOptionValue(OPTION_INPUT_SEQUENCE_COLLECTION, DEFAULT_SEQUENCES_COLLECTION);
LOGGER.info("Attempting to connect to DB %s:%d/%s, collection %s", host, port, dbName, collection);
Loader loader = new Loader(host, port, UNUSED_SOURCE_DB, dbName, collection, sequenceCollection, DEFAULT_RENDERING_CACHE);
JacksonDBCollection<Reachable, String> reachables = loader.getJacksonReachablesCollection();
LOGGER.info("Connected to DB, reading reachables");
List<Long> exportIds = !cl.hasOption(OPTION_EXPORT_SOME) ?
Collections.emptyList() :
Arrays.stream(cl.getOptionValues(OPTION_EXPORT_SOME))
.map(Long::valueOf)
.collect(Collectors.toList());
TSVWriter<String, String> tsvWriter = new TSVWriter<>(HEADER);
tsvWriter.open(new File(cl.getOptionValue(OPTION_OUTPUT_FILE)));
try {
DBCursor<Reachable> cursor = exportIds.isEmpty() ? reachables.find() :
reachables.find(DBQuery.in("_id", exportIds));
int written = 0;
while (cursor.hasNext()) {
final Reachable r = cursor.next();
Map<String, String> row = new HashMap<String, String>() {{
put("inchi", r.getInchi());
put("inchi_key", r.getInchiKey());
put("display_name", r.getPageName());
put("image_name", r.getStructureFilename());
}};
tsvWriter.append(row);
tsvWriter.flush();
written++;
}
LOGGER.info("Wrote %d reachables to output TSV", written);
} finally {
tsvWriter.close();
}
}
}