/*************************************************************************
* *
* This file is part of the 20n/act project. *
* 20n/act enables DNA prediction for synthetic biology/bioengineering. *
* Copyright (C) 2017 20n Labs, Inc. *
* *
* Please direct all queries to act@20n.com. *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* *
*************************************************************************/
package com.act.utils.parser;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.DefaultParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.biojava.nbio.core.exceptions.CompoundNotFoundException;
import org.biojava.nbio.core.sequence.ProteinSequence;
import org.biojava.nbio.core.sequence.compound.AminoAcidCompoundSet;
import org.biojava.nbio.core.sequence.loader.UniprotProxySequenceReader;
import org.biojava.nbio.core.util.XMLHelper;
import org.w3c.dom.Document;
import org.xml.sax.SAXException;
import javax.xml.parsers.ParserConfigurationException;
import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
public class UniprotInterpreter {
private static final Logger LOGGER = LogManager.getFormatterLogger(GenbankInterpreter.class);
private static final String OPTION_UNIPROT_PATH = "p";
public static final String HELP_MESSAGE = StringUtils.join(new String[]{
"This class parses Uniprot Protein sequence files. It can be used on the command line with ",
"a file path as a parameter."}, "");
public static final List<Option.Builder> OPTION_BUILDERS = new ArrayList<Option.Builder>() {{
add(Option.builder(OPTION_UNIPROT_PATH)
.argName("uniprot file")
.desc("uniprot protein sequence file containing sequence and annotations")
.hasArg()
.longOpt("uniprot")
.required()
);
add(Option.builder("h")
.argName("help")
.desc("Example of usage: -p filepath.xml")
.longOpt("help")
);
}};
public static final HelpFormatter HELP_FORMATTER = new HelpFormatter();
static {
HELP_FORMATTER.setWidth(100);
}
private File xmlFile;
private Document xmlDocument;
private ProteinSequence seq;
public void init() throws IOException, SAXException, ParserConfigurationException, CompoundNotFoundException {
BufferedReader br = new BufferedReader(new FileReader(xmlFile));
String line;
StringBuilder sb = new StringBuilder();
while((line=br.readLine()) != null) {
sb.append(line.trim());
}
xmlDocument = XMLHelper.inputStreamToDocument(new ByteArrayInputStream(sb.toString().getBytes()));
AminoAcidCompoundSet aminoAcidCompoundSet = AminoAcidCompoundSet.getAminoAcidCompoundSet();
UniprotProxySequenceReader uniprotProxySequenceReader =
new UniprotProxySequenceReader(xmlDocument, aminoAcidCompoundSet);
seq = new ProteinSequence(uniprotProxySequenceReader);
}
private void checkInit() {
if (xmlDocument == null || seq == null) {
String msg = "Class hasn't been appropriately initialized, no Document and/or ProteinSequence object";
LOGGER.error(msg);
throw new RuntimeException(msg);
}
}
public UniprotInterpreter(File uniprotFile) {
xmlFile = uniprotFile;
}
public Document getXmlDocument() {
checkInit();
return this.xmlDocument;
}
public String getSequence() {
checkInit();
return seq.getSequenceAsString();
}
public static void main(String[] args) throws ParserConfigurationException, IOException, SAXException,
CompoundNotFoundException {
Options opts = new Options();
for (Option.Builder b : OPTION_BUILDERS) {
opts.addOption(b.build());
}
CommandLine cl = null;
try {
CommandLineParser parser = new DefaultParser();
cl = parser.parse(opts, args);
} catch (ParseException e) {
LOGGER.error("Argument parsing failed: %s", e.getMessage());
HELP_FORMATTER.printHelp(UniprotInterpreter.class.getCanonicalName(), HELP_MESSAGE, opts, null, true);
System.exit(1);
}
if (cl.hasOption("help")) {
HELP_FORMATTER.printHelp(UniprotInterpreter.class.getCanonicalName(), HELP_MESSAGE, opts, null, true);
System.exit(1);
}
File uniprotFile = new File(cl.getOptionValue(OPTION_UNIPROT_PATH));
if (!uniprotFile.exists()) {
String msg = "Uniprot file path is null";
LOGGER.error(msg);
throw new RuntimeException(msg);
} else {
UniprotInterpreter reader = new UniprotInterpreter(uniprotFile);
reader.init();
}
}
}