package fr.orsay.lri.varna.factories;
import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.Reader;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Hashtable;
import java.util.List;
import java.util.Vector;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.xml.sax.InputSource;
import fr.orsay.lri.varna.exceptions.ExceptionExportFailed;
import fr.orsay.lri.varna.exceptions.ExceptionFileFormatOrSyntax;
import fr.orsay.lri.varna.exceptions.ExceptionLoadingFailed;
import fr.orsay.lri.varna.exceptions.ExceptionPermissionDenied;
import fr.orsay.lri.varna.exceptions.ExceptionUnmatchedClosingParentheses;
import fr.orsay.lri.varna.models.rna.ModeleBP;
import fr.orsay.lri.varna.models.rna.ModeleBase;
import fr.orsay.lri.varna.models.rna.RNA;
import fr.orsay.lri.varna.models.rna.RNAMLParser;
public class RNAFactory {
public enum RNAFileType{
FILE_TYPE_BPSEQ,
FILE_TYPE_CT,
FILE_TYPE_DBN,
FILE_TYPE_RNAML,
FILE_TYPE_UNKNOWN
};
public static Collection<RNA> loadSecStrRNAML(Reader r) throws ExceptionPermissionDenied,
ExceptionLoadingFailed, ExceptionFileFormatOrSyntax {
ArrayList<RNA> result = new ArrayList<RNA>();
try {
System.setProperty("javax.xml.parsers.SAXParserFactory", "com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl");
SAXParserFactory saxFact = javax.xml.parsers.SAXParserFactory.newInstance();
saxFact.setValidating(false);
saxFact.setXIncludeAware(false);
saxFact.setNamespaceAware(false);
SAXParser sp = saxFact.newSAXParser();
RNAMLParser RNAMLData = new RNAMLParser();
sp.parse(new InputSource(r), RNAMLData);
/*XMLReader xr = XMLReaderFactory.createXMLReader();
RNAMLParser RNAMLData = new RNAMLParser();
xr.setContentHandler(RNAMLData);
xr.setErrorHandler(RNAMLData);
xr.setEntityResolver(RNAMLData);
xr.parse(new InputSource(r));*/
for(RNAMLParser.RNATmp rnaTmp : RNAMLData.getMolecules())
{
RNA current = new RNA();
// Retrieving parsed data
List<String> seq = rnaTmp.getSequence();
//System.err.println(""+seq.size());
// Creating empty structure of suitable size
int[] str = new int[seq.size()];
for (int i=0;i<str.length;i++)
{ str[i] = -1; }
current.setRNA(seq, str);
Vector<RNAMLParser.BPTemp> allbpsTmp = rnaTmp.getStructure();
ArrayList<ModeleBP> allbps = new ArrayList<ModeleBP>();
for (int i = 0; i < allbpsTmp.size(); i++) {
RNAMLParser.BPTemp bp = allbpsTmp.get(i);
//System.err.println(bp);
int bp5 = bp.pos5;
int bp3 = bp.pos3;
ModeleBase mb = current.getBaseAt(bp5);
ModeleBase part = current.getBaseAt(bp3);
ModeleBP newStyle = bp.createBPStyle(mb, part);
allbps.add(newStyle);
}
current.applyBPs(allbps);
result.add(current);
}
} catch (IOException ioe) {
throw new ExceptionLoadingFailed(
"Couldn't load file due to I/O or security policy issues.",
"");
} catch (Exception ge) {
ge.printStackTrace();
}
return result;
}
public static Collection<RNA> loadSecStrDBN(Reader r) throws ExceptionLoadingFailed,
ExceptionPermissionDenied, ExceptionUnmatchedClosingParentheses,
ExceptionFileFormatOrSyntax {
boolean loadOk = false;
ArrayList<RNA> result = new ArrayList<RNA>();
RNA current = new RNA();
try {
BufferedReader fr = new BufferedReader(r);
String line = fr.readLine();
String title = "";
String seqTmp = "";
String strTmp = "";
while ((line != null) && (strTmp.equals(""))) {
line = line.trim();
if (!line.startsWith(">")) {
if (seqTmp.equals("")) {
seqTmp = line;
} else {
strTmp = line;
}
}
else
{
title = line.substring(1).trim();
}
line = fr.readLine();
}
if (strTmp.length() != 0) {
current.setRNA(seqTmp, strTmp);
current.setName(title);
loadOk = true;
}
} catch (IOException e) {
throw new ExceptionLoadingFailed(e.getMessage(), "");
}
if ( loadOk)
{
result.add(current);
}
return result;
}
public static Collection<RNA> loadSecStr(Reader r) throws ExceptionFileFormatOrSyntax {
return loadSecStr(r,RNAFileType.FILE_TYPE_UNKNOWN);
}
public static Collection<RNA> loadSecStr(Reader r, RNAFileType fileType) throws ExceptionFileFormatOrSyntax
{
switch(fileType)
{
case FILE_TYPE_DBN:
{
try {
Collection<RNA> result = loadSecStrDBN(r);
if (result.size()!=0) return result;
} catch (Exception e) { }
}
break;
case FILE_TYPE_CT:
{
try {
Collection<RNA> result = loadSecStrCT(r);
if (result.size()!=0) return result;
} catch (Exception e) { }
}
break;
case FILE_TYPE_BPSEQ:
{
try {
Collection<RNA> result = loadSecStrBPSEQ(r);
if (result.size()!=0) return result;
} catch (Exception e) { }
}
break;
case FILE_TYPE_RNAML:
{
try {
Collection<RNA> result = loadSecStrRNAML(r);
if (result.size()!=0) return result;
} catch (Exception e) { }
}
break;
case FILE_TYPE_UNKNOWN:
{
BufferedReader buf = new BufferedReader(r);
try {
buf.mark(1000000);
try {
Collection<RNA> result = loadSecStrCT(buf);
if (result.size()!=0) return result;
} catch (Exception e) {
}
buf.reset();
try {
Collection<RNA> result = loadSecStrBPSEQ(buf);
if (result.size()!=0) return result;
} catch (Exception e) {
}
buf.reset();
try {
Collection<RNA> result = loadSecStrDBN(buf);
if (result.size()!=0) return result;
} catch (Exception e) {
e.printStackTrace();
}
buf.reset();
try {
Collection<RNA> result = loadSecStrRNAML(buf);
if (result.size()!=0) return result;
} catch (ExceptionLoadingFailed e2)
{
e2.printStackTrace();
} catch (Exception e) {
e.printStackTrace();
}
try {
Collection<RNA> result = loadSecStrRNAML(buf);
if (result.size()!=0) return result;
} catch (ExceptionLoadingFailed e2)
{
e2.printStackTrace();
} catch (Exception e) {
e.printStackTrace();
}
buf.reset();
} catch (IOException e2) {
e2.printStackTrace();
}
}
}
throw new ExceptionFileFormatOrSyntax("");
}
public static RNAFileType guessFileTypeFromExtension(String path)
{
if (path.toLowerCase().endsWith("ml"))
{ return RNAFileType.FILE_TYPE_RNAML; }
else if (path.toLowerCase().endsWith("dbn")||path.toLowerCase().endsWith("faa"))
{ return RNAFileType.FILE_TYPE_DBN; }
else if (path.toLowerCase().endsWith("ct"))
{ return RNAFileType.FILE_TYPE_CT; }
else if (path.toLowerCase().endsWith("bpseq"))
{ return RNAFileType.FILE_TYPE_BPSEQ; }
return RNAFileType.FILE_TYPE_UNKNOWN;
}
public static Collection<RNA> loadSecStr(String path) throws ExceptionExportFailed,
ExceptionPermissionDenied, ExceptionLoadingFailed,
ExceptionFileFormatOrSyntax, ExceptionUnmatchedClosingParentheses,
FileNotFoundException {
FileReader fr = null;
try {
fr = new FileReader(path);
RNAFileType type = guessFileTypeFromExtension(path);
return loadSecStr(fr,type);
} catch (ExceptionFileFormatOrSyntax e) {
if (fr != null)
try {fr.close();} catch(IOException e2){}
e.setPath(path);
throw e;
}
}
public static Collection<RNA> loadSecStrBPSEQ(Reader r) throws ExceptionPermissionDenied,
ExceptionLoadingFailed, ExceptionFileFormatOrSyntax {
boolean loadOk = false;
ArrayList<RNA> result = new ArrayList<RNA>();
RNA current = new RNA();
try {
BufferedReader fr = new BufferedReader(r);
String line = fr.readLine();
ArrayList<String> seqTmp = new ArrayList<String>();
Hashtable<Integer,Vector<Integer> > strTmp = new Hashtable<Integer,Vector<Integer>>();
int bpFrom;
String base;
int bpTo;
int minIndex = -1;
boolean noWarningYet = true;
String title = "";
String filenameStr = "Filename:";
String organismStr = "Organism:";
String ANStr = "Accession Number:";
while (line != null) {
line = line.trim();
String[] tokens = line.split("\\s+");
if ((tokens.length >= 3) && !tokens[0].contains("#")&& !line.startsWith("Organism:")&& !line.startsWith("Citation")
&& !line.startsWith("Filename:")&& !line.startsWith("Accession Number:"))
{
base = tokens[1];
seqTmp.add(base);
bpFrom = (Integer.parseInt(tokens[0]));
if (minIndex<0) minIndex = bpFrom;
if (seqTmp.size() < (bpFrom-minIndex+1)) {
if (noWarningYet) {
noWarningYet = false;
/*warningEmition("Discontinuity detected between nucleotides "
+ (seqTmp.size())
+ " and "
+ (bpFrom + 1)
+ "!\nFilling in missing portions with unpaired unknown 'X' nucleotides ...");*/
}
while (seqTmp.size() < (bpFrom-minIndex+1)) {
//System.err.println(".");
seqTmp.add("X");
}
}
for (int i=2;i<tokens.length;i++)
{
bpTo = (Integer.parseInt(tokens[i]));
if ((bpTo!=0)||(i!=tokens.length-1))
{
if (!strTmp.containsKey(bpFrom))
strTmp.put(bpFrom,new Vector<Integer>());
strTmp.get(bpFrom).add(bpTo);
}
}
}
else if (tokens[0].startsWith("#"))
{
int occur = line.indexOf("#");
String tmp = line.substring(occur+1);
title += tmp.trim()+" ";
}
else if (tokens[0].startsWith(filenameStr))
{
int occur = line.indexOf(filenameStr);
String tmp = line.substring(occur+filenameStr.length());
title += tmp.trim();
}
else if (tokens[0].startsWith(organismStr))
{
int occur = line.indexOf(organismStr);
String tmp = line.substring(occur+organismStr.length());
if (title.length()!=0)
{
title = "/"+title;
}
title = tmp.trim() + title;
}
else if (line.contains(ANStr))
{
int occur = line.indexOf(ANStr);
String tmp = line.substring(occur+ANStr.length());
if (title.length()!=0)
{
title += " ";
}
title +="("+tmp.trim()+")";
}
line = fr.readLine();
}
if (strTmp.size() != 0) {
ArrayList<String> seq = seqTmp;
int[] str = new int[seq.size()];
for (int i = 0; i < seq.size(); i++) {
str[i] = -1;
}
current.setRNA(seq, str, minIndex);
ArrayList<ModeleBP> allbps = new ArrayList<ModeleBP>();
for (int i:strTmp.keySet())
{
for (int j: strTmp.get(i))
{
ModeleBase mb = current.getBaseAt(i-minIndex);
ModeleBase part = current.getBaseAt(j-minIndex);
ModeleBP newStyle = new ModeleBP(mb, part);
allbps.add(newStyle);
}
}
current.applyBPs(allbps);
current.setName(title);
loadOk = true;
}
}
catch (NumberFormatException e) {
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
catch (Exception e) {
throw new ExceptionLoadingFailed(e.getMessage(), "");
}
if (loadOk)
result.add(current);
return result;
}
public static Collection<RNA> loadSecStrCT(Reader r) throws ExceptionPermissionDenied,
ExceptionLoadingFailed, ExceptionFileFormatOrSyntax {
boolean loadOk = false;
ArrayList<RNA> result = new ArrayList<RNA>();
RNA current = new RNA();
try {
BufferedReader fr = new BufferedReader(r);
String line = fr.readLine();
ArrayList<String> seq = new ArrayList<String>();
Vector<Integer> strTmp = new Vector<Integer>();
int bpFrom;
String base;
int bpTo;
boolean noWarningYet = true;
int minIndex = -1;
String title = "";
while (line != null)
{
line = line.trim();
String[] tokens = line.split("\\s+");
if (tokens.length >= 6) {
try{
bpFrom = (Integer.parseInt(tokens[0]));
bpTo = (Integer.parseInt(tokens[4]));
if (minIndex==-1)
minIndex = bpFrom;
bpFrom -= minIndex;
if (bpTo!=0)
bpTo -= minIndex;
else
bpTo = -1;
base = tokens[1];
Integer.parseInt(tokens[2]);
Integer.parseInt(tokens[3]);
Integer.parseInt(tokens[5]);
if (bpFrom != seq.size()) {
if (noWarningYet) {
noWarningYet = false;
/*warningEmition("Discontinuity detected between nucleotides "
+ (seq.size())
+ " and "
+ (bpFrom + 1)
+ "!\nFilling in missing portions with unpaired unknown 'X' nucleotides ...");*/
}
while (bpFrom > seq.size()) {
seq.add("X");
strTmp.add(-1);
}
}
seq.add(base);
strTmp.add(bpTo);
}
catch (NumberFormatException e) {
}
}
if ((line.contains("ENERGY = "))||line.contains("dG = "))
{
String[] ntokens = line.split("\\s+");
if (ntokens.length>=4)
{
String energy = ntokens[3];
for(int i=4;i<ntokens.length;i++)
{
title += ntokens[i]+" ";
}
title += "(E="+energy+" kcal/mol)";
}
}
line = fr.readLine();
}
if (strTmp.size() != 0) {
int[] str = new int[strTmp.size()];
for (int i = 0; i < strTmp.size(); i++) {
str[i] = strTmp.elementAt(i).intValue();
}
current.setRNA(seq, str, minIndex);
current.setName(title);
loadOk = true;
}
} catch (IOException e) {
throw new ExceptionLoadingFailed(e.getMessage(), "");
} catch (NumberFormatException e) {
throw new ExceptionFileFormatOrSyntax(e.getMessage(), "");
}
if (loadOk)
result.add(current);
return result;
}
}