/*
* EuroCarbDB, a framework for carbohydrate bioinformatics
*
* Copyright (c) 2006-2009, Eurocarb project, or third-party contributors as
* indicated by the @author tags or express copyright attribution
* statements applied by the authors.
*
* This copyrighted material is made available to anyone wishing to use, modify,
* copy, or redistribute it subject to the terms and conditions of the GNU
* Lesser General Public License, as published by the Free Software Foundation.
* A copy of this license accompanies this distribution in the file LICENSE.txt.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
* for more details.
*
* Last commit: $Rev: 1210 $ by $Author: glycoslave $ on $Date:: 2009-06-12 #$
*/
package org.eurocarbdb.resourcesdb.io;
import java.io.IOException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.eurocarbdb.resourcesdb.GlycanNamescheme;
import org.eurocarbdb.resourcesdb.ResourcesDbException;
import org.eurocarbdb.resourcesdb.ResourcesDbObject;
import org.eurocarbdb.resourcesdb.monosaccharide.CoreModificationTemplate;
import org.eurocarbdb.resourcesdb.monosaccharide.Monosaccharide;
import org.eurocarbdb.resourcesdb.monosaccharide.MonosaccharideDataBuilder;
import org.eurocarbdb.resourcesdb.monosaccharide.MonosaccharideSynonym;
import org.eurocarbdb.resourcesdb.monosaccharide.MonosaccharideValidation;
import org.eurocarbdb.resourcesdb.monosaccharide.Ringtype;
import org.eurocarbdb.resourcesdb.monosaccharide.Substitution;
import org.eurocarbdb.resourcesdb.representation.*;
import org.eurocarbdb.resourcesdb.util.FileUtils;
import org.eurocarbdb.resourcesdb.util.NumberUtils;
import org.eurocarbdb.resourcesdb.util.Utils;
import org.jdom.Document;
import org.jdom.JDOMException;
import org.jdom.input.SAXBuilder;
/**
* Class to import seed files for filling MonoSaccharideDB
* @author Thomas Luetteke
*
*/
public class MsdbSeedImporter extends ResourcesDbObject {
private GlycanNamescheme globalScheme;
/**
* the root path for import of representations
*/
private static String rootpath = "/home/thomas/eclipse_projects/ResourcesDB/lib/";
//*****************************************************************************
//*** methods to read seed file: **********************************************
//*****************************************************************************
public void parseMsdbSeed(URL seedFileUrl, int offset, int quantity, boolean writeToDb) throws ResourcesDbException {
SAXBuilder parser = new SAXBuilder();
int count = 0;
try {
Document doc = parser.build(seedFileUrl);
org.jdom.Element root = doc.getRootElement();
if(root.getAttributeValue("namescheme") != null) {
GlycanNamescheme scheme = GlycanNamescheme.forName(root.getAttributeValue("namescheme"));
if(scheme != null) {
this.globalScheme = scheme;
} else {
System.err.println("cannot get namescheme '" + root.getAttributeValue("namescheme") + "'");
this.globalScheme = GlycanNamescheme.AUTO;
}
}
List<?> templateList = root.getChildren();
Iterator<?> templatesIter = templateList.iterator();
while(templatesIter.hasNext()) {
org.jdom.Element xmlTemplate = (org.jdom.Element) templatesIter.next();
count ++;
if(count > offset && count <= offset + quantity) {
readTemplateFromXmlTree(xmlTemplate, writeToDb);
}
}
} catch (JDOMException je) {
throw new ResourcesDbException("JDOMException: " + je.getMessage());
} catch (IOException ie) {
throw new ResourcesDbException("IOException: " + ie.getMessage());
}
}
private void readTemplateFromXmlTree(org.jdom.Element xmlElement, boolean writeToDb) {
if(xmlElement.getName().equalsIgnoreCase("monosaccharide")) {
String msName = null;
GlycanNamescheme scheme = null;
if(xmlElement.getAttribute("name") != null) {
msName = xmlElement.getAttributeValue("name");
}
if(xmlElement.getAttributeValue("scheme") != null) {
scheme = GlycanNamescheme.forName(xmlElement.getAttributeValue("scheme"));
}
int msCount = 0;
ArrayList<ResidueRepresentation> repList = new ArrayList<ResidueRepresentation>();
ArrayList<MonosaccharideSynonym> aliasList = new ArrayList<MonosaccharideSynonym>();
List<?> propList = xmlElement.getChildren();
if(propList != null && propList.size() > 0) {
Iterator<?> propIter = propList.iterator();
while(propIter.hasNext()) {
org.jdom.Element propertyTag = (org.jdom.Element) propIter.next();
String propertyName = propertyTag.getName().toLowerCase();
String propertyValue = propertyTag.getValue();
if(propertyValue == null) {
propertyValue = "";
}
if(propertyName.equalsIgnoreCase("name")) {
msName = propertyValue;
if(propertyTag.getAttributeValue("scheme") != null) {
scheme = GlycanNamescheme.forName(propertyTag.getAttributeValue("scheme"));
}
} else if(propertyName.equalsIgnoreCase("count")) {
msCount = NumberUtils.parseIntStr(propertyValue, new Integer(-1));
} else if(propertyName.equalsIgnoreCase("representation")) {
ResidueRepresentation rep = getRepresentationFromXmlTag(propertyTag);
if(rep != null) {
repList.add(rep);
}
} else if(propertyName.equalsIgnoreCase("alias")) {
MonosaccharideSynonym alias = getAliasFromXmlTag(propertyTag);
if(alias != null) {
aliasList.add(alias);
}
} else {
System.err.println("unknown tag: " + propertyName);
}
}
}
System.out.println("name / count: " + msName + " / " + msCount);
if(msName.toLowerCase().indexOf("anhydro") >= 0) {
System.out.flush();
System.err.println("skipped anhydro residue...");
System.err.flush();
return;
}
if(scheme == null) {
scheme = this.globalScheme;
}
Monosaccharide ms = null;
try {
ms = new Monosaccharide(scheme, msName);
MonosaccharideDataBuilder.buildDerivativeData(ms, this.getTemplateContainer());
System.out.println("ms: " + ms.toString());
MonosaccharideSynonym alias = ms.getPrimaryAliasObject(scheme);
String aliasName = alias.getName();
if(msName.equalsIgnoreCase(aliasName)) {
System.out.print(" identical after parsing.");
} else {
System.out.flush();
System.err.println("\n mismatch: " + aliasName);
if(alias.getExternalSubstList().size() > 0) {
System.err.println(" Subst:");
for(Substitution subst : alias.getExternalSubstList()) {
System.err.println(" " + subst.toString());
}
}
System.err.flush();
}
ms.setFuzzy(MonosaccharideValidation.checkFuzziness(ms));
if(ms.isFuzzy()) {
System.out.println(" - fuzzy");
} else {
System.out.println("");
}
if(!ms.isFuzzy()) {
ms.buildRepresentations();
ms.addRepresentations(repList);
ms.addSynonyms(aliasList, true);
if(ms.getRingStart() > 0 && ms.getSubstitutionsByPosition(ms.getRingStart()).size() > 0) {
System.out.println("will not enter into db because of substitution at anomeric center: " + ms.getName());
} else if(ms.getRingStart() > 0 && ms.hasCoreModification(CoreModificationTemplate.DEOXY, ms.getRingStart())) {
System.out.println("will not enter into db because of deoxy modification at anomeric center: " + ms.getName());
} else if(ms.getRingtype().equals(Ringtype.OPEN) && ms.hasCoreModification(CoreModificationTemplate.ANHYDRO)) {
System.out.println("will not enter into db open chain residue with anhydro modification: " + ms.getName());
} else {
System.out.println("ok to insert into db: " + ms.getName());
if(writeToDb) {
ms = HibernateAccess.storeOrUpdateMonosaccharide(ms, this.getTemplateContainer());
System.out.println("dbId: " + ms.getDbId());
//System.out.println(" ms: " + ms.toString());
}
}
}
} catch(Exception ex) {
System.out.flush();
System.err.println("Exception: " + ex);
System.err.flush();
//ex.printStackTrace();
}
System.out.println();
}
}
private static ResidueRepresentation getRepresentationFromXmlTag(org.jdom.Element repTag) {
String sourceFileName = MsdbSeedImporter.rootpath + repTag.getValue();
ResidueRepresentation outRep = null;
if(sourceFileName != null && sourceFileName.length() > MsdbSeedImporter.rootpath.length()) {
ResidueRepresentationType type = ResidueRepresentationType.forName(repTag.getAttributeValue("type"));
ResidueRepresentationFormat format = ResidueRepresentationFormat.forName(repTag.getAttributeValue("format"));
if(type != null && format != null) {
int width = NumberUtils.parseIntStr(repTag.getAttributeValue("width"), 0);
int height = NumberUtils.parseIntStr(repTag.getAttributeValue("height"), 0);
outRep = new ResidueRepresentation(type, format);
outRep.setSize(width, height);
if(format.isBinary()) {
outRep.setData(FileUtils.readBinaryFile(sourceFileName));
} else {
outRep.setData(FileUtils.readTextFile(sourceFileName));
}
if(outRep.getData() == null) {
return null;
}
}
}
return outRep;
}
private static MonosaccharideSynonym getAliasFromXmlTag(org.jdom.Element aliasTag) {
MonosaccharideSynonym alias = null;
GlycanNamescheme scheme = null;
try {
scheme = GlycanNamescheme.forName(aliasTag.getAttributeValue("scheme"));
} catch(Exception ex) {
System.err.println("Cannot assign namescheme '" + aliasTag.getAttributeValue("scheme") + "':");
System.err.println(ex);
}
boolean isPrimary = Utils.parseTrueFalseString(aliasTag.getAttributeValue("primary"), false);
String aliasName = aliasTag.getValue();
if(scheme != null && aliasName != null && aliasName.length() > 0) {
alias = new MonosaccharideSynonym(scheme, aliasName, isPrimary);
}
return alias;
}
//*****************************************************************************
//*** methods to read alias file: *********************************************
//*****************************************************************************
public void parseSynonyms(URL synonymFileUrl, int offset, int quantity) throws ResourcesDbException {
SAXBuilder parser = new SAXBuilder();
int count = 0;
try {
Document doc = parser.build(synonymFileUrl);
org.jdom.Element root = doc.getRootElement();
if(root.getAttributeValue("ms_scheme") != null) {
GlycanNamescheme scheme = GlycanNamescheme.forName(root.getAttributeValue("ms_scheme"));
if(scheme != null) {
this.globalScheme = scheme;
} else {
System.err.println("cannot assign namescheme '" + root.getAttributeValue("ms_scheme") + "'");
this.globalScheme = GlycanNamescheme.AUTO;
}
}
List<?> templateList = root.getChildren();
Iterator<?> templatesIter = templateList.iterator();
while(templatesIter.hasNext()) {
org.jdom.Element xmlTag = (org.jdom.Element) templatesIter.next();
count ++;
if(count > offset && count <= offset + quantity) {
parseAliasTagFromSynonymsFile(xmlTag);
}
}
} catch (JDOMException je) {
throw new ResourcesDbException("JDOMException: " + je.getMessage());
} catch (IOException ie) {
throw new ResourcesDbException("IOException: " + ie.getMessage());
}
}
private void parseAliasTagFromSynonymsFile(org.jdom.Element aliasTag) throws ResourcesDbException {
String msName = aliasTag.getAttributeValue("ms");
if(msName == null || msName.length() == 0) {
return;
}
String aliasName = aliasTag.getAttributeValue("name");
if(aliasName == null || aliasName.length() == 0) {
return;
}
if(Utils.parseTrueFalseString(aliasTag.getAttributeValue("skip"), false)) {
System.out.println("skipped ms " + msName);
return;
}
GlycanNamescheme aliasScheme = GlycanNamescheme.forName(aliasTag.getAttributeValue("scheme"));
if(aliasScheme == null) {
throw new ResourcesDbException("Cannot assign alias namescheme (" + aliasTag.getAttributeValue("scheme") + ")");
}
GlycanNamescheme msNamescheme = GlycanNamescheme.forName(aliasTag.getAttributeValue("ms_scheme"));
if(msNamescheme == null) {
msNamescheme = this.globalScheme;
}
boolean isSecondaryAlias = Utils.parseTrueFalseString(aliasTag.getAttributeValue("secondary"), false);
MonosaccharideSynonym msAlias = new MonosaccharideSynonym(aliasScheme, aliasName, !isSecondaryAlias);
Monosaccharide ms = null;
try {
System.out.println("process ms " + msName);
ms = new Monosaccharide(msNamescheme, msName);
ms.buildName();
System.out.println("ms: " + ms.toString());
Monosaccharide dbMs = HibernateAccess.getMonosaccharideFromDB(ms.getName());
if(dbMs != null) {
//*** monosaccharide is already present in database, add synonym if appropriate: ***
if(dbMs.addSynonym(msAlias)) {
HibernateAccess.storeOrUpdateMonosaccharideSynonym(msAlias);
HibernateAccess.updateMonosaccharide(dbMs);
System.out.println("added alias " + msAlias + " to ms id " + dbMs.getDbId());
} else {
System.out.println("alias " + msAlias + " was not added to ms id " + dbMs.getDbId());
}
} else {
//*** monosaccharide is not yet present in database, enter it if appropriate: ***
MonosaccharideDataBuilder.buildDerivativeData(ms, this.getTemplateContainer());
ms.setFuzzy(MonosaccharideValidation.checkFuzziness(ms));
if(ms.isFuzzy()) {
System.out.println("Monosaccharide is fuzzy - will not enter into db.");
} else {
ms.buildRepresentations();
ms.addSynonym(msAlias);
if(ms.getRingStart() > 0 && ms.getSubstitutionsByPosition(ms.getRingStart()).size() > 0) {
System.out.println("will not enter into db because of substitution at anomeric center: " + ms.getName());
} else if(ms.getRingStart() > 0 && ms.hasCoreModification(CoreModificationTemplate.DEOXY, ms.getRingStart())) {
System.out.println("will not enter into db because of deoxy modification at anomeric center: " + ms.getName());
} else if(ms.getRingtype().equals(Ringtype.OPEN) && ms.hasCoreModification(CoreModificationTemplate.ANHYDRO)) {
System.out.println("will not enter into db open chain residue with anhydro modification: " + ms.getName());
} else {
System.out.println("ok to insert into db: " + ms.getName());
if(true) {
ms = HibernateAccess.storeOrUpdateMonosaccharide(ms, this.getTemplateContainer());
System.out.println("dbId: " + ms.getDbId());
System.out.println(" ms: " + ms.toString());
}
}
}
}
} catch(Exception ex) {
System.out.flush();
System.err.println("Exception: " + ex);
System.err.flush();
ex.printStackTrace();
}
System.out.println();
}
}