/*
* EuroCarbDB, a framework for carbohydrate bioinformatics
*
* Copyright (c) 2006-2009, Eurocarb project, or third-party contributors as
* indicated by the @author tags or express copyright attribution
* statements applied by the authors.
*
* This copyrighted material is made available to anyone wishing to use, modify,
* copy, or redistribute it subject to the terms and conditions of the GNU
* Lesser General Public License, as published by the Free Software Foundation.
* A copy of this license accompanies this distribution in the file LICENSE.txt.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
* for more details.
*
* Last commit: $Rev: 1549 $ by $Author: glycoslave $ on $Date:: 2009-07-19 #$
*/
package org.eurocarbdb.action.admin;
import org.eurocarbdb.action.EurocarbAction;
import org.eurocarbdb.dataaccess.Eurocarb;
import org.eurocarbdb.dataaccess.EntityManager;
import org.eurocarbdb.dataaccess.core.GlycanSequence;
import org.eurocarbdb.dataaccess.core.Taxonomy;
import org.eurocarbdb.dataaccess.core.BiologicalContext;
import org.eurocarbdb.dataaccess.core.Reference;
import org.eurocarbdb.dataaccess.core.ExternalDatabaseReference;
import org.eurocarbdb.sugar.SugarSequence;
import com.opensymphony.xwork.Action;
import au.com.bytecode.opencsv.CSVReader;
import java.util.*;
import java.io.*;
import org.apache.commons.io.*;
import org.apache.log4j.Logger;
import static org.eurocarbdb.dataaccess.Eurocarb.getEntityManager;
public class ImportGlycomeDB extends EurocarbAction implements org.eurocarbdb.action.RequiresAdminLogin
{
static final Logger log = Logger.getLogger( ImportGlycomeDB.class );
private File glycomedbFile = null;
private String glycomedbFileContentType = null;
private String glycomedbFileFileName = null;
private int from_line = -1;
private int to_line = -1;
public File getGlycomedbFile()
{
return this.glycomedbFile;
}
public void setGlycomedbFile(File file)
{
this.glycomedbFile = file;
}
public String getGlycomedbFileContentType()
{
return this.glycomedbFileContentType;
}
public void setGlycomedbFileContentType(String contentType)
{
this.glycomedbFileContentType = contentType;
}
public String getGlycomedbFileFileName()
{
return this.glycomedbFileFileName;
}
public void setGlycomedbFileFileName(String filename)
{
this.glycomedbFileFileName = filename;
}
public void setFromLine(int l) {
from_line = l;
}
public int getFromLine() {
return from_line;
}
public void setToLine(int l) {
to_line = l;
}
public int getToLine() {
return to_line;
}
public String execute() throws Exception
{
if( glycomedbFile==null ) {
this.addFieldError( "glycomedbFile", "You must specify the file containing the glycomedb exported as CSV" );
return "error";
}
try
{
getEntityManager().beginUnitOfWork();
// parse CSV
CSVReader parser = new CSVReader(new FileReader(glycomedbFile),',','\"');
int line_num = 0;
int count = 0;
String[] line;
while( (line = parser.readNext())!=null ) {
// check format
if( line.length!=4 )
throw new Exception("Invalid number of fields in record " + count);
// check from/to
line_num++;
if( from_line>=0 && line_num<from_line )
continue;
if( to_line>=0 && line_num>to_line )
break;
log.debug("line " + line_num);
// commit if necessary
if( count>0 && (count%100)==0 ) {
getEntityManager().endUnitOfWork();
getEntityManager().beginUnitOfWork();
}
// create glycan sequence
GlycanSequence glycanSequence = null;
try {
SugarSequence seq = new SugarSequence( line[0] );
glycanSequence = GlycanSequence.lookupOrCreateNew( seq );
glycanSequence.validate();
log.info("sequence validated");
}
catch(Exception ex) {
log.error(ex.getMessage());
continue;
}
if( glycanSequence.getGlycanSequenceId()<=0 ) {
log.debug("storing sequence");
Eurocarb.getEntityManager().store(glycanSequence);
}
else
log.debug("sequence exists " + glycanSequence.getGlycanSequenceId());
// add biological context
if( line[1]!=null && line[1].length()>0 ) {
Taxonomy tax = Taxonomy.lookupNcbiId( Integer.valueOf(line[1]) );
if( tax!=null ) {
Eurocarb.getEntityManager().refresh(tax);
BiologicalContext bc = new BiologicalContext();
bc.setTaxonomy(tax);
Eurocarb.getEntityManager().store(bc);
glycanSequence.addBiologicalContext(bc);
}
}
// add reference
if( line[2]!=null && line[2].length()>0 ) {
if( line[2].equals("carbbank") )
line[2] = "Carbbank";
Reference ref = ExternalDatabaseReference.lookupOrCreateNew(line[2],line[3]);
if( ref.getReferenceId()<=0 )
Eurocarb.getEntityManager().store(ref);
glycanSequence.addReference(ref);
}
// update
Eurocarb.getEntityManager().update(glycanSequence);
count++;
}
// final commit
getEntityManager().endUnitOfWork();
}
catch(Exception e) {
// rollback on errors
getEntityManager().abortUnitOfWork();
throw e;
}
return "success";
}
public static int main(String[] args) throws Exception {
if( args.length==0 ) {
System.err.println("You must specify the file containing the glycomedb exported as CSV" );
return(-1);
}
ImportGlycomeDB action = new ImportGlycomeDB();
action.setGlycomedbFile(new File(args[0]));
if( args.length>1 )
action.setFromLine(Integer.valueOf(args[1]));
action.execute();
return 0;
}
}