package org.seqcode.genome;
import java.util.*;
import java.io.*;
import java.sql.*;
import org.seqcode.data.connections.DatabaseConnectionManager;
import org.seqcode.data.connections.DatabaseException;
import org.seqcode.data.connections.DatabaseSequence;
import org.seqcode.data.connections.UnknownRoleException;
import org.seqcode.genome.location.ChromosomeInfo;
import org.seqcode.gseutils.*;
/**
* Genome represents one version (or genome build) of some species.
* <i>Note</i>: We assume 1-based, inclusive coordinate.
*/
public class Genome{
//static cache of all Genomes
private static Map<String,Genome> staticGenomes = new HashMap<String,Genome>();
private static Map<Integer, Genome> genomeids = new HashMap<Integer,Genome>();
private Species species;
private String version;
private int dbid;
private Map<String,ChromosomeInfo> chromsByName;
private Map<Integer,ChromosomeInfo> chromsByID;
/**
* Constructs a new Genome from a Species and a genome version.
*/
public Genome(Species species, String version) throws NotFoundException {
this.species = species;
this.version = version;
Connection cxn = null;
Statement stmt = null;
ResultSet rs = null;
try {
cxn = DatabaseConnectionManager.getConnection("core");
stmt = cxn.createStatement();
rs = stmt.executeQuery("select id from genome where species = " + species.getDBID() +
" and version ='" + version + "'");
if (rs.next()) {
dbid = rs.getInt(1);
} else {
throw new NotFoundException("Couldn't find " + species.getName() + " version " + version);
}
fillChroms(cxn);
} catch (SQLException ex) {
ex.printStackTrace();
throw new DatabaseException("Couldn't find " + species + ": "+ ex.toString(),ex);
} catch (UnknownRoleException ex) {
ex.printStackTrace();
throw new DatabaseException("Couldn't connect with role core");
} finally {
if (rs != null) { try {rs.close(); } catch (SQLException ex) { }}
if (stmt != null) { try { stmt.close();} catch (SQLException ex) { } }
if(cxn!=null) try {cxn.close();}catch (Exception ex) {throw new DatabaseException("Couldn't close connection with role core", ex); }
}
}
/**
* Constructs a new Genome from almost complete info
* Only used when a connection is open already for populating chromosome lengths
* Only used within the static methods below to populate a table of all genomes
*/
private Genome(Species s, int dbid, String version, Connection cxn) throws NotFoundException {
this.species = s;
this.version = version;
this.dbid = dbid;
try {
fillChroms(cxn);
} catch (SQLException e) {
e.printStackTrace();
}
}
/**
* Construct a Genome from a file of chromosome lengths
* @param tempName
* @param chrLengths
* @param inventids
*/
public Genome(String tempName, File chrLengths, boolean inventids) {
species = new Species(-1, "FakeOrganism");
version = tempName;
dbid = -1;
chromsByName = new HashMap<String,ChromosomeInfo>();
chromsByID = new HashMap<Integer,ChromosomeInfo>();
if(!chrLengths.isFile()){System.err.println("Invalid genome info file name");System.exit(1);}
BufferedReader reader;
try {
reader = new BufferedReader(new FileReader(chrLengths));
String line;
int id=0;
while ((line = reader.readLine()) != null) {
line = line.trim();
String[] words = line.split("\\s+");
if(words.length>=2){
String chr = words[0].replaceFirst("^chromosome", "");
chr = chr.replaceFirst("^chrom", "");
chr = chr.replaceFirst("^chr", "");
ChromosomeInfo info;
if (inventids) {
info = new ChromosomeInfo(id++, Integer.parseInt(words[1]), chr);
} else {
info = new ChromosomeInfo(Integer.parseInt(words[2]), Integer.parseInt(words[1]), chr);
}
chromsByName.put(info.getName(), info);
chromsByID.put(info.getDBID(), info);
}
}
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (NumberFormatException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* Construct a genome from a Map of names and lengths
* (mostly used to merge fake genomes that are data generated)
* @param tempName
* @param chrLengthMap
*/
public Genome(String tempName, Map<String, Integer> chrLengthMap) {
species = new Species(-1, "FakeOrganism");
version = tempName;
dbid = -1;
chromsByName = new HashMap<String,ChromosomeInfo>();
chromsByID = new HashMap<Integer,ChromosomeInfo>();
int id=0;
for(String s : chrLengthMap.keySet()){
ChromosomeInfo info = new ChromosomeInfo(id--, chrLengthMap.get(s), s);
chromsByName.put(info.getName(), info);
chromsByID.put(info.getDBID(), info);
}
}
/**
* Retrieves the chromosomes for this Genome from the database and fills
* the relevant data structures: chroms and chromsByID
* @param cxn
* @throws SQLException
*/
private void fillChroms(Connection cxn) throws SQLException {
chromsByName = new HashMap<String,ChromosomeInfo>();
chromsByID = new HashMap<Integer,ChromosomeInfo>();
Statement stmt = cxn.createStatement();
ResultSet rs = stmt.executeQuery("select c.id, c.name, cs.len from chromosome c, chromsequence cs " +
"where c.id=cs.id and c.genome=" + dbid);
while(rs.next()) {
int dbid = rs.getInt(1);
String name = rs.getString(2);
int length = rs.getInt(3);
ChromosomeInfo info = new ChromosomeInfo(dbid, length, name);
if(chromsByName.containsKey(name) || chromsByID.containsKey(dbid)) {
throw new IllegalArgumentException("Duplicate name \"" + name +
"\" seems to exist in genome " + version);
}
chromsByName.put(name, info);
chromsByID.put(dbid, info);
}
if (rs != null) { try {rs.close(); } catch (SQLException ex) { }}
if (stmt != null) { try { stmt.close();} catch (SQLException ex) { } }
}
//Accessors
public String getVersion() {return version;}
public int getDBID() {return dbid;}
public Species getSpecies(){return species;}
public String getSpeciesName() {return species.getName();}
public int getSpeciesDBID() { return species.getDBID(); }
public String toString() { return getSpeciesName() +","+getVersion(); }
//Chromosome-related accessors
public Collection<ChromosomeInfo> getChromInfo(){ if(chromsByName!=null){return chromsByName.values();}else{return null;}}
public List<String> getChromList() { return new LinkedList<String>(chromsByName.keySet()); }
public ChromosomeInfo getChrom(String name) { return chromsByName.get(name); }
public boolean containsChromName(String chromName) { return chromsByName.containsKey(chromName); }
public String getChromName(int chromID) { return chromsByID.get(chromID).getName(); }
public int getChromID(String chromName) {
if (chromsByName.get(chromName) == null) {
throw new NullPointerException("Null chromosome for " + chromName);
}
return chromsByName.get(chromName).getDBID();
}
public int getChromLength(String chromName) { return chromsByName.get(chromName).getLength(); }
public Map<String,Integer> getChromLengthMap() {
Map<String,Integer> chromLengths = new HashMap<String,Integer>();
for(String n : chromsByName.keySet()) { chromLengths.put(n, chromsByName.get(n).getLength()); }
return chromLengths;
}
/** Returns the genome info string with chromosome name <tab> length format*/
public String getGenomeInfo(){
StringBuilder sb = new StringBuilder();
for(String n : chromsByName.keySet()) { sb.append(n).append("\t").append(chromsByName.get(n).getLength()).append("\n"); }
return sb.toString();
}
/**
* Return total length of all chromosomes
* @return
*/
public double getGenomeLength() {
double totalLen=0;
for(String n : chromsByName.keySet()) { totalLen+= (double)chromsByName.get(n).getLength();}
return totalLen;
}
//Roman numeral to integer translation helpers
private static int[] romvals;
private static String[] intvals;
public static String convertChromNameToRoman(String c) {
return convertChromNameToRoman(Integer.parseInt(c));
}
public static String convertChromNameToRoman(int chrom) {
if(intvals == null) {
intvals = new String[10];
intvals[0] = "X";
intvals[1] = "I";
intvals[2] = "II";
intvals[3] = "III";
intvals[4] = "IV";
intvals[5] = "V";
intvals[6] = "VI";
intvals[7] = "VII";
intvals[8] = "VIII";
intvals[9] = "IX";
}
StringBuilder sb = new StringBuilder();
sb.append("chr");
while(chrom >= 10) {
chrom -= 10;
sb.append(intvals[0]);
}
if(chrom > 0)
sb.append(intvals[chrom]);
return sb.toString();
}
public static String convertChromNameFromRoman(String chrom) {
if (romvals == null) {
romvals = new int[Character.getNumericValue('Z')];
romvals[Character.getNumericValue('X')] = 10;
romvals[Character.getNumericValue('V')] = 5;
romvals[Character.getNumericValue('I')] = 1;
}
String chr = chrom;
chr = chr.replaceAll("\\.fa?s?t?a$","");
if (chr.matches("^[cC][hH][rR].*")) {
chr = chr.substring(3);
}
if (chr.matches("^[1234567890MmtUnXY]+(_random)?[LRh]?$")) {
return chr;
} else {
throw new NumberFormatException("Can't fix chrom name " + chrom + "," + chr);
}
}
public static String convertYeastChromNameFromRoman(String chrom) {
if (romvals == null) {
romvals = new int[Character.getNumericValue('Z')];
romvals[Character.getNumericValue('X')] = 10;
romvals[Character.getNumericValue('V')] = 5;
romvals[Character.getNumericValue('I')] = 1;
}
String chr = chrom;
chr = chr.replaceAll("\\.fa?s?t?a$","");
if (chr.matches("^[cC][hH][rR].*")) {
chr = chr.substring(3);
}
if (chr.matches("^[XVI]+$")) {
int val = 0, pos = 1, curval, lastval, buffer; char cur, last;
boolean random = false;
if (chr.matches("_random$")) {
random = true;
chr.replaceFirst("_random$","");
}
last = chr.charAt(0);
lastval = romvals[Character.getNumericValue(last)];
buffer = lastval;
// System.err.println("== " + buffer);
while (pos < chr.length()) {
cur = chr.charAt(pos);
curval = romvals[Character.getNumericValue(cur)];
if (curval > lastval) {
val += curval - lastval;
buffer = 0;
} else if (cur != last) {
val += buffer;
buffer = curval;
} else {
buffer += curval;
}
last = cur;
lastval = curval;
pos++;
}
val += buffer;
if (random) {
return Integer.toString(val) + "_random";
} else {
return Integer.toString(val);
}
} else
if (chr.matches("^[1234567890MUXY]+(_random)?[LRh]?$")) {
return chr;
} else if (chr.matches("Mito")) {
return "mt";
} else {
throw new NumberFormatException("Can't fix chrom name " + chrom + "," + chr);
}
}
/**
* Returns a read connection to the annotation database for this genome
*/
public Connection getAnnotationDBConnection() throws SQLException {
try {
String v = this.getVersion().replaceAll("[^\\w\\-]","_");
//We should store these table names in core and load at runtime
return DatabaseConnectionManager.getConnection("ucsc_" + v);
} catch (UnknownRoleException ex) {
throw new DatabaseException("Couldn't create a database connection for genome " +
getVersion(),ex);
}
}
/**
* Load all Genomes from database
* @return
*/
public static Collection<Genome> getAllGenomes(boolean forceRefreshFromDB){
List<Genome> gens = new ArrayList<Genome>();
if(staticGenomes.isEmpty() || forceRefreshFromDB){
staticGenomes.clear(); genomeids.clear();
Connection cxn = null;
Statement stmt = null;
ResultSet rs = null;
try {
cxn = DatabaseConnectionManager.getConnection("core");
stmt = cxn.createStatement();
rs = stmt.executeQuery("select id, species, version from genome");
while(rs.next()) {
Genome gen = new Genome(Species.getSpecies(rs.getInt(2)), rs.getInt(1), rs.getString(3), cxn);
gens.add(gen);
staticGenomes.put(gen.getVersion(), gen);
genomeids.put(gen.getDBID(), gen);
}
} catch (SQLException ex) {
ex.printStackTrace();
throw new DatabaseException("mySQL error: " + ex.toString(), ex);
} catch (UnknownRoleException ex) {
ex.printStackTrace();
throw new DatabaseException("Couldn't connect with role core", ex);
} catch (NotFoundException e) {
e.printStackTrace();
} finally {
if (rs != null) { try {rs.close(); } catch (SQLException ex) { }}
if (stmt != null) { try { stmt.close();} catch (SQLException ex) { } }
if (cxn!=null) try {cxn.close();}catch (Exception ex) {throw new DatabaseException("Couldn't close connection with role core", ex); }
}
}else{
gens.addAll(staticGenomes.values());
}
return gens;
}
/**
* Return all Genomes for a given Species
* @param genomeName
* @return
* @throws NotFoundException
*/
public static Collection<Genome> getAllGenomesBySpecies(Species s) throws NotFoundException {
if(staticGenomes.isEmpty()){
getAllGenomes(true);
}
List<Genome> sGens = new ArrayList<Genome>();
for(Genome g : staticGenomes.values()){
if(g.getSpeciesName().equals(s.getName()))
sGens.add(g);
}
return sGens;
}
/**
* @param gid
* @return
* @throws NotFoundException
*/
public static Genome findGenome(int gid) throws NotFoundException {
if(staticGenomes.isEmpty()){
getAllGenomes(false);
}
if (genomeids.containsKey(gid)) {
return genomeids.get(gid);
}
Connection cxn=null;
Statement stmt = null;
ResultSet rs = null;
try {
cxn = DatabaseConnectionManager.getConnection("core");
stmt = cxn.createStatement();
rs = stmt.executeQuery("select version, species from genome where id=" + gid);
Genome g = null;
if (rs.next()) {
String genomeName = rs.getString(1);
int orgID = rs.getInt(2);
Species org = Species.getSpecies(orgID);
g = new Genome(org, genomeName);
}
if (g == null) {
throw new NotFoundException("Couldn't find genome: " + gid);
}
genomeids.put(gid,g);
staticGenomes.put(g.getSpeciesName(), g);
return g;
} catch (SQLException se) {
throw new DatabaseException("SQLException: " + se.getMessage(), se);
} catch (UnknownRoleException ex) {
throw new DatabaseException("Couldn't connect with role core", ex);
} finally {
if (rs != null) { try {rs.close(); } catch (SQLException ex) { }}
if (stmt != null) { try { stmt.close();} catch (SQLException ex) { } }
if(cxn!=null) try {cxn.close();}catch (Exception ex) {throw new DatabaseException("Couldn't close connection with role core", ex); }
}
}
/**
* @param genomeName
* @return
* @throws NotFoundException
*/
public static Genome findGenome(String genomeName) throws NotFoundException {
if(staticGenomes.isEmpty()){
getAllGenomes(false);
}
if (staticGenomes.containsKey(genomeName)) {
return staticGenomes.get(genomeName);
}
Connection cxn=null;
Statement stmt = null;
ResultSet rs = null;
try {
cxn = DatabaseConnectionManager.getConnection("core");
stmt = cxn.createStatement();
rs = stmt.executeQuery("select species from genome where version='" + genomeName + "'");
Genome g = null;
if (rs.next()) {
int orgID = rs.getInt(1);
Species org = Species.getSpecies(orgID);
g = new Genome(org, genomeName);
}
if (g == null) {
throw new NotFoundException("Couldn't find genome: " + genomeName);
}
staticGenomes.put(genomeName, g);
genomeids.put(g.getDBID(), g);
return g;
} catch (SQLException se) {
throw new DatabaseException("SQLException: " + se.getMessage(), se);
} catch (UnknownRoleException ex) {
throw new DatabaseException("Couldn't connect with role core", ex);
} finally {
if (rs != null) { try {rs.close(); } catch (SQLException ex) { }}
if (stmt != null) { try { stmt.close();} catch (SQLException ex) { } }
if(cxn!=null) try {cxn.close();}catch (Exception ex) {throw new DatabaseException("Couldn't close connection with role core", ex); }
}
}
/**
* Returns all of the versions/builds for this species.
* @return
*/
public static Collection<String> getAllGenomeNames(boolean forceRefreshFromDB) {
if(staticGenomes.isEmpty() || forceRefreshFromDB){
getAllGenomes(forceRefreshFromDB);
}
return staticGenomes.keySet();
}
/**
* Insert a new Genome into the database
* @param version
* @throws SQLException
*/
public static void insertGenome(Species species, String version) throws SQLException {
Connection cxn = null;
Statement stmt = null;
try {
cxn = DatabaseConnectionManager.getConnection("core");
stmt = cxn.createStatement();
String nextIdString = DatabaseSequence.getInsertSQL(cxn, "genome_id");
String insertSQL = String.format("insert into genome(id, species, version) values (%s, %d, '%s')", nextIdString,
species.getDBID(), version);
stmt.executeUpdate(insertSQL);
} catch (UnknownRoleException ex) {
throw new DatabaseException("Couldn't connect with role core", ex);
} catch (SQLException se) {
throw se;
} finally {
if (stmt != null) { try { stmt.close();} catch (SQLException ex) { } }
if(cxn!=null) try {cxn.close();}catch (Exception ex) {throw new DatabaseException("Couldn't close connection with role core", ex); }
}
}
public int hashCode() {
return getSpeciesName().hashCode()*37 + getVersion().hashCode();
}
public boolean equals(Object o) {
if (o instanceof Genome) {
Genome other = (Genome)o;
return (getSpeciesName().equals(other.getSpeciesName()) &&
getVersion().equals(other.getVersion()));
} else {
return false;
}
}
}