/* * Copyright (C) Lennart Martens * * Contact: lennart.martens AT UGent.be (' AT ' to be replaced with '@') */ /** * Created by IntelliJ IDEA. * User: Lennart * Date: 1-jul-2004 * Time: 15:08:17 */ package com.compomics.util.protein; import org.apache.log4j.Logger; import java.io.*; import java.sql.*; import java.util.*; /* * CVS information: * * $Revision: 1.3 $ * $Date: 2007/07/06 09:41:53 $ */ /** * This class can be used to generate a Modification instance from a code or title. <br> * Modification information is loaded from files or database, as specified in the two available * constructors for this Factory. * * @author Lennart Martens */ public class ModificationFactory { // Class specific log4j logger for ModificationFactory instances. static Logger logger = Logger.getLogger(ModificationFactory.class); /** * Constant for a key in the ModificationFactory.properties file. */ private static final String RDBMS = "RDBMS"; /** * Constant for a key in the ModificationFactory.properties file. */ private static final String RDBDRIVER = "RDBDRIVER"; /** * Constant for a key in the ModificationFactory.properties file. */ private static final String TABLE = "TABLE"; /** * Constant for a key in the ModificationFactory.properties file. */ private static final String USER = "USER"; /** * Constant for a key in the ModificationFactory.properties file. */ private static final String PASSWORD = "PASSWORD"; /** * Constant for a key in the ModificationFactory.properties file. */ private static final String MODFILE = "MODFILE"; /** * Constant for a key in the ModificationFactory.properties file. */ private static final String CODEFILE = "CODEFILE"; /** * This HashMap holds will hold all the data necessary to create a specific modification. */ private static HashMap allMods = null; /** * This HashMap maps a modification code to a modification title. */ private static HashMap codeToTitle = null; /** * This HashMap maps a modification title to a modification code. */ private static HashMap titleToCode = null; /** * This boolean is set to true when the factory is initialized. */ private static boolean iInitialized = false; /** * This Factory is fully static. */ private ModificationFactory() { } /** * This method will return a Modification instance from a title. * * @param aTitle String with the title for a modification. * @param aLocation int with the location for the modification. * @return Modification with the corresponding modification, or 'null' when the * modification was not found. */ public static com.compomics.util.interfaces.Modification getModification(String aTitle, int aLocation) { com.compomics.util.interfaces.Modification mod = null; // See if the factory has been initialized. checkInit(); // Find the title. if (allMods.containsKey(aTitle)) { ModificationTemplate template = (ModificationTemplate) allMods.get(aTitle); mod = new ModificationImplementation(template, aLocation); } return mod; } /** * This method takes a code and a residue (the residue for the N-terminus is NTERMINUS and for the * C-terminus CTERMINUS!) and converts this into a Modification instance if possible. * * @param aCode String with the code for the modification * @param aResidue String with the residue carrying the modification (the residue for the N-terminus * is defined in the constant NTERMINUS and for the C-terminus in the constant CTERMINUS!). * @param aLocation int with the location for the modification. * @return Modification with the corresponding modification, or 'null' when the * modification was not found. */ public static com.compomics.util.interfaces.Modification getModification(String aCode, String aResidue, int aLocation) { com.compomics.util.interfaces.Modification mod = null; // See if the factory has been initialized. checkInit(); // Try and find the key without forgetting about possible duplicates. Iterator it = codeToTitle.keySet().iterator(); while (it.hasNext()) { String code = (String) it.next(); if (code.startsWith(aCode) && ((code.length() - aCode.length()) == 0 || (code.length() - aCode.length()) == 1)) { String title = (String) codeToTitle.get(code); ModificationTemplate template = (ModificationTemplate) allMods.get(title); // If the allDetails Object[] is 'null', we have encountered a code that maps to a title // which is NOT present in the 'allMods' HashMap, and therefore is probably not present in the // 'modifications.txt'. if (template == null) { continue; } Collection residues = template.getResidues(); if (residues.contains(aResidue)) { if (mod == null) { mod = new ModificationImplementation(template, aLocation); } } } } return mod; } /** * Returns a String representation of the modifications. * * @return String with a String representation of the modification. */ public static String modificationsToString() { StringBuffer sb = new StringBuffer(); // See if the factory has been initialized. checkInit(); // Cycle all keys. Set keyset = allMods.keySet(); String[] keys = new String[allMods.size()]; keyset.toArray(keys); Arrays.sort(keys); for (int i = 0; i < keys.length; i++) { String title = keys[i]; ModificationTemplate template = (ModificationTemplate) allMods.get(title); boolean hidden = template.isArtifact(); sb.append("Title:" + title + "\n"); if (hidden) { sb.append("Hidden\n"); } keyset = (Set) template.getResidues(); String[] massKeys = new String[keyset.size()]; keyset.toArray(massKeys); Arrays.sort(massKeys); for (int j = 0; j < massKeys.length; j++) { String residue = massKeys[j]; if (residue.equals(com.compomics.util.interfaces.Modification.NTERMINUS)) { sb.append("Nterm:"); } else if (residue.equals(com.compomics.util.interfaces.Modification.CTERMINUS)) { sb.append("Cterm:"); } else { sb.append("Residues:" + residue); } sb.append(" " + template.getMonoisotopicMassDelta(residue) + " " + template.getAverageMassDelta(residue) + "\n"); } sb.append("*\n"); } return sb.toString(); } /** * Returns a String representation of the modification title to code mappings. * * @return String with a String representation of the code tot title mappings. */ public static String modificationConversionToString() { StringBuffer result = new StringBuffer(); // See if the factory has been initialized. checkInit(); // Compose the String. Set keySet = titleToCode.keySet(); String[] titles = new String[keySet.size()]; keySet.toArray(titles); Arrays.sort(titles); for (int i = 0; i < titles.length; i++) { String lTitle = titles[i]; // Only append those mappings that have a title linked to a // real ModificationTemplate. if (allMods.containsKey(lTitle)) { result.append(lTitle + "=" + titleToCode.get(lTitle) + "\n"); } } return result.toString(); } /** * This method can be used to refresh all data from the data store indicated in the ModificationFactory.properties * file. */ public static void reLoadAllData() { iInitialized = false; checkInit(); } /** * This method returns all modification titles known to the Factory. * * @return String[] with all the modification titles. */ public static String[] getAllModificationTitles() { checkInit(); String[] result = null; Set titles = allMods.keySet(); result = new String[titles.size()]; titles.toArray(result); return result; } /** * Try to load the specified file from an absolute name or, if that fails, the classpath. * If the file is found, it is parsed and the in-memory code to title mappings are initialized. * * @param aCodesFile String with the filename for the file to load the code tot title mappings from. * This name can be an absolute filename, or the name of a file in the classpath. */ private static void loadCodesFromFile(String aCodesFile) { codeToTitle = new HashMap(); titleToCode = new HashMap(); try { // Get an InputStream to the text file. InputStream in = null; // Try an absolute pathname. File temp = new File(aCodesFile); if (!temp.exists()) { // In getting here in = ModificationFactory.class.getClassLoader().getResourceAsStream(aCodesFile); if (in == null) { throw new IOException("Unable to load '" + aCodesFile + "' as an absolute path as well as from the classpath! Please check the filename!!"); } } else { in = new FileInputStream(temp); } InputStreamReader is = new InputStreamReader(in); BufferedReader br = new BufferedReader(is); String line = null; // The HashMap that holds all codes that have duplicates. HashMap duplicateCodes = new HashMap(); while ((line = br.readLine()) != null) { line = line.trim(); // Skip comment & blank lines. if (line.startsWith("#") || line.startsWith("!") || line.equals("")) { continue; } else { int location = line.indexOf("="); String title = line.substring(0, location).trim(); String code = line.substring(location + 1).trim(); Object previous = titleToCode.put(title, code); // Now first see if the code has already had duplicates. if (duplicateCodes.containsKey(code)) { int count = ((Integer) duplicateCodes.get(code)).intValue(); count++; codeToTitle.put(code + count, title); duplicateCodes.put(code, Integer.valueOf(count)); } else { previous = codeToTitle.put(code, title); // Here conflicts can arise. if (previous != null) { duplicateCodes.put(code, Integer.valueOf(1)); codeToTitle.put(code + "0", previous); codeToTitle.put(code + "1", title); codeToTitle.remove(code); } } } } if (in != null) { in.close(); } is.close(); br.close(); } catch (IOException ioe) { logger.error(ioe.getMessage(), ioe); } } /** * Try to load the specified file from an absolute name or, if that fails, the classpath. * If the file is found, it is parsed and the in-memory modification table is initialized. * * @param aModsFile String with the filename for the file to load the modifications from. * This name can be an absolute filename, or the name of a file in the classpath. */ private static void loadModificationsFromFile(String aModsFile) { allMods = new HashMap(); try { // Get an InputStream to the text file. InputStream in = null; // First see if we can load it from an absolute path. File temp = new File(aModsFile); if (!temp.exists()) { // In getting here in = ModificationFactory.class.getClassLoader().getResourceAsStream(aModsFile); if (in == null) { throw new IOException("Unable to load '" + aModsFile + "' as an absolute path as well as from the classpath! Please check the filename!!"); } } else { in = new FileInputStream(temp); } BufferedReader br = new BufferedReader(new InputStreamReader(in)); String line; while ((line = br.readLine()) != null) { // Trim the line. line = line.trim(); // Parse the 'modifications.txt' file. if (line.startsWith("Title:")) { // Read the title. String title = line.substring(6); // Read the next line. line = br.readLine().trim(); boolean hidden = false; HashMap massDeltas = new HashMap(); if (line.equals("Hidden")) { hidden = true; line = br.readLine(); } if (line.startsWith("Nterm:")) { String residue = com.compomics.util.interfaces.Modification.NTERMINUS; massDeltas.put(residue, parseMonoAndAverageMassDelta(line.substring(6))); } else if (line.startsWith("Cterm:")) { String residue = com.compomics.util.interfaces.Modification.CTERMINUS; massDeltas.put(residue, parseMonoAndAverageMassDelta(line.substring(6))); } else if (line.startsWith("Residues")) { while (line.startsWith("Residues")) { int colon = line.indexOf(":"); int first = line.indexOf(" "); String residue = line.substring(colon + 1, first).trim(); massDeltas.put(residue, parseMonoAndAverageMassDelta(line.substring(first + 1))); line = br.readLine(); } } // We should add the modification. allMods.put(title, new ModificationTemplate(title, (String) titleToCode.get(title), massDeltas, hidden)); } } br.close(); in.close(); } catch (IOException ioe) { logger.error(ioe.getMessage(), ioe); } } /** * This method will take a String of the form 'xxx.xxx xxx.xxxx' and return the * two space-separated decimal numbers in an array of doubles. <br /> * Specifically for the ModificationFactory class, the first element in the array * is the monoisotopic mass, the second is the average mass delta. * * @param aData String in the form of 'xxx.xxxxx xxx.xxxxxx' to parse the two doubles from. * @return double[] with the two numbers in the String (0:MONO, 1:AVG) */ private static double[] parseMonoAndAverageMassDelta(String aData) { int spaceLocation = aData.indexOf(" "); double mono = Double.parseDouble(aData.substring(0, spaceLocation).trim()); double avg = Double.parseDouble(aData.substring(spaceLocation).trim()); return new double[]{mono, avg}; } /** * This method loads and initiliazes the modification maps in memory. * * @param aModsFile String with the filename for the file to load the modifications from. * This name can be an absolute filename, or the name of a file in the classpath. * @param aCodesFile String with the filename for the file to load the code to title mappings from. * This name can be an absolute filename, or the name of a file in the classpath. */ private static void loadAllFromFiles(String aModsFile, String aCodesFile) { loadCodesFromFile(aCodesFile); loadModificationsFromFile(aModsFile); } /** * This method loads all modification from the specified RDBMS system. It expects the following columns to be present in the * specified table (please read the information below for a detailed description of the massdeltas columns): * <ul> * <li>title [read as String]</li> * <li>code [read as String]</li> * <li>artifact [read as boolean]</li> * <li>monoisotopicmassdeltas [read as String, with the following formatting: (RESIDUE1)_xxx.yyyy;(RESIDUE2)_aaa.bbb]</li> * <li>averagemassdeltas [read as String, with the following formatting: (RESIDUE1)_xxx.yyyy;(RESIDUE2)_aaa.bbb]</li> * </ul> * * Some detailed information about the massdeltas columns could be useful. <br /> * These fields contain data that represents which residue will suffer which mass delta for the specific modification. * These mass deltas can be measured both monoisotopically and averaged over the isotopes. In order to map these fields * correctly it is necessary to have a correspondence between the residues mentioned in the monoisotopic and average rows. * If a certain residue is only present in one of these columns, there will be trouble! * * @param aDB String with the jdbc connection String. * @param aDriver String with the fully qualified classname of the DB driver * @param aTable String with the table to perform the query on. * @param aConnectionProps Properties with all properties for the connection set. * @throws IOException wraps all exceptions that can occur here (mostly SQLExceptions, obviously). */ private static void loadAllFromRDBMS(String aDB, String aDriver, String aTable, Properties aConnectionProps) throws IOException { // Try to get a connection. Connection conn = null; Statement stat = null; ResultSet rs = null; try { // First get a hold of the driver. Driver driver = null; try { driver = (Driver) Class.forName(aDriver).newInstance(); } catch (ClassNotFoundException cnfe) { throw new SQLException("Unable to load driver '" + aDriver + "'! Are you sure it is in the classpath?"); } catch (IllegalAccessException iae) { throw new SQLException("The driver '" + aDriver + "' does not seem to have an accessible public constructor!"); } catch (InstantiationException ie) { throw new SQLException("The driver '" + aDriver + "' does not seem to have an accessible public constructor!"); } if (driver != null) { // Okay, driver loaded! Let's get connected. conn = driver.connect(aDB, aConnectionProps); // Connection made, construct query. // No PreparedStatement used as this query will not be executed frequently and as such // will only clutter the preparedstatement cache of the driver. stat = conn.createStatement(); // Execute the SQL. rs = stat.executeQuery("select title, code, artifact, monoisotopicmassdeltas, averagemassdeltas from " + aTable); // HashMap used for tracking duplicate codes. HashMap duplicateCodes = new HashMap(); allMods = new HashMap(); titleToCode = new HashMap(); codeToTitle = new HashMap(); // Cycle the results. while (rs.next()) { // In JDBC: column numbers start from '1'. String title = rs.getString(1).trim(); String code = rs.getString(2); boolean artifact = rs.getBoolean(3); String monoDeltas = rs.getString(4).trim(); String avgDeltas = rs.getString(5).trim(); // Parse the mono and avg mass deltas Strings. HashMap massDeltas = parseDeltasFromRDBMSStrings(monoDeltas, avgDeltas); // Now to construct a Modification objects and add it to the map. allMods.put(title, new ModificationTemplate(title, code, massDeltas, artifact)); // We mustn't forget the 'title to code' and 'code to title' mappings! // Title to code is quite easy: if (code != null) { // Note that we check whether the title is unique! Object found = titleToCode.put(title, code); if (found != null) { logger.error("Duplicate title for modification: " + found); } // The inverse mapping is somewhat more difficult, since code need not be unique. // If the code is already present, we need to do some complex stuff. // Now first see if the code has already had duplicates. if (duplicateCodes.containsKey(code)) { int count = ((Integer) duplicateCodes.get(code)).intValue(); count++; codeToTitle.put(code + count, title); duplicateCodes.put(code, Integer.valueOf(count)); } else { Object previous = codeToTitle.put(code, title); // Here conflicts can arise. if (previous != null) { duplicateCodes.put(code, Integer.valueOf(1)); codeToTitle.put(code + "0", previous); codeToTitle.put(code + "1", title); codeToTitle.remove(code); } else { } } } } // Closing everything down is done in the 'finally' section. } else { // That's odd: no exception and yet no driver either! throw new SQLException("Driver '" + aDriver + "' was not loaded correctly! Unfortunately, no further details are known."); } } catch (SQLException sqle) { throw new IOException(sqle.getMessage()); } finally { // Wrap things up nicely. if (rs != null) { try { rs.close(); } catch (Exception e) { // Too late to worry about that now. } } if (stat != null) { try { stat.close(); } catch (Exception e) { // Too late to worry about that now. } } if (conn != null) { try { conn.close(); } catch (Exception e) { // Too late to worry about that now. } } } } /** * This method can parse a HashMap with monoisotopic and average mass deltas from the * Strings as stored in the database. <br /> * Formatting of these Strings should be: <br /> * [residue1]_xx.yyyy;[residue2]_aa.bbbb <br /> * eg.: S_165.7654;T_153.9965 <br /> * <b>Please note</b> that every residue present in the monoisotopic list should also be present in the * average list! * * @param aMonoDeltas String with the monoistopic mass deltas, formatted as specified above. * @param aAvgDeltas String with the average mass deltas, formatted as specified above. * @return HashMap with the mass delta mappings. * @throws IOException when the parsing failed. */ private static HashMap parseDeltasFromRDBMSStrings(String aMonoDeltas, String aAvgDeltas) throws IOException { HashMap mappings = new HashMap(); // First the monoisotopic bits. // Start by splitting up the semicolon-delimited parts. StringTokenizer st = new StringTokenizer(aMonoDeltas, ";"); // This arraylist is used as a check on whether each residue, specified in the monoisotopic section, // is also present in the average section. We add all mono residues here, and later subtract those from the // average. If the size of this list is greater than 0 at the end, there was at leats one residue present // in the mono part that was NOT in the average part, and we even know which one! ArrayList keys = new ArrayList(10); while (st.hasMoreTokens()) { // This should yield something of the form [residueX]_xxx.yyyy. String resMassCombo = st.nextToken(); // Find the first underscore starting from the end (this way, one can use underscores in the // code, which is NOT recommended, by the way). int location = resMassCombo.lastIndexOf("_"); if (location < 0) { throw new IOException("The content of the monoisotopicmassdeltas row could not be parsed from (a String + '_' + a double) since the '_' is missing!"); } String residue = resMassCombo.substring(0, location); String stringMonoValue = resMassCombo.substring(location + 1); double mono = 0.0; try { mono = Double.parseDouble(stringMonoValue); } catch (NumberFormatException nfe) { throw new IOException("The content of the monoisotopicmassdeltas row could not be parsed from (a String + '_' + a double)!"); } mappings.put(residue, new Double(mono)); keys.add(residue); } // Okay, we now have all mappings for the monoisotopic stuff. // The average stuff should hold the same number of mappings. st = new StringTokenizer(aAvgDeltas, ";"); while (st.hasMoreTokens()) { // This should yield something of the form [residueX]_xxx.yyyy. String resMassCombo = st.nextToken(); // Find the first underscore starting from the end (this way, one can use underscores in the // code, which is NOT recommended, by the way). int location = resMassCombo.lastIndexOf("_"); if (location < 0) { throw new IOException("The content of the averagemassdeltas row could not be parsed from (a String + '_' + a double) since the '_' is missing!"); } String residue = resMassCombo.substring(0, location); String stringAvgValue = resMassCombo.substring(location + 1); double avg = 0.0; try { avg = Double.parseDouble(stringAvgValue); } catch (NumberFormatException nfe) { throw new IOException("The content of the averagemassdeltas row could not be parsed from (a String + '_' + a double)!"); } // See if this mapping is already there (it should be, if it's not, we throw an exception). Object temp = mappings.get(residue); if (temp == null) { throw new IOException("Residue '" + residue + "' was only present in the average mass delta mappings and NOT in the monoisotopic ones!"); } // Okay, temp is not 'null', so cast it, mold it, group with the average and store it again as a a double[]. double mono = ((Double) temp).doubleValue(); mappings.put(residue, new double[]{mono, avg}); keys.remove(residue); } // Check our keys arraylist. if (keys.size() > 0) { // This is not good. // Generate a nice report, though. Iterator iter = keys.iterator(); StringBuffer residues = new StringBuffer(); while (iter.hasNext()) { String residue = (String) iter.next(); residues.append(residue + " "); } throw new IOException("The following residues all had a monoisotopic mass delta mapping, yet no average mass dleta mapping: " + residues.toString() + "!"); } // Finis! return mappings; } /** * This method checks the 'iInitialized' boolean. When it is not set, it will * initialize the factory based on the input method set in the 'ModificationFactory.properties file'. <br /> * Note that RDBMS has precedence over file. */ private static void checkInit() { // See if we are initialized. If not, do the following: // 1. Find the properties file. // 2. Determine which resource (files or RDBMS) to use for retrieving data. // 3. Call the appropriate retrieve methods. // 4. If all goes well, set iInitialized to 'true' and be done with it. if (!iInitialized) { InputStream is = ModificationFactory.class.getClassLoader().getResourceAsStream("ModificationFactory.properties"); if (is == null) { loadAllFromFiles("modifications.txt", "modificationConversion.txt"); } else { // Okay, load the Properties. Properties props = new Properties(); try { props.load(is); // Okay, let's see what we should do. // If we find the RDBMS tag, we go with that, else try to find the MODFILE tag. if (props.containsKey(RDBMS)) { // Handle RDBMS. String db = props.getProperty(RDBMS); String driver = props.getProperty(RDBDRIVER); String table = props.getProperty(TABLE); String user = props.getProperty(USER); String password = props.getProperty(PASSWORD); // Start doing validations. // We need db, driver and table info. User and password are optional, // yet if either one is present, the other must be present as well. if (db == null || db.trim().equals("")) { throw new IOException(RDBMS + " key defined in 'ModificationFactory.properties', yet its value was 'null' or empty String!"); } else if (driver == null || driver.trim().equals("")) { throw new IOException(RDBMS + " key defined in 'ModificationFactory.properties', yet mandatory " + RDBDRIVER + " 'null' or empty String!"); } else if (table == null || table.trim().equals("")) { throw new IOException(RDBMS + " key defined in 'ModificationFactory.properties', yet mandatory " + TABLE + " 'null' or empty String!"); } // Now do the user and password. Properties dbProps = new Properties(); if (user != null && !user.trim().equals("")) { if (password == null || password.trim().equals("")) { throw new IOException(USER + " key defined in 'ModificationFactory.properties', yet mandatory " + PASSWORD + " 'null' or empty String!"); } else { // Some DB drivers like 'user', some like 'username'. dbProps.put("user", user.trim()); dbProps.put("username", user.trim()); dbProps.put("password", password.trim()); } } else if (password != null && !password.trim().equals("")) { throw new IOException(PASSWORD + " key defined in 'ModificationFactory.properties', yet mandatory " + USER + " 'null' or empty String!"); } // Okay, let 'er roll. loadAllFromRDBMS(db, driver, table, dbProps); } else if (props.containsKey(MODFILE)) { // Handle file-input. String mods = props.getProperty(MODFILE); String codes = props.getProperty(CODEFILE); // Check if anything sensible is contained in these. if (mods == null || mods.trim().equals("")) { throw new IOException("No " + RDBMS + " key defined in 'ModificationFactory.properties' and " + MODFILE + " was 'null' or empty String!"); } else if (codes == null || codes.trim().equals("")) { throw new IOException("No " + RDBMS + " key defined in 'ModificationFactory.properties' and mandatory " + CODEFILE + " was 'null' or empty String!"); } else { // Okay, passed the test. Go get 'em! loadAllFromFiles(mods, codes); } } iInitialized = true; } catch (IOException ioe) { logger.error(ioe.getMessage(), ioe); } } } } }