/* * Copyright (C) Lennart Martens * * Contact: lennart.martens AT UGent.be (' AT ' to be replaced with '@') */ /* * Created by IntelliJ IDEA. * User: Lennart * Date: 8-okt-02 * Time: 18:06:48 */ package com.compomics.util.io; import org.apache.log4j.Logger; import com.compomics.util.protein.Enzyme; import com.compomics.util.protein.DualEnzyme; import com.compomics.util.protein.RegExEnzyme; import java.util.HashMap; import java.util.Set; import java.util.StringTokenizer; import java.util.regex.Pattern; import java.io.*; /* * CVS information: * * $Revision: 1.4 $ * $Date: 2008/11/18 11:39:11 $ */ /** * This class will load Enzyme properties from a Mascot * (<a href="http://www.matrixscience.com" target="_blank">www.matrixscience.com</a>) * formatted text file. * * @author Lennart Martens */ public class MascotEnzymeReader { // Class specific log4j logger for MascotEnzymeReader instances. Logger logger = Logger.getLogger(MascotEnzymeReader.class); /** * This HashMap will hold all the Enzyme entries we've found in the file. * The enzyme name doubles as the key. */ private HashMap iEnzymes = null; /** * The constructor requires that you specify the file from which to load * the enzyme information. * Information is immediately loaded upon construction. * * @param aEnzymeFile String with the filename of the mascot enzyme file. * @exception IOException when the enzyme file could not be read. */ public MascotEnzymeReader(String aEnzymeFile) throws IOException { this(new FileInputStream(aEnzymeFile)); } /** * The constructor allows you specify to specify an inputstream from which to load * the enzyme information. <b>Note that the stream is closed after loading!!!</b> * Information is immediately loaded upon construction. * * @param aEnzymeStream InputStream to the mascot enzyme file. * @exception IOException when the enzyme file could not be read. */ public MascotEnzymeReader(InputStream aEnzymeStream) throws IOException { try { InputStream in = aEnzymeStream; BufferedReader lBr = new BufferedReader(new InputStreamReader(in)); iEnzymes = this.readAllEntries(lBr); lBr.close(); in.close(); } catch(IOException ioe) { throw new IOException("Unable to load Mascot enzyme file from stream: " + ioe.getMessage()); } } /** * This method loads all entries from the Mascot enzymefile associated with the * BufferedReader * * @param aBr BufferedReader to read the enzymefile from. * @return HashMap with all the entries, and with the name for each entry * doubling as the key for that entry. * @exception IOException when the reading goes wrong. */ private HashMap readAllEntries(BufferedReader aBr) throws IOException { HashMap entries = new HashMap(); String line = aBr.readLine(); // The constituent parts of an enzyme. String title = null; String cleavage = null; String restrict = null; String position = null; while(line != null) { line = line.trim(); if(line.equals("*") || line.equals("")) { if(line.equals("*") && cleavage != null && position != null) { if(title.toLowerCase().startsWith("dual")) { // Process dual enzyme here. // The cleavables are separated in N-terms and C-terms by the 'X' character. StringTokenizer st = new StringTokenizer(cleavage.toUpperCase(), "X"); if(st.countTokens() != 2) { String error = null; if(st.countTokens() > 2) { error = " more than one "; } else { error = "out the "; } logger.error("Dual enzyme detected (title starts with 'dual', regardless of case) but with" + error + "'X' separator between N-terminal cleavables and C-terminal cleavables.\nTreating it as a regular enzyme."); entries.put(title, new Enzyme(title, cleavage, restrict, position)); } else { String ntermCleavage = st.nextToken().trim(); String ctermCleavage = st.nextToken().trim(); entries.put(title, new DualEnzyme(title, ntermCleavage, ctermCleavage, restrict, position)); } } else if(title.toLowerCase().startsWith("regex")) { // first check if we can compile the regular expression // @ToDo: if we throw a checked exception here, it will have percussions throughout the system! Pattern.compile(cleavage, Pattern.CASE_INSENSITIVE); entries.put(title, new RegExEnzyme(title, cleavage, restrict, position)); } else { // Process regular enzyme here. entries.put(title, new Enzyme(title, cleavage, restrict, position)); } title = null; cleavage = null; restrict = null; position = null; } line = aBr.readLine(); } else { String insensitive = line.toUpperCase(); if(insensitive.indexOf("TITLE") >= 0) { int start = line.indexOf(":") + 1; title = line.substring(start).trim(); } else if(insensitive.indexOf("CLEAVAGE") >= 0) { int start = line.indexOf(":") + 1; cleavage = line.substring(start).trim(); } else if(insensitive.indexOf("RESTRICT") >= 0) { int start = line.indexOf(":") + 1; restrict = line.substring(start).trim(); } else if(insensitive.indexOf("CTERM") >= 0) { position = "Cterm"; } else if(insensitive.indexOf("NTERM") >= 0) { position = "Nterm"; } // Advance a line. line =aBr.readLine(); } } return entries; } /** * This method reports on all the known names for enzymes in this reader. * * @return String[] with all the names. */ public String[] getEnzymeNames() { Set s = this.iEnzymes.keySet(); int liSize = s.size(); String[] result = new String[liSize]; s.toArray(result); return result; } /** * This method will return a <b>copy of</b> an Enzyme instance * for the given name, or 'null' if the enzyme was not found in the current list. * * @param aTitle String with the title (name) of the Enzyme to retrieve. * @return Enzyme when the specified enzyme was found, 'null' otherwise! */ public Enzyme getEnzyme(String aTitle) { Enzyme e = (Enzyme)this.iEnzymes.get(aTitle); Enzyme result = null; if(e != null) { result = (Enzyme)e.clone(); } return result; } }