/* * Created on 08-Mar-2003 * * To change this generated comment go to * Window>Preferences>Java>Code Generation>Code and Comments */ package org.genedb.web.mvc.controller.analysis; import org.gmod.schema.mapped.Organism; import org.springframework.stereotype.Controller; import org.springframework.web.bind.annotation.RequestMapping; import org.springframework.web.bind.annotation.RequestMethod; import org.springframework.web.servlet.ModelAndView; import java.io.FileInputStream; import java.io.IOException; import java.nio.ByteBuffer; import java.nio.CharBuffer; import java.nio.channels.FileChannel; import java.nio.charset.Charset; import java.util.ArrayList; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; import com.google.common.collect.Maps; /** * Servlet which provides a motif search using a regular expression against a protein database. * * @author Adrian Tivey (art) */ @Controller @RequestMapping("/MotifSearch") public class MotifSearchController { //private static final int MAX_RESULT_SIZE = 20000; private static final Map<Character, String> PROTEIN_GROUP_MAP; private static final Map<Character, String> NUCLEOTIDE_GROUP_MAP; private static Pattern BY_LINE_PATTERN; static { PROTEIN_GROUP_MAP = new HashMap<Character, String>(); PROTEIN_GROUP_MAP.put('B', "[AGS]"); //tiny PROTEIN_GROUP_MAP.put('Z', "[ACDEGHKNQRST]"); //turnlike PROTEIN_GROUP_MAP.put('0', "[DE]"); //acidic PROTEIN_GROUP_MAP.put('1', "[ST]"); //alcohol PROTEIN_GROUP_MAP.put('2', "[ILV]"); //aliphatic PROTEIN_GROUP_MAP.put('3', "[FHWY]"); //aromatic PROTEIN_GROUP_MAP.put('4', "[HKR]"); //basic PROTEIN_GROUP_MAP.put('5', "[DEHKR]"); //charged PROTEIN_GROUP_MAP.put('6', "[AFILMVWY]"); //hydrophobic PROTEIN_GROUP_MAP.put('7', "[DEHKNQR]"); //hydrophilic PROTEIN_GROUP_MAP.put('8', "[CDEHKNQRST]"); //polar PROTEIN_GROUP_MAP.put('9', "[ACDGNPSTV]"); //small NUCLEOTIDE_GROUP_MAP = new HashMap<Character, String>(); NUCLEOTIDE_GROUP_MAP.put('Y', "[ct]"); // Pyrimidine (C & T) NUCLEOTIDE_GROUP_MAP.put('R', "[ag]"); // Purine NUCLEOTIDE_GROUP_MAP.put('W', "[at]"); // weak NUCLEOTIDE_GROUP_MAP.put('S', "[gc]"); // strong NUCLEOTIDE_GROUP_MAP.put('K', "[tg]"); // keto NUCLEOTIDE_GROUP_MAP.put('M', "[ca]"); // amino NUCLEOTIDE_GROUP_MAP.put('D', "[agt]"); // not C NUCLEOTIDE_GROUP_MAP.put('V', "[agc]"); // not T NUCLEOTIDE_GROUP_MAP.put('H', "[act]"); // not G NUCLEOTIDE_GROUP_MAP.put('B', "[gct]"); // not A BY_LINE_PATTERN = Pattern.compile("^.*$", Pattern.MULTILINE); } private String buffer; private String lastReturned; @RequestMapping(method = RequestMethod.GET) public ModelAndView onSubmit(MotifSearchBean msb) throws Exception { // String startString = req.getParameter("start"); // if (startString == null) { // startString = "0"; // } // size or max hits int start = 0; List<MotifMatch> results = runMainSearch(msb.getOrganism(), msb.getPattern(), msb.isProtein(), start); // Return results to page // req.setAttribute("title", "Gene Results List"); HashMap<String, List<MotifMatch>> model = Maps.newHashMap(); model.put("results", results); return new ModelAndView(); } /** * @param org * @param pattern2 * @return * @throws IOException */ private List<MotifMatch> runMainSearch(Organism org, String patternString, boolean protein, int start) throws IOException { return runMainSearch(org, patternString, protein, start, null, null, null); } /** * @param org * @param pattern2 * @return * @throws IOException */ private List<MotifMatch> runMainSearch(Organism org, String patternString, boolean protein, int start, String customGroup1, String customGroup2, String customGroup3) throws IOException { // Work out db given org String dbFileName = "/tmp/Pf_3D7.chromsomes.fa"; // FIXME CharSequence in = fromFile(dbFileName); // Validate custom groups // if (!validateCustomGroup(customGroup1)) { // // } // Pattern pattern = manipulateRegExp(patternString, customGroup1, customGroup2, customGroup3); Pattern pattern = Pattern.compile("CAD"); // Run search return runSearch(in, pattern, start); } /** * @return */ //private static Pattern validatePattern(String patternString) { // # Check search // if ($syn !~ /^[A-Za-z0-9\.\+\?\{\}\,\[\]\*\^\$]+$/) { // print qq(Your query contained invalid characters. Please alter your query and try again.); // return Pattern.compile(patternString); //} private List<MotifMatch> runSearch(CharSequence in, Pattern pattern, int start) throws IllegalStateException { // Read in pairs of lines // Compile the pattern List<MotifMatch> results = new ArrayList<MotifMatch>(); Matcher matcher = BY_LINE_PATTERN.matcher(in); int count = 0; boolean stillContent = true; while (stillContent) { String idLine = getLine(matcher); if (!idLine.startsWith(">")) { throw new IllegalStateException("db (flat-file) isn't correctly formatted. Expecting header but got:"+idLine); } count++; System.err.println("" + count + idLine); boolean inSequence = true; StringBuilder sequence = new StringBuilder(); while (inSequence) { String line = getLine(matcher); if (line == null) { inSequence = false; stillContent = false; } else { if (line.startsWith(">")) { pushBackLine(); inSequence = false; } else { sequence.append(line); } } } MotifMatch mm = runLineSearch(sequence.toString(), idLine, pattern); if (mm != null) { results.add(mm); } // throw new IllegalStateException("db (flat-file) isn't correctly formatted. Got no corresponding sequence to "+idLine); } return results; } private String getLine(Matcher matcher) { if (buffer != null) { lastReturned = buffer; buffer = null; return lastReturned; } if (!matcher.find()) { return null; } lastReturned = matcher.group(0); return lastReturned; } private void pushBackLine() { if (buffer != null) { throw new RuntimeException("Internal error - pushbackLine called when buffer not empty"); } buffer = lastReturned; } /** * @param sequence * @param pattern * @return */ private MotifMatch runLineSearch(String sequence, String idLine, Pattern pattern) { MotifMatch motifMatch = null; Matcher matcher = pattern.matcher(sequence); // Read the lines while (matcher.find()) { // Get the line without the line termination character sequence @SuppressWarnings("unused") String hit = matcher.group(); if (motifMatch == null) { motifMatch = new MotifMatch(idLine, sequence); } motifMatch.addCoords(matcher.start(), matcher.end()); } return motifMatch; } // private Pattern manipulateRegExp(String in, String cg1, String cg2, String cg3) { // StringBuffer pb = new StringBuffer(); //// // // int leftSquareBracket = -1; // int leftCurlyBracket = -1; // // // for (int i=0; i < in.length(); i++) { // char c = in.charAt(i); // switch (c) { // // Square brackets // case '[': // leftSquareBracket = i; // pb.append(c); // break; // case ']': // leftSquareBracket = -1; // pb.append(c); // break; // // // Curly brackets // case '{': // leftCurlyBracket = i; // pb.append(c); // break; // case '}': // leftCurlyBracket = -1; // pb.append(c); // break; // // // Special characters // case '.': // case '+': // case '?': // case ',': // pb.append(c); // break; // // // Numbers // case '0': // case '1': // case '2': // case '3': // case '4': // case '5': // case '6': // case '7': // case '8': // case '9': // if (leftCurlyBracket != -1) { // pb.append(c); // } else { // pb.append(expandGroup(c)); // } // break; // // default: // pb.append(expandGroup(c)); // } // } //$syn =~ s|\{|_\{|g; //$syn =~ s|\}|\}_|g; // //my $newExp = ""; //my @parts = split("_",$syn); //foreach my $cur (@parts) { // if ($cur !~ "^\{") { // foreach my $lup (keys %mappings) { // $cur =~ s/$lup/$mappings{$lup}/g; // } // } // $newExp .= $cur; //} // //# $syn =~ m/(.*)/s; // $syn = $newExp; // // return Pattern.compile(pb.toString()); // } public String expandGroup(char c) { return ""; // FIXME } // Converts the contents of a file into a CharSequence // suitable for use by the regex package. private static CharSequence fromFile(String filename) throws IOException { FileInputStream fis = new FileInputStream(filename); FileChannel fc = fis.getChannel(); // Create a read-only CharBuffer on the file ByteBuffer bbuf = fc.map(FileChannel.MapMode.READ_ONLY, 0, (int)fc.size()); CharBuffer cbuf = Charset.forName("8859_1").newDecoder().decode(bbuf); return cbuf; } private class MotifMatch { private String idLine; private String sequence; private List<int[]> coords = new ArrayList<int[]>(); /** * @param idLine * @param sequence */ public MotifMatch(String idLine, String sequence) { this.idLine = idLine; this.sequence = sequence; } /** * @param i * @param j */ public void addCoords(int i, int j) { int[] coordPair = new int[2]; coordPair[0] = i; coordPair[1] = j; coords.add(coordPair); } /* (non-Javadoc) * @see java.lang.Object#toString() */ @Override public String toString() { StringBuffer ret = new StringBuffer(idLine); ret.append('\n'); for (Iterator<int[]> it = coords.iterator(); it.hasNext();) { int[] pair = (int[]) it.next(); ret.append(" "); ret.append(sequence.substring(pair[0], pair[1])); ret.append('\n'); } return ret.toString(); } } public static void main(String[] args) throws IOException { //String org=args[0]; String pattern=args[1]; MotifSearchController ms = new MotifSearchController(); Organism org = null; List<MotifMatch> results = ms.runMainSearch(org, pattern, true, 0); System.err.println("Number of results: "+results.size()); //for (MotifMatch match : results) { //System.err.println(match.idLine); //} } } class MotifSearchBean { private boolean protein; private String pattern; private String extJ; private String extX; private Organism organism; private String dbName; public String getDbName() { return dbName; } public void setDbName(String dbName) { this.dbName = dbName; } public String getExtJ() { return this.extJ; } public void setExtJ(String extJ) { this.extJ = extJ; } public String getExtX() { return this.extX; } public void setExtX(String extX) { this.extX = extX; } public String getPattern() { return this.pattern; } public void setPattern(String pattern) { this.pattern = pattern; } public Organism getOrganism() { return this.organism; } public void setOrganism(Organism organism) { this.organism = organism; } public boolean isProtein() { return this.protein; } public void setProtein(boolean protein) { this.protein = protein; } }