package org.seqcode.data.io.parsing; import java.lang.*; import java.io.*; import java.util.*; public class ParseTextMotifs { public static void main(String[] args) { try { ParseTextMotifs ptm = new ParseTextMotifs(new File(args[0])); for(ParsedMotif m : ptm.motifList) { System.out.println(m); } } catch(IOException ie) { ie.printStackTrace(System.err); } } private LinkedList<ParsedMotif> motifList; public ParseTextMotifs(File f) throws IOException { motifList = new LinkedList<ParsedMotif>(); BufferedReader br = new BufferedReader(new FileReader(f)); try { while(true) { ParsedMotif m = new ParsedMotif(br); motifList.addLast(m); } } catch(EOFException e) { for(ParsedMotif m : motifList) { System.out.println(m.toString()); } } br.close(); } public List<ParsedMotif> findMotifs(String name) { LinkedList<ParsedMotif> motifs = new LinkedList<ParsedMotif>(); for(ParsedMotif m : motifList) { if(m.getSource().indexOf(name) != -1) { motifs.addLast(m); } } return motifs; } public static class ParsedMotif { private String source; private double max_score; private Map<String,double[]> probs; private int cols; public String toString() { return "[" + source + "] : score (" + max_score + "/" + calcCutoff(0.6, true) + ") --> " + getMaxMotifString(); } public String getSource() { return source; } public double getMaxScore() { return max_score; } public int numCols() { return cols; } public double[] scoreDNASequence(String s) { double[] array = new double[s.length() - cols + 1]; for(int start = 0; start < s.length()-cols; start++) { array[start] = scoreDNAString(s, start); } return array; } public boolean containsMatch(String s) { double cutoff = calcCutoff(0.6, true); return containsMatch(s, cutoff); } public boolean containsMatch(String s, double score) { for(int start = 0; start < s.length()-cols; start++) { double v = scoreDNAString(s, start); if(v >= score) { return true; } } return false; } public double scoreDNAString(String s, int offset) { double sum = 0.0; for(int i = 0; i < cols; i++) { try { sum += probs.get(String.valueOf(Character.toUpperCase(s.charAt(offset + i))))[i]; } catch(NullPointerException npe) { System.err.println("NULL Value: " + s.charAt(offset + i)); System.err.print("Values ("); for(String k : probs.keySet()) { System.err.print(k + " "); } System.err.print(")\n"); } } return sum; } public double calcCutoff(double factor, boolean sum) { return factor * calcMaxScore(sum); } public double calcMaxScore(boolean sum) { double base; if(sum) { base = 0.0; } else { base = 1.0; } for(int i = 0; i < cols; i++) { double max = 0.0; String max_key = null; for(String key : probs.keySet()) { if(max_key == null || probs.get(key)[i] > max) { max_key = key; max = probs.get(key)[i]; } } if(sum) { base += max; } else { base *= max; } } return base; } public double calcMinScore(boolean sum) { double base; if(sum) { base = 0.0; } else { base = 1.0; } for(int i = 0; i < cols; i++) { double max = 0.0; String max_key = null; for(String key : probs.keySet()) { if(max_key == null || probs.get(key)[i] < max) { max_key = key; max = probs.get(key)[i]; } } if(sum) { base += max; } else { base *= max; } } return base; } public String getMaxMotifString() { StringBuilder sb = new StringBuilder(); for(int i = 0; i < cols; i++) { String maxString = null; double maxScore = 0.0; for(String key : probs.keySet()) { double s = probs.get(key)[i]; if(maxString == null || s > maxScore) { maxString = key; maxScore = s; } } sb.append(maxString); } return sb.toString(); } public ParsedMotif(BufferedReader br) throws IOException { String line = br.readLine(); if(line==null) { throw new EOFException(); } int firstColon = line.indexOf(":"); source = line.substring(firstColon+1, line.length()); line = br.readLine(); firstColon = line.indexOf(":"); max_score = Double.parseDouble(line.substring(firstColon+1, line.length()).trim()); line = br.readLine(); if(line.startsWith("#")) { line = line.substring(1, line.length()); } StringTokenizer st = new StringTokenizer(line); cols = st.countTokens(); probs = new HashMap<String,double[]>(); line = br.readLine(); if(line.startsWith("#")) { line = line.substring(1, line.length()); } st = new StringTokenizer(line); double[] array = new double[st.countTokens()-1]; String name = st.nextToken(); for(int i = 1; i < cols; i++) { array[i-1] = Double.parseDouble(st.nextToken()); } probs.put(name, array); line = br.readLine(); if(line.startsWith("#")) { line = line.substring(1, line.length()); } st = new StringTokenizer(line); array = new double[st.countTokens()-1]; name = st.nextToken(); for(int i = 1; i < cols; i++) { array[i-1] = Double.parseDouble(st.nextToken()); } probs.put(name, array); line = br.readLine(); if(line.startsWith("#")) { line = line.substring(1, line.length()); } st = new StringTokenizer(line); array = new double[st.countTokens()-1]; name = st.nextToken(); for(int i = 1; i < cols; i++) { array[i-1] = Double.parseDouble(st.nextToken()); } probs.put(name, array); line = br.readLine(); if(line.startsWith("#")) { line = line.substring(1, line.length()); } st = new StringTokenizer(line); array = new double[st.countTokens()-1]; name = st.nextToken(); for(int i = 1; i < cols; i++) { array[i-1] = Double.parseDouble(st.nextToken()); } probs.put(name, array); } } }