package org.genedb.querying.tmpquery; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.log4j.Logger; import org.apache.lucene.document.Document; import org.apache.lucene.index.Term; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.regex.JakartaRegexpCapabilities; import org.apache.lucene.search.regex.JavaUtilRegexCapabilities; import org.apache.lucene.search.regex.RegexQuery; import org.genedb.querying.core.QueryException; import org.genedb.querying.core.QueryParam; import org.genedb.querying.core.LuceneQuery.Pager; public class MotifQuery extends OrganismLuceneQuery { private static Logger logger = Logger.getLogger(MotifQuery.class); private static Map<Character,String> PROTEIN_GROUP_MAP; static { PROTEIN_GROUP_MAP = new HashMap<Character, String>(); PROTEIN_GROUP_MAP.put('B', "[AGS]"); //tiny PROTEIN_GROUP_MAP.put('Z', "[ACDEGHKNQRST]"); //turnlike PROTEIN_GROUP_MAP.put('0', "[DE]"); //acidic PROTEIN_GROUP_MAP.put('1', "[ST]"); //alcohol PROTEIN_GROUP_MAP.put('2', "[ILV]"); //aliphatic PROTEIN_GROUP_MAP.put('3', "[FHWY]"); //aromatic PROTEIN_GROUP_MAP.put('4', "[HKR]"); //basic PROTEIN_GROUP_MAP.put('5', "[DEHKR]"); //charged PROTEIN_GROUP_MAP.put('6', "[AFILMVWY]"); //hydrophobic PROTEIN_GROUP_MAP.put('7', "[DEHKNQR]"); //hydrophilic PROTEIN_GROUP_MAP.put('8', "[CDEHKNQRST]"); //polar PROTEIN_GROUP_MAP.put('9', "[ACDGNPSTV]"); //small } @QueryParam( order=1, title="The search string" ) private String search = ""; private String actualSearch; private Pattern pattern; public void setSearch(String search) { this.search = search; } public String getSearch() { return search; } @Override public String getQueryName() { return "Motif"; } @Override protected void getQueryTermsWithoutOrganisms(List<Query> queries) { logger.info(search); actualSearch = new String(search); if(actualSearch.indexOf("{") == -1) { for (Entry<Character, String> entry : PROTEIN_GROUP_MAP.entrySet()) { logger.info(Character.toString(entry.getKey()) + " " + entry.getValue()); actualSearch = actualSearch.replaceAll(Character.toString(entry.getKey()), entry.getValue()); logger.info(actualSearch); } } pattern = Pattern.compile(actualSearch); // let's ignore any motifs smaller than 2 characters if (actualSearch.length() < 3) { throw new RuntimeException("Sorry, cannot handle motifs under 3 characters long."); } String starter = ".*"; // if (actualSearch.startsWith(".+") ) { // actualSearch.replaceFirst("\\.\\+", starter); // } // else if (actualSearch.startsWith(".*")) { // actualSearch.replaceFirst("\\.\\*", starter); // } // else if (! search.startsWith("^") && !search.startsWith(starter)) { actualSearch = starter + actualSearch; } logger.info(String.format("%s ----- > %s", search, actualSearch)); queries.add(new TermQuery(new Term("type.name", "polypeptide"))); RegexQuery r = new RegexQuery(new Term("sequenceResidues",actualSearch)); JavaUtilRegexCapabilities capabilites = new JavaUtilRegexCapabilities(); //JakartaRegexpCapabilities c = new JakartaRegexpCapabilities(); r.setRegexImplementation(capabilites); // logger.info("max results " + maxResults); logger.info(r.getRegexImplementation().getClass().toString()); queries.add(r); } @Override protected String getluceneIndexName() { return "org.gmod.schema.mapped.Feature"; } @Override public String getQueryDescription() { return "Searches for polypeptide residue patterns with a regular expression."; } @Override protected String[] getParamNames() { return new String[] {"search"}; } public Map<String,Object> prepareModelData() { Map<String,Object> map = new HashMap<String,Object>(); map.put("search", search); return map; } public class MotifResult { String match; String residues; int start; int end; String displayId; String pre; String post; public String getPre() { return pre; } public void setPre(String pre) { this.pre = pre; } public String getPost() { return post; } public void setPost(String post) { this.post = post; } public String getDisplayId() { return displayId; } public void setDisplayId(String displayId) { this.displayId = displayId; } public String getMatch() { return match; } public void setMatch(String match) { this.match = match; } public String getResidues() { return residues; } public void setResidues(String residues) { this.residues = residues; } public int getStart() { return start; } public void setStart(int start) { this.start = start; } public int getEnd() { return end; } public void setEnd(int end) { this.end = end; } public MotifResult(String displayId, String match, String residues, int start, int end, String pre, String post) { this.match = match; this.residues = residues; this.start = start; this.end = end; this.displayId = displayId; this.pre = pre; this.post = post; } } protected Pager<MotifResult> motifResultPager = new Pager<MotifResult>() { @Override public MotifResult convert(Document document) { String displayId = getGeneUniqueNameOrUniqueName(document); String residues = document.get("sequenceResidues"); logger.info(residues); Matcher m = pattern.matcher(residues); m.find(); String match = m.group(); int start = m.start(); int end = m.end(); logger.info(String.format("%s %d-%d", match, start, end)); String newResidues = residues.substring(0, start) + "*" + residues.substring(start, end) + "*" + residues.substring(end); String newResidues2 = residues.substring(0, start) + "*" + match + "*" + residues.substring(end); String newResidues3 = residues.substring(0, start) + "*" + actualSearch + "*" + residues.substring(end); logger.info(newResidues); logger.info(newResidues2); logger.info(newResidues3); String pre = residues.substring(0, start); int len = 30; if (pre.length() > len) { int i = pre.length() - len; pre = "..." + pre.substring(i); } String post = residues.substring(end); if (post.length() > len) { post = post.substring(0, len) + "..."; } return new MotifResult(displayId, match, residues, start, end, pre, post); } }; public Map<String,MotifResult> getMotifResults(int page, int length) throws QueryException { List<MotifResult> motifResults = motifResultPager.getResults(page, length); Map<String, MotifResult> map = new HashMap<String,MotifResult>(); for (MotifResult mr : motifResults) { map.put(mr.getDisplayId(), mr); } return map; } }