package org.genedb.querying.tmpquery;
import org.genedb.querying.core.QueryException;
import org.genedb.querying.core.QueryParam;
import org.apache.log4j.Logger;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.WildcardQuery;
import org.springframework.util.StringUtils;
import org.springframework.validation.Errors;
import java.io.IOException;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import java.util.regex.Pattern;
public class QuickSearchQuery extends OrganismLuceneQuery {
private static final long serialVersionUID = -3007330180211992013L;
private transient Logger logger = Logger.getLogger(QuickSearchQuery.class);
private String searchText;
@QueryParam(order = 1, title = "Search gene products?")
private boolean product;
@QueryParam(order = 2, title = "Search gene names and synonyms?")
private boolean allNames;
@QueryParam(order = 3, title = "Include pseudogenes")
private boolean pseudogenes;
@Override
protected String getluceneIndexName() {
return "org.gmod.schema.mapped.Feature";
}
@Override
public String getQueryDescription() {
return "Allows you to quickly search for genes by name (including synonyms) or function";
}
@Override
protected String[] getParamNames() {
return new String[] { "searchText", "product", "allNames",
"pseudogenes" };
}
private static Pattern pattern = Pattern.compile( "\\W+" );
@Override
protected void getQueryTermsWithoutOrganisms(
List<org.apache.lucene.search.Query> queries) {
BooleanQuery bq = new BooleanQuery();
// if (searchText.startsWith("*") || searchText.startsWith("?")) {
// searchText = searchText.substring(1);
// }
if (allNames) {
// for names searches, non-word characters are important components of names (e.g. PC302054.00.0), so we can only split on spaces
// the AllNamesAnalyzer for the allNames field used only splits on whitespace, so that's ok
String tokens[] = searchText.trim().split("\\s");
logger.debug("names search tokens: ");
for (String token : tokens) {
logger.debug(token);
}
if (tokens.length > 1) {
// logger.info("phrase query");
// PhraseQuery pq = new PhraseQuery();
// for (String token : tokens) {
// pq.add(new Term("allNames", token.toLowerCase()));
// }
// bq.add(pq, Occur.SHOULD);
BooleanQuery pbq = new BooleanQuery();
for (String token : tokens) {
if (token.indexOf('*') == -1) {
pbq.add(new TermQuery (new Term("allNames", token
.toLowerCase())), Occur.MUST);
} else {
pbq.add(new WildcardQuery (new Term("allNames", token
.toLowerCase())), Occur.MUST);
}
}
bq.add(pbq, Occur.SHOULD);
} else {
String token = tokens[0];
if (token.indexOf('*') == -1) {
bq.add(new TermQuery(new Term("allNames", tokens[0]
.toLowerCase())), Occur.SHOULD);
} else {
bq.add(new WildcardQuery(new Term("allNames", tokens[0]
.toLowerCase())), Occur.SHOULD);
}
}
}
if (product) {
// for product searches we split on all non-word characters (which is what's been used to tokenize the productAlphanumeric field)
String tokens[] = pattern.split(searchText.trim());
logger.debug("product search tokens: ");
for (String token : tokens) {
logger.debug(token);
}
BooleanQuery pbq = new BooleanQuery();
for (String token : tokens) {
// no point in wildcard queries here
pbq.add(new TermQuery (new Term("productAlphanumeric", token
.toLowerCase())), Occur.MUST);
}
bq.add(pbq, Occur.SHOULD);
}
// if (product) {
// if (tokens.length > 1) {
// PhraseQuery pq = new PhraseQuery();
// for (String token : tokens) {
// pq.add(new Term("expandedProduct", token.toLowerCase()));
// }
// bq.add(pq, Occur.SHOULD);
// } else {
// bq.add(new WildcardQuery(new Term("expandedProduct", tokens[0]
// .toLowerCase())), Occur.SHOULD);
// }
// }
queries.add(bq);
// Add type restrictions
if (pseudogenes) {
queries.add(productiveTranscriptQuery);
} else {
queries.add(mRNAQuery);
}
// queries.add(isCurrentQuery);
// logger.info(queries);
}
// @Override
// protected void getQueryTerms(List<Query> queries) {
// getQueryTermsWithoutOrganisms(queries);
// }
// @Override
// public String getParseableDescription() {
// // TODO Auto-generated method stub
// return null;
// }
/**
* Get all the results for the quick query
*
* @return
* @throws QueryException
*/
// public QuickSearchQueryResults getReallyQuickSearchQueryResults(
// int maxResults) throws QueryException {
//
// QuickSearchQueryResults quickSearchQueryResults = new QuickSearchQueryResults();
// quickSearchQueryResults.setTotalHits(0);
// List<GeneSummary> geneSummaries = quickSearchQueryResults.getResults();
//
// if (searchText.length() == 0) {
// return quickSearchQueryResults;
// }
//
// try {
// // taxn name
// List<String> currentTaxonNames = null;
// if (taxons != null && taxons.getNodeCount() > 0) {
// currentTaxonNames = taxonNodeManager
// .getNamesListForTaxons(taxons);
// }
//
// TopDocs topDocs = lookupInLucene(maxResults);
// // int currentResult = 0;
// for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
//
// Document document = fetchDocument(scoreDoc.doc);
//
// // Get the current taxon name from document
// // String taxonName = document.get("organism.commonName");
//
// // boolean isNoTaxonMatch = currentTaxonNames != null &&
// // !currentTaxonNames.contains(taxonName);
// //
// // if (isNoTaxonMatch) {
// // continue;
// // }
//
// // only populate if we are under the max
// // if (currentResult < maxResults) {
// populateGeneSummaries(geneSummaries, document);
// // }
// //
// // we want the total number of hits, even if we don't return
// // them all
// // currentResult++;
//
// }
// Collections.sort(geneSummaries);
//
// logger.info("Total returned hits :" + geneSummaries.size());
// logger.info("Total unreturned hits :" + topDocs.totalHits);
//
// quickSearchQueryResults.setTotalHits(topDocs.totalHits);
//
// if (luceneIndex.getMaxResults() == geneSummaries.size()) {
// isActualResultSizeSameAsMax = true;
// }
//
// if (currentTaxonNames == null && geneSummaries.size() > 1) {
// quickSearchQueryResults
// .setQuickResultType(QuickResultType.ALL_ORGANISMS_IN_ALL_TAXONS);
//
// } else if (geneSummaries.size() == 1) {
// quickSearchQueryResults
// .setQuickResultType(QuickResultType.SINGLE_RESULT_IN_CURRENT_TAXON);
//
// } else if (geneSummaries.size() > 1) {
// quickSearchQueryResults
// .setQuickResultType(QuickResultType.MULTIPLE_RESULTS_IN_CURRENT_TAXON);
//
// } else {
// quickSearchQueryResults
// .setQuickResultType(QuickResultType.NO_EXACT_MATCH_IN_CURRENT_TAXON);
// }
//
// } catch (CorruptIndexException exp) {
// throw new QueryException(exp);
// } catch (IOException exp) {
// throw new QueryException(exp);
// }
// return quickSearchQueryResults;
// }
/**
* This is an unpaged query to work out all the taxon matches.
*/
public QuickSearchQueryResults getQuickSearchQueryResults() throws QueryException {
QuickSearchQueryResults quickSearchQueryResults = new QuickSearchQueryResults();
// first get the paged gene summaries list
// List<GeneSummary> geneSummaries = geneSummaryPager.getResults(page, length);
// quickSearchQueryResults.setResults(geneSummaries);
TreeMap<String, Integer> taxonGroup = quickSearchQueryResults.getTaxonGroup();
TreeMap<String, Integer> tempTaxonGroup = new TreeMap<String, Integer>();
// now we get the entire results set to work out taxon grouping
try {
// taxn name
List<String> currentTaxonNames = null;
if (taxons != null && taxons.getNodeCount() > 0) {
currentTaxonNames = taxonNodeManager.getNamesListForTaxons(taxons);
}
TopDocs topDocs = lookupInLucene();
int size = 0;
for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
Document document = fetchDocument(scoreDoc.doc);
String taxonName = document.get("organism.commonName");
boolean isNoTaxonMatch = currentTaxonNames != null && !currentTaxonNames.contains(taxonName);
if (isNoTaxonMatch) {
populateTaxonGroup(tempTaxonGroup, taxonName);
} else {
// Categrise the taxons into size of hits
populateTaxonGroup(taxonGroup, taxonName);
size++;
}
}
// Collections.sort(geneSummaries);
logger.info("Total matched hits :" + size);
quickSearchQueryResults.setTotalHits(size);
if (luceneIndex.getMaxResults() == size) {
isActualResultSizeSameAsMax = true;
}
// If no matches are found for current taxon, display all other
// taxons with a match
if (size == 0 && taxonGroup.size() == 0 && tempTaxonGroup.size() > 0) {
taxonGroup.putAll(tempTaxonGroup);
}
if (currentTaxonNames == null && size > 1) {
quickSearchQueryResults
.setQuickResultType(QuickResultType.ALL_ORGANISMS_IN_ALL_TAXONS);
} else if (size == 1) {
quickSearchQueryResults
.setQuickResultType(QuickResultType.SINGLE_RESULT_IN_CURRENT_TAXON);
} else if (size > 1) {
quickSearchQueryResults
.setQuickResultType(QuickResultType.MULTIPLE_RESULTS_IN_CURRENT_TAXON);
} else {
quickSearchQueryResults
.setQuickResultType(QuickResultType.NO_EXACT_MATCH_IN_CURRENT_TAXON);
}
} catch (CorruptIndexException exp) {
throw new QueryException(exp);
} catch (IOException exp) {
throw new QueryException(exp);
}
return quickSearchQueryResults;
}
/**
* Get all the results for the quick query
*
* @return
* @throws QueryException
*/
/*public List getResults() throws QueryException {
quickSearchQueryResults = new QuickSearchQueryResults();
List<GeneSummary> geneSummaries = quickSearchQueryResults.getResults();
if (searchText.length() == 0) {
return geneSummaries;
}
TreeMap<String, Integer> taxonGroup = quickSearchQueryResults.getTaxonGroup();
TreeMap<String, Integer> tempTaxonGroup = new TreeMap<String, Integer>();
try {
// taxn name
List<String> currentTaxonNames = null;
if (taxons != null && taxons.getNodeCount() > 0) {
currentTaxonNames = taxonNodeManager
.getNamesListForTaxons(taxons);
}
TopDocs topDocs = lookupInLucene();
for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
Document document = fetchDocument(scoreDoc.doc);
// Get the current taxon name from document
String taxonName = document.get("organism.commonName");
boolean isNoTaxonMatch = currentTaxonNames != null
&& !currentTaxonNames.contains(taxonName);
if (taxons == null) {
// Categorise the taxons into size of hits
populateTaxonGroup(taxonGroup, taxonName);
populateGeneSummaries(geneSummaries, document);
} else if (isNoTaxonMatch) {
populateTaxonGroup(tempTaxonGroup, taxonName);
} else {
populateGeneSummaries(geneSummaries, document);
// Categrise the taxons into size of hits
populateTaxonGroup(taxonGroup, taxonName);
}
}
Collections.sort(geneSummaries);
logger.info("Total geneSummaries hits :" + geneSummaries.size());
quickSearchQueryResults.setTotalHits(geneSummaries.size());
if (luceneIndex.getMaxResults() == geneSummaries.size()) {
isActualResultSizeSameAsMax = true;
}
// If no matches are found for current taxon, display all other
// taxons with a match
if (geneSummaries.size() == 0 && taxonGroup.size() == 0 && tempTaxonGroup.size() > 0) {
taxonGroup.putAll(tempTaxonGroup);
}
if (currentTaxonNames == null && geneSummaries.size() > 1) {
quickSearchQueryResults
.setQuickResultType(QuickResultType.ALL_ORGANISMS_IN_ALL_TAXONS);
} else if (geneSummaries.size() == 1) {
quickSearchQueryResults
.setQuickResultType(QuickResultType.SINGLE_RESULT_IN_CURRENT_TAXON);
} else if (geneSummaries.size() > 1) {
quickSearchQueryResults
.setQuickResultType(QuickResultType.MULTIPLE_RESULTS_IN_CURRENT_TAXON);
} else {
quickSearchQueryResults
.setQuickResultType(QuickResultType.NO_EXACT_MATCH_IN_CURRENT_TAXON);
}
} catch (CorruptIndexException exp) {
throw new QueryException(exp);
} catch (IOException exp) {
throw new QueryException(exp);
}
return geneSummaries;
} */
/**
* Categrise the taxons into size of hits
*
* @param taxonGroup
* @param taxonName
*/
private void populateTaxonGroup(TreeMap<String, Integer> taxonGroup,
String taxonName) {
Integer currentTaxonHitCount = taxonGroup.get(taxonName);
if (currentTaxonHitCount == null) {
taxonGroup.put(taxonName, 1);
} else {
taxonGroup.put(taxonName, ++currentTaxonHitCount);
}
}
// private void populateGeneSummaries(List<GeneSummary> geneSummaries,
// Document document) {
// // logger.debug(StringUtils.collectionToCommaDelimitedString(document.getFields()));
// GeneSummary gs = convertDocumentToReturnType(document);
// geneSummaries.add(gs);
// }
@Override
public Map<String, Object> prepareModelData() {
return Collections.emptyMap();
}
@Override
public int getOrder() {
// TODO Auto-generated method stub
return 0;
}
@Override
public void validate(Object arg0, Errors arg1) {
// TODO Auto-generated method stub
}
public class QuickSearchQueryResults {
//private List<GeneSummary> results = new ArrayList<GeneSummary>();
private TreeMap<String, Integer> taxonGroup = new TreeMap<String, Integer>();
private QuickResultType quickResultType;
private String singleResultInTaxonGeneId;
private int totalHits;
public QuickResultType getQuickResultType() {
return quickResultType;
}
public void setQuickResultType(QuickResultType quickResultType) {
this.quickResultType = quickResultType;
}
// public List<GeneSummary> getResults() {
// return results;
// }
//
// public void setResults(List<GeneSummary> results) {
// this.results = results;
// }
public String getSingleResultInTaxonGeneId() {
return singleResultInTaxonGeneId;
}
public void setSingleResultInTaxonGeneId(
String singleResultInTaxonGeneId) {
this.singleResultInTaxonGeneId = singleResultInTaxonGeneId;
}
public TreeMap<String, Integer> getTaxonGroup() {
return taxonGroup;
}
public void setTaxonGroup(TreeMap<String, Integer> taxonGroup) {
this.taxonGroup = taxonGroup;
}
public void setTotalHits(int totalHits) {
this.totalHits = totalHits;
}
public int getTotalHits() {
return this.totalHits;
}
}
public enum QuickResultType {
ALL_ORGANISMS_IN_ALL_TAXONS, NO_EXACT_MATCH_IN_CURRENT_TAXON, SINGLE_RESULT_IN_CURRENT_TAXON, MULTIPLE_RESULTS_IN_CURRENT_TAXON
}
public String getSearchText() {
if (StringUtils.hasLength(searchText)) {
searchText = searchText.trim();
}
return searchText;
}
public void setSearchText(String searchText) {
this.searchText = searchText;
}
public boolean isProduct() {
return product;
}
public void setProduct(boolean product) {
this.product = product;
}
public boolean isAllNames() {
return allNames;
}
public void setAllNames(boolean allNames) {
this.allNames = allNames;
}
public boolean isPseudogenes() {
return pseudogenes;
}
public void setPseudogenes(boolean pseudogenes) {
this.pseudogenes = pseudogenes;
}
@Override
public String getQueryName() {
return "Quick search";
}
}