package uk.ac.ebi.ep.util.query;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.apache.log4j.Logger;
import uk.ac.ebi.biobabel.lucene.LuceneParser;
import uk.ac.ebi.ep.config.Domain;
import uk.ac.ebi.ep.config.SearchField;
import uk.ac.ebi.ep.search.model.SearchParams;
import uk.ac.ebi.util.result.DataTypeConverter;
/**
* Hello world!
*
*/
public class LuceneQueryBuilder {
public static final String LUCENE_EQUAL = ":";
public static final String LUCENE_QUOTE = "\"";
//FOr EBeye EC must be in upper case
public static final String ENZYME_FILTER = "EC:[1 TO 9]";
public static final String UNIPROT_ID_FIELD ="id";
//public static final String UNIPROT_NAME_FIELD ="name";
//public static final String ACCESSION_FIELD ="acc";
public static final String LUCENE_WILDCARD ="_*";
//public static final String ENZYME_FILTER_UNIPROTAPI ="ec:*.*";
public static final String ENZYME_FILTER_UNIPROTAPI ="ec:(1.* OR 2.* OR 3.* OR 4.* OR 5.* OR 6.*)";
public static final String UNIPROT_SPECIES_FIELD ="organism";
public static final String EBEYE_SPECIES_FIELD ="organism_scientific_name";
public static final Logger LOGGER =
Logger.getLogger(LuceneQueryBuilder.class);
//133343 results does not pass the load test
//public static final int MAX_RESULTS = 100000;
public static LuceneParser luceneParser = new LuceneParser();
/**
* Create a Lucene query for a list of fields of which value is from
* the searchParam. The query consists of several search fields configured
* in the Config.xml file. Each field is separated by an OR statement.
* The keyword in the {@link SearchParams} is the value to be
* queried in each field. Eg.: id:"Sildenafil" OR name:"Sildenafil".
* @param domain The Domain object that contains the search fields which are
* used to create the fields query.
* @param searchParams The SearchParam object which contain the keywords and
* the filter which are used to ass
* @return a Lucene query
* @deprecated The class Domain from ep-domain is deprecated, as it refers
* to EB-Eye domains actually, so it belongs in ebeye-adapter. Use
* {@link #createFieldsQuery(java.util.List, String)} instead.
*/
@Deprecated
public static String createFieldsQuery(Domain domain, SearchParams searchParams) {
StringBuilder query = new StringBuilder();
List<SearchField> SearchFieldList = domain.getSearchFieldList().getSearchField();
List<String> fieldNames = new ArrayList<String>();
for (SearchField field: SearchFieldList) {
fieldNames.add(field.getId());
}
String keywords = searchParams.getText();
query.append(createFieldsQuery(fieldNames,keywords));
return query.toString();
}
/**
* Similar to the {@link #createFieldsQuery(uk.ac.ebi.ep.config.Domain, uk.ac.ebi.ep.search.model.SearchParams)}
* except this method adds the enzyme filter String to query enzyme only in
* Uniprot domain. This query can only be used to query data from Uniprot domain,
* because other domains do not have the ec field.
* @param domain
* @param searchParams
* @return
* @see {@link #createFieldsQuery(uk.ac.ebi.ep.config.Domain, uk.ac.ebi.ep.search.model.SearchParams)}
* @deprecated The class Domain from ep-domain is deprecated, as it refers
* to EB-Eye domains actually, so it belongs in ebeye-adapter. Use
* {@link #createFieldsQueryWithEnzymeFilter(java.util.List, String)}
* instead.
*/
@Deprecated
public static String createFieldsQueryWithEnzymeFilter(Domain domain, SearchParams searchParams) {
StringBuffer query = new StringBuffer()
// .append(createFieldsQuery(domain, searchParams));
.append(searchParams.getText());
query.append(" AND " +ENZYME_FILTER);
return query.toString();
}
/**
* Builds a lucene query not restricted to any fields.
* @param searchTerm the search term.
* @return a lucene query with an filter for enzymes.
* @since 1.0.6
*/
public static String createQueryWithEnzymeFilter(String searchTerm){
return new StringBuilder(searchTerm)
.append(" AND ").append(ENZYME_FILTER).toString();
}
/**
* Builds a lucene query for several fields.
* @param fieldNames names of the fields to be queried.
* @param fieldValue value searched in those fields.
* @return an OR'ed lucene query.
* @since 1.0.6
*/
public static String createFieldsQueryWithEnzymeFilter(
List<String> fieldNames, String fieldValue){
return new StringBuilder(createFieldsQuery(fieldNames, fieldValue))
.append(" AND ").append(ENZYME_FILTER).toString();
}
/**
* Method to create a Lucene query from the field names and field value.
* @param fieldNames The search field names to query.
* @param fieldValue The value of the field names.
* @return an OR'ed Lucene query
*/
public static String createFieldsQuery(List<String> fieldNames,
String fieldValue) {
StringBuilder query = new StringBuilder();
int listLength = fieldNames.size();
int counter = 1;
query.append("(");
for (String field: fieldNames) {
query.append(field);
//query.append(":\"");
query.append(LUCENE_EQUAL);
query.append(LUCENE_QUOTE);
query.append(fieldValue);
query.append(LUCENE_QUOTE);
if (counter <listLength) {
query.append(" OR ");
}
else {
query.append(")");
}
counter++;
}
return query.toString();
}
public static String createFieldValueQuery(String field, String value) {
StringBuilder query = new StringBuilder();
query.append(field);
query.append(":");
query.append("\"");
query.append(value);
query.append("\"");
return query.toString();
}
public static String createWildcardFieldValueQuery(String field, String value) {
StringBuilder query = new StringBuilder();
query.append(field);
query.append(":");
query.append(value);
query.append("*");
return query.toString();
}
public static String addSpeciesFilterQuery(String simpleQuery
, String speciesField, Collection<String> speciesList) {
StringBuilder sb = new StringBuilder();
if (speciesList == null) {
return simpleQuery;
} else {
int listLength = speciesList.size();
if (listLength > 0) {
int counter = 1;
sb.append(simpleQuery);
sb.append(" AND ");
sb.append(speciesField);
sb.append(LUCENE_EQUAL);
sb.append("(");
for (String species: speciesList) {
sb.append(LUCENE_QUOTE);
//sb.append(luceneParser.escapeLuceneSpecialChars(species));
sb.append(species);
sb.append(LUCENE_QUOTE);
if (counter <listLength) {
sb.append(" OR ");
}
counter++;
}
sb.append(")");
} else {
return simpleQuery;
}
}
return sb.toString();
}
public static List<String> escapeSpecies( Collection<String> speciesFilter) {
List<String> escapedList = new ArrayList<String>();
for (String species:speciesFilter) {
String escapedSpecies = luceneParser.escapeLuceneSpecialChars(species);
/*Work around to fix the bug in Uniprot API. Any species ended with
* " / S288c" has 0 result
*/
String newEscapedSpecies = escapedSpecies.replaceAll(" / S288c", "*/*S288c");
escapedList.add(newEscapedSpecies);
}
return escapedList;
}
/**
* Create a list of query by id. A query is created for each id. Queries
* created by the method can only be used for Uniprot API.
* @param idPrefixes
* @param speciesFilter
* @return
*/
public static List<String> createUniprotAPIQueryByIdPrefixes(
List<String> idPrefixes, Collection<String> speciesFilter) {
List<String> queryList = new ArrayList<String>();
List<String> escapedSpeciesFilter = null;
if (speciesFilter != null) {
escapedSpeciesFilter = escapeSpecies(speciesFilter);
}
for (String idPrefix : idPrefixes) {
StringBuilder sb = new StringBuilder();
sb.append(UNIPROT_ID_FIELD);
sb.append(LUCENE_EQUAL);
sb.append(idPrefix);
sb.append(LUCENE_WILDCARD);
/*Adding this will make the result list from Uniprot API smaller
* than the result list from Ebeye
*/
sb.append(" AND " + ENZYME_FILTER_UNIPROTAPI);
queryList.add(
addSpeciesFilterQuery(
sb.toString(), UNIPROT_SPECIES_FIELD, escapedSpeciesFilter));
}
return queryList;
}
public static String createQueryIN(
String fieldName, boolean wildcard, Collection<String> fieldValues) {
StringBuilder query = new StringBuilder();
query.append(fieldName);
query.append(LUCENE_EQUAL);
query.append("(");
int counter = 1;
for (String fieldValue: fieldValues) {
query.append(fieldValue);
if (wildcard) {
query.append(LUCENE_WILDCARD);
}
if (counter <fieldValues.size()) {
query.append(" OR ");
}
counter++;
}
query.append(")");
return query.toString();
}
/**
* Create a list of Lucene queries IN (eg.: id:("PDE7B_HUMAN","PDE7B_MOUSE"))
* for a long list of field values. If the list is too long then it is divided
* into sub lists. A query is created for each sub list. Every query includes
* an "enzyme filter".
* @param queryField
* @param fieldValues
* @param wildcard
* @param subListSize
* @return
*/
public static List<String> createQueriesIn(
String queryField
, List<String> fieldValues, boolean wildcard, int subListSize) {
List<String> queries = new ArrayList<String>();
List<List<String>> subLists = DataTypeConverter
.createSubLists(fieldValues, subListSize);
for (List<String> subList: subLists) {
StringBuilder sb = new StringBuilder();
sb.append(createQueryIN(queryField, wildcard, subList));
sb.append( " AND ");
sb.append(ENZYME_FILTER);
queries.add(sb.toString());
}
return queries;
}
/**
* Builds queries for the given field and field values, <i>adding an
* enzyme filter</i>.
* @param queryField field to query
* @param fieldValues values to match
* @param wildcard use a wildcard?
* @param subListSize size of sublist to divide the original list of field
* values, if it is too long.
* @return
*/
public static List<String> createEbeyeQueriesIn(String queryField,
List<String> fieldValues, boolean wildcard, int subListSize) {
List<String> queries = new ArrayList<String>();
List<List<String>> subLists =
DataTypeConverter.createSubLists(fieldValues, subListSize);
for (List<String> subList: subLists) {
StringBuffer sb = new StringBuffer();
sb.append(createQueryIN(queryField, wildcard, subList));
//This will slow down the search.
sb.append( " AND ");
sb.append(ENZYME_FILTER);
queries.add(sb.toString());
}
return queries;
}
/**
* Concat a simple query with a filter query by AND condition.
* @param query
* @param filterQuery
* @return
*/
public static String createANDQuery(String query, String filterQuery) {
StringBuffer sb = new StringBuffer();
sb.append(query);
sb.append(" AND ");
sb.append(filterQuery);
return sb.toString();
}
public static List<String> addFilterQueriesAND(List<String> queries
, String filterFieldName, List<String> filterValues) {
List<String> queriesWithFilter = new ArrayList<String>();
//String filterQuery = LuceneQueryBuilder.a.createQueryIN(filterFieldName, false, filterValues);
for (String query:queries) {
String qryWithSpecies = addSpeciesFilterQuery(query, filterFieldName, filterValues);
LOGGER.debug(qryWithSpecies);
queriesWithFilter.add(qryWithSpecies);
}
return queriesWithFilter;
}
}