/* * Copyright (C) 2014 University of Dundee & Open Microscopy Environment. * All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ package ome.util.search; import java.text.DateFormat; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Calendar; import java.util.Collections; import java.util.Date; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; /** * Utility class for building lucene queries<br> * <br> * Example:<br> * Fields to search for: 'name', 'description'<br> * Input query: a b AND c AND d f<br> * <br> * will be transformed to this lucene expression:<br> * name:a description:a name:f description:f ((name:b description:b) AND (name:c * description:c) AND (name:d description:d)) <br> * <br> * * @author Dominik Lindner      <a * href="mailto:d.lindner@dundee.ac.uk">d.lindner@dundee.ac.uk</a> * * @since 5.0 * * TODO: For versions > 5.0 this class can be moved from commons to the server */ public class LuceneQueryBuilder { private static final DateFormat DATEFORMAT = new SimpleDateFormat( "yyyyMMdd"); public static final String DATE_IMPORT = "details.creationEvent.time"; public static final String DATE_ACQUISITION = "acquisitionDate"; /** Wild cards we support */ private static final List<String> WILD_CARDS = new ArrayList<String>(); static { WILD_CARDS.add("*"); WILD_CARDS.add("?"); WILD_CARDS.add("~"); } /** Punctuation which will not be stripped */ private static final List<String> NO_BREAK = new ArrayList<String>(); static { NO_BREAK.add("_"); NO_BREAK.add("-"); NO_BREAK.add(":"); } /** * Builds a query with the provided input terms over the given fields * * @param fields * @param input * @return the query * @throws InvalidQueryException */ public static String buildLuceneQuery(List<String> fields, Date from, Date to, String dateType, String input) throws InvalidQueryException { StringBuilder result = new StringBuilder(); String basicQuery = buildLuceneQuery(fields, input); if (from == null && to == null) return basicQuery; if (basicQuery!=null && basicQuery.trim().length()>0) result.append("(" + basicQuery + ")"); else result.append(basicQuery); // Lucence date range TO is exclusive, so have to add a day to it String dateFrom = beginOfTime(); String dateTo = tomorrow(); if (from != null) dateFrom = DATEFORMAT.format(from); if (to != null) { dateTo = DATEFORMAT.format(addOneDay(to)); } if (result.length() > 0) result.append(" AND " + dateType + ":[" + dateFrom + " TO " + dateTo + "]"); else result.append(dateType + ":[" + dateFrom + " TO " + dateTo + "]"); return result.toString(); } /** * Builds a query with the provided input terms over the given fields * * @param fields * @param input * @return the query * @throws InvalidQueryException */ public static String buildLuceneQuery(List<String> fields, String input) throws InvalidQueryException { StringBuilder result = new StringBuilder(); input = replaceNonAlphaNummeric(input); List<String> terms = split(input); if (fields!=null && !fields.isEmpty()) { terms = attachFields(fields, terms); } terms = assembleAndClauses(terms); for (String term : terms) { if (result.length() > 0) result.append(" "); result.append(term); } return result.toString().trim(); } /** * Attaches the field names to the different terms; * * @param fields * @param terms * @return */ private static List<String> attachFields(List<String> fields, List<String> terms) { List<String> result = new ArrayList<String>(); for (String term : terms) { if (term.equals("AND")) { result.add(term); continue; } if((term.indexOf(':')) == -1) { // only add fields, if the term is not already in form 'foo:bar' // (i. e. it is not a MapAnnotation specific search term) String newTerm = ""; for (String field : fields) { if (newTerm.length() > 0) newTerm += " "; newTerm += field + ":" + term; } result.add(newTerm); } else { result.add(term); } } return result; } /** * Replaces non alpha-numeric characters (excluding underscore) with spaces * (which act like OR); will not replace any characters within quotes. * * @param s * @return */ private static String replaceNonAlphaNummeric(String s) { char[] result = new char[s.length()]; boolean insideQuotes = false; for (int i = 0; i < s.length(); i++) { char c = s.charAt(i); if (c == '"') { insideQuotes = !insideQuotes; result[i] = c; continue; } if (!insideQuotes && !Character.isLetterOrDigit(c) && !WILD_CARDS.contains("" + c) && !NO_BREAK.contains("" + c)) result[i] = ' '; else result[i] = c; } return new String(result); } /** * Checks if a String just contains a wildcard character only * * @param s * @return */ private static boolean isWildcardOnly(String s) { return s.matches("[\\*\\?\\~]+"); } /** * Reassembles the AND expressions, i. e. creates the single term "a and b" * from the terms "a" "and" "b" * * @param terms * @return */ private static List<String> assembleAndClauses(List<String> terms) throws InvalidQueryException { List<String> result = new ArrayList<String>(); if (terms==null || terms.isEmpty()) return Collections.emptyList(); if (terms.size() == 1) { if (terms.get(0).equals("AND")) throw new InvalidQueryException( "AND expression must be followed by a search term!"); else return Collections.singletonList(terms.get(0)); } // the AND terms gathered by now List<String> andTerms = new ArrayList<String>(); // flag to indicate that the current term is part of an AND expression boolean withinAndTerm = false; for (int i = 0; i < terms.size(); i++) { String term = terms.get(i); if (i < terms.size() - 1) { String next = terms.get(i + 1); if (next.equals("AND")) { // if next term is AND put this term to the end of the and // terms list and indicate that we are within an AND // expression andTerms.add(term); withinAndTerm = true; i++; } else { if (withinAndTerm) { // if we're still within the AND expression put it to // the end list and indicate that the end of this AND // expression is reached andTerms.add(term); withinAndTerm = false; } else { // end of AND reached or there was no AND expression at // all if (!andTerms.isEmpty()) { // if there was one, built the expression result.add(concatenateAndTerms(andTerms)); andTerms.clear(); } result.add(term); } } } else { // we reached the last search term if (withinAndTerm) { andTerms.add(term); withinAndTerm = false; } else { if (!term.equals("AND")) result.add(term); else throw new InvalidQueryException( "AND expression must be followed by a search term!"); } } } if (!andTerms.isEmpty()) result.add(concatenateAndTerms(andTerms)); return result; } /** * Just concatenates the Strings separated by AND * * @param terms * @return */ private static String concatenateAndTerms(List<String> terms) { String result = "("; for (String t : terms) { if (result.length() > 1) result += " AND "; result += "(" + t + ")"; } return result + ")"; } /** * Splits input string by whitespaces, taking quotes into account * * @param input * @return */ private static List<String> split(String input) { final String regex = "\"([^\"]*)\"|(\\S+)"; List<String> result = new ArrayList<String>(); Matcher m = Pattern.compile(regex).matcher(input); while (m.find()) { String s = m.group(1); if (s != null) { // don't touch quoted terms result.add("\"" + s.trim() + "\""); } else { s = m.group(2); if (!isWildcardOnly(s)) result.add(s.trim()); } } return result; } /** * Get tomorrow's date * * @return */ private static String tomorrow() { return DATEFORMAT.format(addOneDay(new Date())); } /** * Get the earliest possible date * * @return */ private static String beginOfTime() { return DATEFORMAT.format(new Date(0)); } /** * Adds a day to a given date * * @param date * @return */ private static Date addOneDay(Date date) { Calendar tmp = Calendar.getInstance(); tmp.setTime(date); tmp.add(Calendar.DAY_OF_MONTH, 1); return tmp.getTime(); } }