/** * SolrQueryBuilder.java * Author: Philip Kahle (philip.kahle@uibk.ac.at) * * This file is part of PrestoPRIME Preservation Platform (P4). * * Copyright (C) 2009-2012 University of Innsbruck, Austria * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ package eu.prestoprime.search.util; import java.util.Map; import java.util.Map.Entry; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import eu.prestoprime.search.util.Schema.FieldType; /** * Helper class for forming Solr query syntax from seperate field-specific query * Strings * * @author Philip Kahle * */ public class SolrQueryBuilder { private static final Logger LOGGER = LoggerFactory.getLogger(SolrQueryBuilder.class); // Pattern includes Solr's reserved characters { +-&|!(){}[]^~*?:\ } (Quote // " also belongs to them but is treated differently..) private static final String specialCharPattern = "[\\s\\+\\-&|!(){}\\[\\]^~*?:\\\\]+"; private static final String dateRegex = "(([0-9]{4}-{1}[0-9]{2}-{1}[0-9]{2})T00:00:00Z)?#{1}(([0-9]{4}-{1}[0-9]{2}-{1}[0-9]{2})T00:00:00Z)?"; private static final Pattern datePattern = Pattern.compile(dateRegex); /** * Takes a map containing Solr fieldnames and search terms from the user * interface as input and builds fieldspecific queries in the form * +(fieldname:query) which are concatenated. User input allows using quotes * for exact matches. Single words are wrapped in asterisks for matching * also parts of words. * * @param paramMap * @return a query in Solr syntax */ public static String buildQuery(Map<Schema.searchField, String> paramMap) { StringBuilder sb = new StringBuilder(); String delim = ""; String filter = ""; for (Entry<Schema.searchField, String> e : paramMap.entrySet()) { if (e.getValue() != null && !e.getValue().isEmpty()) { LOGGER.debug("At Field:" + e.getKey() + " | Value = " + e.getValue()); if (e.getKey().getType().equals(FieldType.STRING)) { // String -> exact match. Keep all reserved characters but // quote it. filter = buildEqualsFilter(e.getKey().getFieldName(), e.getValue()); } else if (e.getKey().getType().equals(FieldType.TDATE)) { filter = buildDateFilter(e.getKey().getFieldName(), e.getValue()); } else { // expect P4_TEXT and apply complete filter logic filter = buildP4Filter(e.getKey().getFieldName(), e.getValue()); } if (filter.length() > 0) { sb.append(delim); sb.append("+("); sb.append(filter); sb.append(")"); delim = " "; } } } LOGGER.debug("Built query: " + sb.toString()); return sb.toString(); } /** * Build a field specific query for matching parts of words * * @param fieldName * @param value * input string with whitespace seperated query words * @return fieldName:(*word1* *word2* ... ) */ private static String buildContainsFilter(String fieldName, String value) { String filter = ""; if (value != null && !value.trim().isEmpty()) { StringBuilder term = new StringBuilder(); String[] values = value.toLowerCase().split("\\s+"); String delim = ""; for (String v : values) { if (!v.trim().isEmpty()) { term.append(delim); term.append("*"); term.append(v); term.append("*"); delim = " "; } } filter = fieldName + ":(" + term.toString() + ")"; } return filter; } /** * Build a field specific query for matching an exact sequence of words * * @param fieldName * @param value * input string with whitespace seperated query words * @return fieldName:"word1 word2 ..." */ private static String buildEqualsFilter(String fieldName, String value) { String filter = ""; if (value != null && value != "") { filter = fieldName + ":\"" + value + "\""; } return filter; } private static String buildDateFilter(String fieldName, String value) { String filter = ""; if (value != null && value != "") { // value = 2012-08-09T00:00:00Z#2012-08-10T00:00:00Z ; transform // into i.e. createDate:[* TO NOW] if (isValidDate(value)) { if (value.startsWith("#")) { filter = fieldName + ":[* TO " + value.substring(1) + "]"; } else if (value.endsWith("#")) { filter = fieldName + ":[" + value.substring(0, value.length() - 1) + " TO NOW]"; } else { String[] range = value.split("#"); if (range.length == 2) { filter = fieldName + ":[" + range[0] + " TO " + range[1] + "]"; } else { LOGGER.error("Got weird date format: " + value); return ""; } } } } return filter; } /** * determine quoted parts of a query and build respective exact queries and * loosely matching queries with getContainsFilter() and getExactFilter(). * In case of an odd number of quotes, all quotes are dropped and * * @param fieldName * @param value * @return */ private static String buildP4Filter(String fieldName, String value) { StringBuilder filter = new StringBuilder(); StringBuilder contains = new StringBuilder(); if (hasOddNumberOfQuotes(value)) { // odd number of quotation marks is // ignored (remove quotes and build // containsFilter) LOGGER.debug("Detected " + (value.split("\"").length - 1) + " quotes. Building contains-filter..."); return buildContainsFilter(fieldName, value.replace("\"", "")); } else { // even number of quotes means correct quoting. String delim = ""; while (value.indexOf('"') != -1) { // Extract quoted parts and // transform to exact queries. // Store all loose words to a // separate string. int pos = value.indexOf('"'); String start = value.substring(0, pos); value = value.substring(pos + 1); if (start.length() > 0) { contains.append(start); contains.append(" "); } pos = value.indexOf('"'); filter.append(delim); filter.append(buildEqualsFilter(fieldName, value.substring(0, pos))); value = value.substring(pos + 1); delim = " OR "; } if (value.length() > 0) contains.append(value); // Build Contains filter query from all remaining, non-quoted words. // Remove reserved characters (these are only allowed in quoted // terms) String containsString = replaceSpecialChars(contains.toString()); if (containsString.length() != 0) { filter.append(delim); filter.append(buildContainsFilter(fieldName, containsString)); } } return filter.toString(); } private static boolean hasOddNumberOfQuotes(String value) { return (value.split("\"", -1).length - 1) % 2 != 0; } private static String replaceSpecialChars(String value) { return value.replaceAll(specialCharPattern, " ").trim(); } private static String wrapFilter(String filter) { if (filter != null && !filter.isEmpty()) { filter = "+(" + filter + ")"; } else { filter = ""; } return filter; } private static boolean isValidDate(String dateString) { Matcher m = datePattern.matcher(dateString); return m.matches(); } // public static String encode(String s, String encoding){ // if(encoding == null) encoding = "UTF-8"; // if(!encoding.equals("UTF-8")) // LOGGER.warn("URL encoding does not use UTF-8! Might cause problems"); // String codS = ""; // if(s != null && !s.isEmpty()){ // try{ // codS = URLEncoder.encode(s, encoding); // } catch(UnsupportedEncodingException e){ //Should never happen // LOGGER.error(e.getMessage() + // ": Using system default charset for encoding URL parameters which might cause problems!"); // e.printStackTrace(); // codS = URLEncoder.encode(s); // } // } // return codS; // } public static void main(String[] schorsch) { String regex = "([0-9]{4}-{1}[0-9]{2}-{1}[0-9]{2}T00:00:00Z)?#{1}([0-9]{4}-{1}[0-9]{2}-{1}[0-9]{2}T00:00:00Z)?"; Pattern p = Pattern.compile(regex); String[] heinz = { "2012-08-09T00:00:00Z#2012-08-10T00:00:00Z", "2012-08-09T00:00:00Z#", "#2012-08-10T00:00:00Z" }; for (String s : heinz) { try { // System.out.println("===== " + s + " ======"); // // Matcher m = p.matcher(s); // System.out.println("Matches ? " + m.matches()); // int i = 0; // System.out.println("Find ? " + m.find()); // System.out.println(1 + " : " + m.group()); System.out.println(buildDateFilter("test", s)); } catch (Exception e) { System.out.println(e.getMessage()); } } } }