/** * Copyright (c) Codice Foundation * <p/> * This is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser * General Public License as published by the Free Software Foundation, either version 3 of the * License, or any later version. * <p/> * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without * even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. A copy of the GNU Lesser General Public License * is distributed along with this program and can be found at * <http://www.gnu.org/licenses/lgpl.html>. */ package ddf.catalog.pubsub.predicate; import java.io.IOException; import java.util.ArrayList; import java.util.Collection; import java.util.Map; import java.util.regex.Pattern; import org.apache.commons.lang.StringUtils; import org.apache.commons.lang.builder.ToStringBuilder; import org.apache.lucene.queryParser.ParseException; import org.apache.lucene.store.Directory; import org.osgi.service.event.Event; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import ddf.catalog.pubsub.criteria.contextual.ContextualEvaluationCriteria; import ddf.catalog.pubsub.criteria.contextual.ContextualEvaluationCriteriaImpl; import ddf.catalog.pubsub.criteria.contextual.ContextualEvaluator; import ddf.catalog.pubsub.criteria.contextual.ContextualTokenizer; import ddf.catalog.pubsub.internal.PubSubConstants; public class ContextualPredicate implements Predicate { private static final Logger LOGGER = LoggerFactory.getLogger(ContextualPredicate.class); private String searchPhrase; private boolean fuzzy; private boolean caseSensitiveSearch; private Collection<String> textPaths; public ContextualPredicate(String searchPhrase, boolean fuzzy, boolean caseSensitiveSearch, Collection<String> textPaths) { this.fuzzy = fuzzy; this.caseSensitiveSearch = caseSensitiveSearch; if (textPaths != null && !textPaths.isEmpty()) { LOGGER.debug("text paths size: {}", textPaths.size()); this.textPaths = new ArrayList<String>(textPaths); } this.searchPhrase = normalizePhrase(searchPhrase, fuzzy); } public static boolean isContextual(String searchPhrase) { return !searchPhrase.isEmpty(); } /** * Normalizes a search phrase for a Lucene query * * @param inputPhrase the input phrase * @param isFuzzy true indicates the criteria is fuzzy * @return a search phrase aligned to Lucene syntax */ public static String normalizePhrase(String inputPhrase, boolean isFuzzy) { String phrase = ""; if (inputPhrase != null && !inputPhrase.equals("")) { phrase = inputPhrase.trim(); String parts[] = phrase.split("\""); LOGGER.debug("phrase = [{}] parts.length = {}", phrase, parts.length); // if multiple parts found, then exact (quoted) phrases are present if (parts.length > 1) { // Odd parts are in quotes, i.e., exact (quoted) phrases, so skip them // Even parts are individual words or operators for (int i = 0; i < parts.length; i++) { LOGGER.debug("parts[{}] = {}", i, parts[i]); if (i % 2 == 0) { if (!parts[i].isEmpty()) { parts[i] = normalizeBooleanOperators(parts[i]); parts[i] = escapeSpecialCharacters(parts[i]); if (isFuzzy && !isBooleanOperator(parts[i])) { parts[i] = parts[i] + "~"; parts[i] = parts[i].replace("~~", "~"); LOGGER.debug("Fuzzy Search adding a tilde: {}", parts[i]); } } else { LOGGER.debug("part[{}] was empty", i); } } else { parts[i] = escapeSpecialCharacters(parts[i]); } } StringBuilder phraseBuilder = new StringBuilder(""); for (int i = 0; i < parts.length; i++) { phraseBuilder.append(parts[i]); if (i < (parts.length - 1)) { phraseBuilder.append("\""); } } phrase = phraseBuilder.toString(); } else { LOGGER.debug("parts.length <= 1: phrase = {}", phrase); phrase = normalizeBooleanOperators(phrase); phrase = escapeSpecialCharacters(phrase); if (isFuzzy) { String[] words = phrase.trim().split("[ ]+"); for (int i = 0; i < words.length; i++) { String[] subParts = words[i].split("[\\(\\)]+"); for (String subPart : subParts) { if (!subPart.isEmpty() && !isBooleanOperator(subPart)) { String fuzzySubPart = subPart + "~"; phrase = phrase.replaceFirst(Pattern.quote(subPart), fuzzySubPart); LOGGER.debug("2. Fuzzy Search adding a tilde: {}", subPart); LOGGER.debug("phrase = {}", phrase); } } phrase = phrase.replace("~~", "~"); } LOGGER.debug("2. Fuzzy-fied phrase: {}", phrase); } } // Pass thru the last literal double quote if (inputPhrase.lastIndexOf("\"") == inputPhrase.length() - 1) { phrase = phrase + "\""; } } else { phrase = ""; } LOGGER.debug("Normalization complete. \nBefore: {}\nAfter: {}", inputPhrase, phrase); return phrase; } private static String escapeSpecialCharacters(String phrase) { StringBuilder sb = new StringBuilder(); char[] chars = phrase.trim().toCharArray(); for (int i = 0; i < chars.length; i++) { char currentChar = chars[i]; // * is escaped by the subscription when not a wildcard // if the character has already been manually escaped, don't double escape char nullChar = '\0'; char nextChar = nullChar; if (i + 1 < chars.length) { nextChar = chars[i + 1]; } if (currentChar == '\\' && nextChar != nullChar && ContextualTokenizer.SPECIAL_CHARACTERS_SET.contains(nextChar)) { // these two tokens constitute an escaped character, // so consume them together i = i + 1; sb.append(currentChar); sb.append(nextChar); } else if (currentChar != '*' && ContextualTokenizer.SPECIAL_CHARACTERS_SET .contains(currentChar)) { // handle unescaped special characters sb.append("\\"); sb.append(currentChar); } else { sb.append(currentChar); } } phrase = sb.toString(); return phrase; } /** * Normalize all Boolean operators in the phrase since Lucene grammar requires all boolean * operators to be uppercase. * * @param phrase the input phrase * @return the normalized phrase */ private static String normalizeBooleanOperators(String phrase) { phrase = phrase.replace(" not ", " NOT "); phrase = phrase.replace(" or ", " OR "); phrase = phrase.replace(" and ", " AND "); phrase = phrase.replace(" & ", "AND"); phrase = phrase.replace(" | ", "OR"); return phrase; } private static boolean isBooleanOperator(String input) { int index = StringUtils.indexOfAny(input.trim().toLowerCase(), new String[] {"not", "and", "or", "&", "|"}); return index == 0; } public boolean matches(Event properties) { String methodName = "matches"; LOGGER.debug("ENTERING: {}", methodName); LOGGER.debug("Headers: {}", properties); ContextualEvaluationCriteria cec = null; Map<String, Object> contextualMap = (Map<String, Object>) properties .getProperty(PubSubConstants.HEADER_CONTEXTUAL_KEY); if (contextualMap == null) { LOGGER.debug("No contextual metadata to search against."); return false; } String operation = (String) properties.getProperty(PubSubConstants.HEADER_OPERATION_KEY); LOGGER.debug("operation = {}", operation); String metadata = (String) contextualMap.get("METADATA"); LOGGER.debug("metadata = [{}]", metadata); // If deleting a catalog entry and the entry's metadata is only the word "deleted" (i.e., // the // source is deleting the catalog entry and did not send any metadata with the delete // event), then // cannot apply any contextual filtering - just send the event on to the subscriber if (operation.equals(PubSubConstants.DELETE) && metadata .equals(PubSubConstants.METADATA_DELETED)) { LOGGER.debug( "Detected a DELETE operation where metadata is just the word 'deleted', so send event on to subscriber"); return true; } // If predicate specified one or more text paths, then extract the entry's metadata from the // Event properties and // pass it and the text path(s) to the evaluation criteria (which will build a Lucene index // on the metadata using the // text paths) if (this.textPaths != null && !this.textPaths.isEmpty()) { LOGGER.debug("creating criteria with textPaths and metadata document"); try { cec = new ContextualEvaluationCriteriaImpl(searchPhrase, fuzzy, caseSensitiveSearch, this.textPaths.toArray(new String[this.textPaths.size()]), (String) contextualMap.get("METADATA")); } catch (IOException e) { LOGGER.error("IO exception during context evaluation", e); return false; } // This predicate has no text paths specified, so can use default Lucene search index, which // indexed the entry's entire metadata // per the default XPath expressions in ContextualEvaluator, from the event's properties // data } else { LOGGER.debug("using default Lucene search index for metadata"); cec = new ContextualEvaluationCriteriaImpl(searchPhrase, fuzzy, caseSensitiveSearch, (Directory) contextualMap.get("DEFAULT_INDEX")); } try { return ContextualEvaluator.evaluate(cec); } catch (IOException e) { LOGGER.error("IO Exception evaluating context criteria", e); } catch (ParseException e) { LOGGER.error("Parse Exception evaluating context criteria", e); } LOGGER.debug("EXITING: {}", methodName); return false; } public String getSearchPhrase() { return searchPhrase; } public boolean isFuzzy() { return fuzzy; } public boolean isCaseSensitive() { return caseSensitiveSearch; } public boolean hasTextPaths() { return textPaths != null && !textPaths.isEmpty(); } public Collection<String> getTextPaths() { return textPaths; } public String toString() { return ToStringBuilder.reflectionToString(this); } }