/**
* Copyright (c) Codice Foundation
* <p/>
* This is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser
* General Public License as published by the Free Software Foundation, either version 3 of the
* License, or any later version.
* <p/>
* This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
* even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details. A copy of the GNU Lesser General Public License
* is distributed along with this program and can be found at
* <http://www.gnu.org/licenses/lgpl.html>.
*/
package ddf.catalog.pubsub.predicate;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Map;
import java.util.regex.Pattern;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.lang.builder.ToStringBuilder;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.store.Directory;
import org.osgi.service.event.Event;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import ddf.catalog.pubsub.criteria.contextual.ContextualEvaluationCriteria;
import ddf.catalog.pubsub.criteria.contextual.ContextualEvaluationCriteriaImpl;
import ddf.catalog.pubsub.criteria.contextual.ContextualEvaluator;
import ddf.catalog.pubsub.criteria.contextual.ContextualTokenizer;
import ddf.catalog.pubsub.internal.PubSubConstants;
public class ContextualPredicate implements Predicate {
private static final Logger LOGGER = LoggerFactory.getLogger(ContextualPredicate.class);
private String searchPhrase;
private boolean fuzzy;
private boolean caseSensitiveSearch;
private Collection<String> textPaths;
public ContextualPredicate(String searchPhrase, boolean fuzzy, boolean caseSensitiveSearch,
Collection<String> textPaths) {
this.fuzzy = fuzzy;
this.caseSensitiveSearch = caseSensitiveSearch;
if (textPaths != null && !textPaths.isEmpty()) {
LOGGER.debug("text paths size: {}", textPaths.size());
this.textPaths = new ArrayList<String>(textPaths);
}
this.searchPhrase = normalizePhrase(searchPhrase, fuzzy);
}
public static boolean isContextual(String searchPhrase) {
return !searchPhrase.isEmpty();
}
/**
* Normalizes a search phrase for a Lucene query
*
* @param inputPhrase the input phrase
* @param isFuzzy true indicates the criteria is fuzzy
* @return a search phrase aligned to Lucene syntax
*/
public static String normalizePhrase(String inputPhrase, boolean isFuzzy) {
String phrase = "";
if (inputPhrase != null && !inputPhrase.equals("")) {
phrase = inputPhrase.trim();
String parts[] = phrase.split("\"");
LOGGER.debug("phrase = [{}] parts.length = {}", phrase, parts.length);
// if multiple parts found, then exact (quoted) phrases are present
if (parts.length > 1) {
// Odd parts are in quotes, i.e., exact (quoted) phrases, so skip them
// Even parts are individual words or operators
for (int i = 0; i < parts.length; i++) {
LOGGER.debug("parts[{}] = {}", i, parts[i]);
if (i % 2 == 0) {
if (!parts[i].isEmpty()) {
parts[i] = normalizeBooleanOperators(parts[i]);
parts[i] = escapeSpecialCharacters(parts[i]);
if (isFuzzy && !isBooleanOperator(parts[i])) {
parts[i] = parts[i] + "~";
parts[i] = parts[i].replace("~~", "~");
LOGGER.debug("Fuzzy Search adding a tilde: {}", parts[i]);
}
} else {
LOGGER.debug("part[{}] was empty", i);
}
} else {
parts[i] = escapeSpecialCharacters(parts[i]);
}
}
StringBuilder phraseBuilder = new StringBuilder("");
for (int i = 0; i < parts.length; i++) {
phraseBuilder.append(parts[i]);
if (i < (parts.length - 1)) {
phraseBuilder.append("\"");
}
}
phrase = phraseBuilder.toString();
} else {
LOGGER.debug("parts.length <= 1: phrase = {}", phrase);
phrase = normalizeBooleanOperators(phrase);
phrase = escapeSpecialCharacters(phrase);
if (isFuzzy) {
String[] words = phrase.trim().split("[ ]+");
for (int i = 0; i < words.length; i++) {
String[] subParts = words[i].split("[\\(\\)]+");
for (String subPart : subParts) {
if (!subPart.isEmpty() && !isBooleanOperator(subPart)) {
String fuzzySubPart = subPart + "~";
phrase = phrase.replaceFirst(Pattern.quote(subPart), fuzzySubPart);
LOGGER.debug("2. Fuzzy Search adding a tilde: {}", subPart);
LOGGER.debug("phrase = {}", phrase);
}
}
phrase = phrase.replace("~~", "~");
}
LOGGER.debug("2. Fuzzy-fied phrase: {}", phrase);
}
}
// Pass thru the last literal double quote
if (inputPhrase.lastIndexOf("\"") == inputPhrase.length() - 1) {
phrase = phrase + "\"";
}
} else {
phrase = "";
}
LOGGER.debug("Normalization complete. \nBefore: {}\nAfter: {}", inputPhrase, phrase);
return phrase;
}
private static String escapeSpecialCharacters(String phrase) {
StringBuilder sb = new StringBuilder();
char[] chars = phrase.trim().toCharArray();
for (int i = 0; i < chars.length; i++) {
char currentChar = chars[i];
// * is escaped by the subscription when not a wildcard
// if the character has already been manually escaped, don't double escape
char nullChar = '\0';
char nextChar = nullChar;
if (i + 1 < chars.length) {
nextChar = chars[i + 1];
}
if (currentChar == '\\' && nextChar != nullChar
&& ContextualTokenizer.SPECIAL_CHARACTERS_SET.contains(nextChar)) {
// these two tokens constitute an escaped character,
// so consume them together
i = i + 1;
sb.append(currentChar);
sb.append(nextChar);
} else if (currentChar != '*' && ContextualTokenizer.SPECIAL_CHARACTERS_SET
.contains(currentChar)) {
// handle unescaped special characters
sb.append("\\");
sb.append(currentChar);
} else {
sb.append(currentChar);
}
}
phrase = sb.toString();
return phrase;
}
/**
* Normalize all Boolean operators in the phrase since Lucene grammar requires all boolean
* operators to be uppercase.
*
* @param phrase the input phrase
* @return the normalized phrase
*/
private static String normalizeBooleanOperators(String phrase) {
phrase = phrase.replace(" not ", " NOT ");
phrase = phrase.replace(" or ", " OR ");
phrase = phrase.replace(" and ", " AND ");
phrase = phrase.replace(" & ", "AND");
phrase = phrase.replace(" | ", "OR");
return phrase;
}
private static boolean isBooleanOperator(String input) {
int index = StringUtils.indexOfAny(input.trim().toLowerCase(),
new String[] {"not", "and", "or", "&", "|"});
return index == 0;
}
public boolean matches(Event properties) {
String methodName = "matches";
LOGGER.debug("ENTERING: {}", methodName);
LOGGER.debug("Headers: {}", properties);
ContextualEvaluationCriteria cec = null;
Map<String, Object> contextualMap = (Map<String, Object>) properties
.getProperty(PubSubConstants.HEADER_CONTEXTUAL_KEY);
if (contextualMap == null) {
LOGGER.debug("No contextual metadata to search against.");
return false;
}
String operation = (String) properties.getProperty(PubSubConstants.HEADER_OPERATION_KEY);
LOGGER.debug("operation = {}", operation);
String metadata = (String) contextualMap.get("METADATA");
LOGGER.debug("metadata = [{}]", metadata);
// If deleting a catalog entry and the entry's metadata is only the word "deleted" (i.e.,
// the
// source is deleting the catalog entry and did not send any metadata with the delete
// event), then
// cannot apply any contextual filtering - just send the event on to the subscriber
if (operation.equals(PubSubConstants.DELETE) && metadata
.equals(PubSubConstants.METADATA_DELETED)) {
LOGGER.debug(
"Detected a DELETE operation where metadata is just the word 'deleted', so send event on to subscriber");
return true;
}
// If predicate specified one or more text paths, then extract the entry's metadata from the
// Event properties and
// pass it and the text path(s) to the evaluation criteria (which will build a Lucene index
// on the metadata using the
// text paths)
if (this.textPaths != null && !this.textPaths.isEmpty()) {
LOGGER.debug("creating criteria with textPaths and metadata document");
try {
cec = new ContextualEvaluationCriteriaImpl(searchPhrase, fuzzy, caseSensitiveSearch,
this.textPaths.toArray(new String[this.textPaths.size()]),
(String) contextualMap.get("METADATA"));
} catch (IOException e) {
LOGGER.error("IO exception during context evaluation", e);
return false;
}
// This predicate has no text paths specified, so can use default Lucene search index, which
// indexed the entry's entire metadata
// per the default XPath expressions in ContextualEvaluator, from the event's properties
// data
} else {
LOGGER.debug("using default Lucene search index for metadata");
cec = new ContextualEvaluationCriteriaImpl(searchPhrase, fuzzy, caseSensitiveSearch,
(Directory) contextualMap.get("DEFAULT_INDEX"));
}
try {
return ContextualEvaluator.evaluate(cec);
} catch (IOException e) {
LOGGER.error("IO Exception evaluating context criteria", e);
} catch (ParseException e) {
LOGGER.error("Parse Exception evaluating context criteria", e);
}
LOGGER.debug("EXITING: {}", methodName);
return false;
}
public String getSearchPhrase() {
return searchPhrase;
}
public boolean isFuzzy() {
return fuzzy;
}
public boolean isCaseSensitive() {
return caseSensitiveSearch;
}
public boolean hasTextPaths() {
return textPaths != null && !textPaths.isEmpty();
}
public Collection<String> getTextPaths() {
return textPaths;
}
public String toString() {
return ToStringBuilder.reflectionToString(this);
}
}