package org.jabref.model.search.rules; import java.util.Objects; import java.util.Optional; import java.util.Set; import java.util.function.Predicate; import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.stream.Collectors; import org.jabref.model.entry.BibEntry; import org.jabref.model.entry.Keyword; import org.jabref.search.SearchBaseVisitor; import org.jabref.search.SearchLexer; import org.jabref.search.SearchParser; import org.antlr.v4.runtime.ANTLRInputStream; import org.antlr.v4.runtime.BailErrorStrategy; import org.antlr.v4.runtime.BaseErrorListener; import org.antlr.v4.runtime.CommonTokenStream; import org.antlr.v4.runtime.RecognitionException; import org.antlr.v4.runtime.Recognizer; import org.antlr.v4.runtime.misc.ParseCancellationException; import org.antlr.v4.runtime.tree.ParseTree; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; /** * The search query must be specified in an expression that is acceptable by the Search.g4 grammar. */ public class GrammarBasedSearchRule implements SearchRule { private static final Log LOGGER = LogFactory.getLog(GrammarBasedSearchRule.class); private final boolean caseSensitiveSearch; private final boolean regExpSearch; private ParseTree tree; private String query; public static class ThrowingErrorListener extends BaseErrorListener { public static final ThrowingErrorListener INSTANCE = new ThrowingErrorListener(); @Override public void syntaxError(Recognizer<?, ?> recognizer, Object offendingSymbol, int line, int charPositionInLine, String msg, RecognitionException e) throws ParseCancellationException { throw new ParseCancellationException("line " + line + ":" + charPositionInLine + " " + msg); } } public GrammarBasedSearchRule(boolean caseSensitiveSearch, boolean regExpSearch) throws RecognitionException { this.caseSensitiveSearch = caseSensitiveSearch; this.regExpSearch = regExpSearch; } public static boolean isValid(boolean caseSensitive, boolean regExp, String query) { return new GrammarBasedSearchRule(caseSensitive, regExp).validateSearchStrings(query); } public boolean isCaseSensitiveSearch() { return this.caseSensitiveSearch; } public boolean isRegExpSearch() { return this.regExpSearch; } public ParseTree getTree() { return this.tree; } public String getQuery() { return this.query; } private void init(String query) throws ParseCancellationException { if (Objects.equals(this.query, query)) { return; } SearchLexer lexer = new SearchLexer(new ANTLRInputStream(query)); lexer.removeErrorListeners(); // no infos on file system lexer.addErrorListener(ThrowingErrorListener.INSTANCE); SearchParser parser = new SearchParser(new CommonTokenStream(lexer)); parser.removeErrorListeners(); // no infos on file system parser.addErrorListener(ThrowingErrorListener.INSTANCE); parser.setErrorHandler(new BailErrorStrategy()); // ParseCancelationException on parse errors tree = parser.start(); this.query = query; } @Override public boolean applyRule(String query, BibEntry bibEntry) { try { return new BibtexSearchVisitor(caseSensitiveSearch, regExpSearch, bibEntry).visit(tree); } catch (Exception e) { LOGGER.debug("Search failed", e); return false; } } @Override public boolean validateSearchStrings(String query) { try { init(query); return true; } catch (ParseCancellationException e) { LOGGER.debug("Search query invalid", e); return false; } } public enum ComparisonOperator { EXACT, CONTAINS, DOES_NOT_CONTAIN; public static ComparisonOperator build(String value) { if ("CONTAINS".equalsIgnoreCase(value) || "=".equals(value)) { return CONTAINS; } else if ("MATCHES".equalsIgnoreCase(value) || "==".equals(value)) { return EXACT; } else { return DOES_NOT_CONTAIN; } } } public static class Comparator { private final ComparisonOperator operator; private final Pattern fieldPattern; private final Pattern valuePattern; public Comparator(String field, String value, ComparisonOperator operator, boolean caseSensitive, boolean regex) { this.operator = operator; int option = caseSensitive ? 0 : Pattern.CASE_INSENSITIVE; this.fieldPattern = Pattern.compile(regex ? field : "\\Q" + field + "\\E", option); this.valuePattern = Pattern.compile(regex ? value : "\\Q" + value + "\\E", option); } public boolean compare(BibEntry entry) { // special case for searching for entrytype=phdthesis if (fieldPattern.matcher(BibEntry.TYPE_HEADER).matches()) { return matchFieldValue(entry.getType()); } // special case for searching a single keyword if (fieldPattern.matcher("anykeyword").matches()) { return entry.getKeywords(',').stream().map(Keyword::toString).anyMatch(this::matchFieldValue); } // specification of fieldsKeys to search is done in the search expression itself Set<String> fieldsKeys = entry.getFieldNames(); // special case for searching allfields=cat and title=dog if (!fieldPattern.matcher("anyfield").matches()) { // Filter out the requested fields fieldsKeys = fieldsKeys.stream().filter(matchFieldKey()).collect(Collectors.toSet()); } for (String field : fieldsKeys) { Optional<String> fieldValue = entry.getLatexFreeField(field); if (fieldValue.isPresent()) { if (matchFieldValue(fieldValue.get())) { return true; } } } // special case of asdf!=whatever and entry does not contain asdf return fieldsKeys.isEmpty() && (operator == ComparisonOperator.DOES_NOT_CONTAIN); } private Predicate<String> matchFieldKey() { return s -> fieldPattern.matcher(s).matches(); } public boolean matchFieldValue(String content) { Matcher matcher = valuePattern.matcher(content); if (operator == ComparisonOperator.CONTAINS) { return matcher.find(); } else if (operator == ComparisonOperator.EXACT) { return matcher.matches(); } else if (operator == ComparisonOperator.DOES_NOT_CONTAIN) { return !matcher.find(); } else { throw new IllegalStateException("MUST NOT HAPPEN"); } } } /** * Search results in boolean. It may be later on converted to an int. */ static class BibtexSearchVisitor extends SearchBaseVisitor<Boolean> { private final boolean caseSensitive; private final boolean regex; private final BibEntry entry; public BibtexSearchVisitor(boolean caseSensitive, boolean regex, BibEntry bibEntry) { this.caseSensitive = caseSensitive; this.regex = regex; this.entry = bibEntry; } public boolean comparison(String field, ComparisonOperator operator, String value) { return new Comparator(field, value, operator, caseSensitive, regex).compare(entry); } @Override public Boolean visitStart(SearchParser.StartContext ctx) { return visit(ctx.expression()); } @Override public Boolean visitComparison(SearchParser.ComparisonContext context) { // remove possible enclosing " symbols String right = context.right.getText(); if (right.startsWith("\"") && right.endsWith("\"")) { right = right.substring(1, right.length() - 1); } Optional<SearchParser.NameContext> fieldDescriptor = Optional.ofNullable(context.left); if (fieldDescriptor.isPresent()) { return comparison(fieldDescriptor.get().getText(), ComparisonOperator.build(context.operator.getText()), right); } else { return new ContainBasedSearchRule(caseSensitive).applyRule(right, entry); } } @Override public Boolean visitUnaryExpression(SearchParser.UnaryExpressionContext ctx) { return !visit(ctx.expression()); // negate } @Override public Boolean visitParenExpression(SearchParser.ParenExpressionContext ctx) { return visit(ctx.expression()); // ignore parenthesis } @Override public Boolean visitBinaryExpression(SearchParser.BinaryExpressionContext ctx) { if ("AND".equalsIgnoreCase(ctx.operator.getText())) { return visit(ctx.left) && visit(ctx.right); // and } else { return visit(ctx.left) || visit(ctx.right); // or } } } }