/** * */ package querqy.rewrite.commonrules; import java.util.Arrays; import java.util.LinkedList; import java.util.List; import querqy.model.Clause.Occur; import querqy.model.RawQuery; import querqy.parser.QuerqyParser; import querqy.parser.QuerqyParserFactory; import querqy.rewrite.commonrules.model.BoostInstruction; import querqy.rewrite.commonrules.model.BoostInstruction.BoostDirection; import querqy.rewrite.commonrules.model.DecorateInstruction; import querqy.rewrite.commonrules.model.DeleteInstruction; import querqy.rewrite.commonrules.model.FilterInstruction; import querqy.rewrite.commonrules.model.Input; import querqy.rewrite.commonrules.model.PrefixTerm; import querqy.rewrite.commonrules.model.SynonymInstruction; import querqy.rewrite.commonrules.model.Term; /** * @author René Kriegler, @renekrie * */ public class LineParser { public static final char BOUNDARY = '"'; public static final char WILDCARD = '*'; public static final String INSTR_BOOST_DOWN = "down"; public static final String INSTR_BOOST_UP = "up"; public static final String INSTR_DECORATE = "decorate"; public static final String INSTR_DELETE = "delete"; public static final String INSTR_FILTER = "filter"; public static final String INSTR_SYNONYM = "synonym"; static final char RAWQUERY = '*'; public static Object parse(String line, Input previousInput, QuerqyParserFactory querqyParserFactory) { if (line.endsWith("=>")) { if (line.length() == 2) { return new ValidationError("Empty input"); } return parseInput(line.substring(0, line.length() - 2)); } if (previousInput == null) { return new ValidationError("Missing input for instruction"); } String lcLine = line.toLowerCase(); if (lcLine.startsWith(INSTR_DELETE)) { if (lcLine.length() == 6) { return new DeleteInstruction(previousInput.getInputTerms()); } String instructionTerms = line.substring(6).trim(); if (instructionTerms.charAt(0) != ':') { return new ValidationError("Cannot parse line: " + line); } if (instructionTerms.length() == 1) { return new DeleteInstruction(previousInput.getInputTerms()); } instructionTerms = instructionTerms.substring(1).trim(); Object expr = parseTermExpression(instructionTerms); if (expr instanceof ValidationError) { return new ValidationError("Cannot parse line: " + line +" : " + ((ValidationError)expr).getMessage()); } @SuppressWarnings("unchecked") List<Term> deleteTerms = (List<Term>) expr; List<Term> inputTerms = previousInput.getInputTerms(); for (Term term: deleteTerms) { if (term.findFirstMatch(inputTerms) == null) { return new ValidationError("Condition doesn't contain the term to delete: " + term); } } return new DeleteInstruction(deleteTerms); } if (lcLine.startsWith(INSTR_FILTER)) { if (lcLine.length() == 6) { return new ValidationError("Cannot parse line: " + line); } String filterString = line.substring(6).trim(); if (filterString.charAt(0) != ':') { return new ValidationError("Cannot parse line: " + line); } filterString = filterString.substring(1).trim(); if (filterString.length() == 0) { return new ValidationError("Cannot parse line: " + line); } if (filterString.charAt(0) == RAWQUERY) { if (filterString.length() == 1) { return new ValidationError("Missing raw query after * in line: " + line); } String rawQuery = filterString.substring(1).trim(); return new FilterInstruction(new RawQuery(null, rawQuery, Occur.MUST, false)); } else if (querqyParserFactory == null) { return new ValidationError("No querqy parser factory to parse filter query. Prefix '*' if you want to pass this line as a raw query String to your search engine. Line: " + line); } else { QuerqyParser parser = querqyParserFactory.createParser(); return new FilterInstruction(parser.parse(filterString)); } } if (lcLine.startsWith(INSTR_BOOST_DOWN)) { return parseBoostInstruction(line, lcLine, 4, BoostDirection.DOWN, querqyParserFactory); } if (lcLine.startsWith(INSTR_BOOST_UP)) { return parseBoostInstruction(line, lcLine, 2, BoostDirection.UP, querqyParserFactory); } if (lcLine.startsWith(INSTR_SYNONYM)) { if (lcLine.length() == 7) { return new ValidationError("Cannot parse line: " + line); } String synonymString = line.substring(7).trim(); if (synonymString.charAt(0) != ':') { return new ValidationError("Cannot parse line, ':' expetcted in " + line); } synonymString = synonymString.substring(1).trim(); if (synonymString.length() == 0) { return new ValidationError("Cannot parse line: " + line); } List<Term> synonymTerms = new LinkedList<>(); for (String token: synonymString.split("\\s+")) { if (token.length() > 0) { Term term = parseTerm(token); if (term.getMaxPlaceHolderRef() > 1) { return new ValidationError("Max. wild card reference is 1: " + line); } synonymTerms.add(term); } } if (synonymTerms.isEmpty()) { // should never happen return new ValidationError("Cannot parse line: " + line); } else { return new SynonymInstruction(synonymTerms); } } if (lcLine.startsWith(INSTR_DECORATE)) { return parseDecorateInstruction(line); } return new ValidationError("Cannot parse line: " + line); } public static Object parseDecorateInstruction(String line) { if (line.length() == INSTR_DECORATE.length()) { return new ValidationError(INSTR_DECORATE + " requires a value"); } String decValue = line.substring(INSTR_DECORATE.length()).trim(); if (decValue.charAt(0) != ':') { return new ValidationError("Cannot parse line, ':' expetcted in " + line); } decValue = decValue.substring(1).trim(); if (decValue.length() == 0) { return new ValidationError(INSTR_DECORATE + " requires a value"); } return new DecorateInstruction(decValue); } public static Object parseBoostInstruction(String line, String lcLine, int lengthPredicate, BoostDirection direction, QuerqyParserFactory querqyParserFactory) { if (lcLine.length() == lengthPredicate) { return new ValidationError("Cannot parse line: " + line); } String boostLine = line.substring(lengthPredicate).trim(); char ch = boostLine.charAt(0); switch (ch) { case '(': if (line.length() < 5) { return new ValidationError("Cannot parse line, expecting boost factor and ':' after '(' in " + line); } break; case ':': if (line.length() == 1) { return new ValidationError("Query expected: " + line); } break; default: return new ValidationError("Cannot parse line, '(' or ':' expected: " + line); } boostLine = boostLine.substring(1).trim(); float boost = 1f; if (ch == '(') { int pos = boostLine.indexOf(')'); if (pos < 1 || (pos == boostLine.length() - 1)) { return new ValidationError("Cannot parse line: " + line); } boost = Float.parseFloat(boostLine.substring(0, pos)); boostLine = boostLine.substring(pos + 1).trim(); if (boostLine.charAt(0) != ':') { return new ValidationError("Query expected: " + line); } boostLine = boostLine.substring(1).trim(); } if (boostLine.length() == 0) { return new ValidationError("Query expected: " + line); } if (boostLine.charAt(0) == RAWQUERY) { if (boostLine.length() == 1) { return new ValidationError("Missing raw query after " + RAWQUERY + " in line: " + line); } String rawQuery = boostLine.substring(1).trim(); return new BoostInstruction( new RawQuery(null, rawQuery, Occur.SHOULD, false), direction, boost); } else if (querqyParserFactory == null) { return new ValidationError("No querqy parser factory to parse filter query. Prefix '" + RAWQUERY + "' you want to pass this line as a raw query String to your search engine. Line: " + line); } else { QuerqyParser parser = querqyParserFactory.createParser(); return new BoostInstruction(parser.parse(boostLine), direction, boost); } } @SuppressWarnings("unchecked") public static Object parseInput(String s) { boolean requiresLeftBoundary = false; boolean requiresRightBoundary = false; s = s.trim(); if (s.length() > 0 && s.charAt(0) == BOUNDARY) { requiresLeftBoundary = true; s = s.substring(1).trim(); } if (s.length() > 0 && s.charAt(s.length() - 1) == BOUNDARY) { requiresRightBoundary = true; s = s.substring(0, s.length() - 1).trim(); } int pos = s.indexOf('*'); if (pos > -1) { if (pos < (s.length() -1)) { return new ValidationError(WILDCARD + " is only allowed at the end of the input: " + s); } else if (requiresRightBoundary) { return new ValidationError(WILDCARD + " cannot be combined with right boundary"); } } Object expr = parseTermExpression(s); return (expr instanceof ValidationError) ? expr : new Input((List<Term>) expr, requiresLeftBoundary, requiresRightBoundary); } static Object parseTermExpression(String s) { int len = s.length(); if (len == 1) { char ch = s.charAt(0); if (ch == WILDCARD) { return new ValidationError("Missing prefix for wildcard " + WILDCARD); } Term term = new Term(new char[] {ch}, 0, 1, null); return Arrays.asList(term); } List<Term> terms = new LinkedList<>(); for (String part : s.split("\\s+")) { if (part.length() > 0) { terms.add(parseTerm(part)); } } return terms; } public static Term parseTerm(String s) { int len = s.length(); if (len == 1) { char ch = s.charAt(0); if (ch == WILDCARD) { throw new IllegalArgumentException("Missing prefix for wildcard " + WILDCARD); } return new Term(new char[] {ch}, 0, 1, null); } int pos = s.indexOf(':'); boolean fieldNamesPossible = (pos > 0 && pos < (len -1)); List<String> fieldNames = fieldNamesPossible ? parseFieldNames(s.substring(0, pos)) : null; String remaining = fieldNamesPossible ? s.substring(pos + 1).trim() : s; if (fieldNamesPossible && remaining.length() == 1 && remaining.charAt(0) == WILDCARD) { throw new IllegalArgumentException("Missing prefix for wildcard " + WILDCARD); } return (remaining.charAt(remaining.length() - 1) == WILDCARD) ? new PrefixTerm(remaining.toCharArray(), 0, remaining.length() - 1, fieldNames) : new Term(remaining.toCharArray(), 0, remaining.length(), fieldNames); } public static List<String> parseFieldNames(String s) { int len = s.length(); if (len == 1) { return Arrays.asList(s); } List<String> result = new LinkedList<>(); if (s.charAt(0) == '{' && s.charAt(len - 1) == '}') { if (len > 2) { String[] parts = s.substring(1, len - 1).split(","); for (String part: parts) { part = part.trim(); if (part.length() > 0) { result.add(part); } } } } else { result.add(s); } return result; } }