package folioxml.lucene.folioQueryParser;
import folioxml.core.InvalidMarkupException;
import folioxml.core.TokenInfo;
import folioxml.core.TokenUtils;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Pattern;
/**
* Opening angle brackets must be paired. Closing angle brackets cannot be present at all! (</example>)
*
* @author nathanael
*/
public class QueryToken {
/*et [, the backslash \, the caret ^, the dollar sign $, the period or dot .,
* the vertical bar or pipe symbol |, the question mark ?, the asterisk or star *,
* the plus sign +, the opening round bracket ( and the closing round bracket ). And {}
* */
public enum TokenType {
None, OpenGroup, CloseGroup, OpenField, CloseField, Whitespace, Or, Xor, Not, FieldDelimiter, Term, TermSuffix
}
public static Pattern rGrouping = Pattern.compile("^[\\(\\)\\[\\]]");
// & and and are whitespace, and whitespace = &/and
public static Pattern rWhitespace = Pattern.compile("^(?:\\&|and|\\s+)", Pattern.CASE_INSENSITIVE);
public static Pattern rOr = Pattern.compile("^(?:\\||or)", Pattern.CASE_INSENSITIVE);
public static Pattern rXor = Pattern.compile("^(?:\\~|xor)", Pattern.CASE_INSENSITIVE);
public static Pattern rNot = Pattern.compile("^(?:\\^|not)", Pattern.CASE_INSENSITIVE);
public static Pattern rColon = Pattern.compile("^\\:{1,2}");
//Unquoted strings may contain apostrophes, but they cannot start with them.
public static Pattern rTerm = Pattern.compile("^(?:,|" +
"\"(?:[^\"]++|\"\")++\"|" +
"'(?:[^']++|'')++'|" +
"[^'\",\\s~:/@#\\|\\^\\&\\[\\]\\(\\)\\\\\\{\\}][^\"\\s~:\\|\\^\\&\\[\\]\\(\\)\\\\\\{\\}]*+)");
public static Pattern rTermSuffix = Pattern.compile("^[/#@][0-9Ss]+");
/**
* An array of the patterns we look for, in the correct order.
*/
public static Pattern[] tokenPatterns = new Pattern[]{rGrouping, rWhitespace, rNot, rOr, rXor, rColon, rWhitespace, rTermSuffix, rTerm};
public QueryToken(Pattern p, String text) throws InvalidMarkupException {
this.text = text;
if (p == rGrouping) {
if (text.equals("(")) type = TokenType.OpenGroup;
if (text.equals(")")) type = TokenType.CloseGroup;
if (text.equals("[")) type = TokenType.OpenField;
if (text.equals("]")) type = TokenType.CloseField;
}
if (p == rWhitespace) type = TokenType.Whitespace;
if (p == rOr) type = TokenType.Or;
if (p == rXor) type = TokenType.Xor;
if (p == rNot) type = TokenType.Not;
if (p == rColon) type = TokenType.FieldDelimiter;
if (p == rTerm) type = TokenType.Term;
if (p == rTermSuffix) type = TokenType.TermSuffix;
if (this.type == TokenType.None)
throw new InvalidMarkupException("Invalid token, could not be classified: " + text);
}
public QueryToken(TokenType type, String text) throws InvalidMarkupException {
this.text = text;
this.type = type;
}
/**
* Comment, text, or tag
*/
public TokenType type = TokenType.None;
public String text = null;
public TokenInfo info = null;
public String fieldName = null;
public void setFieldNameRecursive(String name) {
fieldName = name;
if (children != null) {
for (int i = 0; i < children.size(); i++) {
children.get(i).setFieldNameRecursive(name);
}
}
}
public List<QueryToken> children;
public List<QueryToken> headers; //For a field
public void add(QueryToken child) {
if (children == null) children = new ArrayList<QueryToken>();
children.add(child);
}
public void ParseChildrenIntoTree() throws InvalidMarkupException {
/* Order of operations
* 2) Proximity suffixes. Throw exception if the previous child is not a Term
* 1) Groups and Fields, build them.
* 3) Drop whitespace
* 4) do not grouping
* 5) do or grouping
* 6) do xor grouping
* 7) If an OpenParen, OpenField, or None token has more than one child, they are implicitly in an AND clause.
*/
applyTermSuffixes();
createGroup(false);
stripWhitespace();
applyNot();
applyOrs();
}
/*
* Finds each term suffix and ensures the previous token is a term. Then it moves the term to be a child of the term suffix.
*/
protected void applyTermSuffixes() throws InvalidMarkupException {
if (children == null) return;
for (int i = 0; i < children.size(); i++) {
if (children.get(i).type == TokenType.TermSuffix) {
if (i == 0 || children.get(i - 1).type != TokenType.Term)
throw new InvalidMarkupException("Term suffixes must be preceeded by a term", children.get(i).info);
children.get(i).add(children.get(i - 1));
children.remove(i - 1);
i--;
}
}
}
/*
* Behaves as if this token is a '(', and 'children' contains all the remaining tokens. If no matching ')' is found, it doesn't complain.
* calls createField when it hits '['. Returns all tokens that don't fit into the current group.
*/
protected List<QueryToken> createGroup(boolean strict) throws InvalidMarkupException {
if (children == null) return new ArrayList<QueryToken>();
for (int i = 0; i < children.size(); i++) {
QueryToken t = children.get(i);
if (t.type == TokenType.CloseGroup) {
List<QueryToken> remainder = new ArrayList<QueryToken>(children.subList(i + 1, children.size()));
children = new ArrayList<QueryToken>(children.subList(0, i));
return remainder;
}
if (t.type == TokenType.CloseField)
throw new InvalidMarkupException("Found closing ']' without matching opening '['", t.info);
if (t.type == TokenType.OpenGroup || t.type == TokenType.OpenField) { //Recursive
t.children = new ArrayList<QueryToken>(children.subList(i + 1, children.size()));
List<QueryToken> newchildrenlist = new ArrayList<QueryToken>(children.subList(0, i + 1));
newchildrenlist.addAll(t.type == TokenType.OpenGroup ? t.createGroup(true) : t.createField());
children = newchildrenlist;
}
}
if (strict) throw new InvalidMarkupException("Failed to find closing ')'", info);
else return new ArrayList<QueryToken>();
}
protected List<QueryToken> createField() throws InvalidMarkupException {
if (children == null) return new ArrayList<QueryToken>();
//Remove all leading whitespace from a field declaration.
while (children.size() > 0 && children.get(0).type == TokenType.Whitespace) children.remove(0);
headers = new ArrayList<QueryToken>();
//All child tokens prior to ':' are considered part of the heading, except [, ], (, )
//If there is no ':', then they're all headers
//Process the children.
boolean foundDelimiter = false;
//For popup and note searches, the field is predefined, we want everything to be contents.
if (children.size() > 0 && TokenUtils.fastMatches("popup|note", children.get(0).text)) {
foundDelimiter = true;
headers.add(children.get(0));
children.remove(0);
}
for (int i = 0; i < children.size(); i++) {
QueryToken t = children.get(i);
if (t.type == TokenType.CloseField) {
List<QueryToken> remainder = new ArrayList<QueryToken>(children.subList(i + 1, children.size()));
children = new ArrayList<QueryToken>(children.subList(0, i));
return remainder;
} else if (t.type == TokenType.CloseGroup)
throw new InvalidMarkupException("Found closing ')' without matching opening '(", t.info);
else if (t.type == TokenType.OpenGroup || t.type == TokenType.OpenField) { //Recursive
t.children = new ArrayList<QueryToken>(children.subList(i + 1, children.size()));
List<QueryToken> newchildrenlist = new ArrayList<QueryToken>(children.subList(0, i + 1));
newchildrenlist.addAll(t.type == TokenType.OpenGroup ? t.createGroup(true) : t.createField());
children = newchildrenlist;
} else if (!foundDelimiter && t.type == TokenType.FieldDelimiter) {
foundDelimiter = true;
children.remove(i);
i--;
} else if (!foundDelimiter) {
headers.add(t);
children.remove(i);
i--;
}
}
throw new InvalidMarkupException("Failed to find closing ']'", info);
}
protected void stripWhitespace() throws InvalidMarkupException {
if (children == null) return;
for (int i = 0; i < children.size(); i++) {
if (children.get(i).type == TokenType.Whitespace) {
children.remove(i);
i--;
} else children.get(i).stripWhitespace(); //recursive.
}
}
protected void applyNot() throws InvalidMarkupException {
if (children == null) return;
for (int i = 0; i < children.size(); i++) {
if (children.get(i).type == TokenType.Not) {
if (i >= children.size() - 1)
throw new InvalidMarkupException("No term found for not operator.", children.get(i).info);
children.get(i).add(children.get(i + 1));
children.remove(i + 1);
}
children.get(i).applyNot(); //If the user double nots, this will cause an exception (good!).
}
}
protected void applyOrs() throws InvalidMarkupException {
if (children == null) return;
//grandchildren first
for (int i = 0; i < children.size(); i++) {
children.get(i).applyOrs();
}
//Then we do the or stuff.
for (int i = 0; i < children.size(); i++) {
if (children.get(i).type == TokenType.Or || children.get(i).type == TokenType.Xor) {
if (i == 0 || i >= children.size() - 1)
throw new InvalidMarkupException("OR and XOR operators require at least two operands", children.get(i).info);
children.get(i).add(children.get(i - 1));
children.get(i).add(children.get(i + 1));
children.remove(i + 1);
children.remove(i - 1);
i--;
}
}
}
}