// License: GPL. Copyright 2007 by Immanuel Scholz and others
package org.openstreetmap.josm.actions.search;
import static org.openstreetmap.josm.tools.I18n.marktr;
import static org.openstreetmap.josm.tools.I18n.tr;
import java.io.PushbackReader;
import java.io.StringReader;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
import org.openstreetmap.josm.Main;
import org.openstreetmap.josm.actions.search.PushbackTokenizer.Range;
import org.openstreetmap.josm.actions.search.PushbackTokenizer.Token;
import org.openstreetmap.josm.data.osm.Node;
import org.openstreetmap.josm.data.osm.OsmPrimitive;
import org.openstreetmap.josm.data.osm.OsmUtils;
import org.openstreetmap.josm.data.osm.Relation;
import org.openstreetmap.josm.data.osm.RelationMember;
import org.openstreetmap.josm.data.osm.Way;
import org.openstreetmap.josm.tools.DateUtils;
/**
Implements a google-like search.
<br>
Grammar:
<pre>
expression =
fact | expression
fact expression
fact
fact =
( expression )
-fact
term?
term=term
term:term
term
</pre>
@author Imi
*/
public class SearchCompiler {
private boolean caseSensitive = false;
private boolean regexSearch = false;
private static String rxErrorMsg = marktr("The regex \"{0}\" had a parse error at offset {1}, full error:\n\n{2}");
private PushbackTokenizer tokenizer;
public SearchCompiler(boolean caseSensitive, boolean regexSearch, PushbackTokenizer tokenizer) {
this.caseSensitive = caseSensitive;
this.regexSearch = regexSearch;
this.tokenizer = tokenizer;
}
abstract public static class Match {
abstract public boolean match(OsmPrimitive osm);
}
public static class Always extends Match {
public static Always INSTANCE = new Always();
@Override public boolean match(OsmPrimitive osm) {
return true;
}
}
public static class Never extends Match {
@Override
public boolean match(OsmPrimitive osm) {
return false;
}
}
private static class Not extends Match {
private final Match match;
public Not(Match match) {this.match = match;}
@Override public boolean match(OsmPrimitive osm) {
return !match.match(osm);
}
@Override public String toString() {return "!"+match;}
}
private static class BooleanMatch extends Match {
private final String key;
private final boolean defaultValue;
public BooleanMatch(String key, boolean defaultValue) {
this.key = key;
this.defaultValue = defaultValue;
}
@Override
public boolean match(OsmPrimitive osm) {
Boolean ret = OsmUtils.getOsmBoolean(osm.get(key));
if (ret == null)
return defaultValue;
else
return ret;
}
}
private static class And extends Match {
private Match lhs;
private Match rhs;
public And(Match lhs, Match rhs) {this.lhs = lhs; this.rhs = rhs;}
@Override public boolean match(OsmPrimitive osm) {
return lhs.match(osm) && rhs.match(osm);
}
@Override public String toString() {return lhs+" && "+rhs;}
}
private static class Or extends Match {
private Match lhs;
private Match rhs;
public Or(Match lhs, Match rhs) {this.lhs = lhs; this.rhs = rhs;}
@Override public boolean match(OsmPrimitive osm) {
return lhs.match(osm) || rhs.match(osm);
}
@Override public String toString() {return lhs+" || "+rhs;}
}
private static class Id extends Match {
private long id;
public Id(long id) {
this.id = id;
}
@Override public boolean match(OsmPrimitive osm) {
return id == 0?osm.isNew():osm.getUniqueId() == id;
}
@Override public String toString() {return "id="+id;}
}
private static class ChangesetId extends Match {
private long changesetid;
public ChangesetId(long changesetid) {this.changesetid = changesetid;}
@Override public boolean match(OsmPrimitive osm) {
return osm.getChangesetId() == changesetid;
}
@Override public String toString() {return "changeset="+changesetid;}
}
private static class Version extends Match {
private long version;
public Version(long version) {this.version = version;}
@Override public boolean match(OsmPrimitive osm) {
return osm.getVersion() == version;
}
@Override public String toString() {return "version="+version;}
}
private static class KeyValue extends Match {
private final String key;
private final Pattern keyPattern;
private final String value;
private final Pattern valuePattern;
private final boolean caseSensitive;
public KeyValue(String key, String value, boolean regexSearch, boolean caseSensitive) throws ParseError {
this.caseSensitive = caseSensitive;
if (regexSearch) {
int searchFlags = regexFlags(caseSensitive);
try {
this.keyPattern = Pattern.compile(key, searchFlags);
this.valuePattern = Pattern.compile(value, searchFlags);
} catch (PatternSyntaxException e) {
throw new ParseError(tr(rxErrorMsg, e.getPattern(), e.getIndex(), e.getMessage()));
}
this.key = key;
this.value = value;
} else if (caseSensitive) {
this.key = key;
this.value = value;
this.keyPattern = null;
this.valuePattern = null;
} else {
this.key = key.toLowerCase();
this.value = value;
this.keyPattern = null;
this.valuePattern = null;
}
}
@Override public boolean match(OsmPrimitive osm) {
if (keyPattern != null) {
if (!osm.hasKeys())
return false;
/* The string search will just get a key like
* 'highway' and look that up as osm.get(key). But
* since we're doing a regex match we'll have to loop
* over all the keys to see if they match our regex,
* and only then try to match against the value
*/
for (String k: osm.keySet()) {
String v = osm.get(k);
Matcher matcherKey = keyPattern.matcher(k);
boolean matchedKey = matcherKey.find();
if (matchedKey) {
Matcher matcherValue = valuePattern.matcher(v);
boolean matchedValue = matcherValue.find();
if (matchedValue)
return true;
}
}
} else {
String mv = null;
if (key.equals("timestamp")) {
mv = DateUtils.fromDate(osm.getTimestamp());
} else {
mv = osm.get(key);
}
if (mv == null)
return false;
String v1 = caseSensitive ? mv : mv.toLowerCase();
String v2 = caseSensitive ? value : value.toLowerCase();
// is not Java 1.5
//v1 = java.text.Normalizer.normalize(v1, java.text.Normalizer.Form.NFC);
//v2 = java.text.Normalizer.normalize(v2, java.text.Normalizer.Form.NFC);
return v1.indexOf(v2) != -1;
}
return false;
}
@Override public String toString() {return key+"="+value;}
}
public static class ExactKeyValue extends Match {
private enum Mode {
ANY, ANY_KEY, ANY_VALUE, EXACT, NONE, MISSING_KEY,
ANY_KEY_REGEXP, ANY_VALUE_REGEXP, EXACT_REGEXP, MISSING_KEY_REGEXP;
}
private final String key;
private final String value;
private final Pattern keyPattern;
private final Pattern valuePattern;
private final Mode mode;
public ExactKeyValue(boolean regexp, String key, String value) throws ParseError {
if (key == "")
throw new ParseError(tr("Key cannot be empty when tag operator is used. Sample use: key=value"));
this.key = key;
this.value = value == null?"":value;
if ("".equals(this.value) && "*".equals(key)) {
mode = Mode.NONE;
} else if ("".equals(this.value)) {
if (regexp) {
mode = Mode.MISSING_KEY_REGEXP;
} else {
mode = Mode.MISSING_KEY;
}
} else if ("*".equals(key) && "*".equals(this.value)) {
mode = Mode.ANY;
} else if ("*".equals(key)) {
if (regexp) {
mode = Mode.ANY_KEY_REGEXP;
} else {
mode = Mode.ANY_KEY;
}
} else if ("*".equals(this.value)) {
if (regexp) {
mode = Mode.ANY_VALUE_REGEXP;
} else {
mode = Mode.ANY_VALUE;
}
} else {
if (regexp) {
mode = Mode.EXACT_REGEXP;
} else {
mode = Mode.EXACT;
}
}
if (regexp && key.length() > 0 && !key.equals("*")) {
keyPattern = Pattern.compile(key);
} else {
keyPattern = null;
}
if (regexp && this.value.length() > 0 && !this.value.equals("*")) {
try {
valuePattern = Pattern.compile(this.value);
} catch (PatternSyntaxException e) {
throw new ParseError(tr("Pattern Syntax Error: Pattern {0} in {1} is illegal!", e.getPattern(), value));
}
} else {
valuePattern = null;
}
}
@Override
public boolean match(OsmPrimitive osm) {
if (!osm.hasKeys())
return mode == Mode.NONE;
switch (mode) {
case NONE:
return false;
case MISSING_KEY:
return osm.get(key) == null;
case ANY:
return true;
case ANY_VALUE:
return osm.get(key) != null;
case ANY_KEY:
for (String v:osm.getKeys().values()) {
if (v.equals(value))
return true;
}
return false;
case EXACT:
return value.equals(osm.get(key));
case ANY_KEY_REGEXP:
for (String v:osm.getKeys().values()) {
if (valuePattern.matcher(v).matches())
return true;
}
return false;
case ANY_VALUE_REGEXP:
case EXACT_REGEXP:
for (String key: osm.keySet()) {
if (keyPattern.matcher(key).matches()) {
if (mode == Mode.ANY_VALUE_REGEXP
|| valuePattern.matcher(osm.get(key)).matches())
return true;
}
}
return false;
case MISSING_KEY_REGEXP:
for (String k:osm.keySet()) {
if (keyPattern.matcher(k).matches())
return false;
}
return true;
}
throw new AssertionError("Missed state");
}
@Override
public String toString() {
return key + '=' + value;
}
}
private static class Any extends Match {
private final String search;
private final Pattern searchRegex;
private final boolean caseSensitive;
public Any(String s, boolean regexSearch, boolean caseSensitive) throws ParseError {
this.caseSensitive = caseSensitive;
if (regexSearch) {
try {
this.searchRegex = Pattern.compile(s, regexFlags(caseSensitive));
} catch (PatternSyntaxException e) {
throw new ParseError(tr(rxErrorMsg, e.getPattern(), e.getIndex(), e.getMessage()));
}
this.search = s;
} else if (caseSensitive) {
this.search = s;
this.searchRegex = null;
} else {
this.search = s.toLowerCase();
this.searchRegex = null;
}
}
@Override public boolean match(OsmPrimitive osm) {
if (!osm.hasKeys())
return search.equals("");
// is not Java 1.5
//search = java.text.Normalizer.normalize(search, java.text.Normalizer.Form.NFC);
for (String key: osm.keySet()) {
String value = osm.get(key);
if (searchRegex != null) {
// is not Java 1.5
//value = java.text.Normalizer.normalize(value, java.text.Normalizer.Form.NFC);
Matcher keyMatcher = searchRegex.matcher(key);
Matcher valMatcher = searchRegex.matcher(value);
boolean keyMatchFound = keyMatcher.find();
boolean valMatchFound = valMatcher.find();
if (keyMatchFound || valMatchFound)
return true;
} else {
if (!caseSensitive) {
key = key.toLowerCase();
value = value.toLowerCase();
}
// is not Java 1.5
//value = java.text.Normalizer.normalize(value, java.text.Normalizer.Form.NFC);
if (key.indexOf(search) != -1 || value.indexOf(search) != -1)
return true;
}
}
if (osm.getUser() != null) {
String name = osm.getUser().getName();
// is not Java 1.5
//String name = java.text.Normalizer.normalize(name, java.text.Normalizer.Form.NFC);
if (!caseSensitive) {
name = name.toLowerCase();
}
if (name.indexOf(search) != -1)
return true;
}
return false;
}
@Override public String toString() {
return search;
}
}
private static class ExactType extends Match {
private final Class<?> type;
public ExactType(String type) throws ParseError {
if ("node".equals(type)) {
this.type = Node.class;
} else if ("way".equals(type)) {
this.type = Way.class;
} else if ("relation".equals(type)) {
this.type = Relation.class;
} else
throw new ParseError(tr("Unknown primitive type: {0}. Allowed values are node, way or relation",
type));
}
@Override public boolean match(OsmPrimitive osm) {
return osm.getClass() == type;
}
@Override public String toString() {return "type="+type;}
}
private static class UserMatch extends Match {
private String user;
public UserMatch(String user) {
if (user.equals("anonymous")) {
this.user = null;
} else {
this.user = user;
}
}
@Override public boolean match(OsmPrimitive osm) {
if (osm.getUser() == null)
return user == null;
else
return osm.getUser().hasName(user);
}
@Override public String toString() {
return "user=" + user == null ? "" : user;
}
}
private static class NodeCountRange extends Match {
private int minCount;
private int maxCount;
public NodeCountRange(int minCount, int maxCount) {
if(maxCount < minCount) {
this.minCount = maxCount;
this.maxCount = minCount;
} else {
this.minCount = minCount;
this.maxCount = maxCount;
}
}
@Override public boolean match(OsmPrimitive osm) {
if(!(osm instanceof Way)) return false;
int size = ((Way)osm).getNodesCount();
return (size >= minCount) && (size <= maxCount);
}
@Override public String toString() {return "nodes="+minCount+"-"+maxCount;}
}
private static class TagCountRange extends Match {
private int minCount;
private int maxCount;
public TagCountRange(int minCount, int maxCount) {
if(maxCount < minCount) {
this.minCount = maxCount;
this.maxCount = minCount;
} else {
this.minCount = minCount;
this.maxCount = maxCount;
}
}
@Override public boolean match(OsmPrimitive osm) {
int size = osm.getKeys().size();
return (size >= minCount) && (size <= maxCount);
}
@Override public String toString() {return "tags="+minCount+"-"+maxCount;}
}
private static class Modified extends Match {
@Override public boolean match(OsmPrimitive osm) {
return osm.isModified() || osm.isNew();
}
@Override public String toString() {return "modified";}
}
private static class Selected extends Match {
@Override public boolean match(OsmPrimitive osm) {
return Main.main.getCurrentDataSet().isSelected(osm);
}
@Override public String toString() {return "selected";}
}
private static class Incomplete extends Match {
@Override public boolean match(OsmPrimitive osm) {
return osm.isIncomplete();
}
@Override public String toString() {return "incomplete";}
}
private static class Untagged extends Match {
@Override public boolean match(OsmPrimitive osm) {
return !osm.isTagged();
}
@Override public String toString() {return "untagged";}
}
private static class Parent extends Match {
private Match child;
public Parent(Match m) { child = m; }
@Override public boolean match(OsmPrimitive osm) {
boolean isParent = false;
// "parent" (null) should mean the same as "parent()"
// (Always). I.e. match everything
if (child == null) {
child = new Always();
}
if (osm instanceof Way) {
for (Node n : ((Way)osm).getNodes()) {
isParent |= child.match(n);
}
} else if (osm instanceof Relation) {
for (RelationMember member : ((Relation)osm).getMembers()) {
isParent |= child.match(member.getMember());
}
}
return isParent;
}
@Override public String toString() {return "parent(" + child + ")";}
}
private static class Child extends Match {
private final Match parent;
public Child(Match m) {
// "child" (null) should mean the same as "child()"
// (Always). I.e. match everything
if (m == null) {
parent = new Always();
} else {
parent = m;
}
}
@Override public boolean match(OsmPrimitive osm) {
boolean isChild = false;
for (OsmPrimitive p : osm.getReferrers()) {
isChild |= parent.match(p);
}
return isChild;
}
@Override public String toString() {return "child(" + parent + ")";}
}
public static class ParseError extends Exception {
public ParseError(String msg) {
super(msg);
}
public ParseError(Token expected, Token found) {
this(tr("Unexpected token. Expected {0}, found {1}", expected, found));
}
}
public static Match compile(String searchStr, boolean caseSensitive, boolean regexSearch)
throws ParseError {
return new SearchCompiler(caseSensitive, regexSearch,
new PushbackTokenizer(
new PushbackReader(new StringReader(searchStr))))
.parse();
}
public Match parse() throws ParseError {
Match m = parseExpression();
if (!tokenizer.readIfEqual(Token.EOF))
throw new ParseError(tr("Unexpected token: {0}", tokenizer.nextToken()));
if (m == null)
return new Always();
return m;
}
private Match parseExpression() throws ParseError {
Match factor = parseFactor();
if (factor == null)
return null;
if (tokenizer.readIfEqual(Token.OR))
return new Or(factor, parseExpression(tr("Missing parameter for OR")));
else {
Match expression = parseExpression();
if (expression == null)
return factor;
else
return new And(factor, expression);
}
}
private Match parseExpression(String errorMessage) throws ParseError {
Match expression = parseExpression();
if (expression == null)
throw new ParseError(errorMessage);
else
return expression;
}
private Match parseFactor() throws ParseError {
if (tokenizer.readIfEqual(Token.LEFT_PARENT)) {
Match expression = parseExpression();
if (!tokenizer.readIfEqual(Token.RIGHT_PARENT))
throw new ParseError(Token.RIGHT_PARENT, tokenizer.nextToken());
return expression;
} else if (tokenizer.readIfEqual(Token.NOT))
return new Not(parseFactor(tr("Missing operator for NOT")));
else if (tokenizer.readIfEqual(Token.KEY)) {
String key = tokenizer.getText();
if (tokenizer.readIfEqual(Token.EQUALS))
return new ExactKeyValue(regexSearch, key, tokenizer.readTextOrNumber());
else if (tokenizer.readIfEqual(Token.COLON)) {
if ("id".equals(key))
return new Id(tokenizer.readNumber(tr("Primitive id expected")));
else if ("tags".equals(key)) {
Range range = tokenizer.readRange(tr("Range of numbers expected"));
return new TagCountRange((int)range.getStart(), (int)range.getEnd());
} else if ("nodes".equals(key)) {
Range range = tokenizer.readRange(tr("Range of numbers expected"));
return new NodeCountRange((int)range.getStart(), (int)range.getEnd());
} else if ("changeset".equals(key))
return new ChangesetId(tokenizer.readNumber(tr("Changeset id expected")));
else if ("version".equals(key))
return new Version(tokenizer.readNumber(tr("Version expected")));
else
return parseKV(key, tokenizer.readTextOrNumber());
} else if (tokenizer.readIfEqual(Token.QUESTION_MARK))
return new BooleanMatch(key, false);
else if ("modified".equals(key))
return new Modified();
else if ("incomplete".equals(key))
return new Incomplete();
else if ("untagged".equals(key))
return new Untagged();
else if ("selected".equals(key))
return new Selected();
else if ("child".equals(key))
return new Child(parseFactor());
else if ("parent".equals(key))
return new Parent(parseFactor());
else
return new Any(key, regexSearch, caseSensitive);
} else
return null;
}
private Match parseFactor(String errorMessage) throws ParseError {
Match fact = parseFactor();
if (fact == null)
throw new ParseError(errorMessage);
else
return fact;
}
private Match parseKV(String key, String value) throws ParseError {
if (value == null) {
value = "";
}
if (key.equals("type"))
return new ExactType(value);
else if (key.equals("user"))
return new UserMatch(value);
else
return new KeyValue(key, value, regexSearch, caseSensitive);
}
private static int regexFlags(boolean caseSensitive) {
int searchFlags = 0;
// Enables canonical Unicode equivalence so that e.g. the two
// forms of "\u00e9gal" and "e\u0301gal" will match.
//
// It makes sense to match no matter how the character
// happened to be constructed.
searchFlags |= Pattern.CANON_EQ;
// Make "." match any character including newline (/s in Perl)
searchFlags |= Pattern.DOTALL;
// CASE_INSENSITIVE by itself only matches US-ASCII case
// insensitively, but the OSM data is in Unicode. With
// UNICODE_CASE casefolding is made Unicode-aware.
if (!caseSensitive) {
searchFlags |= (Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE);
}
return searchFlags;
}
}