package nl.knaw.huygens.alexandria.query;
/*
* #%L
* alexandria-service
* =======
* Copyright (C) 2015 - 2017 Huygens ING (KNAW)
* =======
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program. If not, see
* <http://www.gnu.org/licenses/gpl-3.0.html>.
* #L%
*/
import static java.util.stream.Collectors.toList;
import static java.util.stream.Collectors.toMap;
import static java.util.stream.Collectors.toSet;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.UUID;
import java.util.function.Function;
import java.util.function.Predicate;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Stream;
import javax.inject.Inject;
import org.antlr.v4.runtime.ANTLRInputStream;
import org.antlr.v4.runtime.CharStream;
import org.antlr.v4.runtime.CommonTokenStream;
import org.antlr.v4.runtime.tree.ParseTree;
import org.apache.commons.lang3.StringUtils;
import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversal;
import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.__;
import org.apache.tinkerpop.gremlin.structure.Vertex;
import com.google.common.base.Joiner;
import com.google.common.base.Splitter;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
import com.google.common.collect.Ordering;
import nl.knaw.huygens.alexandria.antlr.AQLLexer;
import nl.knaw.huygens.alexandria.antlr.AQLParser;
import nl.knaw.huygens.alexandria.antlr.QueryErrorListener;
import nl.knaw.huygens.alexandria.api.model.AlexandriaState;
import nl.knaw.huygens.alexandria.api.model.search.AlexandriaQuery;
import nl.knaw.huygens.alexandria.api.model.search.QueryField;
import nl.knaw.huygens.alexandria.api.model.search.QueryFunction;
import nl.knaw.huygens.alexandria.endpoint.LocationBuilder;
import nl.knaw.huygens.alexandria.exception.BadRequestException;
import nl.knaw.huygens.alexandria.model.AlexandriaAnnotation;
import nl.knaw.huygens.alexandria.model.AlexandriaResource;
import nl.knaw.huygens.alexandria.storage.Storage;
import nl.knaw.huygens.alexandria.storage.frames.AlexandriaVF;
import nl.knaw.huygens.alexandria.storage.frames.AnnotationVF;
import nl.knaw.huygens.alexandria.storage.frames.ResourceVF;
import nl.knaw.huygens.alexandria.util.StreamUtil;
public class AlexandriaQueryParser {
static final String ALLOWED_FIELDS = ", available fields: " + Joiner.on(", ").join(QueryField.ALL_EXTERNAL_NAMES);
static final String ALLOWED_FUNCTIONS = ", available functions: " + Joiner.on(", ").join(QueryFunction.values());
private static LocationBuilder locationBuilder;
List<String> parseErrors = Lists.newArrayList();
@Inject
public AlexandriaQueryParser(final LocationBuilder locationBuilder) {
AlexandriaQueryParser.locationBuilder = locationBuilder;
}
public ParsedAlexandriaQuery parse(final AlexandriaQuery query) {
parseErrors.clear();
final ParsedAlexandriaQuery paq = new ParsedAlexandriaQuery()//
.setVFClass(parseFind(query.getFind()))//
.setResultComparator(parseSort(query.getSort()));
setFilter(paq, query.getWhere());
parseReturn(paq, query.getFields());
paq.setDistinct(query.isDistinct());
if (!parseErrors.isEmpty()) {
throw new AlexandriaQueryParseException(parseErrors);
}
return paq;
}
private void setFilter(final ParsedAlexandriaQuery paq, String where) {
final List<WhereToken> tokens = tokenize(where);
// add default stateToken unless there is a state clause in the where
addDefaultStateTokenWhenNeeded(tokens);
// any tokens with resource.id or subresource.id need to be filtered out and lead to an annotationVFFinder
List<WhereToken> resourceWhereTokens = filterResourceWhereTokens(tokens);
tokens.removeAll(resourceWhereTokens);
if (ResourceVF.class == paq.getVFClass()) {
paq.setResultStreamMapper(createResultStreamMapper(resourceWhereTokens));
} else {
if (!resourceWhereTokens.isEmpty()) {
Function<Storage, Stream<AnnotationVF>> annotationVFFinder = createAnnotationVFFinder(resourceWhereTokens);
if (annotationVFFinder != null) {
paq.setAnnotationVFFinder(annotationVFFinder);
}
}
// create a predicate for filtering the annotationVF stream based on the remaining tokens
paq.setPredicate(createPredicate(tokens));
}
}
private Function<Storage, Stream<Map<String, Object>>> createResultStreamMapper(List<WhereToken> resourceWhereTokens) {
// TODO extend use of resource queries, current implementation only for implementation of nla-264 case
return (storage) -> {
GraphTraversal<Vertex, Vertex> traversal = storage.getResourceVertexTraversal();
Optional<String> rootResourceUUID = resourceWhereTokens.stream()//
.filter(t -> t.getProperty().equals(QueryField.resource_id)//
&& t.getFunction().equals(QueryFunction.eq))//
.map(t -> t.getParameters().get(0))//
.map(String.class::cast)//
.findFirst();
if (rootResourceUUID.isPresent()) {
traversal = traversal.has(Storage.IDENTIFIER_PROPERTY, rootResourceUUID.get());
}
Optional<String> sub = resourceWhereTokens.stream()//
.filter(t -> t.getProperty().equals(QueryField.subresource_sub)//
&& t.getFunction().equals(QueryFunction.eq))//
.map(t -> t.getParameters().get(0))//
.map(String.class::cast)//
.findFirst();
if (sub.isPresent()) {
traversal = traversal//
.until(__.has(ResourceVF.Properties.CARGO, sub.get()))//
.repeat(__.in(ResourceVF.EdgeLabels.PART_OF));
}
return StreamUtil.stream(traversal)//
.map(v -> storage.frameVertex(v, ResourceVF.class))//
.map(this::toResultMap);
};
}
private Map<String, Object> toResultMap(ResourceVF rvf) {
Map<String, Object> map = new HashMap<>();
map.put(QueryField.subresource_id.externalName(), rvf.getUuid());
// map.put(QueryField.subresource_sub.externalName(), rvf.getCargo());
return map;
}
private void addDefaultStateTokenWhenNeeded(List<WhereToken> tokens) {
boolean addStateToken = tokens.stream()//
.noneMatch(token -> QueryField.state.equals(token.getProperty()));
if (addStateToken) {
WhereToken defaultStateToken = new WhereToken(//
QueryField.state, //
QueryFunction.eq, //
ImmutableList.of(AlexandriaState.CONFIRMED.name())//
);
tokens.add(defaultStateToken);
}
}
private List<WhereToken> filterResourceWhereTokens(List<WhereToken> tokens) {
return tokens.stream()//
.filter(WhereToken::hasResourceProperty)//
.collect(toList());
}
private Function<Storage, Stream<AnnotationVF>> createAnnotationVFFinder(List<WhereToken> resourceWhereTokens) {
// TODo: refactor
WhereToken resourceWhereToken = resourceWhereTokens.get(0);
if (resourceWhereTokens.size() == 1 && resourceWhereToken.getFunction().equals(QueryFunction.eq) && resourceWhereToken.getProperty().equals(QueryField.resource_id)) {
String uuid = (String) resourceWhereToken.getParameters().get(0);
return storage -> {
Optional<ResourceVF> optionalResource = storage.readVF(ResourceVF.class, UUID.fromString(uuid));
if (optionalResource.isPresent()) {
ResourceVF resourceVF = optionalResource.get();
Stream<AnnotationVF> resourceAnnotationsStream = resourceVF.getAnnotatedBy().stream();
Stream<AnnotationVF> subresourceAnnotationsStream = resourceVF.getSubResources().stream()//
.flatMap(rvf -> rvf.getAnnotatedBy().stream());
return Stream.concat(resourceAnnotationsStream, subresourceAnnotationsStream);
}
// Should return error, since no resource found with given uuid
return ImmutableList.<AnnotationVF> of().stream();
};
} else if (resourceWhereTokens.size() == 1 && resourceWhereToken.getFunction().equals(QueryFunction.eq) && resourceWhereToken.getProperty().equals(QueryField.resource_ref)) {
return storage -> {
Object cargo = resourceWhereToken.getParameters().get(0);
// Log.info("cargo={}", cargo);
List<ResourceVF> resourceVFs = storage.find(ResourceVF.class)//
.has(ResourceVF.Properties.CARGO, cargo)//
.toList();
List<UUID> resourceUUIDs = resourceVFs.stream()//
.map(ResourceVF::getUuid)//
.map(UUID::fromString)//
.collect(toList());
// Log.info("resourceUUIDs={}", resourceUUIDs);
return toAnnotationVFStream(resourceUUIDs, storage);
};
} else if (resourceWhereToken.getFunction().equals(QueryFunction.inSet)) {
List<UUID> uuidSet = resourceWhereToken.getParameters().stream()//
.map(String.class::cast)//
.map(UUID::fromString)//
.collect(toList());
return storage -> toAnnotationVFStream(uuidSet, storage);
}
return null;
}
private Stream<AnnotationVF> toAnnotationVFStream(List<UUID> uuidSet, Storage storage) {
List<AnnotationVF> annotationList = new ArrayList<>();
for (UUID uuid : uuidSet) {
Optional<ResourceVF> optionalResource = storage.readVF(ResourceVF.class, uuid);
optionalResource.ifPresent(resourceVF -> {
annotationList.addAll(resourceVF.getAnnotatedBy());
annotationList.addAll(resourceVF.getSubResources().stream()//
.map(ResourceVF::getAnnotatedBy)//
.flatMap(Collection::stream)//
.collect(toList()));
});
}
// Log.info("annotationList={}", annotationList);
return annotationList.stream();
}
private Class<? extends AlexandriaVF> parseFind(final String find) {
switch (find) {
case "annotation":
return AnnotationVF.class;
case "resource":
// parseErrors.add("find: type 'resource' not supported yet");
return ResourceVF.class;
default:
parseErrors.add("find: unknown type '" + find + "', should be 'annotation' or 'resource'");
return null;
}
}
List<WhereToken> tokenize(String whereString) {
// Log.info("whereString=<{}>", whereString);
if (StringUtils.isEmpty(whereString)) {
// parseErrors.add("empty or missing where");
return Lists.newArrayList();
}
QueryErrorListener errorListener = new QueryErrorListener();
CharStream stream = new ANTLRInputStream(whereString);
AQLLexer lex = new AQLLexer(stream);
lex.removeErrorListeners();
CommonTokenStream tokenStream = new CommonTokenStream(lex);
AQLParser parser = new AQLParser(tokenStream);
parser.removeErrorListeners();
parser.addErrorListener(errorListener);
parser.setBuildParseTree(true);
ParseTree tree = parser.root();
// Log.info("tree={}", tree.toStringTree(parser));
if (errorListener.heardErrors()) {
parseErrors.addAll(errorListener.getParseErrors().stream()//
.map(AlexandriaQueryParser::clarifyParseError)//
.collect(toList()));
return Lists.newArrayList();
}
QueryVisitor visitor = new QueryVisitor();
visitor.visit(tree);
parseErrors.addAll(errorListener.getParseErrors());
return visitor.getWhereTokens();
}
private static final String MISSING_FIELD_NAME = "missing FIELD_NAME";
private static final String MISSING_FUNCTION = "missing FUNCTION";
private static String clarifyParseError(String parseError) {
if (parseError.contains(MISSING_FIELD_NAME)) {
return parseError.replace(MISSING_FIELD_NAME, "missing or invalid field") + ALLOWED_FIELDS;
}
if (parseError.contains(MISSING_FUNCTION)) {
return parseError.replace(MISSING_FUNCTION, "missing or invalid function") + ALLOWED_FUNCTIONS;
}
return parseError;
}
private static Predicate<AnnotationVF> createPredicate(List<WhereToken> tokens) {
if (tokens.isEmpty()) {
return alwaysTrue();
}
return tokens.stream()//
.map(AlexandriaQueryParser::toPredicate)//
.reduce(alwaysTrue(), Predicate::and);
}
static Set<String> ALL_STATES = Arrays.stream(AlexandriaState.values()).map(AlexandriaState::name).collect(toSet());
static Predicate<AnnotationVF> toPredicate(WhereToken whereToken) {
Function<AnnotationVF, Object> getter = QueryFieldGetters.get(whereToken.getProperty());
// eq
if (QueryFunction.eq.equals(whereToken.getFunction())) {
checkForValidStateParameter(whereToken);
Object eqValue = whereToken.getParameters().get(0);
return avf -> getter.apply(avf).equals(eqValue);
}
// match
if (QueryFunction.match.equals(whereToken.getFunction())) {
// TODO: catch errors
String matchValue = (String) whereToken.getParameters().get(0);
Pattern p = Pattern.compile(matchValue);
return (AnnotationVF avf) -> {
String propertyValue = (String) getter.apply(avf);
Matcher matcher = p.matcher(propertyValue);
return matcher.matches();
};
}
// inSet
if (QueryFunction.inSet.equals(whereToken.getFunction())) {
checkForValidStateParameter(whereToken);
List<Object> possibleValues = whereToken.getParameters();
return (AnnotationVF avf) -> {
Object propertyValue = getter.apply(avf);
return possibleValues.contains(propertyValue);
};
}
// inRange
if (QueryFunction.inRange.equals(whereToken.getFunction())) {
List<Object> rangeLimits = whereToken.getParameters();
Object lowerLimit = rangeLimits.get(0);
Object upperLimit = rangeLimits.get(1);
return (AnnotationVF avf) -> {
Object propertyValue = getter.apply(avf);
if (propertyValue instanceof String) {
return ((String) propertyValue).compareTo((String) lowerLimit) >= 0//
&& ((String) propertyValue).compareTo((String) upperLimit) <= 0;
}
//
return !(propertyValue instanceof Long) || ((Long) propertyValue).compareTo((Long) lowerLimit) >= 0//
&& ((Long) propertyValue).compareTo((Long) upperLimit) <= 0;
};
}
return alwaysTrue();
}
static final Predicate<String> INVALID_STATEVALUE_PREDICATE = stateValue -> !(stateValue instanceof String && ALL_STATES.contains(stateValue));
private static void checkForValidStateParameter(WhereToken whereToken) {
if (QueryField.state.equals(whereToken.getProperty())) {
List<String> invalidValues = whereToken.getParameters().stream()//
.map(String.class::cast).filter(INVALID_STATEVALUE_PREDICATE)//
.collect(toList());
if (!invalidValues.isEmpty()) {
String message = ((invalidValues.size() == 1)//
? invalidValues.get(0) + " is not a valid value"//
: Joiner.on(", ").join(invalidValues) + " are not valid values")//
+ " for " + QueryField.state.externalName();
throw new BadRequestException(message);
}
}
}
private static Predicate<AnnotationVF> alwaysTrue() {
return x -> true;
}
static String getAnnotationURL(final AnnotationVF avf) {
return locationBuilder.locationOf(AlexandriaAnnotation.class, avf.getUuid()).toString();
}
static String getAnnotationId(final AnnotationVF avf) {
// for deprecated annotations, remove the revision from the id.
return avf.getUuid().replaceFirst("\\..*$", "");
}
static String getResourceURL(final AnnotationVF avf) {
return id2url(avf.getResourceId());
}
static String getResourceRef(final AnnotationVF avf) {
return avf.getResource().getCargo();
}
static String getSubResourceURL(final AnnotationVF avf) {
return id2url(avf.getSubResourceId());
}
static String getSubResourceSub(final AnnotationVF avf) {
ResourceVF subResource = avf.getSubResource();
return (subResource != null) ? subResource.getCargo() : ":null";
}
private static String id2url(String resourceId) {
if (StringUtils.isNotEmpty(resourceId) && !AnnotationVF.NO_VALUE.equals(resourceId)) {
return locationBuilder.locationOf(AlexandriaResource.class, resourceId).toString();
}
return ":null";
}
private Comparator<AnnotationVF> parseSort(final String sortString) {
// TODO: cache resultcomparator?
final List<SortToken> sortTokens = parseSortString(sortString);
if (sortTokens == null) {
// there were parse errors
return null;
}
List<Ordering<AnnotationVF>> orderings = sortTokens.stream()//
.map(AlexandriaQueryParser::ordering)//
.collect(toList());
Ordering<AnnotationVF> order = orderings.remove(0);
for (final Ordering<AnnotationVF> suborder : orderings) {
order = order.compound(suborder);
}
return order;
}
private static Ordering<AnnotationVF> ordering(SortToken token) {
boolean ascending = token.isAscending();
Function<AnnotationVF, Object> function = QueryFieldGetters.get(token.getField());
return new Ordering<AnnotationVF>() {
@SuppressWarnings("unchecked")
@Override
public int compare(final AnnotationVF left, final AnnotationVF right) {
return ascending//
? ((Comparable<Object>) function.apply(left)).compareTo(function.apply(right))//
: ((Comparable<Object>) function.apply(right)).compareTo(function.apply(left));
}
};
}
private List<SortToken> parseSortString(final String sortString) {
List<String> sortTokenStrings = splitToList(sortString);
List<String> sortParseErrors = sortTokenStrings.stream()//
.map(AlexandriaQueryParser::extractExternalName)//
.filter(externalName -> !QueryField.ALL_EXTERNAL_NAMES.contains(externalName))//
.map(invalidFieldName -> "sort: unknown field: " + invalidFieldName + ALLOWED_FIELDS)//
.collect(toList());
if (!sortParseErrors.isEmpty()) {
parseErrors.addAll(sortParseErrors);
return null;
}
return sortTokenStrings.stream()//
.map(AlexandriaQueryParser::sortToken)//
.collect(toList());
}
static SortToken sortToken(final String f) {
boolean ascending = !f.startsWith("-");
String externalName = extractExternalName(f);
return new SortToken()//
.setAscending(ascending)//
.setField(QueryField.fromExternalName(externalName));
}
private static String extractExternalName(final String sortParameter) {
return sortParameter.replaceFirst("^[\\-\\+]", "");
}
private void parseReturn(final ParsedAlexandriaQuery paq, final String fieldString) {
final List<String> fields = splitToList(fieldString);
final List<String> listFields = extractListfields(fieldString);
final List<String> allowedFields = QueryField.ALL_EXTERNAL_NAMES;
final List<String> unknownFields = Lists.newArrayList(fields);
unknownFields.removeAll(allowedFields);
if (!unknownFields.isEmpty()) {
parseErrors.add("return: unknown field(s) " + Joiner.on(", ").join(unknownFields) + ALLOWED_FIELDS);
} else {
paq.setReturnFields(fields);
paq.setFieldsToGroup(listFields);
final Function<AnnotationVF, Map<String, Object>> mapper = avf -> fields.stream()//
.collect(toMap(Function.identity(), f -> QueryFieldGetters.get(QueryField.fromExternalName(f)).apply(avf)));
// TODO: cache resultmapper?
paq.setResultMapper(mapper);
}
}
static final Pattern LISTPATTERN = Pattern.compile("list\\((.*)\\)");
private List<String> extractListfields(String fieldString) {
Matcher matcher = LISTPATTERN.matcher(fieldString);
String listFields = matcher.find() ? matcher.group(1) : "";
return splitToList(listFields);
}
private static List<String> splitToList(final String fieldString) {
return Splitter.on(",")//
.trimResults()//
.omitEmptyStrings()//
.splitToList(fieldString.replace("list(", "").replace(")", ""));
}
}