/* * Copyright 2012 SFB 632. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package annis.sqlgen; import annis.CommonHelper; import annis.model.QueryNode; import annis.ql.parser.QueryData; import annis.service.objects.Match; import annis.service.objects.MatchGroup; import static annis.sqlgen.AbstractSqlGenerator.TABSTOP; import static annis.sqlgen.SqlConstraints.sqlString; import static annis.sqlgen.TableAccessStrategy.CORPUS_TABLE; import static annis.sqlgen.TableAccessStrategy.NODE_TABLE; import annis.sqlgen.extensions.AnnotateQueryData; import com.google.common.base.Joiner; import com.google.common.collect.Lists; import com.google.common.escape.Escaper; import com.google.common.escape.Escapers; import java.net.URI; import java.util.Collections; import java.util.LinkedList; import java.util.List; import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.Validate; /** * Generates a WITH clause sql statement for a list of salt ids. * * Salt ids are simple URI and are defined like this: * * <p>{@code salt:/corp1/corp2/doc1#node}</p>. * * The leading / of the URI is a must, // would cause an error, because * authorities are currently not supported. * * @author Benjamin Weißenfels <b.pixeldrama@gmail.com> * @author Thomas Krause <krauseto@hu-berlin.de> */ public class GraphWithClauseGenerator extends CommonAnnotateWithClauseGenerator { private static final Escaper ARRAY_ELEM_ESC = Escapers.builder().addEscape(',', "\\,").build(); private String selectForNode( TableAccessStrategy tas, AnnotateQueryData annotateQueryData, int match, int nodeNr, String indent) { StringBuilder sb = new StringBuilder(); sb.append(match).append(" AS n, "); sb.append(nodeNr).append(" AS nodeNr,\n").append(indent); sb.append(tas.tableName(NODE_TABLE)).append(nodeNr).append(".") .append(tas.columnName(NODE_TABLE, "id")).append(" AS ") .append("id, "); sb.append(tas.tableName(NODE_TABLE)).append(nodeNr).append(".") .append(tas.columnName(NODE_TABLE, "text_ref")).append(" AS ") .append("text, "); sb.append(tas.tableName(NODE_TABLE)).append(nodeNr).append(".") .append(tas.columnName(NODE_TABLE, "left_token")); if (annotateQueryData.getSegmentationLayer() == null) { sb.append(" - ").append(annotateQueryData.getLeft()); } sb.append(" AS ").append("min, "); sb.append(tas.tableName(NODE_TABLE)).append(nodeNr).append(".") .append(tas.columnName(NODE_TABLE, "right_token")); if (annotateQueryData.getSegmentationLayer() == null) { sb.append(" + ").append(annotateQueryData.getRight()); } sb.append(" AS ").append("max, "); sb.append(tas.tableName(NODE_TABLE)).append(nodeNr).append(".") .append(tas.columnName(NODE_TABLE, "corpus_ref")) .append(" AS ").append("corpus"); return sb.toString(); } private String fromForNode( TableAccessStrategy tas, String indent, int nodeNr, List<Long> corpusList) { String factsSQL = SelectedFactsFromClauseGenerator.selectedFactsSQL(corpusList, indent); StringBuilder sb = new StringBuilder(); sb.append(indent) .append(factsSQL).append(" AS ") .append(tas.tableName(NODE_TABLE)).append(nodeNr).append(", ") .append(tas.tableName(CORPUS_TABLE)).append(" AS ") .append(tas.tableName(CORPUS_TABLE)).append(nodeNr); return sb.toString(); } private String whereForNode(URI uri, TableAccessStrategy tas, List<Long> corpusList, String indent, int nodeNr) { StringBuilder sb = new StringBuilder(); // check for corpus/document by it's path sb.append(indent) .append(tas.tableName(CORPUS_TABLE)).append(nodeNr).append(".path_name = ") .append(generatePathName(uri)).append(" AND\n"); // join the found corpus/document to the facts table sb.append(indent) .append(tas.tableName(NODE_TABLE)).append(nodeNr).append(".corpus_ref = ") .append(tas.tableName(CORPUS_TABLE)).append(nodeNr).append(".id AND\n"); // filter the node with the right name sb.append(indent) .append(tas.tableName(NODE_TABLE)).append(nodeNr).append(".salt_id = ") .append("'").append(generateNodeID(uri)).append("'").append(" AND\n"); // use the toplevel partioning sb.append(indent) .append(tas.tableName(NODE_TABLE)).append(nodeNr).append(".toplevel_corpus IN ( ") .append(StringUtils.join(corpusList, ",")).append(") "); return sb.toString(); } private String subselectForMatch(int match, int nodeNr, URI uri, TableAccessStrategy tas, AnnotateQueryData annoQueryData, List<Long> corpusList, String indent) { StringBuilder sb = new StringBuilder(); sb.append(indent).append("SELECT ").append( selectForNode(tas, annoQueryData, match, nodeNr, indent+TABSTOP)).append("\n"); sb.append(indent).append("FROM\n").append(fromForNode(tas, indent+TABSTOP, nodeNr, corpusList)).append("\n"); sb.append(indent).append("WHERE\n").append(whereForNode(uri, tas, corpusList , indent+TABSTOP, nodeNr)).append("\n"); sb.append(indent).append("LIMIT 1\n"); return sb.toString(); } @Override protected List<String> getMatchesWithClause(QueryData queryData, List<QueryNode> alternative, String indent) { TableAccessStrategy tas = createTableAccessStrategy(); List<AnnotateQueryData> extensions = queryData.getExtensions(AnnotateQueryData.class); AnnotateQueryData annotateQueryData = extensions.isEmpty() ? new AnnotateQueryData(5, 5) : extensions.get(0); List<MatchGroup> listOfSaltURIs = queryData.getExtensions(MatchGroup.class); // only work with the first element Validate.isTrue(!listOfSaltURIs.isEmpty()); List<String> subselects = new LinkedList<>(); String indent2 = indent + TABSTOP; MatchGroup groupSet = listOfSaltURIs.get(0); int matchNr = 1; for(Match match : groupSet.getMatches()) { List<URI> uriList = match.getSaltIDs(); int nodeNr = 1; for (URI uri : uriList) { String sub = indent2 + "(\n" + subselectForMatch(matchNr, nodeNr, uri, tas, annotateQueryData, queryData.getCorpusList(), indent2) + indent2 + ")"; subselects.add(0, sub); nodeNr++; } matchNr++; } String result = indent + "matches AS\n" + indent + "(\n" + Joiner.on("\n" + indent2 +"UNION ALL\n").join(subselects) + "\n" + indent + ")"; return Lists.newArrayList(result); } private String generatePathName(URI uri) { StringBuilder sb = new StringBuilder(); List<String> path = CommonHelper.getCorpusPath(uri); Collections.reverse(path); List<String> escapedPath = new LinkedList<>(); for (String p : path) { escapedPath.add(ARRAY_ELEM_ESC.escape(p)); } sb.append("{"); Joiner.on(", ").appendTo(sb, escapedPath); sb.append("}"); return sqlString(sb.toString()); } private String generateNodeID(URI uri) { return uri.getFragment(); } }