/* * Copyright 2009-2011 Collaborative Research Centre SFB 632 * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package annis.sqlgen; import annis.dao.objects.AnnotatedMatch; import annis.dao.objects.AnnotatedSpan; import annis.model.QueryNode; import annis.ql.parser.QueryData; import static annis.sqlgen.TableAccessStrategy.CORPUS_ANNOTATION_TABLE; import static annis.sqlgen.TableAccessStrategy.NODE_TABLE; import static annis.sqlgen.TableAccessStrategy.TEXT_TABLE; import java.sql.Array; import java.sql.ResultSet; import java.sql.SQLException; import java.sql.Types; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Set; import java.util.TreeSet; import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.Validate; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.dao.DataAccessException; /** * * @author thomas */ public class MatrixSqlGenerator extends AbstractSqlGenerator implements SelectClauseSqlGenerator<QueryData>, FromClauseSqlGenerator<QueryData>, WhereClauseSqlGenerator<QueryData>, GroupByClauseSqlGenerator<QueryData>, OrderByClauseSqlGenerator<QueryData>, SqlGeneratorAndExtractor<QueryData, List<AnnotatedMatch>> { private final Logger log = LoggerFactory.getLogger(MatrixSqlGenerator.class); @Deprecated private String matchedNodesViewName; private SolutionSqlGenerator solutionSqlGenerator; private AnnotatedSpanExtractor spanExtractor; /** * Create a solution key to be used inside a single call to * {@code extractData}. * * This method must be overridden in child classes or by Spring. */ protected SolutionKey<?> createSolutionKey() { throw new UnsupportedOperationException( "BUG: This method needs to be overwritten by ancestors or through Spring"); } @Override public List<AnnotatedMatch> extractData(ResultSet resultSet) throws SQLException, DataAccessException { List<AnnotatedMatch> matches = new ArrayList<>(); Map<List<Long>, AnnotatedSpan[]> matchesByGroup = new HashMap<>(); int rowNum = 0; while (resultSet.next()) { long id = resultSet.getLong("id"); // create key Array sqlKey = resultSet.getArray("key"); Validate.isTrue(!resultSet.wasNull(), "Match group identifier must not be null"); Validate.isTrue(sqlKey.getBaseType() == Types.BIGINT, "Key in database must be from the type \"bigint\" but was \"" + sqlKey. getBaseTypeName() + "\""); Long[] keyArray = (Long[]) sqlKey.getArray(); int matchWidth = keyArray.length; List<Long> key = Arrays.asList(keyArray); if (!matchesByGroup.containsKey(key)) { matchesByGroup.put(key, new AnnotatedSpan[matchWidth]); } AnnotatedSpan span = spanExtractor.mapRow(resultSet, rowNum); // set annotation spans for *all* positions of the id // (node could have matched several times) for (int posInMatch = 0; posInMatch < key.size(); posInMatch++) { if (key.get(posInMatch) == id) { matchesByGroup.get(key)[posInMatch] = span; } } rowNum++; } // end for each row for (AnnotatedSpan[] match : matchesByGroup.values()) { matches.add(new AnnotatedMatch(Arrays.asList(match))); } return matches; } @Override public String selectClause(QueryData queryData, List<QueryNode> alternative, String indent) { StringBuilder sb = new StringBuilder(); sb.append("\n"); TableAccessStrategy tables = tables(null); SolutionKey<?> key = createSolutionKey(); // TableAccessStrategy tas = createTableAccessStrategy(); List<String> keyColumns = key.generateOuterQueryColumns(tables, alternative.size()); // key sb.append(indent).append(TABSTOP); sb.append(StringUtils.join(keyColumns, ", ")); sb.append(",\n"); // fields List<MatrixQueryData> matrixExtList = queryData.getExtensions( MatrixQueryData.class); sb.append(indent).append(TABSTOP); sb.append(StringUtils.join(getSelectFields(tables, matrixExtList.isEmpty() ? null : matrixExtList.get(0)), ",\n" + indent + TABSTOP)); sb.append("\n"); return sb.toString(); } protected List<String> getSelectFields(TableAccessStrategy tas, MatrixQueryData matrixExt) { List<String> result = new LinkedList<>(); result.add(selectIdString(tas)); result.add(selectSpanString(tas)); result.add(selectAnnotationsString(tas)); if (matrixExt != null && matrixExt.getMetaKeys() != null && !matrixExt. getMetaKeys().isEmpty()) { result.add(selectMetadataString(tas)); } return result; } protected String selectIdString(TableAccessStrategy tas) { return tas.aliasedColumn(NODE_TABLE, "id") + " AS id"; } protected String selectSpanString(TableAccessStrategy tas) { return "min(substr(" + tas.aliasedColumn(TEXT_TABLE, "text") + ", " + tas.aliasedColumn(NODE_TABLE, "left") + " + 1, " + tas.aliasedColumn(NODE_TABLE, "right") + " - " + tas.aliasedColumn(NODE_TABLE, "left") + ")) AS span"; } protected String selectMetadataString(TableAccessStrategy tas) { return "array_agg(DISTINCT coalesce(" + tas.aliasedColumn(CORPUS_ANNOTATION_TABLE, "namespace") + " || ':', ':') || " + tas.aliasedColumn(CORPUS_ANNOTATION_TABLE, "name") + " || ':' || " + tas.aliasedColumn(CORPUS_ANNOTATION_TABLE, "value") + ") AS metadata"; } @Override public String fromClause(QueryData queryData, List<QueryNode> alternative, String indent) { StringBuilder sb = new StringBuilder(); sb.append(indent).append("(\n"); sb.append(indent); sb.append(solutionSqlGenerator.toSql(queryData, indent + TABSTOP)); sb.append(indent).append(") AS solutions,\n"); String factsSQL = SelectedFactsFromClauseGenerator.selectedFactsSQL( queryData.getCorpusList(), indent); sb.append(indent).append(TABSTOP); sb.append(factsSQL).append(" AS facts"); sb.append("\n"); TableAccessStrategy tables = tables(null); addFromOuterJoins(sb, queryData, tables, indent, alternative); sb.append(",\n"); sb.append(indent).append(TABSTOP); sb.append(TEXT_TABLE); return sb.toString(); } protected void addFromOuterJoins(StringBuilder sb, QueryData queryData, TableAccessStrategy tas, String indent, List<QueryNode> alternative) { List<MatrixQueryData> matrixExtList = queryData.getExtensions( MatrixQueryData.class); // restrict to certain annnotation or metadata keys if wanted if (!matrixExtList.isEmpty()) { MatrixQueryData matrixExt = matrixExtList.get(0); if (matrixExt.getMetaKeys() != null && !matrixExt.getMetaKeys().isEmpty()) { sb.append(indent).append(TABSTOP); sb.append("LEFT OUTER JOIN "); sb.append(CORPUS_ANNOTATION_TABLE); sb.append(" ON ("); sb.append(tas.aliasedColumn(CORPUS_ANNOTATION_TABLE, "corpus_ref")); sb.append(" = "); sb.append(tas.aliasedColumn(NODE_TABLE, "corpus_ref")); Set<String> conditions = new TreeSet<>(); addAnnoSelectionCondition(conditions, matrixExt.getMetaKeys(), CORPUS_ANNOTATION_TABLE, tas); sb.append(" AND ").append(StringUtils.join(conditions, " AND ")); sb.append(")\n"); } } List<Long> corpusList = queryData.getCorpusList(); String factsSQL = SelectedFactsFromClauseGenerator.selectedFactsSQL( queryData.getCorpusList(), indent); String corpusListString = StringUtils. join(corpusList, ", "); if(corpusList.isEmpty()) { corpusListString = "NULL"; } sb.append(indent).append(TABSTOP); sb.append("LEFT OUTER JOIN ").append(factsSQL).append(" AS node_anno") .append(" ON (") .append(tas.aliasedColumn(NODE_TABLE, "id")).append( " = node_anno.id AND ") .append("node_anno.n_na_sample IS TRUE AND ") .append(tas.aliasedColumn(NODE_TABLE, "toplevel_corpus")).append( " = node_anno.toplevel_corpus AND ") .append("node_anno.toplevel_corpus IN (").append(corpusListString) .append("))"); sb.append("\n"); } @Override public Set<String> whereConditions(QueryData queryData, List<QueryNode> alternative, String indent) { Set<String> conditions = new HashSet<>(); StringBuilder sb = new StringBuilder(); TableAccessStrategy tables = tables(null); // corpus selection List<Long> corpusList = queryData.getCorpusList(); if (corpusList != null && !corpusList.isEmpty()) { sb.append(tables.aliasedColumn(NODE_TABLE, "toplevel_corpus")); sb.append(" IN ("); sb.append(StringUtils.join(corpusList, ", ")); sb.append(")"); conditions.add(sb.toString()); } // text table table joining (FIXME: why not in from clause) sb.setLength(0); sb.append(tables.aliasedColumn(TEXT_TABLE, "id")); sb.append(" = "); sb.append(tables.aliasedColumn(NODE_TABLE, "text_ref")); conditions.add(sb.toString()); conditions.add(tables.aliasedColumn(TEXT_TABLE, "corpus_ref") + " = " + tables.aliasedColumn(NODE_TABLE, "corpus_ref")); conditions.add(tables.aliasedColumn(NODE_TABLE, "toplevel_corpus") + " = " + "solutions.toplevel_corpus"); // nodes selected by id sb.setLength(0); sb.append("(\n"); sb.append(indent).append(TABSTOP).append(TABSTOP); List<String> ors = new ArrayList<>(); for (int i = 1; i <= queryData.getMaxWidth(); ++i) { ors.add( tables.aliasedColumn(NODE_TABLE, "id") + " = solutions.id" + String. valueOf(i)); } sb.append(StringUtils.join(ors, " OR\n" + indent + TABSTOP + TABSTOP)); sb.append("\n"); sb.append(indent).append(TABSTOP); sb.append(")"); conditions.add(sb.toString()); return conditions; } private void addAnnoSelectionCondition(Set<String> conditions, List<MatrixQueryData.QName> selected, String tableName, TableAccessStrategy tables) { List<String> orConditions = new LinkedList<>(); Iterator<MatrixQueryData.QName> itMatrix = selected.iterator(); while (itMatrix.hasNext()) { MatrixQueryData.QName qname = itMatrix.next(); if (qname.name != null && qname.namespace != null) { orConditions.add( tables.aliasedColumn(tableName, "name") + " = " + SqlConstraints.sqlString(qname.name) + " AND " + tables.aliasedColumn(tableName, "namespace") + " = " + SqlConstraints.sqlString(qname.namespace)); } else if (qname.namespace != null) { orConditions.add(tables.aliasedColumn(tableName, "namespace") + " = " + SqlConstraints.sqlString(qname.namespace)); } else if (qname.name != null) { orConditions.add(tables.aliasedColumn(tableName, "name") + " = " + SqlConstraints.sqlString(qname.name)); } } if (orConditions.isEmpty()) { // add an always false condition on corpus_annotation conditions.add(tables.aliasedColumn(tableName, "name") + " IS NULL"); } else { conditions.add("(" + StringUtils.join(orConditions, " OR ") + ")"); } } @Override public String groupByAttributes(QueryData queryData, List<QueryNode> alternative) { TableAccessStrategy tas = tables(null); return "key, " + tas.aliasedColumn(NODE_TABLE, "corpus_ref") + ", " + tas.aliasedColumn(NODE_TABLE, "text_ref") + ", " + tas.aliasedColumn(NODE_TABLE, "token_index") + ", " + tas.aliasedColumn(NODE_TABLE, "id"); } protected String selectAnnotationsString(TableAccessStrategy tas) { return "array_agg(DISTINCT node_anno.node_qannotext) AS annotations"; } @Override public String orderByClause(QueryData queryData, List<QueryNode> alternative, String indent) { TableAccessStrategy tas = tables(null); return "key, " + tas.aliasedColumn(NODE_TABLE, "corpus_ref") + ", " + tas.aliasedColumn(NODE_TABLE, "text_ref") + ", " + tas.aliasedColumn(NODE_TABLE, "token_index") + ", " + tas.aliasedColumn(NODE_TABLE, "id"); } public String getMatchedNodesViewName() { return matchedNodesViewName; } public void setMatchedNodesViewName(String matchedNodesViewName) { this.matchedNodesViewName = matchedNodesViewName; } public SolutionSqlGenerator getSolutionSqlGenerator() { return solutionSqlGenerator; } public void setSolutionSqlGenerator(SolutionSqlGenerator solutionSqlGenerator) { this.solutionSqlGenerator = solutionSqlGenerator; } public AnnotatedSpanExtractor getSpanExtractor() { return spanExtractor; } public void setSpanExtractor(AnnotatedSpanExtractor spanExtractor) { this.spanExtractor = spanExtractor; } }