/*
* Copyright 2012 SFB 632.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package annis.sqlgen;
import annis.model.QueryNode;
import annis.ql.parser.QueryData;
import static annis.sqlgen.AbstractSqlGenerator.TABSTOP;
import static annis.sqlgen.SqlConstraints.sqlString;
import static annis.sqlgen.TableAccessStrategy.NODE_TABLE;
import annis.sqlgen.extensions.AnnotateQueryData;
import com.google.common.collect.Lists;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Properties;
import org.apache.commons.lang3.StringUtils;
/**
*
* @author Thomas Krause <krauseto@hu-berlin.de>
*/
public class CommonAnnotateWithClauseGenerator
extends TableAccessStrategyFactory
implements WithClauseSqlGenerator<QueryData>
{
private AnnotateInnerQuerySqlGenerator innerQuerySqlGenerator;
private IslandsPolicy islandsPolicy;
/**
* Create a solution key to be used inside a single call to
* {@code extractData}.
*
* This method must be overridden in child classes or by Spring.
*/
protected SolutionKey<?> createSolutionKey()
{
throw new UnsupportedOperationException(
"BUG: This method needs to be overwritten by ancestors or through Spring");
}
@Override
public List<String> withClauses(QueryData queryData,
List<QueryNode> alternative, String indent)
{
List<Long> corpusList = queryData.getCorpusList();
HashMap<Long, Properties> corpusProperties =
queryData.getCorpusConfiguration();
IslandsPolicy.IslandPolicies policy = getIslandsPolicy().getMostRestrictivePolicy(corpusList,
corpusProperties);
List<String> result = new LinkedList<>();
List<AnnotateQueryData> ext = queryData.getExtensions(
AnnotateQueryData.class);
if (!ext.isEmpty())
{
AnnotateQueryData annoQueryData = ext.get(0);
if (annoQueryData.getSegmentationLayer() == null)
{
// token index based method
// first get the raw matches
result.addAll(getMatchesWithClause(queryData, alternative, indent));
result.add(getKeyWithClause(indent));
// break the columns down in a way that every matched node has it's own
// row
result.add(getSolutionFromMatchesWithClause(policy, "matches", indent));
}
else
{
// segmentation layer based method
result.addAll(getMatchesWithClause(queryData, alternative, indent));
result.add(getKeyWithClause(indent));
result.add(getNearestSeqWithClause(queryData, annoQueryData, alternative,
"matches", indent));
result.add(getSolutionFromNearestSegWithClause(queryData, annoQueryData,
alternative, policy, "nearestseg", indent));
}
}
return result;
}
/**
* Uses the inner SQL generator and provides an ordered and limited view on
* the matches with a match number.
* @param queryData
* @param alternative
* @param indent
* @return
*/
protected List<String> getMatchesWithClause(QueryData queryData, List<QueryNode> alternative, String indent)
{
String indent2= indent + TABSTOP;
String indent3 = indent2 + TABSTOP;
StringBuilder sbRaw = new StringBuilder();
sbRaw.append(indent).append("matchesRaw AS\n");
sbRaw.append(indent).append("(\n");
sbRaw.append(indent).append(getInnerQuerySqlGenerator().toSql(queryData, indent2));
sbRaw.append("\n").append(indent).append(")");
StringBuilder sbMatches = new StringBuilder();
sbMatches.append(indent).append("matches AS\n");
sbMatches.append(indent).append("(\n");
int numOfNodes = queryData.getMaxWidth();
for(int i=1; i <= numOfNodes; i++)
{
sbMatches.append(indent2)
.append("SELECT\n");
sbMatches.append(indent3).append("n").append(" AS n,\n");
sbMatches.append(indent3).append(i).append(" AS nodeNr,\n");
sbMatches.append(indent3).append("id").append(i).append(" AS id,\n");
sbMatches.append(indent3).append("text").append(i).append(" AS \"text\",\n");
sbMatches.append(indent3).append("min").append(i).append(" AS min,\n");
sbMatches.append(indent3).append("max").append(i).append(" AS max,\n");
sbMatches.append(indent3).append("corpus").append(i).append(" AS corpus\n");
sbMatches.append(indent2).append("FROM matchesRaw\n");
if(i < numOfNodes)
{
sbMatches.append(indent2).append("\n").append(indent2).append("UNION ALL\n\n");
}
}
sbMatches.append("\n").append(indent).append(")");
return Lists.newArrayList(sbRaw.toString(), sbMatches.toString());
}
protected String getKeyWithClause(String indent)
{
String indent2 = indent + TABSTOP;
return indent + "keys AS (\n" +
indent2 + "SELECT n, array_agg(id ORDER BY nodenr ASC) AS \"key\" FROM matches\n" +
indent2 + "GROUP BY n\n" +
indent + ")";
}
/**
* Breaks down the matches table, so that each node of each match has it's own
* row.
* @param islandPolicy
* @param islandPolicies
* @param matchesName
* @param indent
* @return
*/
protected String getSolutionFromMatchesWithClause(
IslandsPolicy.IslandPolicies islandPolicy,
String matchesName,
String indent)
{
String indent2 = indent + TABSTOP;
StringBuilder sb = new StringBuilder();
sb.append(indent).append("solutions AS\n");
sb.append(indent).append("(\n");
sb.append(indent2).append("SELECT ");
if(islandPolicy == IslandsPolicy.IslandPolicies.none)
{
sb.append("min(keys.key) AS key, ")
.append(matchesName).append(".n AS n, ")
.append(matchesName).append(".text, ")
.append(matchesName).append(".corpus, ");
sb.append("min(").append(matchesName).append(".min").append("), ");
sb.append("max(").append(matchesName).append(".max").append(")");
}
else if(islandPolicy == IslandsPolicy.IslandPolicies.context)
{
sb.append("keys.key AS key, ")
.append(matchesName).append(".n AS n, ")
.append(matchesName).append(".text, ")
.append(matchesName).append(".corpus, ");
sb.append(matchesName).append(".min, ");
sb.append(matchesName).append(".max");
}
else
{
throw new UnsupportedOperationException(
"No implementation for island policy "
+ islandsPolicy.toString());
}
sb.append("\n").append(indent2);
sb.append("FROM ").append(matchesName).append(", keys\n").append(indent2);
sb.append("WHERE keys.n = ").append(matchesName).append(".n\n");
if(islandPolicy == IslandsPolicy.IslandPolicies.none)
{
sb.append(indent2)
.append("GROUP BY ")
.append(matchesName).append(".n, ")
.append(matchesName).append(".text, ")
.append(matchesName).append(".corpus").append("\n");
}
sb.append(indent).append(")");
return sb.toString();
}
protected String getNearestSeqWithClause(
QueryData queryData, AnnotateQueryData annoQueryData,
List<QueryNode> alternative, String matchesName, String indent)
{
String indent2 = indent + TABSTOP;
String indent3 = indent2 + TABSTOP;
// use copy constructor in order not to mess up the global TableAccessStrategy bean
TableAccessStrategy tas = new TableAccessStrategy(createTableAccessStrategy());
tas.addTableAlias("solutions", matchesName);
StringBuilder sb = new StringBuilder();
sb.append(indent).append("nearestseg AS\n");
sb.append(indent).append("(\n");
sb.append(indent2).append("SELECT\n");
sb.append(indent3);
sb.append("matches.nodeNr AS nodeNr");
sb.append(", matches.n AS n");
sb.append(", matches.id AS id,\n");
sb.append(indent3).append(tas.aliasedColumn(NODE_TABLE, "seg_index")).append(" - ").append(annoQueryData.
getLeft()).append(" AS \"min\",\n");
sb.append(indent3).append(tas.aliasedColumn(NODE_TABLE,
"seg_index")).append(" + ").append(annoQueryData.getRight()).append(
" AS \"max\",\n");
sb.append(indent3).append(tas.aliasedColumn(NODE_TABLE, "text_ref")).append(
" AS \"text\", \n");
sb.append(indent3).append(tas.aliasedColumn(NODE_TABLE, "corpus_ref")).append(
" AS \"corpus\", \n");
String distLeft = "min"
+ " - "
+ tas.aliasedColumn(NODE_TABLE, "left_token");
String distRight = tas.aliasedColumn(NODE_TABLE, "right_token")
+ " - max" ;
// create ordered window partition
// values are ordered by their distance to the min or max token index
// NULLIF( dist+1, -abs(dist+1) will give negative entries NULL which means
// their are put last in the ordered list
// +1 is there to ensure that positive equal values (thus dist=0) are not ignored
sb.append(indent3).append("row_number() OVER (PARTITION BY ")
.append(tas.aliasedColumn(NODE_TABLE, "corpus_ref")).append(", ")
.append(tas.aliasedColumn(NODE_TABLE, "text_ref")).append(", ")
.append(matchesName).append(".n").append(", ")
.append(matchesName).append(".nodeNr")
.append(" ORDER BY NULLIF(")
.append(distLeft).append("+ 1, -abs(").append(distLeft)
.append(" + 1)) ASC) AS rank_left,\n");
sb.append(indent3).append("row_number() OVER (PARTITION BY ")
.append(tas.aliasedColumn(NODE_TABLE, "corpus_ref")).append(", ")
.append(tas.aliasedColumn(NODE_TABLE, "text_ref")).append(", ")
.append(matchesName).append(".n").append(", ")
.append(matchesName).append(".nodeNr")
.append(" ORDER BY NULLIF(")
.append(distRight).append(" + 1, -abs(").append(distRight)
.append(" + 1)) ASC) AS rank_right\n");
sb.append(indent2).append("FROM ").append(tas.tableName(NODE_TABLE)).append(
", matches\n");
sb.append(indent2).append("WHERE\n");
sb.append(indent3).append(tas.aliasedColumn(NODE_TABLE, "toplevel_corpus")).
append(" IN (").append(StringUtils.join(queryData.getCorpusList(), ",")).
append(") AND\n");
sb.append(indent3).append(tas.aliasedColumn(NODE_TABLE, "n_sample")).append(
" IS TRUE AND\n");
sb.append(indent3).append(tas.aliasedColumn(NODE_TABLE, "seg_name"))
.append(" = ").append(sqlString(annoQueryData.getSegmentationLayer()))
.append(" AND\n");
sb.append(indent3).append(tas.aliasedColumn(NODE_TABLE, "text_ref"))
.append(" = matches.text").append(" AND\n");
sb.append(indent3).append(tas.aliasedColumn(NODE_TABLE, "corpus_ref"))
.append(" = matches.corpus").append("\n");
sb.append(indent).append(")");
return sb.toString();
}
/**
* Get solution from a covered segmentation using a WITH clause.
*
* @param queryData
* @param annoQueryData
* @param alternative
* @param islandsPolicy
* @param coveredName
* @param indent
* @return
*
*/
protected String getSolutionFromNearestSegWithClause(
QueryData queryData, AnnotateQueryData annoQueryData,
List<QueryNode> alternative, IslandsPolicy.IslandPolicies islandsPolicy,
String coveredName, String indent)
{
TableAccessStrategy tas = createTableAccessStrategy();
String indent2 = indent + TABSTOP;
String indent3 = indent2 + TABSTOP;
List<Long> corpusList = queryData.getCorpusList();
StringBuilder sb = new StringBuilder();
sb.append(indent).append("solutions AS\n");
sb.append(indent).append("(\n");
sb.append(indent2).append("SELECT DISTINCT ");
sb.append("min(").append("keys.key) AS key, ")
.append(coveredName).append(".n AS n, ")
.append("min(").append(tas.aliasedColumn(NODE_TABLE, "left_token")).append(") AS \"min\", ")
.append("max(").append(tas.aliasedColumn(NODE_TABLE, "right_token")).append(") AS \"max\", ")
.append(tas.aliasedColumn(NODE_TABLE, "text_ref")).append(" AS \"text\", ")
.append(tas.aliasedColumn(NODE_TABLE, "corpus_ref")).append(" AS \"corpus\"\n");
sb.append(indent2).append("FROM ")
.append(coveredName).append(", ")
.append(tas.tableName(NODE_TABLE)).append(", ")
.append("keys")
.append("\n");
sb.append(indent2).append("WHERE\n");
sb.append(indent3).append("keys.n = ").append(coveredName).append(".n AND\n");
sb.append(indent3).append(tas.aliasedColumn(NODE_TABLE, "toplevel_corpus")).
append(" IN (").append(StringUtils.join(corpusList, ",")).append(") AND\n");
sb.append(indent3).append(tas.aliasedColumn(NODE_TABLE, "n_sample")).append(
" IS TRUE AND\n");
sb.append(indent3).append(tas.aliasedColumn(NODE_TABLE, "seg_name")).append(
" = ")
.append(sqlString(annoQueryData.getSegmentationLayer())).append(" AND\n");
sb.append(indent3).append(tas.aliasedColumn(NODE_TABLE, "text_ref")).append(
" = ").append(coveredName).append(".\"text\" AND\n");
sb.append(indent3).append(tas.aliasedColumn(NODE_TABLE, "corpus_ref")).append(
" = ").append(coveredName).append(".\"corpus\" AND\n");
sb.append(indent3).append(tas.aliasedColumn(NODE_TABLE, "seg_index")).append(
" <= ").append(coveredName).append(".\"max\" AND\n");
sb.append(indent3).append(tas.aliasedColumn(NODE_TABLE, "seg_index")).append(
" >= ").append(coveredName).append(".\"min\" AND\n");
sb.append(indent3).append("(").append(coveredName)
.append(".rank_left = 1 OR ").append(coveredName)
.append(".rank_right = 1)\n");
if (islandsPolicy == IslandsPolicy.IslandPolicies.none)
{
sb.append(indent2).append("GROUP BY ")
.append(tas.aliasedColumn(NODE_TABLE, "corpus_ref")).append(", ")
.append(tas.aliasedColumn(NODE_TABLE, "text_ref")).append(", ")
.append(coveredName).append(".n\n");
}
else if(islandsPolicy == IslandsPolicy.IslandPolicies.context)
{
sb.append(indent2).append("GROUP BY ")
.append(tas.aliasedColumn(NODE_TABLE, "corpus_ref")).append(", ")
.append(tas.aliasedColumn(NODE_TABLE, "text_ref")).append(", ")
.append(coveredName).append(".n, ")
.append(coveredName).append(".nodeNr\n");
}
else
{
throw new UnsupportedOperationException(
"No implementation for island policy "
+ islandsPolicy.toString());
}
sb.append(indent).append(")\n");
return sb.toString();
}
public static String overlapForOneRange(String indent,
String rangeMin, String rangeMax, String textRef, String corpus_ref, TableAccessStrategy tables)
{
StringBuilder sb = new StringBuilder();
sb.append(indent).append("(");
sb.append(tables.aliasedColumn(NODE_TABLE, "left_token")).append(" <= ").append(rangeMax)
.append(" AND ")
.append(tables.aliasedColumn(NODE_TABLE, "right_token")).append(" >= ").append(rangeMin)
.append(" AND ")
.append(tables.aliasedColumn(NODE_TABLE, "text_ref")).append(" = ").append(textRef)
.append(" AND ")
.append(tables.aliasedColumn(NODE_TABLE, "corpus_ref")).append(" = ").append(corpus_ref);
sb.append(")");
return sb.toString();
}
public AnnotateInnerQuerySqlGenerator getInnerQuerySqlGenerator()
{
return innerQuerySqlGenerator;
}
public void setInnerQuerySqlGenerator(
AnnotateInnerQuerySqlGenerator innerQuerySqlGenerator)
{
this.innerQuerySqlGenerator = innerQuerySqlGenerator;
}
/**
* @return the islandsPolicy
*/
public IslandsPolicy getIslandsPolicy()
{
return islandsPolicy;
}
/**
* @param islandsPolicy the islandsPolicy to set
*/
public void setIslandsPolicy(IslandsPolicy islandsPolicy)
{
this.islandsPolicy = islandsPolicy;
}
}