/*******************************************************************************
* Copyright (c) 2004, 2007 IBM Corporation and Cambridge Semantics Incorporated.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* File: $Source: /cvsroot/slrp/glitter/com.ibm.adtech.glitter/src/com/ibm/adtech/glitter/query/Projection.java,v $
* Created by: Lee Feigenbaum (<a href="mailto:feigenbl@us.ibm.com">feigenbl@us.ibm.com</a>)
* Created on: 10/23/06
* Revision: $Id: Projection.java 164 2007-07-31 14:11:09Z mroy $
*
* Contributors: IBM Corporation - initial API and implementation
* Cambridge Semantics Incorporated - Fork to Anzo
*******************************************************************************/
package org.openanzo.glitter.query;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.Map.Entry;
import org.apache.commons.collections15.CollectionUtils;
import org.apache.commons.collections15.Transformer;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.lang.builder.HashCodeBuilder;
import org.openanzo.analysis.RequestAnalysis;
import org.openanzo.exceptions.ExceptionConstants;
import org.openanzo.exceptions.LogUtils;
import org.openanzo.glitter.exception.ExpressionEvaluationException;
import org.openanzo.glitter.exception.GlitterRuntimeException;
import org.openanzo.glitter.exception.UnnamedProjectionException;
import org.openanzo.glitter.expression.AggregateFunction;
import org.openanzo.glitter.query.QueryController.QueryStringPrintOptions;
import org.openanzo.glitter.syntax.abstrakt.Expression;
import org.openanzo.glitter.syntax.abstrakt.FunctionCall;
import org.openanzo.glitter.syntax.abstrakt.SimpleExpression;
import org.openanzo.rdf.URI;
import org.openanzo.rdf.Value;
import org.openanzo.rdf.Variable;
import org.openanzo.rdf.utils.PrettyPrinter;
/**
* {@link Projection} handles the <tt>SELECT</tt> SPARQL query form, in which the result set is returned after being projected out to only the selected
* variables.
*
*
* @author lee <lee@cambridgesemantics.com>
*
*/
public class Projection implements QueryResultForm {
//static private Variable countVariable = MemVariable.createVariable("count");
// these two lists are parallel. That is, projectedExpression[i] gets projected as projectedAs[i]
private final List<Expression> projectedExpressions;
private final List<Variable> projectedAs;
private final List<Variable> groupByVars;
private final Map<Variable, Expression> aliasMap;
private final boolean isSelectStar;
private final boolean distinct;
private final boolean reduced;
private boolean isAggregate;
/**
* This constructor takes ownership of the lists passed in to it.
*
* @param expressions
* The expressions being selected; should be filled in even if isSelectStar is true
* @param variables
* The variable names for the expressions being selected
* @param groupByVars
* If non-null and non-empty, solutions are generated once per unique set of values identified by these variables. Note that the presence of a
* single aggregate function call in <tt>expressions</tt> indicates that solutions are grouped; if an aggregate occurs and groupByVars is null or
* empty, then the solutions are processed as a single group.
* @param isSelectStar
* If <tt>true</tt>, this is a <tt>SELECT *</tt> projection; used for serialization.
* @param distinct
* If <tt>true</tt>, return only distinct resultset rows
* @param reduced
* If <tt>true</tt> (and <tt>distinct</tt> is <tt>false</tt>), indicates that the implementation is free to return anywhere between 1 and the
* standard (algebra-defined) cardinality of each solution.
* @throws UnnamedProjectionException
* @throws UnnamedProjectionException
*/
public Projection(List<Expression> expressions, List<Variable> variables, List<Variable> groupByVars, boolean isSelectStar, boolean distinct, boolean reduced) throws UnnamedProjectionException {
this.projectedExpressions = expressions;
this.projectedAs = variables;
this.aliasMap = new HashMap<Variable, Expression>();
this.groupByVars = groupByVars != null ? groupByVars : new ArrayList<Variable>();
this.isSelectStar = isSelectStar;
this.distinct = distinct;
this.reduced = reduced;
initialize();
}
private void initialize() throws UnnamedProjectionException {
// determine if this is an aggregate
if (!this.groupByVars.isEmpty()) {
this.isAggregate = true;
} else {
// see if any aggregate functions are involved, which means we're grouping in one big (happy) group
LinkedList<Expression> expressions = new LinkedList<Expression>(this.projectedExpressions);
while (!expressions.isEmpty()) {
Expression e = expressions.removeFirst();
if (e instanceof FunctionCall) {
FunctionCall fc = (FunctionCall) e;
if (fc.getFunction() instanceof AggregateFunction) {
this.isAggregate = true;
break;
}
expressions.addAll(fc.getArguments());
}
}
}
// check that we have an output name for every projected expression
if (this.projectedAs.size() < this.projectedExpressions.size())
throw new UnnamedProjectionException(this.projectedExpressions.get(this.projectedAs.size()));
if (this.projectedAs.size() > this.projectedExpressions.size())
throw new GlitterRuntimeException(ExceptionConstants.GLITTER.MORE_NAMES);
int i;
if ((i = this.projectedAs.indexOf(null)) != -1)
throw new UnnamedProjectionException(this.projectedExpressions.get(i));
for (i = 0; i < this.projectedAs.size(); i++)
this.aliasMap.put(this.projectedAs.get(i), this.projectedExpressions.get(i));
}
/**
*
* @return Whether this projection deals with aggregated solutions
*/
public boolean isAggregateProjection() {
return this.isAggregate;
}
/**
*
* @return Whether this projection eliminates duplicate result rows
*/
public boolean isDistinct() {
return this.distinct;
}
/**
* Return true if this projection is reduced
*
* @return Whether this is a <tt>REDUCED</tt> query.
*/
public boolean isReduced() {
return this.reduced;
}
/**
*
* @return The list of variables that form the result columns for this projection.
*/
public List<Variable> getResultVariables() {
return new ArrayList<Variable>(this.projectedAs);
}
/**
*
* @return The expressions projected from this query.
*/
public List<Expression> getProjectedExpressions() {
return new ArrayList<Expression>(this.projectedExpressions);
}
/**
* @return The variables that contribute to the projected results.
*/
public Set<Variable> getReferencedVariables() {
// @@ this could be memo'ized if helpful
HashSet<Variable> vars = new HashSet<Variable>();
for (Expression e : this.projectedExpressions) {
vars.addAll(e.getReferencedVariables());
}
return vars;
}
public Collection<Variable> getBindableVariables() {
return Collections.emptyList();
}
public Collection<URI> getReferencedURIs() {
HashSet<URI> uris = new HashSet<URI>();
for (Expression e : this.projectedExpressions)
uris.addAll(e.getReferencedURIs());
return uris;
}
/**
*
* @return The list of group by variables.
*/
public List<Variable> getGroupByVariables() {
return new ArrayList<Variable>(this.groupByVars);
}
public Object serializeResults(SolutionSet results) {
return serializeResultsAsResultSet(results);
}
/**
* Serializing a {@link Projection} is a no-op.
*
* @param solutions
* The refined {@link SolutionSet}
* @return The refined {@link SolutionSet}
*/
private SolutionSet serializeResultsAsResultSet(SolutionSet solutions) {
return solutions;
}
public SolutionSet refineSolutionsBeforeOrdering(SolutionSet results) {
boolean isEnabled = RequestAnalysis.getAnalysisLogger().isDebugEnabled();
long start = 0;
if (isEnabled) {
start = System.currentTimeMillis();
StringBuilder sb = new StringBuilder();
this.prettyPrint(sb);
RequestAnalysis.getAnalysisLogger().debug(LogUtils.GLITTER_MARKER, "[glitter_Projection_refiningSolutions] [{}] {}", sb.toString(), results.size());
}
SolutionList projection = new SolutionList();
if (isAggregateProjection()) {
// Create groups based on GROUP BY variables
HashMap<GroupKey, SolutionSet> groups = new HashMap<GroupKey, SolutionSet>();
if (this.groupByVars == null || this.groupByVars.isEmpty()) {
groups.put(new GroupKey(), results);
} else {
for (PatternSolution solution : results) {
GroupKey key = null;
try {
key = new GroupKey(solution);
} catch (ExpressionEvaluationException eee) {
continue;
}
if (!groups.containsKey(key))
groups.put(key, new SolutionList());
SolutionSet groupSolutions = groups.get(key);
groupSolutions.add(solution);
}
}
// For each group, process (evaluate) the aggregates and simple variables and fill out the solution
for (Entry<GroupKey, SolutionSet> entry : groups.entrySet()) {
// representative has all the proper values for the GROUP BY variables
PatternSolution representative = entry.getValue().size() > 0 ? entry.getValue().get(0) : null;
for (int i = 0; i < this.projectedExpressions.size(); i++) {
Expression e = this.projectedExpressions.get(i);
Variable var = this.projectedAs.get(i);
Value val;
try {
val = e.evaluate(representative, entry.getValue());
} catch (ExpressionEvaluationException eee) {
val = null;
}
if (val != null)
entry.getKey().aggregateSolution.setBinding(var, val);
}
// @@ if we add a HAVING clause, this is where those expressions are checked to be true
// before including this aggregate solution
projection.add(entry.getKey().aggregateSolution);
}
} else {
// Evaluate any expressions (a variable simply looks its value up in the environment)
for (PatternSolution solution : results) {
PatternSolutionImpl projected = new PatternSolutionImpl(solution);
boolean keepSolution = true;
for (int i = 0; i < this.projectedExpressions.size(); i++) {
Expression e = this.projectedExpressions.get(i);
Variable var = this.projectedAs.get(i);
Value val = null;
try {
val = e.evaluate(solution, null);
} catch (ExpressionEvaluationException eee) {
keepSolution = false;
break;
}
if (val != null)
projected.setBinding(var, val);
}
if (keepSolution)
projection.add(projected);
}
}
if (isEnabled) {
RequestAnalysis.getAnalysisLogger().debug(LogUtils.GLITTER_MARKER, "[glitter_Projection_evaluatedAndProjectedSolutions] {}:{}", projection.size(), System.currentTimeMillis() - start);
RequestAnalysis.getAnalysisLogger().debug(LogUtils.TIMING_MARKER, "glitter_Projection_evaluatedAndProjectedSolutions,{},{}", System.currentTimeMillis() - start, projection.size());
}
return projection;
}
public SolutionSet refineSolutionsAfterOrdering(SolutionSet solutions, List<OrderingCondition> sortedByConditions) {
SolutionList projection = new SolutionList();
int projectedBindings = this.projectedAs.size();
for (PatternSolution solution : solutions) {
if (solution.size() == projectedBindings) {
projection.add(solution);
} else {
PatternSolutionImpl newSolution = new PatternSolutionImpl();
for (Variable v : this.projectedAs) {
Value val = solution.getBinding(v);
if (val != null)
newSolution.setBinding(v, val);
}
projection.add(newSolution);
}
}
if (this.distinct) {
projection = projectDistinctSolutions(projection, this.projectedExpressions, sortedByConditions);
}
// the solutions as we were given them may already be ordered - we need to
// preserve that order. given that, how do we efficiently find duplicates?
// for now, we inefficiently find duplicates :-/
return projection;
}
/**
* Project distinct solutions
*
* @param projection
* @param sortedByConditions
* @param projectedExpressiond
* @return distinct solutions list
*/
public static SolutionList projectDistinctSolutions(SolutionSet projection, List<Expression> projectedExpressions, List<OrderingCondition> sortedByConditions) {
SolutionList distinctProjection = new SolutionList();
PatternSolution solutions[] = projection.toArray(new PatternSolution[0]);
if (solutions.length == 1) {
distinctProjection.add(solutions[0]);
} else {
boolean solutionsAreSorted = false;
if (projectedExpressions != null && sortedByConditions != null) {
// we only care that the solutions are sorted if the sorting conditions lead to a total order
// on the projected solutions - this is the case if all the projected expressions are involved
// in the sorting, and no other sorting condition pre-empts them
int matchedExpressions = 0;
for (OrderingCondition oc : sortedByConditions) {
if (projectedExpressions.contains(oc.getCondition()))
matchedExpressions++;
else
break;
}
if (matchedExpressions == projectedExpressions.size())
solutionsAreSorted = true;
}
// If you sort the solutions then you can shortcut a lot of comparisons:
// a.) you only compare up until the other solution compares greater than you
// But we can only do this if we don't need to maintain the order of a pre-projection sort!
if (!solutionsAreSorted && sortedByConditions == null) {
Arrays.sort(solutions, 0, solutions.length);
solutionsAreSorted = true;
}
if (solutionsAreSorted) {
PatternSolution current = null;
for (int i = 0; i < solutions.length; i++) {
if (current == null || !current.equals(solutions[i])) {
distinctProjection.add(solutions[i]);
current = solutions[i];
}
}
} else {
// this is the slow (n^2) way of finding dupes
for (int i = 0; i < projection.size(); i++) {
boolean dupe_later = false;
PatternSolution pi = projection.get(i);
for (int j = i + 1; j < projection.size(); j++) {
PatternSolution pj = projection.get(j);
if (PatternSolutionImpl.containMatchingBindings(pj, pi)) {
dupe_later = true;
break;
}
}
if (!dupe_later)
distinctProjection.add(pi);
}
}
}
return distinctProjection;
}
@Override
public String toString() {
StringBuilder builder = new StringBuilder();
builder.append("SELECT");
if (this.distinct)
builder.append(" DISTINCT");
if (this.reduced)
builder.append(" REDUCED");
if (this.isSelectStar) {
builder.append(" *");
} else {
for (int i = 0; i < this.projectedExpressions.size(); i++) {
Expression e = this.projectedExpressions.get(i);
Variable var = this.projectedAs.get(i);
if (expressionWrapsVariable(e, var)) {
builder.append(" " + var);
} else {
builder.append(" (");
builder.append(e);
builder.append(" AS ");
builder.append(var);
builder.append(")");
}
}
}
return builder.toString();
}
private boolean expressionWrapsVariable(Expression e, Variable v) {
return e instanceof SimpleExpression && ((SimpleExpression) e).getTerm().equals(v);
}
public void prettyPrint(StringBuilder buffer) {
buffer.append("Projection(");
if (this.isDistinct())
buffer.append("DISTINCT, ");
if (this.isReduced())
buffer.append("REDUCED, ");
if (this.isSelectStar)
buffer.append("*");
else {
for (int i = 0; i < this.projectedExpressions.size(); i++) {
Expression e = this.projectedExpressions.get(i);
Variable var = this.projectedAs.get(i);
if (i > 0)
buffer.append(", ");
if (expressionWrapsVariable(e, var)) {
buffer.append(PrettyPrinter.print(var));
} else {
buffer.append("ProjectAs(");
buffer.append(PrettyPrinter.print(e));
buffer.append(", ");
buffer.append(PrettyPrinter.print(var));
buffer.append(")");
}
}
}
if (groupByVars != null && !groupByVars.isEmpty()) {
buffer.append(", ");
buffer.append("GroupBy(");
buffer.append(StringUtils.join(CollectionUtils.collect(this.groupByVars, new Transformer<Variable, String>() {
public String transform(Variable input) {
return PrettyPrinter.print(input);
}
}), ", "));
buffer.append(")");
}
buffer.append(")");
}
public void prettyPrintQueryPart(EnumSet<QueryStringPrintOptions> printFlags, int indentLevel, Map<String, String> uri2prefix, StringBuilder s) {
s.append("SELECT");
if (this.isDistinct())
s.append(" DISTINCT");
if (this.isReduced())
s.append(" REDUCED");
if (this.isSelectStar)
s.append(" *");
else {
for (int i = 0; i < this.projectedExpressions.size(); i++) {
Expression e = this.projectedExpressions.get(i);
Variable var = this.projectedAs.get(i);
s.append(" ");
if (expressionWrapsVariable(e, var)) {
QueryController.printTriplePatternComponent(var, printFlags, uri2prefix, s);
} else {
s.append("(");
e.prettyPrintQueryPart(printFlags, indentLevel, uri2prefix, s);
s.append(" AS ");
QueryController.printTriplePatternComponent(var, printFlags, uri2prefix, s);
s.append(")");
}
}
}
}
protected void prettyPrintGroupByQueryPart(EnumSet<QueryStringPrintOptions> printFlags, int indentLevel, Map<String, String> uri2prefix, StringBuilder s) {
if (groupByVars != null && !groupByVars.isEmpty()) {
s.append("GROUP BY");
for (Variable v : this.groupByVars) {
s.append(" ");
QueryController.printTriplePatternComponent(v, printFlags, uri2prefix, s);
}
}
}
private class GroupKey {
// the bindings that define the group; for a particular query, the keySet of bindings will be the same for
// all instances of GroupKey
private final PatternSolution bindings;
private final int hashCode;
// each group has a single (aggregate/grouped) solution, which is maintained in the GroupKey
private final PatternSolutionImpl aggregateSolution = new PatternSolutionImpl();
// this constructor is for a group of all solutions
public GroupKey() {
this.hashCode = 0;
this.bindings = null;
}
protected GroupKey(final PatternSolution bindings) {
this.bindings = bindings;
HashCodeBuilder builder = new HashCodeBuilder();
for (Variable var : Projection.this.groupByVars) {
// check if we're grouping by an alias, in which case we actually need to evalutae
// the associated expression to find the grouping value here
Expression e = Projection.this.aliasMap.get(var);
Value val = null;
if (e != null)
val = e.evaluate(bindings, null);
else
val = bindings.getBinding(var);
if (val != null) {
builder.append(val.hashCode());
}
}
this.hashCode = builder.toHashCode();
}
@Override
public boolean equals(Object obj) {
if (!(obj instanceof GroupKey))
return false;
GroupKey other = (GroupKey) obj;
if (this.hashCode == 0)
return other.hashCode == 0;
if (other.hashCode == 0)
return false;
for (Variable var : Projection.this.groupByVars) {
Value thisTerm = this.bindings.getBinding(var);
Value otherTerm = other.bindings.getBinding(var);
if (thisTerm == null && otherTerm == null)
continue;
else if (thisTerm == null || otherTerm == null) {
return false;
} else if (!thisTerm.equals(otherTerm))
return false;
}
return true;
}
@Override
public int hashCode() {
return hashCode;
}
}
}