/**
Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved.
Contact:
SYSTAP, LLC DBA Blazegraph
2501 Calvert ST NW #106
Washington, DC 20008
licenses@blazegraph.com
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/*
* Created on April 15, 2012
*/
package com.bigdata.rdf.sparql.ast;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.Set;
import com.bigdata.bop.BOpUtility;
import com.bigdata.bop.IBindingSet;
import com.bigdata.bop.IConstant;
import com.bigdata.bop.IVariable;
import com.bigdata.rdf.internal.IV;
import com.bigdata.rdf.internal.IVCache;
import com.bigdata.rdf.internal.VTE;
import com.bigdata.rdf.internal.impl.TermId;
import cutthecrap.utils.striterators.ICloseableIterator;
/**
* Class populates an {@link ISolutionSetStats} object from a stream of
* solutions. The summary is available from {@link #getStats()} once the source
* solutions have been fully consumed.
*
* TODO Compute the distinct values for each variable for which we have a
* binding, or at least the #of such values and offer a method to obtain the
* distinct values which could cache them. We can do the exact #of distinct
* values trivially for small solution sets. For very large solution sets this
* is more expensive and approximate techniques for obtaining the distinct set
* only when it is likely to be small would be appropriate.
* <p>
* Note that this must correctly handle {@link TermId#mockIV(VTE)}s.
* <p>
* Or compute a bloom filter for a statistical summary.
*
* @see <a href="http://sourceforge.net/apps/trac/bigdata/ticket/490"> Mock IV /
* TermId hashCode()/equals() problems</a>
*/
public class SolutionSetStatserator implements
ICloseableIterator<IBindingSet[]> {
private final Iterator<IBindingSet[]> src;
private boolean open = true;
private ISolutionSetStats compiledStats;
/**
* The #of solutions.
*/
protected long nsolutions = 0;
/**
* The set of variables observed across all solutions.
*/
protected final Set<IVariable<?>> usedVars = new HashSet<IVariable<?>>();
/**
* The set of variables which are NOT bound in at least one solution (e.g.,
* MAYBE bound semantics).
*/
protected final Set<IVariable<?>> notAlwaysBound = new HashSet<IVariable<?>>();
/**
* The set of variables whose {@link IVCache} association is NOT set is at
* least one solution in which the variable is bound.
*/
protected final Set<IVariable<?>> notMaterialized = new HashSet<IVariable<?>>();
/**
* A map from the variable to the first bound value for that variable. This
* is used to identify variables which are effective constants (they are
* bound in the first solution and in each solution thereafter and always to
* the same value).
*/
protected final Map<IVariable<?>, IConstant<?>> firstBoundValue = new HashMap<IVariable<?>, IConstant<?>>();
/**
* The set of variables which have been proven to not be effective
* constants. In order to be an effective constant, the variable must be
* bound in all solutions and it must be bound to the same value in each
* solution.
*/
protected final Set<IVariable<?>> notConstant = new HashSet<IVariable<?>>();
protected final Set<IVariable<?>> currentVars = new HashSet<IVariable<?>>();
protected final Set<IVariable<?>> notBoundThisSolution = new HashSet<IVariable<?>>();
/**
* Convenience method.
*
* @param bindingSets
* The source solutions.
*
* @return The computed statistics.
*/
static public ISolutionSetStats get(final IBindingSet[] bindingSets) {
final SolutionSetStatserator itr = new SolutionSetStatserator(
BOpUtility.asIterator(bindingSets));
try {
while (itr.hasNext()) {
itr.next();
}
return itr.getStats();
} finally {
itr.close();
}
}
public SolutionSetStatserator(final Iterator<IBindingSet[]> src) {
if (src == null)
throw new IllegalArgumentException();
this.src = src;
}
/**
* Compute incremental statistics from an observed chunk of solutions.
*/
protected void filter(final IBindingSet[] a) {
for (IBindingSet bset : a) {
if (bset == null)
throw new IllegalArgumentException();
nsolutions++;
// Collect all variables used in this solution.
currentVars.clear();
{
@SuppressWarnings("rawtypes")
final Iterator<IVariable> vitr = bset.vars();
while (vitr.hasNext()) {
final IVariable<?> v = vitr.next();
if (usedVars.add(v) && nsolutions > 1) {
/*
* This variable was not used in solutions prior to this
* one.
*/
notAlwaysBound.add(v);
}
currentVars.add(v);
// Look for the bound value for this variable.
@SuppressWarnings("unchecked")
final IConstant<IV<?, ?>> c = bset.get(v);
if(nsolutions == 1) {
/*
* Record the binding for each variable in the first
* solution. This is used to identify variables which
* are effective constants (they are bound to the same
* value in all solutions).
*/
firstBoundValue.put(v, c);
} else {
/*
* Look at the first bound value for this variable. If
* it was not bound or if the variable was not bound to
* the same constant, then this variable is not an
* effective constant for this set of solutions.
*/
if (!notConstant.contains(v)) {
final IConstant<?> c2 = firstBoundValue.get(v);
if (c2 == null || !c2.equals(c)) {
// Proven not a constant.
notConstant.add(v);
}
}
}
/*
* Check for a variable which has a bound value but the
* bound value is not materialized.
*/
if (!notMaterialized.contains(v)) {
if (c != null) {
/*
* Note: ClassCastException if bound value is not an
* IV.
*/
final IV<?, ?> iv = c.get();
if (!iv.hasValue()) {
notMaterialized.add(v);
}
}
}
}
}
/*
* Figure out which observed variables were not bound in this
* solution and add them to the set of variables which are not
* always bound. We also do this for possible constants.
*/
notBoundThisSolution.clear();
notBoundThisSolution.addAll(usedVars);
notBoundThisSolution.removeAll(currentVars);
notAlwaysBound.addAll(notBoundThisSolution);
notConstant.addAll(notBoundThisSolution);
}
}
/**
* Compile the statistics collected from the observed solutions.
*
* @return The compiled statistics.
*/
protected ISolutionSetStats compile() {
// Figure out which variables were bound in every solution.
final Set<IVariable<?>> alwaysBound = new HashSet<IVariable<?>>(
usedVars);
alwaysBound.removeAll(notAlwaysBound);
/*
* Figure out which variables were always materialized when they were
* bound.
*/
final Set<IVariable<?>> materialized = new HashSet<IVariable<?>>(
usedVars);
materialized.removeAll(notMaterialized);
/*
* Figure out which variables were effective constants. We start with
* the bindings for the first solution and then remove any entry where
* we have proven that the variable is not always bound to the same
* constant.
*/
final Map<IVariable<?>, IConstant<?>> constants = new HashMap<IVariable<?>, IConstant<?>>(
firstBoundValue);
for (IVariable<?> v : notConstant) {
constants.remove(v);
}
// Expose immutable versions of these collections.
return new CompiledSolutionSetStats(//
nsolutions,//
usedVars,//
alwaysBound,//
notAlwaysBound,//
materialized,//
constants//
);
}
/**
* Return the compiled statistics.
*
* @return The compiled statistics.
*
* @throws {@link IllegalStateException} if the statistics have not yet been
* compiled (they are automatically compiled when the source
* iterator has been fully consumed).
*/
public ISolutionSetStats getStats() {
if (compiledStats == null) {
throw new IllegalStateException();
}
return compiledStats;
}
/*
* Closeable iterator pattern.
*/
@Override
public void close() {
if (open) {
open = false;
if (src instanceof ICloseableIterator) {
((ICloseableIterator<?>) src).close();
}
}
}
@Override
public boolean hasNext() {
if (open && !src.hasNext()) {
// Close this iterator.
close();
// Compile the statistics and expose via getStats()
compiledStats = compile();
return false;
}
return open;
}
@Override
public IBindingSet[] next() {
if (!hasNext())
throw new NoSuchElementException();
final IBindingSet[] a = src.next();
filter(a);
return a;
}
@Override
public void remove() {
throw new UnsupportedOperationException();
}
}