/**
Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved.
Contact:
SYSTAP, LLC DBA Blazegraph
2501 Calvert ST NW #106
Washington, DC 20008
licenses@blazegraph.com
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/*
* Created on Apr 26, 2013
*/
package com.bigdata.bop.join;
import java.util.Arrays;
import java.util.concurrent.ConcurrentHashMap;
import org.apache.log4j.Logger;
import com.bigdata.bop.IBindingSet;
import com.bigdata.bop.IConstant;
import com.bigdata.bop.IVariable;
import com.bigdata.bop.bindingSet.ListBindingSet;
import com.bigdata.bop.engine.BOpStats;
import com.bigdata.relation.accesspath.IBuffer;
import cutthecrap.utils.striterators.ICloseableIterator;
/**
* Utility class for imposing a DISTINCT filter on {@link IBindingSet}. This
* class is thread-safe. It is based on a {@link ConcurrentHashMap}.
*
* @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a>
*/
public class JVMDistinctFilter implements IDistinctFilter {
private static final Logger log = Logger.getLogger(JVMDistinctFilter.class);
/**
* Wrapper used for the as bound solutions in the {@link ConcurrentHashMap}.
*/
private static class Solution {
private final int hash;
private final IConstant<?>[] vals;
public Solution(final IConstant<?>[] vals) {
this.vals = vals;
this.hash = java.util.Arrays.hashCode(vals);
}
@Override
public int hashCode() {
return hash;
}
@Override
public boolean equals(final Object o) {
if (this == o)
return true;
if (!(o instanceof Solution)) {
return false;
}
final Solution t = (Solution) o;
if (vals.length != t.vals.length)
return false;
for (int i = 0; i < vals.length; i++) {
// @todo verify that this allows for nulls with a unit test.
if (vals[i] == t.vals[i])
continue;
if (vals[i] == null)
return false;
if (!vals[i].equals(t.vals[i]))
return false;
}
return true;
}
}
/**
* The variables used to impose a distinct constraint.
*/
private final IVariable<?>[] vars;
/**
* A concurrent map whose keys are the bindings on the specified variables
* (the keys and the values are the same since the map implementation does
* not allow <code>null</code> values).
* <p>
* Note: The map is shared state and can not be discarded or cleared until
* the last invocation!!!
*/
private final ConcurrentHashMap<Solution, Solution> map;
/**
*
* @param vars
* The set of variables on which the DISTINCT filter will be
* imposed. Only these variables will be present in the
* "accepted" solutions. Any variable bindings not specified in
* this array will be dropped.
* @param initialCapacity
* @param loadFactor
* @param concurrencyLevel
*/
public JVMDistinctFilter(final IVariable<?>[] vars,
final int initialCapacity, final float loadFactor,
final int concurrencyLevel) {
if (vars == null)
throw new IllegalArgumentException();
this.vars = vars;
this.map = new ConcurrentHashMap<Solution, Solution>(initialCapacity,
loadFactor, concurrencyLevel);
}
/* (non-Javadoc)
* @see com.bigdata.bop.join.IDistinctFilter#clear()
*/
@Override
public void release() {
map.clear();
}
@Override
public IVariable<?>[] getProjectedVars() {
return vars;
}
/**
* If the bindings are distinct for the configured variables then return
* those bindings.
*
* @param bset
* The binding set to be filtered.
*
* @return The distinct as bound values -or- <code>null</code> if the
* binding set duplicates a solution which was already accepted.
*/
private IConstant<?>[] _accept(final IBindingSet bset) {
final IConstant<?>[] r = new IConstant<?>[vars.length];
for (int i = 0; i < vars.length; i++) {
/*
* Note: This allows null's.
*
* @todo write a unit test when some variables are not bound.
*/
r[i] = bset.get(vars[i]);
}
final Solution s = new Solution(r);
if (log.isTraceEnabled())
log.trace("considering: " + Arrays.toString(r));
final boolean distinct = map.putIfAbsent(s, s) == null;
if (distinct && log.isDebugEnabled())
log.debug("accepted: " + Arrays.toString(r));
return distinct ? r : null;
}
/* (non-Javadoc)
* @see com.bigdata.bop.join.IDistinctFilter#accept(com.bigdata.bop.IBindingSet)
*/
@Override
public IBindingSet accept(final IBindingSet bset) {
final IConstant<?>[] vals = _accept(bset);
if (vals == null) {
/*
* This is a duplicate solution.
*/
return null;
}
/*
* This is a distinct solution. Copy only the variables used to select
* distinct solutions into a new binding set and add that to the set of
* [accepted] binding sets which will be emitted by this operator.
*/
final ListBindingSet tmp = new ListBindingSet();
for (int i = 0; i < vars.length; i++) {
if (vals[i] != null)
tmp.set(vars[i], vals[i]);
}
return tmp;
}
@Override
public long filterSolutions(final ICloseableIterator<IBindingSet[]> itr,
final BOpStats stats, final IBuffer<IBindingSet> sink) {
long n = 0L;
while (itr.hasNext()) {
final IBindingSet[] a = itr.next();
stats.chunksIn.increment();
stats.unitsIn.add(a.length);
for (IBindingSet bset : a) {
/*
* Test to see if this solution is distinct from those already
* seen.
*/
if ((bset = accept(bset)) == null) {
// Drop duplicate solution.
continue;
}
/*
* This is a distinct solution.
*/
sink.add(bset);
n++;
}
} // next chunk.
return n;
}
}