/**
Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved.
Contact:
SYSTAP, LLC DBA Blazegraph
2501 Calvert ST NW #106
Washington, DC 20008
licenses@blazegraph.com
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/*
* Created on Feb 23, 2011
*/
package com.bigdata.bop.joinGraph.rto;
import java.util.concurrent.atomic.AtomicReference;
import org.apache.log4j.Logger;
import com.bigdata.bop.IBindingSet;
import com.bigdata.bop.engine.IChunkMessage;
import com.bigdata.rwstore.sector.IMemoryManager;
/**
* Base class for a sample taken from a vertex (access path) or edge (cutoff
* join).
*
* @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a>
* @version $Id$
*
* TODO Large samples should be buffered on the {@link IMemoryManager}
* so they do not pose a burden on the heap. This will require us to
* manage the allocation contexts so we can release samples in a timely
* manner once they are no longer used and always release samples by
* the time the RTO is finished. [There is an additional twist if we
* have fully materialized some part of the join since we no longer
* need to evaluate that path segment. If the RTO can interleave query
* evaluation with exploration then we can take advantage of these
* materialized solutions.]
*/
public abstract class SampleBase {
private static final transient Logger log = Logger
.getLogger(SampleBase.class);
/**
* The total estimated cardinality of the underlying access path (for a
* vertex) or the join path segment (for a cutoff join).
*
* TODO When using a non-perfect index, the estimated cardinality is only
* part of the cost. The #of tuples scanned is also important. Even when
* scanning and filtering in key order this could trigger random IOs unless
* the file has index order (an IndexSegment file has index order but a
* BTree on a journal does not).
*/
public final long estCard;
/**
* The limit used to produce the {@link #getSample() sample}.
*/
public final int limit;
/**
* Indicates whether the estimate is exact, an upper bound, or a lower
* bound.
*
* TODO When the input to a cutoff join is {@link EstimateEnum#Exact}, we
* could run the join against the sample rather than the disk by wrapping
* the sample as an inline access path.
*
* TODO This field should be used to avoid needless re-computation of a join
* whose exact solution is already known. We already do this within the
* runtime optimizer. To go further than that we need to do the partial
* evaluation of the join graph.
*/
public final EstimateEnum estimateEnum;
/**
* Return <code>true</code> iff this sample is the fully materialized
* solution for the vertex or join path segment.
*/
public boolean isExact() {
return estimateEnum == EstimateEnum.Exact;
}
/**
* Return <code>true</code> iff this sample has cardinality underflow (the
* sample is empty). Cardinality underflow occurs when the sampling process
* was unable to find any solutions. Underflow is typically addressed by
* increasing the sample size, but sometimes underflow indicates that an
* access path (if it has filters) or a join may not have any solutions in
* the data.
*/
public boolean isUnderflow() {
return estimateEnum == EstimateEnum.Underflow;
}
/**
* Sample.
*/
private final AtomicReference<IBindingSet[]> sampleRef = new AtomicReference<IBindingSet[]>();
/**
* The sampled solution set.
*
* @return The sampled solution set -or- <code>null</code> if it has been
* released.
*
* TODO Wrap up as an {@link IChunkMessage} so we can store this on
* the native heap?
*/
public IBindingSet[] getSample() {
return sampleRef.get();
}
/**
* Release the sampled solution set.
*
* FIXME RTO : MEMORY MANAGER : release.
*/
void releaseSample() {
if (sampleRef.getAndSet(null) != null) {
if (log.isTraceEnabled())
log.trace("Released sample: " + this);
}
}
/**
*
* @param estimatedCardinality
* The estimated cardinality.
* @param limit
* The cutoff limit used to make that cardinality estimate.
* @param estimateEnum
* Type safe enumeration indication various edge conditions which
* can arise when making a cardinality estimate.
* @param sample
* The sample.
*/
public SampleBase(//
final long estimatedCardinality,//
final int limit,//
final EstimateEnum estimateEnum,//
final IBindingSet[] sample//
) {
if (estimatedCardinality < 0L)
throw new IllegalArgumentException();
if (limit <= 0)
throw new IllegalArgumentException();
if (estimateEnum == null)
throw new IllegalArgumentException();
if (sample == null)
throw new IllegalArgumentException();
this.estCard = estimatedCardinality;
this.limit = limit;
this.estimateEnum = estimateEnum;
this.sampleRef.set(sample);
}
/**
* Hook for extending {@link #toString()}.
*
* @param sb
* The buffer into which the implementation can write additional
* information.
*/
protected void toString(final StringBuilder sb) {
// NOP
}
@Override
public String toString() {
final StringBuilder sb = new StringBuilder();
sb.append(getClass().getSimpleName());
sb.append("{estCard=" + estCard);
sb.append(",limit=" + limit);
sb.append(",estimateEnum=" + estimateEnum);
{
final IBindingSet[] tmp = sampleRef.get();
sb.append(",sampleSize=" + (tmp != null ? tmp.length : "N/A"));
}
toString(sb); // allow extension
sb.append("}");
return sb.toString();
}
}