package com.bigdata.bop.join; import java.util.Arrays; import java.util.LinkedHashMap; import java.util.Map; import java.util.Properties; import java.util.UUID; import java.util.concurrent.Callable; import java.util.concurrent.FutureTask; import junit.framework.TestCase2; import com.bigdata.bop.BOp; import com.bigdata.bop.BOpContext; import com.bigdata.bop.BOpEvaluationContext; import com.bigdata.bop.BOpUtility; import com.bigdata.bop.Constant; import com.bigdata.bop.IBindingSet; import com.bigdata.bop.IConstant; import com.bigdata.bop.IPredicate; import com.bigdata.bop.IQueryAttributes; import com.bigdata.bop.IVariable; import com.bigdata.bop.NV; import com.bigdata.bop.NamedSolutionSetRefUtility; import com.bigdata.bop.PipelineOp; import com.bigdata.bop.Var; import com.bigdata.bop.bindingSet.ListBindingSet; import com.bigdata.bop.controller.INamedSolutionSetRef; import com.bigdata.bop.engine.AbstractQueryEngineTestCase; import com.bigdata.bop.engine.IRunningQuery; import com.bigdata.bop.engine.QueryEngine; import com.bigdata.bop.join.AbstractHashJoinUtilityTestCase.JoinSetup; import com.bigdata.bop.solutions.SolutionSetStream; import com.bigdata.htree.HTree; import com.bigdata.journal.BufferMode; import com.bigdata.journal.Journal; import com.bigdata.rdf.internal.IV; import com.bigdata.stream.Stream.StreamIndexMetadata; import cutthecrap.utils.striterators.ICloseableIterator; import cutthecrap.utils.striterators.IStriterator; import cutthecrap.utils.striterators.Resolver; import cutthecrap.utils.striterators.Striterator; /** * Abstract test suite for {@link HashIndexOp} implementations. * * TODO Test variant with non-empty join vars. * * TODO Test variant with SELECT projects only the selected variables. * * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a> */ abstract public class HashIndexOpTestCase extends TestCase2 { public HashIndexOpTestCase() { } public HashIndexOpTestCase(String name) { super(name); } @Override public Properties getProperties() { final Properties p = new Properties(super.getProperties()); p.setProperty(Journal.Options.BUFFER_MODE, BufferMode.MemStore.toString()); return p; } private Journal jnl; protected QueryEngine queryEngine; public void setUp() throws Exception { jnl = new Journal(getProperties()); queryEngine = new QueryEngine(jnl); queryEngine.init(); } public void tearDown() throws Exception { if (queryEngine != null) { queryEngine.shutdownNow(); queryEngine = null; } if (jnl != null) { jnl.destroy(); jnl = null; } } /** * Factory for the {@link HashIndexOp} implementations. * * @param namespace * The namespace of the lexicon relation (required by the ivCache * for the {@link HTree} variants of the {@link HashIndexOp}). */ abstract protected HashIndexOp newHashIndexOp(final String namespace, final BOp[] args, final NV... anns); /** * Factory for the {@link SolutionSetHashJoinOp} implementation. */ abstract protected SolutionSetHashJoinOp newSolutionSetHashJoinOp( final BOp[] args, final NV... anns); /** * Combines the two arrays, appending the contents of the 2nd array to the * contents of the first array. * * @param a * @param b * @return */ @SuppressWarnings("unchecked") protected static <T> T[] concat(final T[] a, final T[] b) { if (a == null && b == null) return a; if (a == null) return b; if (b == null) return a; final T[] c = (T[]) java.lang.reflect.Array.newInstance(a.getClass() .getComponentType(), a.length + b.length); // final String[] c = new String[a.length + b.length]; System.arraycopy(a, 0, c, 0, a.length); System.arraycopy(b, 0, c, a.length, b.length); return c; } /** * A simple test of a {@link HashIndexOp} followed by a * {@link SolutionSetHashJoinOp}. In practice we should never follow the * {@link HashIndexOp} immediately with a {@link SolutionSetHashJoinOp} as * this is basically a complex NOP. However, this does provide a simple test * of the most basic mechanisms for those two operators. */ @SuppressWarnings("rawtypes") public void test_hashIndexOp_01() throws Exception { final JoinSetup setup = new JoinSetup(getName()); final UUID queryId = UUID.randomUUID(); final String solutionSetName = "set1"; final IVariable[] joinVars = new IVariable[]{}; final IVariable[] selectVars = null; final INamedSolutionSetRef namedSolutionSet = NamedSolutionSetRefUtility .newInstance(queryId, solutionSetName, joinVars); final HashIndexOp op = newHashIndexOp(setup.namespace, BOp.NOARGS,// new NV(BOp.Annotations.BOP_ID, 1),// new NV(BOp.Annotations.EVALUATION_CONTEXT, BOpEvaluationContext.CONTROLLER),// new NV(PipelineOp.Annotations.MAX_PARALLEL, 1),// new NV(PipelineOp.Annotations.LAST_PASS, true),// new NV(HashIndexOp.Annotations.JOIN_TYPE, JoinTypeEnum.Normal),// new NV(HashIndexOp.Annotations.JOIN_VARS, joinVars),// new NV(HashIndexOp.Annotations.SELECT, selectVars),// new NV(HashIndexOp.Annotations.NAMED_SET_REF, namedSolutionSet),// new NV(IPredicate.Annotations.RELATION_NAME, "kb") ); final SolutionSetHashJoinOp op2 = newSolutionSetHashJoinOp( new BOp[] { op },// new NV(BOp.Annotations.BOP_ID, 2),// new NV(BOp.Annotations.EVALUATION_CONTEXT, BOpEvaluationContext.CONTROLLER),// new NV(PipelineOp.Annotations.MAX_PARALLEL, 1),// // new NV(SolutionSetHashJoinOp.Annotations.OPTIONAL, op.isOptional()),// // new NV(SolutionSetHashJoinOp.Annotations.JOIN_VARS, joinVars),// // new NV(SolutionSetHashJoinOp.Annotations.SELECT, selectVars),// new NV(SolutionSetHashJoinOp.Annotations.RELEASE, true),// new NV(SolutionSetHashJoinOp.Annotations.LAST_PASS, true),// new NV(SolutionSetHashJoinOp.Annotations.NAMED_SET_REF, namedSolutionSet)// ); final PipelineOp query = op2; // The source solutions. final IVariable<?> x = Var.var("x"); final IVariable<?> y = Var.var("y"); final IBindingSet[] bindingSets1 = new IBindingSet[1]; { final IBindingSet tmp = new ListBindingSet(); tmp.set(x, new Constant<IV>(setup.leon)); bindingSets1[0] = tmp; } final IBindingSet[] bindingSets2 = new IBindingSet[1]; { final IBindingSet tmp = new ListBindingSet(); tmp.set(x, new Constant<IV>(setup.mary)); tmp.set(y, new Constant<IV>(setup.john)); bindingSets2[0] = tmp; } // the expected solutions. final IBindingSet[] expected = new IBindingSet[] {// new ListBindingSet(// new IVariable[] { x },// new IConstant[] { new Constant<IV>(setup.leon) }// ), // new ListBindingSet(// new IVariable[] { x, y },// new IConstant[] { new Constant<IV>(setup.mary), new Constant<IV>(setup.john) }// ),// }; final IRunningQuery runningQuery = queryEngine.eval(queryId, query, null/* queryAttributes */, concat(bindingSets1, bindingSets2)); // verify solutions. AbstractQueryEngineTestCase.assertSameSolutionsAnyOrder(expected, runningQuery); } /** * Unit test of variant with an OPTIONAL join. * <p> * Note: Since there are no intervening joins or filters, this produces the * same output as the unit test above. However, in this case the joinSet * will have been created by the {@link HashIndexOp} and utilized by the * {@link SolutionSetHashJoinOp}. */ @SuppressWarnings("rawtypes") public void test_hashIndexOp_02() throws Exception { final JoinSetup setup = new JoinSetup(getName()); final UUID queryId = UUID.randomUUID(); final String solutionSetName = "set1"; final IVariable[] joinVars = new IVariable[]{}; final IVariable[] selectVars = null; final INamedSolutionSetRef namedSolutionSet = NamedSolutionSetRefUtility .newInstance(queryId, solutionSetName, joinVars); final HashIndexOp op = newHashIndexOp(setup.namespace,BOp.NOARGS,// new NV(BOp.Annotations.BOP_ID, 1),// new NV(BOp.Annotations.EVALUATION_CONTEXT, BOpEvaluationContext.CONTROLLER),// new NV(PipelineOp.Annotations.MAX_PARALLEL, 1),// new NV(PipelineOp.Annotations.LAST_PASS, true),// new NV(HashIndexOp.Annotations.JOIN_TYPE, JoinTypeEnum.Optional),// new NV(HashIndexOp.Annotations.JOIN_VARS, joinVars),// new NV(HashIndexOp.Annotations.SELECT, selectVars),// new NV(HashIndexOp.Annotations.NAMED_SET_REF, namedSolutionSet),// new NV(IPredicate.Annotations.RELATION_NAME, "kb") ); final SolutionSetHashJoinOp op2 = newSolutionSetHashJoinOp( new BOp[] { op },// new NV(BOp.Annotations.BOP_ID, 2),// new NV(BOp.Annotations.EVALUATION_CONTEXT, BOpEvaluationContext.CONTROLLER),// new NV(PipelineOp.Annotations.MAX_PARALLEL, 1),// // new NV(SolutionSetHashJoinOp.Annotations.OPTIONAL, op.isOptional()),// // new NV(SolutionSetHashJoinOp.Annotations.JOIN_VARS, joinVars),// // new NV(SolutionSetHashJoinOp.Annotations.SELECT, selectVars),// new NV(SolutionSetHashJoinOp.Annotations.RELEASE, true),// new NV(SolutionSetHashJoinOp.Annotations.LAST_PASS, true),// new NV(SolutionSetHashJoinOp.Annotations.NAMED_SET_REF, namedSolutionSet)// ); final PipelineOp query = op2; // The source solutions. final IVariable<?> x = Var.var("x"); final IVariable<?> y = Var.var("y"); final IBindingSet[] bindingSets1 = new IBindingSet[1]; { final IBindingSet tmp = new ListBindingSet(); tmp.set(x, new Constant<IV>(setup.leon)); bindingSets1[0] = tmp; } final IBindingSet[] bindingSets2 = new IBindingSet[1]; { final IBindingSet tmp = new ListBindingSet(); tmp.set(x, new Constant<IV>(setup.mary)); tmp.set(y, new Constant<IV>(setup.john)); bindingSets2[0] = tmp; } // the expected solutions. final IBindingSet[] expected = new IBindingSet[] {// new ListBindingSet(// new IVariable[] { x },// new IConstant[] { new Constant<IV>(setup.leon) }// ), // new ListBindingSet(// new IVariable[] { x, y },// new IConstant[] { new Constant<IV>(setup.mary), new Constant<IV>(setup.john) }// ),// }; final IRunningQuery runningQuery = queryEngine.eval(queryId, query, null/* queryAttributes */, concat(bindingSets1, bindingSets2)); // verify solutions. AbstractQueryEngineTestCase.assertSameSolutionsAnyOrder(expected, runningQuery); } /** * Test variant where the index is built from a {@link SolutionSetStream} * available as an attribute to the {@link IRunningQuery}. * * FIXME Also test with {@link IHashJoinUtility} query attribute as the * <em>source</em> (rather than a {@link SolutionSetStream}) and an * {@link IPredicate}. */ @SuppressWarnings({ "rawtypes", "unchecked" }) public void test_hashIndexOp_buildFromSolutionSet() throws Exception { final JoinSetup setup = new JoinSetup(getName()); final UUID queryId = UUID.randomUUID(); final String solutionSetName = "set1"; final IVariable[] joinVars = new IVariable[]{}; final IVariable[] selectVars = null; final INamedSolutionSetRef namedSolutionSet = NamedSolutionSetRefUtility .newInstance(queryId, solutionSetName, joinVars); final INamedSolutionSetRef namedSolutionSetSource = NamedSolutionSetRefUtility .newInstance(queryId, solutionSetName + "source", joinVars); // The Stream will be attached to the query attributes final Map<Object, Object> queryAttributes = new LinkedHashMap<Object, Object>(); // The solutions to index. final IBindingSet[] solutionsToIndex; // solutionsToIndex = setup.getRight1().toArray( // new IBindingSet[setup.getRight1().size()]); solutionsToIndex = setup.getLeft1().toArray( new IBindingSet[setup.getLeft1().size()]); final SolutionSetStream stream; { final StreamIndexMetadata metadata = new StreamIndexMetadata( UUID.randomUUID()); // Create stream. stream = SolutionSetStream.create(jnl, metadata); /* * Populate the stream. */ { IStriterator itr = new Striterator(Arrays.asList( solutionsToIndex).iterator()); // wrap each bindingSet as an IBindingSet[]. itr.addFilter(new Resolver(){ private static final long serialVersionUID = 1L; @Override protected Object resolve(Object obj) { return new IBindingSet[] { (IBindingSet) obj }; }}); stream.put((ICloseableIterator<IBindingSet[]>) itr); } // Checkpoint. stream.writeCheckpoint2(); // Attach to query attributes (make visible to the query). queryAttributes.put(namedSolutionSetSource, stream); } // Add operator to build the hash index. final HashIndexOp op = newHashIndexOp(setup.namespace,BOp.NOARGS,// new NV(BOp.Annotations.BOP_ID, 1),// new NV(BOp.Annotations.EVALUATION_CONTEXT, BOpEvaluationContext.CONTROLLER),// new NV(PipelineOp.Annotations.MAX_PARALLEL, 1),// new NV(PipelineOp.Annotations.LAST_PASS, true),// new NV(HashIndexOp.Annotations.JOIN_TYPE, JoinTypeEnum.Optional),// new NV(HashIndexOp.Annotations.JOIN_VARS, joinVars),// new NV(HashIndexOp.Annotations.SELECT, selectVars),// new NV(HashIndexOp.Annotations.NAMED_SET_REF, namedSolutionSet),// new NV(HashIndexOp.Annotations.NAMED_SET_SOURCE_REF, namedSolutionSetSource),// new NV(IPredicate.Annotations.RELATION_NAME, "kb") ); // Add operator to validate the hash index build. final ValidateIndexOp op2 = new ValidateIndexOp( new BOp[] { op }, NV.asMap(new NV[] {// new NV(BOp.Annotations.BOP_ID, 2),// new NV(BOp.Annotations.EVALUATION_CONTEXT, BOpEvaluationContext.CONTROLLER),// // new NV(PipelineOp.Annotations.LAST_PASS, true),// // new NV(PipelineOp.Annotations.MAX_PARALLEL,1),// new NV(PipelineOp.Annotations.PIPELINED, false),// new NV(ValidateIndexOp.Annotations.EXPECTED_SOLUTIONS, solutionsToIndex),// new NV(ValidateIndexOp.Annotations.NAMED_SET_SOURCE_REF, namedSolutionSetSource),// new NV(ValidateIndexOp.Annotations.NAMED_SET_REF, namedSolutionSet),// })); final PipelineOp query = op2; // The source solutions. final IVariable<?> x = Var.var("x"); final IVariable<?> y = Var.var("y"); final IBindingSet[] bindingSets1 = new IBindingSet[1]; { final IBindingSet tmp = new ListBindingSet(); tmp.set(x, new Constant<IV>(setup.leon)); bindingSets1[0] = tmp; } final IBindingSet[] bindingSets2 = new IBindingSet[1]; { final IBindingSet tmp = new ListBindingSet(); tmp.set(x, new Constant<IV>(setup.mary)); tmp.set(y, new Constant<IV>(setup.john)); bindingSets2[0] = tmp; } // the source solutions fed into the query. final IBindingSet[] bindingSets = concat(bindingSets1, bindingSets2); // the expected solutions (same as the source solutions). final IBindingSet[] expected = bindingSets; // run the query. it will build the index as a side-effect. final IRunningQuery runningQuery = queryEngine.eval(queryId, query, queryAttributes, bindingSets); // verify solutions. AbstractQueryEngineTestCase.assertSameSolutionsAnyOrder(expected, runningQuery); } /** * Operator is used to validate the {@link HashIndexOp} by verifying the * contents of the generated index before the life cycle of the index is * ended (when the {@link IRunningQuery} ends, the memory manager associated * with the query is cleared and the index data will no longer be valid). */ private static class ValidateIndexOp extends PipelineOp { private static final long serialVersionUID = 1L; interface Annotations extends PipelineOp.Annotations { /** * Identifies the solutions that should have been indexed by the * {@link HashIndexOp}. */ String NAMED_SET_SOURCE_REF = HashIndexOp.Annotations.NAMED_SET_SOURCE_REF; /** * Identifies the index that is the output of the * {@link HashIndexOp}. This is the index that we will validate. */ String NAMED_SET_REF = HashIndexOp.Annotations.NAMED_SET_REF; /** * The expected solutions that should be reported by a scan of the * generated index. */ String EXPECTED_SOLUTIONS = ValidateIndexOp.class.getName() + ".expectedSolutions"; } public ValidateIndexOp(final BOp[] args, final Map<String, Object> annotations) { super(args, annotations); /* * This is not strictly necessary, but it does simplify validation * if we can assume that there will be exactly one invocation of * this operator. */ assertTrue(isAtOnceEvaluation()); } public ValidateIndexOp(ValidateIndexOp op) { super(op); } @Override public FutureTask<Void> eval(final BOpContext<IBindingSet> context) { return new FutureTask<Void>(new ChunkTask(this, context)); } private static class ChunkTask implements Callable<Void> { private final ValidateIndexOp op; private final BOpContext<IBindingSet> context; public ChunkTask(final ValidateIndexOp op, final BOpContext<IBindingSet> context) { this.op = op; this.context = context; } /** * We need to verify that the named solution set was created and * attached to the query and that the correct solutions were placed * into the index. */ @Override public Void call() throws Exception { // The solutions that should be in the generated index. final IBindingSet[] expectedSolutions = (IBindingSet[]) op .getRequiredProperty(Annotations.EXPECTED_SOLUTIONS); // Metadata to identify the generated solution set. final INamedSolutionSetRef namedSetRef = (INamedSolutionSetRef) op .getRequiredProperty(Annotations.NAMED_SET_REF); // Metadata to identify the source solution set. final INamedSolutionSetRef namedSetSourceRef = (INamedSolutionSetRef) op .getRequiredProperty(Annotations.NAMED_SET_SOURCE_REF); // The actual query attributes from the query. final IQueryAttributes queryAttributes2 = context .getQueryAttributes(); // verify that the generated solution set exists. assertNotNull("Generated index not found: " + namedSetRef, queryAttributes2.get(namedSetRef)); // verify that the source solution set exists. assertNotNull("Source not found: " + namedSetSourceRef, queryAttributes2.get(namedSetSourceRef)); /* * Verify that the generated index has the correct solutions. */ final IHashJoinUtility tmp = (IHashJoinUtility) queryAttributes2 .get(namedSetRef); // Verify index scan against expected solutions. AbstractQueryEngineTestCase.assertSameSolutionsAnyOrder( expectedSolutions, tmp.indexScan()); /* * Copy the source solutions to the sink. */ BOpUtility.copy(context.getSource(), context.getSink(), null/* sink2 */, null/* mergeSolution */, null/* selectVars */, null/* constraints */, context.getStats()); // Flush the sink. context.getSink().flush(); return null; } } } }