/** Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved. Contact: SYSTAP, LLC DBA Blazegraph 2501 Calvert ST NW #106 Washington, DC 20008 licenses@blazegraph.com This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; version 2 of the License. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ /* * Created on Nov 3, 2011 */ package com.bigdata.bop.rdf.join; import java.util.Arrays; import java.util.HashMap; import java.util.Iterator; import java.util.Map; import java.util.concurrent.Callable; import java.util.concurrent.FutureTask; import org.apache.log4j.Logger; import com.bigdata.bop.BOp; import com.bigdata.bop.BOpContext; import com.bigdata.bop.IBindingSet; import com.bigdata.bop.IConstant; import com.bigdata.bop.IVariable; import com.bigdata.bop.NV; import com.bigdata.bop.PipelineOp; import com.bigdata.bop.ap.Predicate; import com.bigdata.bop.engine.BOpStats; import com.bigdata.rdf.internal.IV; import com.bigdata.rdf.internal.IVCache; import com.bigdata.rdf.lexicon.LexiconRelation; import com.bigdata.rdf.model.BigdataValue; import com.bigdata.rdf.store.BigdataBindingSetResolverator; import com.bigdata.relation.accesspath.IBlockingBuffer; import cutthecrap.utils.striterators.ICloseableIterator; /** * A vectored materialization operator based on pretty much the same logic as * {@link BigdataBindingSetResolverator}. However, this class caches the * resolved {@link BigdataValue} reference on the {@link IV} while the * {@link BigdataBindingSetResolverator} replaces the {@link IV} in the solution * with the {@link BigdataValue}. Also, this class does not filter out variables * which are not being materialized. * * @see ChunkedMaterializationIterator * @see BigdataBindingSetResolverator */ public class ChunkedMaterializationOp extends PipelineOp { private final static Logger log = Logger .getLogger(ChunkedMaterializationOp.class); private static final long serialVersionUID = 1L; public interface Annotations extends PipelineOp.Annotations { /** * The {@link IVariable}[] identifying the variables to be materialized. * When <code>null</code> or not specified, ALL variables will be * materialized. This may not be an empty array as that would imply that * there is no need to use this operator. */ String VARS = ChunkedMaterializationOp.class.getName()+".vars"; String RELATION_NAME = Predicate.Annotations.RELATION_NAME; String TIMESTAMP = Predicate.Annotations.TIMESTAMP; /** * If true, materialize inline values in addition to term IDs. */ String MATERIALIZE_INLINE_IVS = ChunkedMaterializationOp.class.getName()+".materializeAll"; /** * Default materialize all is false. */ boolean DEFAULT_MATERIALIZE_INLINE_IVS = false; } /** * @param args * @param annotations */ public ChunkedMaterializationOp(final BOp[] args, final Map<String, Object> annotations) { super(args, annotations); final IVariable<?>[] vars = getVars(); if (vars != null && vars.length == 0) throw new IllegalArgumentException(); getRequiredProperty(Annotations.RELATION_NAME); getRequiredProperty(Annotations.TIMESTAMP); } /** * @param op */ public ChunkedMaterializationOp(final ChunkedMaterializationOp op) { super(op); } public ChunkedMaterializationOp(final BOp[] args, final NV... annotations) { this(args, NV.asMap(annotations)); } /** * * @param vars * The variables to be materialized. Materialization is only * attempted for those variables which are actually bound in * given solution. * @param namespace * The namespace of the {@link LexiconRelation}. * @param timestamp * The timestamp against which to read. */ public ChunkedMaterializationOp(final BOp[] args, final IVariable<?>[] vars, final String namespace, final long timestamp) { this(args, // new NV(Annotations.VARS, vars),// new NV(Annotations.RELATION_NAME, new String[] { namespace }), // new NV(Annotations.TIMESTAMP, timestamp) // ); } /** * Return the variables to be materialized. * * @return The variables to be materialized -or- <code>null</code> iff all * variables should be materialized. * * @see Annotations#VARS */ public IVariable<?>[] getVars() { return (IVariable<?>[]) getProperty(Annotations.VARS); } /** * When <code>true</code>, inline {@link IV}s are also materialized. * * @see Annotations#MATERIALIZE_INLINE_IVS */ public boolean materializeInlineIVs() { return getProperty(Annotations.MATERIALIZE_INLINE_IVS, Annotations.DEFAULT_MATERIALIZE_INLINE_IVS); } @Override public FutureTask<Void> eval(final BOpContext<IBindingSet> context) { return new FutureTask<Void>(new ChunkTask(this, context)); } /** * Task executing on the node. */ static private class ChunkTask implements Callable<Void> { private final BOpContext<IBindingSet> context; /** * The variables to be materialized. */ private final IVariable<?>[] vars; private final String namespace; private final long timestamp; private final boolean materializeInlineIVs; ChunkTask(final ChunkedMaterializationOp op, final BOpContext<IBindingSet> context ) { this.context = context; this.vars = op.getVars(); namespace = ((String[]) op.getProperty(Annotations.RELATION_NAME))[0]; timestamp = (Long) op.getProperty(Annotations.TIMESTAMP); materializeInlineIVs = op.materializeInlineIVs(); } @Override public Void call() throws Exception { final BOpStats stats = context.getStats(); final ICloseableIterator<IBindingSet[]> itr = context .getSource(); final IBlockingBuffer<IBindingSet[]> sink = context.getSink(); try { final LexiconRelation lex = (LexiconRelation) context .getResource(namespace, timestamp); while (itr.hasNext()) { final IBindingSet[] a = itr.next(); stats.chunksIn.increment(); stats.unitsIn.add(a.length); final IBindingSet[] aOut = resolveChunk(vars, lex, a, materializeInlineIVs); sink.add(aOut); } sink.flush(); // done. return null; } finally { sink.close(); } } } // ChunkTask /** * Resolve a chunk of {@link IBindingSet}s into a chunk of * {@link IBindingSet}s in which {@link IV}s have been resolved to * {@link BigdataValue}s. * * @param required * The variable(s) to be materialized or <code>null</code> to * materialize all variable bindings. * @param lex * The lexicon reference. * @param chunkIn * The chunk of solutions whose variables will be materialized. * * @return a new binding set in which the chunks have been resolved */ static IBindingSet[] resolveChunk(final IVariable<?>[] required, final LexiconRelation lex,// final IBindingSet[] chunkIn,// final boolean materializeInlineIVs) { if (log.isInfoEnabled()) log.info("Fetched chunk: size=" + chunkIn.length + ", chunk=" + Arrays.toString(chunkIn)); /* * Create a collection of the distinct term identifiers used in this * chunk. */ /* * Estimate the capacity of the hash map based on the #of variables to * materialize per solution and the #of solutions. */ final int initialCapacity = required == null ? chunkIn.length : ((required.length == 0) ? 1 : chunkIn.length * required.length); /** * In the following map we store, for each IV, the constant that was * associated with this IV; we later use these constants canonically. * -> see https://jira.blazegraph.com/browse/BLZG-1591 */ final Map<IV<?, ?>, IConstant<?>> idToConstMap = new HashMap<IV<?, ?>, IConstant<?>>(initialCapacity); for (IBindingSet solution : chunkIn) { final IBindingSet bindingSet = solution; // System.err.println(solution); assert bindingSet != null; if (required == null) { // Materialize all variable bindings. @SuppressWarnings("rawtypes") final Iterator<Map.Entry<IVariable, IConstant>> itr = bindingSet .iterator(); while (itr.hasNext()) { @SuppressWarnings("rawtypes") final Map.Entry<IVariable, IConstant> entry = itr.next(); final IV<?, ?> iv = (IV<?, ?>) entry.getValue().get(); if (iv == null) { throw new RuntimeException("NULL? : var=" + entry.getKey() + ", " + bindingSet); } if (iv.needsMaterialization() || materializeInlineIVs) { if (!idToConstMap.containsKey(iv)) { idToConstMap.put(iv, entry.getValue()); } } // handleIV(iv, ids, materializeInlineIVs); } } else { // Materialize the specified variable bindings. for (IVariable<?> v : required) { final IConstant<?> c = bindingSet.get(v); if (c == null) { continue; } final IV<?, ?> iv = (IV<?, ?>) c.get(); if (iv == null) { throw new RuntimeException("NULL? : var=" + v + ", " + bindingSet); } if (iv.needsMaterialization() || materializeInlineIVs) { if (!idToConstMap.containsKey(iv)) { idToConstMap.put(iv, c); } } // handleIV(iv, ids, materializeInlineIVs); } } } // System.err.println("resolving: " + // Arrays.toString(ids.toArray())); if (log.isInfoEnabled()) log.info("Resolving " + idToConstMap.keySet().size() + " IVs, required=" + Arrays.toString(required)); // batch resolve term identifiers to terms; as a side-effect, this sets the cache // on the IVs that we pass in final Map<IV<?, ?>, BigdataValue> terms = lex.getTerms(idToConstMap.keySet()); /* * Resolve the duplicates */ final IBindingSet[] chunkOut = new IBindingSet[chunkIn.length]; for (int i=0; i<chunkIn.length; i++) { chunkOut[i] = getBindingSet(required, chunkIn[i], terms, idToConstMap); } return chunkOut; } // /** // * Either add the IV to the list if it needs materialization, or else // * delegate to {@link #handleSid(SidIV, Collection, boolean)} if it's a // * SidIV. // */ // static private void handleIV(final IV<?, ?> iv, // final Collection<IV<?, ?>> ids, // final boolean materializeInlineIVs) { // // if (iv instanceof SidIV) { // // handleSid((SidIV<?>) iv, ids, materializeInlineIVs); // // } else if (iv.needsMaterialization() || materializeInlineIVs) { // // ids.add(iv); // // } // // } // // /** // * Sids need to be handled specially because their individual ISPO // * components might need materialization. // */ // static private void handleSid(final SidIV<?> sid, // final Collection<IV<?, ?>> ids, // final boolean materializeInlineIVs) { // // final ISPO spo = sid.getInlineValue(); // // System.err.println("handling a sid"); // System.err.println("adding s: " + spo.s()); // System.err.println("adding p: " + spo.p()); // System.err.println("adding o: " + spo.o()); // // handleIV(spo.s(), ids, materializeInlineIVs); // // handleIV(spo.p(), ids, materializeInlineIVs); // // handleIV(spo.o(), ids, materializeInlineIVs); // // if (spo.c() != null) { // // handleIV(spo.c(), ids, materializeInlineIVs); // // } // // } /** * Resolve the term identifiers in the {@link IBindingSet} using the map * populated when we fetched the current chunk. * * @param required * The variables to be resolved -or- <code>null</code> if all * variables should have been resolved. * @param bindingSetIn * A solution whose {@link IV}s will be resolved to the * corresponding {@link BigdataValue}s in the caller's * <code>terms</code> map. The {@link IVCache} associations are * set as a side-effect. * @param terms * A map from {@link IV}s to {@link BigdataValue}s. * * @param idsToConstMap mapping from IVs to the constant value containing the IV; * this map will be used to replace the binding set values * for inline IVs * * @throws IllegalStateException * if the {@link IBindingSet} was not materialized with the * {@link IBindingSet}. */ static private IBindingSet getBindingSet(// final IVariable<?>[] required, final IBindingSet bindingSetIn, final Map<IV<?, ?>, BigdataValue> terms, final Map<IV<?, ?>, IConstant<?>> idsToConstMap) { if (bindingSetIn == null) throw new IllegalArgumentException(); if (terms == null) throw new IllegalArgumentException(); if (idsToConstMap == null) throw new IllegalArgumentException(); final IBindingSet bindingSetOut = bindingSetIn.clone(); if (required != null) { /* * Only the specified variables. */ for (IVariable<?> var : required) { @SuppressWarnings("unchecked") final IConstant<IV<?, ?>> c = bindingSetOut.get(var); if (c == null) { // Variable is not bound in this solution. continue; } final IV<?, ?> iv = (IV<?, ?>) c.get(); if (iv == null) { continue; } /** * As per https://jira.blazegraph.com/browse/BLZG-1591, we distinguish * between inline IVs (which have already been resolved as a side effect * of the preceding getTerms() call) and for which we thus can substitute * in a canonical version of the constant from which it was derived * and non-inline IVs (the old code path) for which we conditionally * set the IV cache. */ if (iv.isInline()) { final IConstant<?> cVal = idsToConstMap.get(iv); if (cVal == null) { if (iv.needsMaterialization()) { // Not found in dictionary. This is an error. throw new RuntimeException("Could not resolve: iv=" + iv); } // else NOP - Value is not required. } else { bindingSetOut.set(var, cVal); } } else { final BigdataValue value = terms.get(iv); conditionallySetIVCache(iv,value); } } } else { /* * Everything in the binding set. */ @SuppressWarnings("rawtypes") final Iterator<Map.Entry<IVariable, IConstant>> itr = bindingSetOut .iterator(); while (itr.hasNext()) { @SuppressWarnings("rawtypes") final Map.Entry<IVariable, IConstant> entry = itr.next(); final Object boundValue = entry.getValue().get(); if (!(boundValue instanceof IV)) { continue; } final IV<?, ?> iv = (IV<?, ?>) boundValue; final BigdataValue value = terms.get(iv); /** * As per https://jira.blazegraph.com/browse/BLZG-1591, we distinguish * between inline IVs (which have already been resolved as a side effect * of the preceding getTerms() call) and for which we thus can substitute * in a canonical version of the constant from which it was derived * and non-inline IVs (the old code path) for which we conditionally * set the IV cache. */ if (iv.isInline()) { final IConstant<?> cVal = idsToConstMap.get(iv); if (cVal == null) { if (iv.needsMaterialization()) { // Not found in dictionary. This is an error. throw new RuntimeException("Could not resolve: iv=" + iv); } // else NOP - Value is not required. } else { bindingSetOut.set(entry.getKey(), idsToConstMap.get(iv)); } } else { conditionallySetIVCache(iv,value); } } } return bindingSetOut; } /** * If the {@link BigdataValue} is non-null, then set it on the * {@link IVCache} interface. * * @param iv * The {@link IV} * @param value * The {@link BigdataValue} for that {@link IV} (from the * dictionary). * * @throws RuntimeException * If the {@link BigdataValue} is null (could not be discovered * in the dictionary) and the {@link IV} requires * materialization ({@link IV#needsMaterialization() is * <code>true</code>). * * @see #1028 (xsd:boolean materialization issue) */ private static void conditionallySetIVCache(IV<?, ?> iv, BigdataValue value) { if (value == null) { if (iv.needsMaterialization()) { // Not found in dictionary. This is an error. throw new RuntimeException("Could not resolve: iv=" + iv); } // else NOP - Value is not required. } else { /* * Value was found in the dictionary, so replace the binding. * * FIXME This probably needs to strip out the BigdataSail#NULL_GRAPH * since that should not become bound. */ ((IV) iv).setValue(value); } } }