/** Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved. Contact: SYSTAP, LLC DBA Blazegraph 2501 Calvert ST NW #106 Washington, DC 20008 licenses@blazegraph.com This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; version 2 of the License. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ /* * Created on Feb 16, 2012 */ package com.bigdata.rdf.internal.encoder; import java.io.IOException; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.Iterator; import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import com.bigdata.bop.Constant; import com.bigdata.bop.IBindingSet; import com.bigdata.bop.IConstant; import com.bigdata.bop.IVariable; import com.bigdata.bop.Var; import com.bigdata.bop.bindingSet.ListBindingSet; import com.bigdata.htree.HTree; import com.bigdata.io.DataInputBuffer; import com.bigdata.rdf.internal.IV; import com.bigdata.rdf.internal.IVCache; import com.bigdata.rdf.internal.IVUtility; import com.bigdata.rdf.internal.impl.TermId; import com.bigdata.rdf.model.BigdataValue; import com.bigdata.rdf.model.BigdataValueFactoryImpl; import com.bigdata.rdf.model.BigdataValueSerializer; import com.bigdata.util.BytesUtil; /** * Decoder for {@link IVSolutionSetEncoder}. * * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a> */ public class IVSolutionSetDecoder implements IBindingSetDecoder { /** * The schema provides the order in which the {@link IV}[] for solutions * stored in the hash index are encoded in the {@link HTree}. {@link IV} s * which are not bound are modeled by a {@link TermId#NullIV}. * <p> * Note: In order to be able to encode/decode the schema based on the lazy * identification of the variables which appear in solutions the * {@link HTree} must store variable length {@link IV}[]s since new * variables may be discovered at any point. */ private final LinkedHashSet<IVariable<?>> schema; /** * An extensible random access list. The order of the items in the list is * the order in which they are entered into the {@link #schema}. This is * used to interpret the bitmap for the variables which are bound in a * solution. */ private final ArrayList<IVariable<?>> schemaIndex; /** * The observed {@link IVCache} associations. */ private final Map<IV<?, ?>, BigdataValue> cache; /** * Used to de-serialize the {@link BigdataValue}s. */ private final StringBuilder tmp; /** * The #of solutions decoded to date. */ private int nsolutions = 0; /** * The version number. The versions are declared by * {@link IVSolutionSetEncoder}. They are read from the first solution in a * stream. */ private int version = -1; /* * Discovered dynamically. */ /** * The namespace of the lexicon relation. The namespace is discovered * dynamically when we read the first record with an {@link IVCache} * association. It will be <code>null</code> until then. */ private String namespace; /** * Used to de-serialize the {@link BigdataValue}s for {@link IVCache} * associations. This is initialized if and when we discover the * {@link #namespace}. */ private BigdataValueSerializer<BigdataValue> valueSer; @Override public String toString() { final StringBuilder sb = new StringBuilder(); sb.append(super.toString()); sb.append("{namespace=" + namespace); sb.append(",schema=" + schema); // Not thread-safe. sb.append(",cacheSize=" + cache.size());// Not thread-safe sb.append(",nsolutions="+nsolutions); sb.append("}"); return sb.toString(); } public IVSolutionSetDecoder() { // The ordered set of variables for which bindings have been observed. this.schema = new LinkedHashSet<IVariable<?>>(); // The ordered set of variables for which bindings have been observed. this.schemaIndex = new ArrayList<IVariable<?>>(); // The IV -> BigdataValue cache this.cache = new HashMap<IV<?, ?>, BigdataValue>(); this.tmp = new StringBuilder(); } /** * {@inheritDoc} * <p> * Note: Solutions MUST be decoded in the encode order because the schema * (the set of variables for which bindings have been observed) is assembled * incrementally from the decoded solutions and the encoding is sensitive to * the order in which the variables are first observed. Also, the presence * of the version field in the first solution makes it impossible to * re-process a stream of solutions with the same decoder. */ @Override public IBindingSet decodeSolution(final byte[] data, int off, int len, final boolean resolveCachedValues) { // Note: close() is NOT required for DataInputBuffer. final DataInputBuffer in = new DataInputBuffer(data, off, len); return decodeSolution(in, resolveCachedValues); } /** * Stream oriented decode. * * @param in * The source data. * @param resolveCachedValues * <code>true</code> if {@link IVCache} associations should be * resolved as the solutions are decoded. * * @return The next decoded solution. */ public IBindingSet decodeSolution(final DataInputBuffer in, final boolean resolveCachedValues) { if (version == -1) { try { version = in.unpackInt(); switch (version) { case IVSolutionSetEncoder.VERSION0: break; default: throw new RuntimeException("Unknown version: " + version); } // final int versionLength = (int) in.position(); // // off += versionLength; // len -= versionLength; } catch (IOException e) { throw new RuntimeException(e); } } final IBindingSet bset = _decodeSolution(in, //data, off, len, resolveCachedValues); nsolutions++; return bset; } private IBindingSet _decodeSolution(final DataInputBuffer in, // final byte[] data, final int off, final int lenX, final boolean resolveCachedValuesIsIgnored) { final byte[] data = in.getBuffer(); final int off = in.getOrigin(); try { final IBindingSet bset = new ListBindingSet(); // * nbound // * nvars // * ncached // * var[0]...var[nvars-1] // * bitmap-for-bound-variables // * bitmap-for-IV-with-cached-Values // * IV[0] ... IV[nbound-1] // * Value[0] ... Value[ncached-1] // final DataInputBuffer in = new DataInputBuffer(data, off, len); // #of bindings in this record. final int numBindings = in.unpackInt(); if (numBindings == 0) { // Empty solution. return bset; } // #of variables declared for the first time by this record. final int newVars = in.unpackInt(); // #of new IVCache associations encoded in this record. final int newCached = in.unpackInt(); if (newCached > 0 && numBindings == 0) { /* * Illegal combination. New IV => BigdataValue cache * associations can only appear with new bindings. */ throw new RuntimeException(); } if (newCached > 0 && namespace == null) { /* * This is where we discover the namespace for the serialized * BigdataValue objects. */ namespace = in.readUTF2(); valueSer = BigdataValueFactoryImpl.getInstance(namespace) .getValueSerializer(); } // read newly declared variable names and add them to the schema. for (int i = 0; i < newVars; i++) { final IVariable<?> var = Var.var(in.readUTF2()); if (schema.add(var)) { schemaIndex.add(var); } else { // The variable was already declared. throw new IllegalStateException("Already declared: " + var.getName()); } } // #of variables declared so far across decoded solutions. final int schemaSize = schema.size(); // The bit index into the byte[] of the variable bit map. final int offsetVarBits = (off + ((int) in.position())) << 3; // #of bytes required for the bit flags (one per declared var to // date, but only if there are some bindings in this solution) final int nbytesVarBits = numBindings == 0 ? 0 : BytesUtil .bitFlagByteLength(schemaSize); // Skip over the variable bit map. in.skipBytes(nbytesVarBits); // The bit index into the byte[] of the cached Value bit map. final int offsetCacheValueBits = (off + ((int) in.position())) << 3; // #of bytes required for the bit flags (one per declared var to // date, but only if there are some IVCache values in this record). final int nbytesCacheValueBits = newCached == 0 ? 0 : BytesUtil .bitFlagByteLength(numBindings); // Skip over the cache bit map. in.skipBytes(nbytesCacheValueBits); /* * Decode the IV[]. * * The IV[] is dense. There are [numBindings] values in the IV[]. * The variable for each binding is obtained by finding the next * non-zero bit in the variable bit map (which must sum to * numBindings). * * The [ivs] List is maintained iff we need to index into the IVs by * their decode order. That is only necessary when there are new * IVCache associations in this record (newCached>0). */ final List<IV<?,?>> ivs; if (newCached > 0) ivs = new ArrayList<IV<?, ?>>(numBindings); else ivs = Collections.emptyList(); if (numBindings > 0) { int chksum = 0; long bitIndex = offsetVarBits; final long maxBitIndex = offsetCacheValueBits;//bitIndex + (nbytesVarBits << 3); int i = 0; int ivoff = (int) (off + in.position()); while (i < schemaSize) { if (bitIndex >= maxBitIndex) break; final boolean isSet = BytesUtil.getBit(data, bitIndex++); if (isSet) { /* * Decode the IV for this variable. * * Note: A "mock" IV will be decoded into a non-null * TermId. */ chksum++; final IVariable<?> var = schemaIndex.get(i); final IV<?, ?> iv = IVUtility.decodeFromOffset(data, ivoff, false/* nullIsNullRef */); bset.set(var, new Constant<IV<?, ?>>(iv)); if (newCached > 0) { ivs.add(iv); } final int byteLength = iv.byteLength(); ivoff += byteLength; in.skipBytes(byteLength); } i++; } if (chksum != numBindings) { throw new RuntimeException("Bad bit sum: chksum=" + chksum + ", expected=" + numBindings); } } /* * Decode any cached BigdataValue objects and associate them with * the correct IVs in the IVCache mapping. The bit map has * [numBindings] bits. Each bit indicates whether or not there is a * cached Value inline for the corresponding binding (decoded * above). */ if (newCached > 0) { int chksum = 0; long bitIndex = offsetCacheValueBits; final long maxBitIndex = bitIndex + (nbytesCacheValueBits << 3); int i = 0; while (i < numBindings) { if (bitIndex >= maxBitIndex) break; final boolean isSet = BytesUtil.getBit(data, bitIndex++); if (isSet) { /* * Decode the cached Value for the IV. */ chksum++; final IV<?, ?> iv = ivs.get(i); final BigdataValue value = valueSer .deserialize(in, tmp); if (!iv.isNullIV()) { cache.put(iv, value); } else { /* * We must attach the IVCache association for a * mockIV since it is NOT possible to recover that * association from the cache. * * Note: This is because TermId.equals() considers * TermIds to be equals() if they have termId=0L and * ignores the cached Value unless BOTH TermIds have * the value set. When we decode a mock IV the Value * is not set so we can not resolve it against a * hash map. Hence we MUST set it on the IV * immediately while we can stil maintain the * correlation between the IV and the cached Value. */ ((IV) iv).setValue(value); } } i++; } if (chksum != newCached) { throw new RuntimeException("Bad bit sum: chksum=" + chksum + ", expected=" + newCached); } } if (numBindings > 0) resolveCachedValues(bset); return bset; } catch (IOException ex) { throw new RuntimeException(ex); } } @SuppressWarnings({ "rawtypes", "unchecked" }) @Override public void resolveCachedValues(final IBindingSet bset) { final Iterator<Map.Entry<IVariable, IConstant>> itr = bset.iterator(); while (itr.hasNext()) { final Map.Entry<IVariable, IConstant> e = itr.next(); final IConstant c = e.getValue(); final IV iv = (IV) c.get(); final BigdataValue val = cache.get(iv); if (val != null) { iv.setValue(val); } } } @Override public void release() { cache.clear(); schema.clear(); schemaIndex.clear(); tmp.setLength(0); version = -1; nsolutions = 0; namespace = null; valueSer = null; } /** * {@inheritDoc} * <p> * Always returns <code>true</code>. */ @Override public boolean isValueCache() { return true; } }