/** Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved. Contact: SYSTAP, LLC DBA Blazegraph 2501 Calvert ST NW #106 Washington, DC 20008 licenses@blazegraph.com This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; version 2 of the License. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ /* * Created on Jan 25, 2008 */ package com.bigdata.rdf.spo; import java.io.IOException; import java.io.ObjectInput; import java.io.ObjectOutput; import org.apache.log4j.Logger; import com.bigdata.btree.IIndex; import com.bigdata.btree.proc.AbstractKeyArrayIndexProcedure; import com.bigdata.btree.proc.AbstractKeyArrayIndexProcedureConstructor; import com.bigdata.btree.proc.IParallelizableIndexProcedure; import com.bigdata.btree.proc.IResultHandler; import com.bigdata.btree.proc.LongAggregator; import com.bigdata.btree.raba.IRaba; import com.bigdata.btree.raba.codec.IRabaCoder; import com.bigdata.rdf.model.StatementEnum; import com.bigdata.relation.IMutableRelationIndexWriteProcedure; import com.bigdata.util.BytesUtil; /** * Procedure for batch insert on a single statement index (or index partition). * <p> * The key for each statement encodes the {s:p:o} of the statement in the order * that is appropriate for the index (SPO, POS, OSP, etc). The key is written * unchanged on the index. * <p> * The value for each statement is a byte that encodes the {@link StatementEnum} * and also encodes whether or not the "override" flag is set using - see * {@link StatementEnum#MASK_OVERRIDE} - followed by 8 bytes representing the * statement identifier IFF statement identifiers are enabled AND the * {@link StatementEnum} is {@link StatementEnum#Explicit}. The value requires * interpretation to determine the byte[] that will be written as the value on * the index - see the code for more details. * <p> * Note: This needs to be a custom batch operation using a conditional insert so * that we do not write on the index when the data would not be changed and to * handle the overflow flag and the optional statement identifier correctly. * * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a> */ public class SPOIndexWriteProc extends AbstractKeyArrayIndexProcedure<Object> implements IParallelizableIndexProcedure<Object>, IMutableRelationIndexWriteProcedure<Object> { private transient static final Logger log = Logger .getLogger(SPOIndexWriteProc.class); final transient private boolean INFO = log.isInfoEnabled(); final transient private boolean DEBUG = log.isDebugEnabled(); /** * */ private static final long serialVersionUID = 3969394126242598370L; private transient boolean reportMutation; @Override public final boolean isReadOnly() { return false; } /** * De-serialization constructor. */ public SPOIndexWriteProc() { } /** * * @param fromIndex * @param toIndex * @param keys * @param vals */ protected SPOIndexWriteProc(final IRabaCoder keySer, final IRabaCoder valSer, final int fromIndex, final int toIndex, final byte[][] keys, final byte[][] vals, final boolean reportMutation) { super(keySer, valSer, fromIndex, toIndex, keys, vals); assert vals != null; this.reportMutation = reportMutation; } public static class IndexWriteProcConstructor extends AbstractKeyArrayIndexProcedureConstructor<SPOIndexWriteProc> { final boolean reportMutation; /** * Instance reports back which statements were modified (inserted into * the index or updated on the index). The return value of the procedure * is a {@link ResultBitBuffer}. The mutation count */ public static IndexWriteProcConstructor REPORT_MUTATION = new IndexWriteProcConstructor( true/* reportMutation */); /** * Instance does not report by which statements were modified (inserted * into the index or updated on the index). The return value of the RPC * is a {@link Long} mutation count. */ public static IndexWriteProcConstructor INSTANCE = new IndexWriteProcConstructor( false/* reportMutation */); private IndexWriteProcConstructor(final boolean reportMutation) { this.reportMutation = reportMutation; } /** * Values are required. */ @Override public final boolean sendValues() { return true; } @Override public SPOIndexWriteProc newInstance(final IRabaCoder keySer, final IRabaCoder valSer, final int fromIndex, final int toIndex, final byte[][] keys, final byte[][] vals) { return new SPOIndexWriteProc(keySer, valSer, fromIndex, toIndex, keys, vals, reportMutation); } } /** * * @return The #of statements actually written on the index as an * {@link Long} -or- a {@link ResultBitBuffer} IFF * <code>reportMutations := true</code>. */ @Override public Object applyOnce(final IIndex ndx, final IRaba keys, final IRaba vals) { // #of statements actually written on the index partition. int writeCount = 0; final int n = keys.size();//getKeyCount(); // // used to generate the values that we write on the index. // final ByteArrayBuffer tmp = new ByteArrayBuffer(1); final SPOTupleSerializer tupleSer = (SPOTupleSerializer) ndx.getIndexMetadata().getTupleSerializer(); // true iff logging is enabled and this is the primary (SPO/SPOC) index. final boolean isPrimaryIndex = INFO ? tupleSer.getKeyOrder() .isPrimaryIndex() : false; // Array used to report by which statements were modified by this operation. final ModifiedEnum[] modified = reportMutation ? new ModifiedEnum[n] : null; if (reportMutation) { for (int i = 0; i < n; i++) { modified[i] = ModifiedEnum.NONE; } } for (int i = 0; i < n; i++) { // the key encodes the {s:p:o} of the statement. final byte[] key = keys.get(i);//getKey(i); assert key != null; /* * The value encodes the statement type (byte 0). If statement * identifiers are enabled and the statement type is explicit, then * the value MUST also encode the statement identifier (bytes 1-9). * Otherwise the statement identifier MUST NOT be present. */ final byte[] val = vals.get(i); assert val != null; assert val.length == 1; // figure out if the override bit is set. final boolean override = StatementEnum.isOverride(val[0]); final boolean userFlag = StatementEnum.isUserFlag(val[0]); /* * Decode the new (proposed) statement type (override bit is * masked off). */ final StatementEnum newType = StatementEnum.decode(val[0]); /* * The current value for the statement in this index partition (or * null iff the stmt is not asserted). * * @todo reuse Tuple for lookup to reduce byte[] allocation. */ final byte[] oldval = ndx.lookup(key); /* * The following reconciles the old and new statement type and the * optional statement identifier, both of which are interpreted in * the light of whether or not the override flag was set. */ if (oldval == null) { /* * Statement is NOT pre-existing. */ ndx.insert(key, tupleSer.serializeVal( /*tmp,*/ false/* override */, userFlag, newType)); if (isPrimaryIndex && DEBUG) { log.debug("new SPO: key=" + BytesUtil.toString(key)); } writeCount++; if (reportMutation) modified[i] = ModifiedEnum.INSERTED; } else { /* * Statement is pre-existing. */ // old statement type. final StatementEnum oldType = StatementEnum.deserialize(oldval); if (oldType == StatementEnum.History || newType == StatementEnum.History) { if (oldType != newType) { ndx.insert(key, tupleSer.serializeVal( false/* override */, userFlag, newType)); if (isPrimaryIndex && DEBUG) { log.debug("Changing statement type: key=" + BytesUtil.toString(key) + ", oldType=" + oldType + ", newType=" + newType); } writeCount++; if (reportMutation) modified[i] = newType == StatementEnum.History ? ModifiedEnum.REMOVED : ModifiedEnum.INSERTED; } } else if (override) { if (oldType != newType) { /* * We are downgrading a statement from explicit to * inferred during TM. */ assert newType != StatementEnum.Explicit; ndx.insert(key, tupleSer.serializeVal( /*tmp,*/ false/* override */, userFlag, // false /* no sid for type=inferred */, newType)); if (isPrimaryIndex && DEBUG) { log.debug("Downgrading SPO: key=" + BytesUtil.toString(key) + ", oldType=" + oldType + ", newType=" + newType); } writeCount++; if (reportMutation) modified[i] = ModifiedEnum.UPDATED; } } else { final StatementEnum maxType = StatementEnum.max(oldType, newType); if (oldType != maxType) { // final boolean newSid = maxType == StatementEnum.Explicit; ndx.insert(key, tupleSer.serializeVal( /*tmp,*/ false/* override */, userFlag, // newSid, maxType)); if (isPrimaryIndex && DEBUG) { log.debug("Changing statement type: key=" + BytesUtil.toString(key) + ", oldType=" + oldType + ", newType=" + newType + ", maxType=" + maxType); } writeCount++; if (reportMutation) modified[i] = ModifiedEnum.UPDATED; } } } } if (isPrimaryIndex && INFO) log.info("Wrote " + writeCount + " SPOs on ndx=" + ndx.getIndexMetadata().getName()); if (reportMutation) { final boolean[] b = ModifiedEnum.toBooleans(modified, n); int onCount = 0; for (int i = 0; i < b.length; i++) { if (b[i]) onCount++; } final ResultBitBuffer rbb = new ResultBitBuffer(b.length, b, onCount); return rbb; } else { return Long.valueOf(writeCount); } } // /** // * Used by {@link #decodeStatementIdentifier(StatementEnum, byte[])} // */ // private transient final DataInputBuffer vbuf = new DataInputBuffer( // new byte[] {}); @Override protected void writeMetadata(final ObjectOutput out) throws IOException { super.writeMetadata(out); out.writeBoolean(reportMutation); } @Override protected void readMetadata(final ObjectInput in) throws IOException, ClassNotFoundException { super.readMetadata(in); reportMutation = in.readBoolean(); } @SuppressWarnings({ "unchecked", "rawtypes" }) @Override protected IResultHandler<Object, Object> newAggregator() { if (reportMutation) { return (IResultHandler) new ResultBitBufferHandler(getKeys().size(), 2/* multiplier */); } return (IResultHandler) new LongAggregator(); } }