/**
Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved.
Contact:
SYSTAP, LLC DBA Blazegraph
2501 Calvert ST NW #106
Washington, DC 20008
licenses@blazegraph.com
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/*
* Created on July 31, 2015
*/
package com.bigdata.rdf.internal.impl.extensions;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.Properties;
import java.util.Set;
import org.openrdf.model.Literal;
import org.openrdf.model.Value;
import com.bigdata.btree.keys.DefaultKeyBuilderFactory;
import com.bigdata.btree.keys.IKeyBuilder;
import com.bigdata.btree.keys.IKeyBuilderFactory;
import com.bigdata.rdf.internal.IDatatypeURIResolver;
import com.bigdata.rdf.internal.IExtension;
import com.bigdata.rdf.internal.impl.literal.AbstractLiteralIV;
import com.bigdata.rdf.internal.impl.literal.LiteralExtensionIV;
import com.bigdata.rdf.internal.impl.literal.XSDIntegerIV;
import com.bigdata.rdf.model.BigdataLiteral;
import com.bigdata.rdf.model.BigdataURI;
import com.bigdata.rdf.model.BigdataValue;
import com.bigdata.rdf.model.BigdataValueFactory;
import com.bigdata.service.geospatial.GeoSpatialDatatypeConfiguration;
import com.bigdata.service.geospatial.GeoSpatialDatatypeFieldConfiguration;
import com.bigdata.service.geospatial.IGeoSpatialLiteralSerializer;
/**
* Special encoding for GeoSpatial datatypes. We encode literals of the form
* <int_i>#...#<int_n> as BigInteger using the xsd:integer type. The conversion
* into BigInteger is based on a calculation of the z-order string for the n
* components.
*
* The code to create a literal is, e.g.:
* <code>"2#4"^^<http://www.bigdata.com/rdf/geospatial#geoSpatialLiteral></code>
*
* The two components are first broken down long integers, namely
* - 2 -> 00000000 00000000 00000010
* - 4 -> 00000000 00000000 00000100
*
* The z-order encoding of these two strings is then
* 00000000 00000000 00000000 00000000 00000000 00000000 00011000
*
* Interpreted as BigInteger, this is the value 24, which is stored as integer
* literal in the database. The asValue method reverts this (lossless) encoding.
*
* @author <a href="mailto:ms@metaphacts.com">Michael Schmidt</a>
* @version $Id$
*/
public class GeoSpatialLiteralExtension<V extends BigdataValue> implements IExtension<V> {
private static final int BASE_SIZE = Double.SIZE / 8;
private final IGeoSpatialLiteralSerializer litSerializer;
private final BigdataURI datatype;
private final GeoSpatialDatatypeConfiguration datatypeConfig;
// Factory for thread local key builder
private final IKeyBuilderFactory kbfactory;
/**
* Constructor setting up an instance with a default schema description.
*
* @param resolver
*/
public GeoSpatialLiteralExtension(
final IDatatypeURIResolver resolver, final GeoSpatialDatatypeConfiguration config) {
this.datatype = resolver.resolve(config.getUri());
this.datatypeConfig = config;
this.litSerializer = config.getLiteralSerializer();
this.kbfactory = new DefaultKeyBuilderFactory(new Properties());
}
private IKeyBuilder getKeyBuilder() {
return kbfactory.getKeyBuilder();
}
@Override
public Set<BigdataURI> getDatatypes() {
final HashSet<BigdataURI> datatypes = new LinkedHashSet<BigdataURI>();
datatypes.add(datatype);
return datatypes;
}
/****************************************************************************
* DIRECTION "DOWN":
* -----------------
* The following methods implement either the full or parts of the "down"
* direction. For the down, direction, conversion works as follows:
*
* A.) We get as value a literal such as "2.54#3.21"^^geo:geoSpatialLiteral
*
* B.) The literal is split into its components.
*
* C.) The components are mapped to long values (either trivially, if they
* represent long values according to the schema, or based on a
* precision); for instance, assuming precision=2 in the schema for the
* two components, the literal above would be converted into [254,321]
*
* D.) Next, from these long components, we compute the z-order string:
* D1.) If specified in the schema, we apply a range shift based on the
* minimum value known to shop up in the data. Assuming, e.g., our
* minimum value is zero, the components are shifted as
* [Long.MIN+254, Long.MIN+321].
* D2.) We compute the z-order string by mixing up the components bit
* representation. For instance (not matching the values from
* the example above), if component one has bit representation
* 0011 and component two has bit representation 0110, we mix the
* bits as 00101101, where position 0,2,4,6 represent the second
* string, position 1,3,5,7 represent the first string.
*
* E.) We pad a 0 byte to the z-order string, to make sure that the
* BigInteger constructor (which expects a two's complement), does
* not destroy order.
*
* F.) The 0 byte padded string is converted into a BigInteger
*
* G.) The BigInteger is converted to an XSDIntegerIV
**************************************************************************/
/**
* Create an IV from a given value (where the value must be a Literal).
* Implements transformation A->G.
*/
@SuppressWarnings("rawtypes")
@Override
public LiteralExtensionIV createIV(final Value value) {
if (value instanceof Literal == false)
throw new IllegalArgumentException("Value not a literal");
// delegate, splitting the value into its components
return createIV(litSerializer.toComponents(value.stringValue()));
}
/**
* Create an IV from a given value (where the value must be a Literal).
* Implements transformation B->F.
*/
@SuppressWarnings({ "rawtypes", "unchecked" })
public LiteralExtensionIV createIV(Object[] components) {
// convert component array into long's (B->C)
final long[] componentsAsLongArr = componentsAsLongArr(components, datatypeConfig);
// convert the long array into a byte[] (C->D)
final byte[] zOrderByteArray = toZOrderByteArray(componentsAsLongArr, datatypeConfig);
// convert into a valid two's complement byte array (D->E)
final byte[] zOrderByteArrayTwoCompl = padLeadingZero(zOrderByteArray);
// we now can safely call the BigInteger constructor (E->F)
final BigInteger bi = new BigInteger(zOrderByteArrayTwoCompl);
// finally, wrap the big integer into an xsd:integer (F->G)
final AbstractLiteralIV delegate = new XSDIntegerIV(bi);
return new LiteralExtensionIV(delegate, datatype.getIV());
}
/**
* Create a two-components byte[] from a component array.
* Implements transformation B->E
*/
public byte[] toZOrderByteArray(Object[] components) {
// convert component array into long's (B->C)
final long[] componentsAsLongArr = componentsAsLongArr(components, datatypeConfig);
// convert the long array into a byte[] (C->D)
final byte[] zOrderByteArray = toZOrderByteArray(componentsAsLongArr, datatypeConfig);
// convert into a valid two's complement byte array (D->E)
return padLeadingZero(zOrderByteArray);
}
/**
* Create an IV from a two's complement byte array
*
* Implements transformation E->F
*/
@SuppressWarnings({ "rawtypes", "unchecked" })
public LiteralExtensionIV createIVFromZOrderByteArray(final byte[] zOrderByteArray) {
// convert into a valid two's complement byte array (D->E)
final byte[] zOrderByteArrayTwoCompl = padLeadingZero(zOrderByteArray);
// we now can safely call the BigInteger constructor (E->F)
final BigInteger bi = new BigInteger(zOrderByteArrayTwoCompl);
// finally, wrap the big integer into an xsd:integer (F->G)
final AbstractLiteralIV delegate = new XSDIntegerIV(bi);
return new LiteralExtensionIV(delegate, datatype.getIV());
}
/**
* Convert the components into a long array. The array is passed as an
* Object[], in order to allow for unparsed strings as well as Long or
* Double's (or any convertable) as input. The array must have the same
* size as the number of dimensions, otherwise a runtime exception is thrown.
*
* Longs (or other objects being parseable as Long) are copied to the target
* array without modification. Floats (or objects being parseable as Float)
* are converted into Long according to the precision specified in the
* passed {@link SchemaDescription}.
*
* Implements step B->C.
*/
public final long[] componentsAsLongArr(
final Object[] components, final GeoSpatialDatatypeConfiguration datatypeConfig) {
final int numDimensions = datatypeConfig.getNumDimensions();
final long[] ret = new long[numDimensions];
if (numDimensions != components.length) {
throw new InvalidGeoSpatialLiteralError(
"Literal value has wrong format. Expected " + numDimensions
+ " components for datatype, but literal has " + components.length + " components.");
}
try {
for (int i = 0; i < components.length; i++) {
final Object component = components[i];
final GeoSpatialDatatypeFieldConfiguration fieldConfig = datatypeConfig.getFields().get(i);
switch (fieldConfig.getValueType()) {
case DOUBLE:
{
final BigDecimal precisionAdjustment = BigDecimal.valueOf(fieldConfig.getMultiplier());
final BigDecimal componentAsBigDecimal =
component instanceof BigDecimal ?
(BigDecimal)component : new BigDecimal(component.toString());
final BigDecimal x = precisionAdjustment.multiply(componentAsBigDecimal);
ret[i] = x.longValue();
break;
}
case LONG:
{
final BigInteger precisionAdjustment = BigInteger.valueOf(fieldConfig.getMultiplier());
final BigInteger componentAsBigInteger =
component instanceof BigInteger ?
(BigInteger)component : new BigInteger(component.toString());
final BigInteger x = precisionAdjustment.multiply(componentAsBigInteger);
ret[i] = x.longValue();
break;
}
default:
throw new IllegalArgumentException("Invalid field configuration: value type not supported.");
}
}
} catch (Exception e) {
throw new InvalidGeoSpatialLiteralError(e.getMessage());
}
return ret;
}
/**
* Converts a long array representing the components to a z-order byte array.
* Thereby, a range shift is performed, if specified.
*
* Implements step C->D
*/
public byte[] toZOrderByteArray(
final long[] componentsAsLongArr, final GeoSpatialDatatypeConfiguration datatypeConfig) {
final IKeyBuilder kb = getKeyBuilder();
kb.reset();
for (int i=0; i<componentsAsLongArr.length; i++) {
// get current component
final long componentAsLong = componentsAsLongArr[i];
// shift component by given range
final Long minValue = datatypeConfig.getFields().get(i).getMinValue();
final long componentAsLongRangeShifted =
minValue==null ?
componentAsLong :
encodeRangeShift(componentAsLong, minValue);
kb.append(componentAsLongRangeShifted);
}
return kb.toZOrder(datatypeConfig.getNumDimensions());
}
/**
* Shift values according to the minValue, making sure that we encode the
* lowest value in the range as the lowest value 00000000... when
* encoded as byte array.
*
* Implements steps C->D1.
*/
protected Long encodeRangeShift(final Long val, final Long minValue) {
if (minValue==null) { // do nothing if range shift not set
return val;
}
if (val<minValue) {
throw new RuntimeException("Illegal range shift -- datatype violation.");
}
return Long.MIN_VALUE + (val - minValue);
}
/**
* Pads a leading zero byte to the byte array. This changes the value (which
* does not harm order, if we do it consistently for all zOrder strings
* prior to saving them) and makes sure that the array represents an unsigned
* value, for which the two's complement representation does not differ.
* More concretely, having padded the zero, we may safely call the
* {@link BigInteger} constructor (which expects a two's complement input).
*
* Implements step D->E.
*/
public byte[] padLeadingZero(byte[] arr) {
final byte[] ret = new byte[arr.length+1];
for (int i=0; i<arr.length; i++) {
ret[i+1] = arr[i];
}
return ret;
}
/****************************************************************************
* DIRECTION "UP":
* -----------------
* The following methods implement either the full or parts of the "up"
* direction (which is the inverse of the down direction discussed in detail
* above.
***************************************************************************/
/**
* Decodes an xsd:integer into an n-dimensional string of the form
* <int_1>#...#<int_n>.
*
* Implements transformation G->A.
*/
@SuppressWarnings({ "unchecked", "rawtypes" })
@Override
public V asValue(final LiteralExtensionIV iv, final BigdataValueFactory vf) {
// get the components represented by the IV (which must be of type
// xsd:integer (G->C)
final long[] componentsAsLongArr = asLongArray(iv);
// convert long array to components array
final Object[] componentArr = longArrAsComponentArr(componentsAsLongArr);
// set up the component and merge them into a string (C->B)
final String litStr = litSerializer.fromComponents(componentArr);
// setup a literal carrying the component string (B->A)
return (V) vf.createLiteral(litStr, datatype);
}
/**
* Decodes an xsd:integer into the long values of the z-order components
* represented through the xsd:integer.
*
* Implements transformation G->C
* @param iv
* @return
*/
@SuppressWarnings("rawtypes")
public long[] asLongArray(final LiteralExtensionIV iv) {
if (!datatype.getIV().equals(iv.getExtensionIV())) {
throw new IllegalArgumentException("unrecognized datatype");
}
final BigInteger bigInt = iv.getDelegate().integerValue();
// big integer to zOrder byte[] (F>D2)
final byte[] bigIntAsByteArrUnsigned = toZOrderByteArray(bigInt);
// retrieve the original long values from z-order byte[] (D2 -> C)
final long[] componentsAsLongArr = fromZOrderByteArray(bigIntAsByteArrUnsigned);
return componentsAsLongArr;
}
/**
* Conversion of a an IV into its component array.
*/
@SuppressWarnings("rawtypes")
public Object[] toComponentArray(LiteralExtensionIV iv) {
long[] longArr = asLongArray(iv);
return longArrAsComponentArr(longArr);
}
/**
* Decodes a BigInteger into a zOrder byte[] (without leading zero).
*
* Implements transformation F->E.
*/
public byte[] toZOrderByteArray(final BigInteger bigInt) {
final int numDimensions = datatypeConfig.getNumDimensions();
// convert BigInteger back to byte array (F->E)
final byte[] bigIntAsByteArr = bigInt.toByteArray();
// pad 0-bytes if necessary and copy over bytes; note that we make sure
// to get the correct number of bytes (no trailing bytes may be skipped),
// so the code below looks somewhat complex
final int paddedArraySize = numDimensions * BASE_SIZE + 1;
final byte[] bigIntAsByteArrPad = new byte[paddedArraySize];
int idx = 0;
for (int i = 0; i < paddedArraySize - bigIntAsByteArr.length; i++) {
bigIntAsByteArrPad[idx++] = 0; // padding
}
for (int i = 0; i < bigIntAsByteArr.length; i++) {
bigIntAsByteArrPad[idx++] = bigIntAsByteArr[i]; // copy of bytes
}
final byte[] bigIntAsByteArrUnsigned = unpadLeadingZero(bigIntAsByteArrPad);
return bigIntAsByteArrUnsigned;
}
/**
* Converts an IV to a zOrderByte array (without leading zero).
* Entry point for query service, somewhat outside the pipeline described above.
*/
public byte[] toZOrderByteArray(AbstractLiteralIV<BigdataLiteral, ?> literalIV) {
if (!(literalIV instanceof XSDIntegerIV)) {
throw new RuntimeException("zOrder value IV must be XSDInteger");
}
return toZOrderByteArray(literalIV.integerValue());
}
/**
* Converts a z-order byte array to a long array representing the components.
* As part of this transformation, a possible range shift is reverted.
*
* Implements transformation D2 -> C.
*/
public long[] fromZOrderByteArray(final byte[] byteArr) {
final IKeyBuilder kb = getKeyBuilder();
kb.reset();
for (int i=0; i<byteArr.length; i++) {
kb.append(byteArr[i]);
}
final long[] componentsAsLongArr = kb.fromZOrder(datatypeConfig.getNumDimensions());
// revert range shift
for (int i=0; i<componentsAsLongArr.length; i++) {
final Long minValue = datatypeConfig.getFields().get(i).getMinValue();
if (minValue!=null) {
componentsAsLongArr[i] = decodeRangeShift(componentsAsLongArr[i], minValue);
}
}
return componentsAsLongArr;
}
/**
* Invert {@link #encodeRangeShift(Long, Long)} operation.
* Implements steps D1->C.
*/
protected Long decodeRangeShift(final Long val, final Long minValue) {
if (minValue==null) { // do nothing if range shift not set
return val;
}
return val - Long.MIN_VALUE + minValue;
}
/**
* Converts a a Long[] reflecting the long values of the individual
* components back into a component array representing the literal.
*
* Implements step C->B.
*/
final public Object[] longArrAsComponentArr(final long[] arr) {
final int numDimensions = datatypeConfig.getNumDimensions();
if (arr.length!=numDimensions) {
throw new IllegalArgumentException(
"Encoding has wrong format. Expected " + numDimensions
+ " components for datatype.");
}
final Object[] componentArr = new Object[arr.length];
for (int i=0; i<arr.length; i++) {
final GeoSpatialDatatypeFieldConfiguration fieldConfig = datatypeConfig.getFields().get(i);
final double precisionAdjustment = fieldConfig.getMultiplier();
switch (fieldConfig.getValueType()) {
case DOUBLE:
componentArr[i] = (double)arr[i]/precisionAdjustment;
break;
case LONG:
componentArr[i] = arr[i]/(long)precisionAdjustment;
break;
default:
throw new RuntimeException("Uncovered decoding case. Please fix code.");
}
}
return componentArr;
}
/**
* Reverts method {{@link #padLeadingZero(byte[])}.
*
* Implements step E->D.
*/
public byte[] unpadLeadingZero(byte[] arr) {
final byte[] ret = new byte[arr.length-1];
for (int i=0; i<ret.length; i++) {
ret[i] = arr[i+1];
}
return ret;
}
/**
* Return the number of dimensions of the literal
* @return
*/
public int getNumDimensions() {
return datatypeConfig.getNumDimensions();
}
public GeoSpatialDatatypeConfiguration getDatatypeConfig() {
return datatypeConfig;
}
}