package mil.nga.giat.geowave.core.index.sfc.xz; import java.math.BigInteger; import java.nio.ByteBuffer; import java.util.ArrayDeque; import java.util.ArrayList; import java.util.Arrays; import java.util.BitSet; import java.util.List; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import mil.nga.giat.geowave.core.index.ByteArrayId; import mil.nga.giat.geowave.core.index.ByteArrayRange; import mil.nga.giat.geowave.core.index.ByteArrayRange.MergeOperation; import mil.nga.giat.geowave.core.index.ByteArrayUtils; import mil.nga.giat.geowave.core.index.PersistenceUtils; import mil.nga.giat.geowave.core.index.sfc.RangeDecomposition; import mil.nga.giat.geowave.core.index.sfc.SFCDimensionDefinition; import mil.nga.giat.geowave.core.index.sfc.SpaceFillingCurve; import mil.nga.giat.geowave.core.index.sfc.data.BasicNumericDataset; import mil.nga.giat.geowave.core.index.sfc.data.MultiDimensionalNumericData; import mil.nga.giat.geowave.core.index.sfc.data.NumericData; public class XZOrderSFC implements SpaceFillingCurve { private final static Logger LOGGER = LoggerFactory.getLogger(XZOrderSFC.class); private static double LOG_POINT_FIVE = Math.log(0.5); // the initial level of 2^dim tree private XElement[] LevelOneElements; // indicator that we have searched a full level of the 2^dim tree private XElement LevelTerminator; // TODO magic number; have to determine most appropriate value? private static int g = 12; private SFCDimensionDefinition[] dimensionDefs; private int dimensionCount; private int nthPowerOfTwo; public XZOrderSFC() {} public XZOrderSFC( SFCDimensionDefinition[] dimensionDefs ) { this.dimensionDefs = dimensionDefs; init(); } private void init() { dimensionCount = dimensionDefs.length; nthPowerOfTwo = (int) Math.pow( 2, dimensionCount); double[] mins = new double[dimensionCount]; Arrays.fill( mins, 0.0); double[] maxes = new double[dimensionCount]; Arrays.fill( maxes, 1.0); double[] negativeOnes = new double[dimensionCount]; Arrays.fill( negativeOnes, -1.0); LevelOneElements = new XElement( mins, maxes, 1.0).children(); LevelTerminator = new XElement( negativeOnes, negativeOnes, 0.0); } @Override public byte[] getId( double[] values ) { if (values.length == dimensionCount) { // We have a point, not a bounding box int boxCount = 0; double[] boxedValues = new double[dimensionCount * 2]; for (int i = 0; i < dimensionCount; i++) { boxedValues[boxCount++] = values[i]; boxedValues[boxCount++] = values[i]; } values = boxedValues; } if (values.length != dimensionCount * 2) { LOGGER.error("Point or bounding box value count does not match number of indexed dimensions."); return null; } normalize(values); // calculate the length of the sequence code (section 4.1 of XZ-Ordering // paper) double maxDim = 0.0; for (int i = 0; (i + 1) < values.length; i++) { maxDim = Math.max( maxDim, Math.abs(values[i] - values[++i])); } // l1 (el-one) is a bit confusing to read, but corresponds with the // paper's definitions int l1 = (int) Math.floor(Math.log(maxDim) / LOG_POINT_FIVE); // the length will either be (l1) or (l1 + 1) int length = g; if (l1 < g) { double w2 = Math.pow( 0.5, l1 + 1); // width of an element at // resolution l2 (l1 + 1) length = l1 + 1; for (int i = 0; (i + 1) < values.length; i++) { if (!predicate( values[i], values[++i], w2)) { length = l1; break; } } } double[] minValues = new double[values.length / 2]; for (int i = 0; (i + 1) < values.length; i += 2) { minValues[i / 2] = values[i]; } return sequenceCode( minValues, length); } // predicate for checking how many axis the polygon intersects // math.floor(min / w2) * w2 == start of cell containing min private boolean predicate( double min, double max, double w2 ) { return max <= (Math.floor(min / w2) * w2) + (2 * w2); } /** * Normalize user space values to [0,1] */ private void normalize( double[] values ) { for (int i = 0; i < values.length; i++) { values[i] = dimensionDefs[i / 2].normalize(values[i]); } } private byte[] sequenceCode( double[] minValues, int length ) { double[] minsPerDimension = new double[dimensionCount]; Arrays.fill( minsPerDimension, 0.0); double[] maxesPerDimension = new double[dimensionCount]; Arrays.fill( maxesPerDimension, 1.0); long cs = 0L; for (int i = 0; i < length; i++) { double[] centers = new double[dimensionCount]; for (int j = 0; j < dimensionCount; j++) { centers[j] = (minsPerDimension[j] + maxesPerDimension[j]) / 2.0; } BitSet bits = new BitSet( dimensionCount); for (int j = dimensionCount - 1; j >= 0; j--) { if (minValues[j] >= centers[j]) { bits.set(j); } } long bTerm = 0L; long[] longs = bits.toLongArray(); if (longs.length > 0) { bTerm = longs[0]; } cs += 1L + bTerm * (((long) (Math.pow( nthPowerOfTwo, g - i))) - 1L) / ((long) nthPowerOfTwo - 1); for (int j = 0; j < dimensionCount; j++) { if (minValues[j] < centers[j]) { maxesPerDimension[j] = centers[j]; } else { minsPerDimension[j] = centers[j]; } } } return ByteArrayUtils.longToByteArray(cs); } /** * An extended Z curve element. Bounds refer to the non-extended z element * for simplicity of calculation. * * An extended Z element refers to a normal Z curve element that has its * upper bounds expanded by double its dimensions. By convention, an element * is always an n-cube. */ private static class XElement { private final double[] minsPerDimension; private final double[] maxesPerDimension; private double length; private final Double[] extendedBounds; private XElement[] children; private final int dimensionCount; private final int nthPowerOfTwo; public XElement( double[] minsPerDimension, double[] maxesPerDimension, double length ) { this.minsPerDimension = minsPerDimension; this.maxesPerDimension = maxesPerDimension; this.length = length; dimensionCount = minsPerDimension.length; nthPowerOfTwo = (int) Math.pow( 2, dimensionCount); extendedBounds = new Double[dimensionCount]; } public XElement( XElement xElement ) { this( Arrays.copyOf( xElement.minsPerDimension, xElement.minsPerDimension.length), Arrays.copyOf( xElement.maxesPerDimension, xElement.maxesPerDimension.length), xElement.length); } // lazy-evaluated extended bounds public double getExtendedBound( int dimension ) { if (extendedBounds[dimension] == null) { extendedBounds[dimension] = maxesPerDimension[dimension] + length; } return extendedBounds[dimension]; } public boolean isContained( final double[] windowMins, final double[] windowMaxes ) { for (int i = 0; i < dimensionCount; i++) { if (windowMins[i] > minsPerDimension[i] || windowMaxes[i] < getExtendedBound(i)) { return false; } } return true; } public boolean overlaps( final double[] windowMins, final double[] windowMaxes ) { for (int i = 0; i < dimensionCount; i++) { if (windowMaxes[i] < minsPerDimension[i] || windowMins[i] > getExtendedBound(i)) { return false; } } return true; } public XElement[] children() { if (children == null) { double[] centers = new double[dimensionCount]; for (int i = 0; i < dimensionCount; i++) { centers[i] = (minsPerDimension[i] + maxesPerDimension[i]) / 2.0; } double len = length / 2.0; children = new XElement[nthPowerOfTwo]; for (int i = 0; i < children.length; i++) { XElement child = new XElement( this); child.length = len; String binaryString = Integer.toBinaryString(i); // pad or trim binary as necessary to match dimensionality // of curve int paddingCount = binaryString.length() - dimensionCount; if (paddingCount > 0) { binaryString = binaryString.substring(paddingCount); } else { while (paddingCount < 0) { binaryString = "0" + binaryString; paddingCount++; } } for (int j = 1; j <= dimensionCount; j++) { if (binaryString.charAt(j - 1) == '1') { child.minsPerDimension[dimensionCount - j] = centers[dimensionCount - j]; } else { child.maxesPerDimension[dimensionCount - j] = centers[dimensionCount - j]; } } children[i] = child; } } return children; } } @Override public RangeDecomposition decomposeRangeFully( MultiDimensionalNumericData query ) { return decomposeRange( query, true, -1); } @Override public RangeDecomposition decomposeRange( MultiDimensionalNumericData query, boolean overInclusiveOnEdge, int maxRanges ) { // normalize query values double[] queryMins = query.getMinValuesPerDimension(); double[] queryMaxes = query.getMaxValuesPerDimension(); for (int i = 0; i < dimensionCount; i++) { queryMins[i] = dimensionDefs[i].normalize(queryMins[i]); queryMaxes[i] = dimensionDefs[i].normalize(queryMaxes[i]); } // stores our results - initial size of 100 in general saves us some // re-allocation ArrayList<ByteArrayRange> ranges = new ArrayList<ByteArrayRange>( 100); // values remaining to process - initial size of 100 in general saves us // some re-allocation ArrayDeque<XElement> remaining = new ArrayDeque<XElement>( 100); // initial level for (XElement levelOneEl : LevelOneElements) { remaining.add(levelOneEl); } remaining.add(LevelTerminator); // level of recursion short level = 1; while (level < g && !remaining.isEmpty() && (maxRanges < 1 || ranges.size() < maxRanges)) { XElement next = remaining.poll(); if (next.equals(LevelTerminator)) { // we've fully processed a level, increment our state if (!remaining.isEmpty()) { level = (short) (level + 1); remaining.add(LevelTerminator); } } else { checkValue( next, level, queryMins, queryMaxes, ranges, remaining); } } // bottom out and get all the ranges that partially overlapped but we // didn't fully process while (!remaining.isEmpty()) { XElement next = remaining.poll(); if (next.equals(LevelTerminator)) { level = (short) (level + 1); } else { ByteArrayRange range = sequenceInterval( next.minsPerDimension, level, false); ranges.add(range); } } // we've got all our ranges - now reduce them down by merging // overlapping values // note: we don't bother reducing the ranges as in the XZ paper, as // accumulo handles lots of ranges fairly well ArrayList<ByteArrayRange> result = (ArrayList<ByteArrayRange>) ByteArrayRange.mergeIntersections( ranges, MergeOperation.UNION); return new RangeDecomposition( result.toArray(new ByteArrayRange[result.size()])); } // checks a single value and either: // eliminates it as out of bounds // adds it to our results as fully matching, or // adds it to our results as partial matching and queues up it's children // for further processing private void checkValue( XElement value, Short level, double[] queryMins, double[] queryMaxes, ArrayList<ByteArrayRange> ranges, ArrayDeque<XElement> remaining ) { if (value.isContained( queryMins, queryMaxes)) { // whole range matches, happy day ByteArrayRange range = sequenceInterval( value.minsPerDimension, level, false); ranges.add(range); } else if (value.overlaps( queryMins, queryMaxes)) { // some portion of this range is excluded // add the partial match and queue up each sub-range for processing ByteArrayRange range = sequenceInterval( value.minsPerDimension, level, true); ranges.add(range); for (XElement child : value.children()) { remaining.add(child); } } } /** * Computes an interval of sequence codes for a given point - for polygons * this is the lower-left corner. * * @param minsPerDimension * normalized min values [0,1] per dimension * @param length * length of the sequence code that will used as the basis for * this interval * @param partial * true if the element partially intersects the query window, * false if it is fully contained * @return */ private ByteArrayRange sequenceInterval( double[] minsPerDimension, short length, boolean partial ) { byte[] min = sequenceCode( minsPerDimension, length); // if a partial match, we just use the single sequence code as an // interval // if a full match, we have to match all sequence codes starting with // the single sequence code byte[] max; if (partial) { max = min; } else { // from lemma 3 in the XZ-Ordering paper max = ByteArrayUtils.longToByteArray(ByteArrayUtils.byteArrayToLong(min) + (((long) (Math.pow( nthPowerOfTwo, g - length + 1))) - 1L) / ((long) (nthPowerOfTwo - 1))); } return new ByteArrayRange( new ByteArrayId( min), new ByteArrayId( max)); } @Override public byte[] toBinary() { final List<byte[]> dimensionDefBinaries = new ArrayList<byte[]>( dimensionDefs.length); int bufferLength = 4; for (final SFCDimensionDefinition sfcDimension : dimensionDefs) { final byte[] sfcDimensionBinary = PersistenceUtils.toBinary(sfcDimension); bufferLength += (sfcDimensionBinary.length + 4); dimensionDefBinaries.add(sfcDimensionBinary); } final ByteBuffer buf = ByteBuffer.allocate(bufferLength); buf.putInt(dimensionDefs.length); for (final byte[] dimensionDefBinary : dimensionDefBinaries) { buf.putInt(dimensionDefBinary.length); buf.put(dimensionDefBinary); } return buf.array(); } @Override public void fromBinary( final byte[] bytes ) { final ByteBuffer buf = ByteBuffer.wrap(bytes); final int numDimensions = buf.getInt(); dimensionDefs = new SFCDimensionDefinition[numDimensions]; for (int i = 0; i < numDimensions; i++) { final byte[] dim = new byte[buf.getInt()]; buf.get(dim); dimensionDefs[i] = PersistenceUtils.fromBinary( dim, SFCDimensionDefinition.class); } init(); } @Override public double[] getInsertionIdRangePerDimension() { double normalizedSize = Math.pow( 0.5, g); double[] rangesPerDimension = new double[dimensionCount]; for (int i = 0; i < dimensionCount; i++) { rangesPerDimension[i] = dimensionDefs[i].denormalize(normalizedSize); } return rangesPerDimension; } @Override public BigInteger getEstimatedIdCount( MultiDimensionalNumericData data ) { // TODO Replace hard-coded value with real implementation? return BigInteger.ONE; } // TODO Backwords (sfc-space to user-space) conversion?? @Override public MultiDimensionalNumericData getRanges( byte[] id ) { // use max range per dimension for now // to avoid false negatives NumericData[] dataPerDimension = new NumericData[dimensionCount]; int i = 0; for (SFCDimensionDefinition dim : dimensionDefs) { dataPerDimension[i++] = dim.getFullRange(); } return new BasicNumericDataset( dataPerDimension); } @Override public long[] getCoordinates( byte[] id ) { return null; } @Override public long[] normalizeRange( double minValue, double maxValue, int dimension ) { // TODO Auto-generated method stub return null; } }