package mil.nga.giat.geowave.core.index.sfc.tiered;
import java.math.BigInteger;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import net.sf.json.JSONException;
import net.sf.json.JSONObject;
import com.google.common.collect.ImmutableBiMap;
import com.google.common.collect.ImmutableBiMap.Builder;
import mil.nga.giat.geowave.core.index.ByteArrayId;
import mil.nga.giat.geowave.core.index.ByteArrayRange;
import mil.nga.giat.geowave.core.index.ByteArrayUtils;
import mil.nga.giat.geowave.core.index.CoordinateRange;
import mil.nga.giat.geowave.core.index.FloatCompareUtils;
import mil.nga.giat.geowave.core.index.HierarchicalNumericIndexStrategy;
import mil.nga.giat.geowave.core.index.IndexMetaData;
import mil.nga.giat.geowave.core.index.Mergeable;
import mil.nga.giat.geowave.core.index.MultiDimensionalCoordinateRanges;
import mil.nga.giat.geowave.core.index.MultiDimensionalCoordinates;
import mil.nga.giat.geowave.core.index.PersistenceUtils;
import mil.nga.giat.geowave.core.index.StringUtils;
import mil.nga.giat.geowave.core.index.dimension.NumericDimensionDefinition;
import mil.nga.giat.geowave.core.index.dimension.bin.BinRange;
import mil.nga.giat.geowave.core.index.sfc.RangeDecomposition;
import mil.nga.giat.geowave.core.index.sfc.SpaceFillingCurve;
import mil.nga.giat.geowave.core.index.sfc.binned.BinnedSFCUtils;
import mil.nga.giat.geowave.core.index.sfc.data.BinnedNumericDataset;
import mil.nga.giat.geowave.core.index.sfc.data.MultiDimensionalNumericData;
/**
* This class uses multiple SpaceFillingCurve objects, one per tier, to
* represent a single cohesive index strategy with multiple precisions
*
*/
public class TieredSFCIndexStrategy implements
HierarchicalNumericIndexStrategy
{
private final static Logger LOGGER = LoggerFactory.getLogger(TieredSFCIndexStrategy.class);
private final static int DEFAULT_MAX_ESTIMATED_DUPLICATE_IDS_PER_DIMENSION = 2;
protected static final int DEFAULT_MAX_RANGES = -1;
private SpaceFillingCurve[] orderedSfcs;
private ImmutableBiMap<Integer, Byte> orderedSfcIndexToTierId;
private NumericDimensionDefinition[] baseDefinitions;
private long maxEstimatedDuplicateIdsPerDimension;
private final Map<Integer, BigInteger> maxEstimatedDuplicatesPerDimensionalExtent = new HashMap<>();
protected TieredSFCIndexStrategy() {}
/**
* Constructor used to create a Tiered Index Strategy.
*
* @param baseDefinitions
* the dimension definitions of the space filling curve
* @param orderedSfcs
* the space filling curve used to create the strategy
*/
public TieredSFCIndexStrategy(
final NumericDimensionDefinition[] baseDefinitions,
final SpaceFillingCurve[] orderedSfcs,
final ImmutableBiMap<Integer, Byte> orderedSfcIndexToTierId ) {
this(
baseDefinitions,
orderedSfcs,
orderedSfcIndexToTierId,
DEFAULT_MAX_ESTIMATED_DUPLICATE_IDS_PER_DIMENSION);
}
/**
* Constructor used to create a Tiered Index Strategy.
*/
public TieredSFCIndexStrategy(
final NumericDimensionDefinition[] baseDefinitions,
final SpaceFillingCurve[] orderedSfcs,
final ImmutableBiMap<Integer, Byte> orderedSfcIndexToTierId,
final long maxEstimatedDuplicateIdsPerDimension ) {
this.orderedSfcs = orderedSfcs;
this.baseDefinitions = baseDefinitions;
this.orderedSfcIndexToTierId = orderedSfcIndexToTierId;
this.maxEstimatedDuplicateIdsPerDimension = maxEstimatedDuplicateIdsPerDimension;
initDuplicateIdLookup();
}
private void initDuplicateIdLookup() {
for (int i = 0; i <= baseDefinitions.length; i++) {
final long maxEstimatedDuplicateIds = (long) Math.pow(
maxEstimatedDuplicateIdsPerDimension,
i);
maxEstimatedDuplicatesPerDimensionalExtent.put(
i,
BigInteger.valueOf(maxEstimatedDuplicateIds));
}
}
@Override
public List<ByteArrayRange> getQueryRanges(
final MultiDimensionalNumericData indexedRange,
final int maxRangeDecomposition,
final IndexMetaData... hints ) {
// TODO don't just pass max ranges along to the SFC, take tiering and
// binning into account to limit the number of ranges correctly
final List<ByteArrayRange> queryRanges = new ArrayList<ByteArrayRange>();
final BinnedNumericDataset[] binnedQueries = BinnedNumericDataset.applyBins(
indexedRange,
baseDefinitions);
final TierIndexMetaData metaData = ((hints.length > 0) && (hints[0] != null) && (hints[0] instanceof TierIndexMetaData)) ? (TierIndexMetaData) hints[0]
: null;
for (int sfcIndex = orderedSfcs.length - 1; sfcIndex >= 0; sfcIndex--) {
if ((metaData != null) && (metaData.tierCounts[sfcIndex] == 0)) {
continue;
}
final SpaceFillingCurve sfc = orderedSfcs[sfcIndex];
final Byte tier = orderedSfcIndexToTierId.get(sfcIndex);
queryRanges.addAll(BinnedSFCUtils.getQueryRanges(
binnedQueries,
sfc,
maxRangeDecomposition, // for now we're doing this
// per SFC/tier rather than
// dividing by the tiers
tier));
}
return queryRanges;
}
/**
* Returns a list of query ranges for an specified numeric range.
*
* @param indexedRange
* defines the numeric range for the query
* @return a List of query ranges
*/
@Override
public List<ByteArrayRange> getQueryRanges(
final MultiDimensionalNumericData indexedRange,
final IndexMetaData... hints ) {
return getQueryRanges(
indexedRange,
DEFAULT_MAX_RANGES,
hints);
}
/**
* Returns a list of id's for insertion.
*
* @param indexedData
* defines the numeric data to be indexed
* @return a List of insertion ID's
*/
@Override
public List<ByteArrayId> getInsertionIds(
final MultiDimensionalNumericData indexedData ) {
return internalGetInsertionIds(
indexedData,
maxEstimatedDuplicatesPerDimensionalExtent.get(getRanges(indexedData)));
}
private static int getRanges(
final MultiDimensionalNumericData indexedData ) {
final double[] mins = indexedData.getMinValuesPerDimension();
final double[] maxes = indexedData.getMaxValuesPerDimension();
int ranges = 0;
for (int d = 0; d < mins.length; d++) {
if (!FloatCompareUtils.checkDoublesEqual(
mins[d],
maxes[d])) {
ranges++;
}
}
return ranges;
}
@Override
public List<ByteArrayId> getInsertionIds(
final MultiDimensionalNumericData indexedData,
final int maxDuplicateInsertionIdsPerDimension ) {
return internalGetInsertionIds(
indexedData,
BigInteger.valueOf(maxDuplicateInsertionIdsPerDimension));
}
private List<ByteArrayId> internalGetInsertionIds(
final MultiDimensionalNumericData indexedData,
final BigInteger maxDuplicateInsertionIds ) {
final BinnedNumericDataset[] ranges = BinnedNumericDataset.applyBins(
indexedData,
baseDefinitions);
// place each of these indices into a single row ID at a tier that will
// fit its min and max
final List<ByteArrayId> rowIds = new ArrayList<ByteArrayId>(
ranges.length);
for (final BinnedNumericDataset range : ranges) {
rowIds.addAll(getRowIds(
range,
maxDuplicateInsertionIds));
}
return rowIds;
}
@Override
public MultiDimensionalCoordinates getCoordinatesPerDimension(
final ByteArrayId insertionId ) {
final byte[] rowId = insertionId.getBytes();
if (rowId.length > 0) {
final Integer orderedSfcIndex = orderedSfcIndexToTierId.inverse().get(
rowId[0]);
return new MultiDimensionalCoordinates(
new byte[] {
rowId[0]
},
BinnedSFCUtils.getCoordinatesForId(
rowId,
baseDefinitions,
orderedSfcs[orderedSfcIndex]));
}
else {
LOGGER.warn("Row must at least contain a byte for tier");
}
return null;
}
@Override
public MultiDimensionalNumericData getRangeForId(
final ByteArrayId insertionId ) {
final byte[] rowId = insertionId.getBytes();
if (rowId.length > 0) {
final Integer orderedSfcIndex = orderedSfcIndexToTierId.inverse().get(
rowId[0]);
return BinnedSFCUtils.getRangeForId(
rowId,
baseDefinitions,
orderedSfcs[orderedSfcIndex]);
}
else {
LOGGER.warn("Row must at least contain a byte for tier");
}
return null;
}
@Override
public MultiDimensionalCoordinateRanges[] getCoordinateRangesPerDimension(
final MultiDimensionalNumericData dataRange,
final IndexMetaData... hints ) {
final List<MultiDimensionalCoordinateRanges> coordRanges = new ArrayList<MultiDimensionalCoordinateRanges>();
final BinRange[][] binRangesPerDimension = BinnedNumericDataset.getBinnedRangesPerDimension(
dataRange,
baseDefinitions);
final TierIndexMetaData metaData = ((hints.length > 0) && (hints[0] != null) && (hints[0] instanceof TierIndexMetaData)) ? (TierIndexMetaData) hints[0]
: null;
for (int sfcIndex = orderedSfcs.length - 1; sfcIndex >= 0; sfcIndex--) {
if ((metaData != null) && (metaData.tierCounts[sfcIndex] == 0)) {
continue;
}
final SpaceFillingCurve sfc = orderedSfcs[sfcIndex];
final Byte tier = orderedSfcIndexToTierId.get(sfcIndex);
coordRanges.add(BinnedSFCUtils.getCoordinateRanges(
binRangesPerDimension,
sfc,
baseDefinitions.length,
tier));
}
return coordRanges.toArray(new MultiDimensionalCoordinateRanges[] {});
}
@Override
public int hashCode() {
final int prime = 31;
int result = 1;
result = (prime * result) + Arrays.hashCode(baseDefinitions);
result = (prime * result)
+ (int) (maxEstimatedDuplicateIdsPerDimension ^ (maxEstimatedDuplicateIdsPerDimension >>> 32));
result = (prime * result) + ((orderedSfcIndexToTierId == null) ? 0 : orderedSfcIndexToTierId.hashCode());
result = (prime * result) + Arrays.hashCode(orderedSfcs);
return result;
}
@Override
public boolean equals(
final Object obj ) {
if (this == obj) {
return true;
}
if (obj == null) {
return false;
}
if (getClass() != obj.getClass()) {
return false;
}
final TieredSFCIndexStrategy other = (TieredSFCIndexStrategy) obj;
if (!Arrays.equals(
baseDefinitions,
other.baseDefinitions)) {
return false;
}
if (maxEstimatedDuplicateIdsPerDimension != other.maxEstimatedDuplicateIdsPerDimension) {
return false;
}
if (orderedSfcIndexToTierId == null) {
if (other.orderedSfcIndexToTierId != null) {
return false;
}
}
else if (!orderedSfcIndexToTierId.equals(other.orderedSfcIndexToTierId)) {
return false;
}
if (!Arrays.equals(
orderedSfcs,
other.orderedSfcs)) {
return false;
}
return true;
}
@Override
public String getId() {
return StringUtils.intToString(hashCode());
}
@Override
public NumericDimensionDefinition[] getOrderedDimensionDefinitions() {
return baseDefinitions;
}
public boolean tierExists(
Byte tierId ) {
return orderedSfcIndexToTierId.containsValue(tierId);
}
synchronized private List<ByteArrayId> getRowIds(
final BinnedNumericDataset index,
final BigInteger maxEstimatedDuplicateIds ) {
// most times this should be a single row ID, but if the lowest
// precision tier does not have a single SFC value for this data, it
// will be multiple row IDs
// what tier does this entry belong in?
for (int sfcIndex = orderedSfcs.length - 1; sfcIndex >= 0; sfcIndex--) {
final SpaceFillingCurve sfc = orderedSfcs[sfcIndex];
// loop through space filling curves and stop when both the min and
// max of the ranges fit the same row ID
final byte tierId = orderedSfcIndexToTierId.get(sfcIndex);
final List<ByteArrayId> rowIdsAtTier = getRowIdsAtTier(
index,
tierId,
sfc,
maxEstimatedDuplicateIds,
sfcIndex);
if (rowIdsAtTier != null) {
return rowIdsAtTier;
}
}
// this should never happen because of the check for tier 0
return new ArrayList<ByteArrayId>();
}
protected static List<ByteArrayId> getRowIdsAtTier(
final BinnedNumericDataset index,
final byte tierId,
final SpaceFillingCurve sfc,
final BigInteger maxEstimatedDuplicateIds,
final int sfcIndex ) {
final BigInteger rowCount = sfc.getEstimatedIdCount(index);
ByteArrayId singleId = BinnedSFCUtils.getSingleBinnedRowId(
rowCount,
tierId,
index,
sfc);
if (singleId != null) {
return Collections.singletonList(singleId);
}
if ((maxEstimatedDuplicateIds == null) || (rowCount.compareTo(maxEstimatedDuplicateIds) <= 0)
|| (sfcIndex == 0)) {
return decomposeRangesForEntry(
index,
tierId,
sfc);
}
return null;
}
protected static List<ByteArrayId> decomposeRangesForEntry(
final BinnedNumericDataset index,
final byte tierId,
final SpaceFillingCurve sfc ) {
final List<ByteArrayId> retVal = new ArrayList<ByteArrayId>();
final byte[] tierAndBinId = ByteArrayUtils.combineArrays(
new byte[] {
tierId
},
index.getBinId());
final RangeDecomposition rangeDecomp = sfc.decomposeRange(
index,
false,
DEFAULT_MAX_RANGES);
// this range does not fit into a single row ID at the lowest
// tier, decompose it
for (final ByteArrayRange range : rangeDecomp.getRanges()) {
final byte[] currentRowId = Arrays.copyOf(
range.getStart().getBytes(),
range.getStart().getBytes().length);
retVal.add(new ByteArrayId(
ByteArrayUtils.combineArrays(
tierAndBinId,
currentRowId)));
while (!Arrays.equals(
currentRowId,
range.getEnd().getBytes())) {
// increment until we reach the end row ID
boolean overflow = !ByteArrayUtils.increment(currentRowId);
if (!overflow) {
retVal.add(new ByteArrayId(
ByteArrayUtils.combineArrays(
tierAndBinId,
currentRowId)));
}
else {
// the increment caused an overflow which shouldn't
// ever happen assuming the start row ID is less
// than the end row ID
LOGGER
.warn("Row IDs overflowed when ingesting data; start of range decomposition must be less than or equal to end of range. This may be because the start of the decomposed range is higher than the end of the range.");
overflow = true;
break;
}
}
}
return retVal;
}
@Override
public byte[] toBinary() {
int byteBufferLength = 20 + (2 * orderedSfcIndexToTierId.size());
final List<byte[]> orderedSfcBinaries = new ArrayList<byte[]>(
orderedSfcs.length);
final List<byte[]> dimensionBinaries = new ArrayList<byte[]>(
baseDefinitions.length);
for (final SpaceFillingCurve sfc : orderedSfcs) {
final byte[] sfcBinary = PersistenceUtils.toBinary(sfc);
byteBufferLength += (4 + sfcBinary.length);
orderedSfcBinaries.add(sfcBinary);
}
for (final NumericDimensionDefinition dimension : baseDefinitions) {
final byte[] dimensionBinary = PersistenceUtils.toBinary(dimension);
byteBufferLength += (4 + dimensionBinary.length);
dimensionBinaries.add(dimensionBinary);
}
final ByteBuffer buf = ByteBuffer.allocate(byteBufferLength);
buf.putInt(orderedSfcs.length);
buf.putInt(baseDefinitions.length);
buf.putInt(orderedSfcIndexToTierId.size());
buf.putLong(maxEstimatedDuplicateIdsPerDimension);
for (final byte[] sfcBinary : orderedSfcBinaries) {
buf.putInt(sfcBinary.length);
buf.put(sfcBinary);
}
for (final byte[] dimensionBinary : dimensionBinaries) {
buf.putInt(dimensionBinary.length);
buf.put(dimensionBinary);
}
for (final Entry<Integer, Byte> entry : orderedSfcIndexToTierId.entrySet()) {
buf.put(entry.getKey().byteValue());
buf.put(entry.getValue());
}
return buf.array();
}
@Override
public void fromBinary(
final byte[] bytes ) {
final ByteBuffer buf = ByteBuffer.wrap(bytes);
final int numSfcs = buf.getInt();
final int numDimensions = buf.getInt();
final int mappingSize = buf.getInt();
maxEstimatedDuplicateIdsPerDimension = buf.getLong();
orderedSfcs = new SpaceFillingCurve[numSfcs];
baseDefinitions = new NumericDimensionDefinition[numDimensions];
for (int i = 0; i < numSfcs; i++) {
final byte[] sfc = new byte[buf.getInt()];
buf.get(sfc);
orderedSfcs[i] = PersistenceUtils.fromBinary(
sfc,
SpaceFillingCurve.class);
}
for (int i = 0; i < numDimensions; i++) {
final byte[] dim = new byte[buf.getInt()];
buf.get(dim);
baseDefinitions[i] = PersistenceUtils.fromBinary(
dim,
NumericDimensionDefinition.class);
}
final Builder<Integer, Byte> bimapBuilder = ImmutableBiMap.builder();
for (int i = 0; i < mappingSize; i++) {
bimapBuilder.put(
Byte.valueOf(
buf.get()).intValue(),
buf.get());
}
orderedSfcIndexToTierId = bimapBuilder.build();
initDuplicateIdLookup();
}
@Override
public SubStrategy[] getSubStrategies() {
final SubStrategy[] subStrategies = new SubStrategy[orderedSfcs.length];
for (int sfcIndex = 0; sfcIndex < orderedSfcs.length; sfcIndex++) {
final byte tierId = orderedSfcIndexToTierId.get(sfcIndex);
subStrategies[sfcIndex] = new SubStrategy(
new SingleTierSubStrategy(
orderedSfcs[sfcIndex],
baseDefinitions,
tierId),
new byte[] {
tierId
});
}
return subStrategies;
}
@Override
public double[] getHighestPrecisionIdRangePerDimension() {
// delegate this to the highest precision tier SFC
return orderedSfcs[orderedSfcs.length - 1].getInsertionIdRangePerDimension();
}
public void setMaxEstimatedDuplicateIdsPerDimension(
final int maxEstimatedDuplicateIdsPerDimension ) {
this.maxEstimatedDuplicateIdsPerDimension = maxEstimatedDuplicateIdsPerDimension;
initDuplicateIdLookup();
}
@Override
public Set<ByteArrayId> getNaturalSplits() {
final Set<ByteArrayId> retVal = new HashSet<ByteArrayId>(
orderedSfcIndexToTierId.size());
for (final Byte tier : orderedSfcIndexToTierId.values()) {
retVal.add(new ByteArrayId(
new byte[] {
tier
}));
}
return retVal;
}
@Override
public int getByteOffsetFromDimensionalIndex() {
int rowIdOffset = 1;
for (int dimensionIdx = 0; dimensionIdx < baseDefinitions.length; dimensionIdx++) {
final int binSize = baseDefinitions[dimensionIdx].getFixedBinIdSize();
if (binSize > 0) {
rowIdOffset += binSize;
}
}
return rowIdOffset;
}
@Override
public List<IndexMetaData> createMetaData() {
return Collections.singletonList((IndexMetaData) new TierIndexMetaData(
orderedSfcIndexToTierId.inverse()));
}
public static class TierIndexMetaData implements
IndexMetaData
{
private int[] tierCounts = null;
private ImmutableBiMap<Byte, Integer> orderedTierIdToSfcIndex = null;
public TierIndexMetaData() {}
public TierIndexMetaData(
final ImmutableBiMap<Byte, Integer> orderedTierIdToSfcIndex ) {
super();
tierCounts = new int[orderedTierIdToSfcIndex.size()];
this.orderedTierIdToSfcIndex = orderedTierIdToSfcIndex;
}
@Override
public byte[] toBinary() {
final ByteBuffer buffer = ByteBuffer.allocate(4 + (tierCounts.length * 4));
buffer.putInt(tierCounts.length);
for (final int count : tierCounts) {
buffer.putInt(count);
}
// do not use orderedTierIdToSfcIndex on query
// for (final Entry<Byte,Integer > entry :
// orderedTierIdToSfcIndex.entrySet()) {
// buffer.put(entry.getKey().byteValue());
// buffer.put(entry.getValue().byteValue());
// }
return buffer.array();
}
@Override
public void fromBinary(
final byte[] bytes ) {
final ByteBuffer buffer = ByteBuffer.wrap(bytes);
tierCounts = new int[buffer.getInt()];
for (int i = 0; i < tierCounts.length; i++) {
tierCounts[i] = buffer.getInt();
}
// do not use orderedTierIdToSfcIndex on query
// final Builder<Byte,Integer> bimapBuilder =
// ImmutableBiMap.builder();
// for (int i = 0; i < tierCounts.length; i++) {
// bimapBuilder.put(
// buffer.get(),
// Byte.valueOf(buffer.get()).intValue()
// );
// }
// orderedTierIdToSfcIndex = bimapBuilder.build();
}
@Override
public void merge(
final Mergeable merge ) {
if (merge instanceof TierIndexMetaData) {
final TierIndexMetaData other = (TierIndexMetaData) merge;
int pos = 0;
for (final int count : other.tierCounts) {
tierCounts[pos++] += count;
}
}
}
@Override
public void insertionIdsAdded(
final List<ByteArrayId> ids ) {
for (final ByteArrayId id : ids) {
final byte first = id.getBytes()[0];
if (orderedTierIdToSfcIndex.containsKey(first)) {
tierCounts[orderedTierIdToSfcIndex.get(
first).intValue()]++;
}
}
}
@Override
public void insertionIdsRemoved(
final List<ByteArrayId> ids ) {
for (final ByteArrayId id : ids) {
final byte first = id.getBytes()[0];
if (orderedTierIdToSfcIndex.containsKey(first)) {
tierCounts[orderedTierIdToSfcIndex.get(
first).intValue()]--;
}
}
}
/**
* Convert Tiered Index Metadata statistics to a JSON object
*/
@Override
public JSONObject toJSONObject()
throws JSONException {
JSONObject jo = new JSONObject();
jo.put(
"type",
"TieredSFCIndexStrategy");
jo.put(
"TierCountsSize",
tierCounts.length);
if (null == orderedTierIdToSfcIndex) {
jo.put(
"orderedTierIdToSfcIndex",
"null");
}
else {
jo.put(
"orderedTierIdToSfcIndexSize",
orderedTierIdToSfcIndex.size());
}
return jo;
}
}
}