package mil.nga.giat.geowave.analytic.partitioner;
import java.io.IOException;
import java.io.ObjectOutputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import mil.nga.giat.geowave.analytic.PropertyManagement;
import mil.nga.giat.geowave.analytic.ScopedJobConfiguration;
import mil.nga.giat.geowave.analytic.model.IndexModelBuilder;
import mil.nga.giat.geowave.analytic.model.SpatialIndexModelBuilder;
import mil.nga.giat.geowave.analytic.param.ClusteringParameters;
import mil.nga.giat.geowave.analytic.param.CommonParameters;
import mil.nga.giat.geowave.analytic.param.ParameterEnum;
import mil.nga.giat.geowave.analytic.param.PartitionParameters;
import mil.nga.giat.geowave.analytic.param.PartitionParameters.Partition;
import mil.nga.giat.geowave.core.index.ByteArrayId;
import mil.nga.giat.geowave.core.index.PersistenceUtils;
import mil.nga.giat.geowave.core.index.sfc.SFCFactory.SFCType;
import mil.nga.giat.geowave.core.index.sfc.data.MultiDimensionalNumericData;
import mil.nga.giat.geowave.core.index.sfc.tiered.TieredSFCIndexFactory;
import mil.nga.giat.geowave.core.index.sfc.tiered.TieredSFCIndexStrategy;
import mil.nga.giat.geowave.core.store.dimension.NumericDimensionField;
import mil.nga.giat.geowave.core.store.index.CommonIndexModel;
import mil.nga.giat.geowave.core.store.index.PrimaryIndex;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapreduce.JobContext;
/**
* Basic support class for Partitioners (e.g {@link Paritioner}
*
* @param <T>
*/
public abstract class AbstractPartitioner<T> implements
Partitioner<T>
{
/**
*
*/
private static final long serialVersionUID = 1L;
private transient PrimaryIndex index = null;
private double[] distancePerDimension = null;
private double precisionFactor = 1.0;
public AbstractPartitioner() {}
public AbstractPartitioner(
final CommonIndexModel indexModel,
final double[] distancePerDimension ) {
super();
this.distancePerDimension = distancePerDimension;
this.initIndex(
indexModel,
distancePerDimension);
}
public AbstractPartitioner(
final double[] distancePerDimension ) {
super();
this.distancePerDimension = distancePerDimension;
}
protected double[] getDistancePerDimension() {
return distancePerDimension;
}
protected PrimaryIndex getIndex() {
return index;
}
@Override
public List<PartitionData> getCubeIdentifiers(
final T entry ) {
final Set<PartitionData> partitionIdSet = new HashSet<PartitionData>();
final NumericDataHolder numericData = getNumericData(entry);
if (numericData == null) {
return Collections.emptyList();
}
addPartitions(
partitionIdSet,
getIndex().getIndexStrategy().getInsertionIds(
numericData.primary),
true);
for (final MultiDimensionalNumericData expansionData : numericData.expansion) {
addPartitions(
partitionIdSet,
getIndex().getIndexStrategy().getInsertionIds(
expansionData),
false);
}
return new ArrayList<PartitionData>(
partitionIdSet);
}
@Override
public void partition(
final T entry,
final PartitionDataCallback callback )
throws Exception {
final NumericDataHolder numericData = getNumericData(entry);
if (numericData == null) {
return;
}
for (final ByteArrayId addId : getIndex().getIndexStrategy().getInsertionIds(
numericData.primary)) {
callback.partitionWith(new PartitionData(
addId,
true));
}
for (final MultiDimensionalNumericData expansionData : numericData.expansion) {
for (final ByteArrayId addId : getIndex().getIndexStrategy().getInsertionIds(
expansionData)) {
callback.partitionWith(new PartitionData(
addId,
false));
}
}
}
protected static class NumericDataHolder
{
MultiDimensionalNumericData primary;
MultiDimensionalNumericData[] expansion;
}
protected abstract NumericDataHolder getNumericData(
final T entry );
public MultiDimensionalNumericData getRangesForPartition(
final PartitionData partitionData ) {
return index.getIndexStrategy().getRangeForId(
partitionData.getId());
}
protected void addPartitions(
final Set<PartitionData> masterList,
final List<ByteArrayId> addList,
final boolean isPrimary ) {
for (final ByteArrayId addId : addList) {
masterList.add(new PartitionData(
addId,
isPrimary));
}
}
private static double[] getDistances(
final ScopedJobConfiguration config ) {
final String distances = config.getString(
PartitionParameters.Partition.DISTANCE_THRESHOLDS,
"0.000001");
final String distancesArray[] = distances.split(",");
final double[] distancePerDimension = new double[distancesArray.length];
{
int i = 0;
for (final String eachDistance : distancesArray) {
distancePerDimension[i++] = Double.valueOf(eachDistance);
}
}
return distancePerDimension;
}
@Override
public void initialize(
final JobContext context,
final Class<?> scope )
throws IOException {
initialize(new ScopedJobConfiguration(
context.getConfiguration(),
scope));
}
public void initialize(
final ScopedJobConfiguration config )
throws IOException {
distancePerDimension = getDistances(config);
this.precisionFactor = config.getDouble(
Partition.PARTITION_PRECISION,
1.0);
if ((precisionFactor < 0) || (precisionFactor > 1.0)) {
throw new IllegalArgumentException(
String.format(
"Precision value must be between 0 and 1: %.6f",
precisionFactor));
}
try {
final IndexModelBuilder builder = config.getInstance(
CommonParameters.Common.INDEX_MODEL_BUILDER_CLASS,
IndexModelBuilder.class,
SpatialIndexModelBuilder.class);
final CommonIndexModel model = builder.buildModel();
if (model.getDimensions().length > distancePerDimension.length) {
final double[] newDistancePerDimension = new double[model.getDimensions().length];
for (int j = 0; j < newDistancePerDimension.length; j++) {
newDistancePerDimension[j] = distancePerDimension[j < distancePerDimension.length ? j
: (distancePerDimension.length - 1)];
}
distancePerDimension = newDistancePerDimension;
}
this.initIndex(
model,
distancePerDimension);
}
catch (InstantiationException | IllegalAccessException e) {
throw new IOException(
e);
}
}
@Override
public void setup(
final PropertyManagement runTimeProperties,
final Class<?> scope,
final Configuration configuration ) {
final ParameterEnum[] params = new ParameterEnum[] {
CommonParameters.Common.INDEX_MODEL_BUILDER_CLASS,
PartitionParameters.Partition.DISTANCE_THRESHOLDS,
Partition.PARTITION_PRECISION
};
runTimeProperties.setConfig(
params,
configuration,
scope);
}
protected void initIndex(
final CommonIndexModel indexModel,
final double[] distancePerDimensionForIndex ) {
// truncating to lower precision
final NumericDimensionField<?>[] dimensions = indexModel.getDimensions();
int totalRequestedPrecision = 0;
final int[] dimensionPrecision = new int[indexModel.getDimensions().length];
for (int i = 0; i < dimensionPrecision.length; i++) {
final double distance = distancePerDimensionForIndex[i] * 2.0; // total
// width...(radius)
// adjust by precision factory (0 to 1.0)
dimensionPrecision[i] = (int) (precisionFactor * Math.abs((int) (Math.log(dimensions[i].getRange()
/ distance) / Math.log(2))));
totalRequestedPrecision += dimensionPrecision[i];
}
if (totalRequestedPrecision > 63) {
final double rescale = 63.0 / totalRequestedPrecision;
for (int i = 0; i < dimensionPrecision.length; i++) {
dimensionPrecision[i] = (int) (rescale * dimensionPrecision[i]);
}
}
final TieredSFCIndexStrategy indexStrategy = TieredSFCIndexFactory.createSingleTierStrategy(
indexModel.getDimensions(),
dimensionPrecision,
SFCType.HILBERT);
// Not relevant since this is a single tier strategy.
// For now, just setting to a non-zero reasonable value
indexStrategy.setMaxEstimatedDuplicateIdsPerDimension(2);
index = new PrimaryIndex(
indexStrategy,
indexModel);
}
@Override
public Collection<ParameterEnum<?>> getParameters() {
return Arrays.asList(new ParameterEnum<?>[] {
CommonParameters.Common.INDEX_MODEL_BUILDER_CLASS,
PartitionParameters.Partition.DISTANCE_THRESHOLDS,
Partition.PARTITION_PRECISION
});
}
private void writeObject(
ObjectOutputStream stream )
throws IOException {
final byte[] indexData = PersistenceUtils.toBinary(this.index);
stream.writeInt(indexData.length);
stream.write(indexData);
stream.writeDouble(precisionFactor);
stream.writeInt(distancePerDimension.length);
for (double v : distancePerDimension)
stream.writeDouble(v);
}
private void readObject(
java.io.ObjectInputStream stream )
throws IOException,
ClassNotFoundException {
final byte[] indexData = new byte[stream.readInt()];
stream.readFully(indexData);
index = PersistenceUtils.fromBinary(
indexData,
PrimaryIndex.class);
precisionFactor = stream.readDouble();
distancePerDimension = new double[stream.readInt()];
for (int i = 0; i < distancePerDimension.length; i++) {
distancePerDimension[i] = stream.readDouble();
}
}
@Override
public int hashCode() {
final int prime = 31;
int result = 1;
result = prime * result + Arrays.hashCode(distancePerDimension);
result = prime * result + ((index == null) ? 0 : index.hashCode());
long temp;
temp = Double.doubleToLongBits(precisionFactor);
result = prime * result + (int) (temp ^ (temp >>> 32));
return result;
}
@Override
public boolean equals(
Object obj ) {
if (this == obj) return true;
if (obj == null) return false;
if (getClass() != obj.getClass()) return false;
AbstractPartitioner other = (AbstractPartitioner) obj;
if (!Arrays.equals(
distancePerDimension,
other.distancePerDimension)) return false;
if (index == null) {
if (other.index != null) return false;
}
else if (!index.equals(other.index)) return false;
if (Double.doubleToLongBits(precisionFactor) != Double.doubleToLongBits(other.precisionFactor)) return false;
return true;
}
}