package mil.nga.giat.geowave.analytic.mapreduce.clustering; import java.io.IOException; import java.util.Iterator; import java.util.UUID; import mil.nga.giat.geowave.adapter.vector.FeatureDataAdapter; import mil.nga.giat.geowave.analytic.AnalyticFeature; import mil.nga.giat.geowave.analytic.ScopedJobConfiguration; import mil.nga.giat.geowave.analytic.clustering.ClusteringUtils; import mil.nga.giat.geowave.analytic.extract.DimensionExtractor; import mil.nga.giat.geowave.analytic.extract.EmptyDimensionExtractor; import mil.nga.giat.geowave.analytic.param.ExtractParameters; import mil.nga.giat.geowave.analytic.param.GlobalParameters; import mil.nga.giat.geowave.core.index.StringUtils; import mil.nga.giat.geowave.mapreduce.GeoWaveConfiguratorBase; import mil.nga.giat.geowave.mapreduce.GeoWaveReducer; import mil.nga.giat.geowave.mapreduce.input.GeoWaveInputKey; import org.apache.hadoop.io.ObjectWritable; import org.apache.hadoop.mapreduce.ReduceContext; import org.apache.hadoop.mapreduce.Reducer; import org.geotools.feature.type.BasicFeatureTypes; import org.opengis.feature.simple.SimpleFeature; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.vividsolutions.jts.geom.Geometry; /** * Remove duplicate input objects and write out as a simple feature with * geometry projected onto CRS EPSG:4326. The output feature contains the ID of * the originating object. The intent is to create a light weight uniform object * that reuses GeoWave data formats to feed analytic processes. * * If the input object does not require adjustment after de-duplication, use * {@link mil.nga.giat.geowave.accumulo.mapreduce.dedupe.GeoWaveDedupReducer} * * OutputFeature Attributes, see * {@link mil.nga.giat.geowave.analytic.AnalyticFeature.ClusterFeatureAttribute} * * Context configuration parameters include: * * @formatter:off * * * "SimpleFeatureOutputReducer.Extract.DimensionExtractClass" -> * {@link DimensionExtractor} to extract non-geometric dimensions * * "SimpleFeatureOutputReducer.Extract.OutputDataTypeId" -> the * name of the output SimpleFeature data type * * "SimpleFeatureOutputReducer.Global.BatchId" ->the id of the * batch; defaults to current time in millis (for range * comparisons) * * * @formatter:on */ public class SimpleFeatureOutputReducer extends GeoWaveReducer { protected DimensionExtractor<Object> dimExtractor; protected String outputDataTypeID; protected String batchID; protected String groupID; protected FeatureDataAdapter outputAdapter; protected static final Logger LOGGER = LoggerFactory.getLogger(SimpleFeatureOutputReducer.class); @Override protected void reduceNativeValues( final GeoWaveInputKey key, final Iterable<Object> values, final ReduceContext<GeoWaveInputKey, ObjectWritable, GeoWaveInputKey, Object> context ) throws IOException, InterruptedException { final Iterator<Object> valIt = values.iterator(); if (valIt.hasNext()) { key.setAdapterId(outputAdapter.getAdapterId()); final SimpleFeature feature = getSimpleFeature( key, valIt.next()); context.write( key, feature); } } private SimpleFeature getSimpleFeature( final GeoWaveInputKey key, final Object entry ) { final Geometry geometry = dimExtractor.getGeometry(entry); final double[] extraDims = dimExtractor.getDimensions(entry); final String inputID = StringUtils.stringFromBinary(key.getDataId().getBytes()); final SimpleFeature pointFeature = AnalyticFeature.createGeometryFeature( outputAdapter.getFeatureType(), batchID, inputID, inputID, groupID, 0.0, geometry, dimExtractor.getDimensionNames(), extraDims, 1, 1, 0); return pointFeature; } @SuppressWarnings("unchecked") @Override protected void setup( final Reducer<GeoWaveInputKey, ObjectWritable, GeoWaveInputKey, ObjectWritable>.Context context ) throws IOException, InterruptedException { super.setup(context); final ScopedJobConfiguration config = new ScopedJobConfiguration( context.getConfiguration(), SimpleFeatureOutputReducer.class); outputDataTypeID = config.getString( ExtractParameters.Extract.OUTPUT_DATA_TYPE_ID, "reduced_features"); batchID = config.getString( GlobalParameters.Global.BATCH_ID, UUID.randomUUID().toString()); groupID = config.getString( ExtractParameters.Extract.GROUP_ID, UUID.randomUUID().toString()); try { dimExtractor = config.getInstance( ExtractParameters.Extract.DIMENSION_EXTRACT_CLASS, DimensionExtractor.class, EmptyDimensionExtractor.class); } catch (final Exception e1) { LOGGER.warn( "Failed to instantiate " + GeoWaveConfiguratorBase.enumToConfKey( SimpleFeatureOutputReducer.class, ExtractParameters.Extract.DIMENSION_EXTRACT_CLASS), e1); throw new IOException( "Invalid configuration for " + GeoWaveConfiguratorBase.enumToConfKey( SimpleFeatureOutputReducer.class, ExtractParameters.Extract.DIMENSION_EXTRACT_CLASS)); } outputAdapter = AnalyticFeature.createGeometryFeatureAdapter( outputDataTypeID, dimExtractor.getDimensionNames(), config.getString( ExtractParameters.Extract.DATA_NAMESPACE_URI, BasicFeatureTypes.DEFAULT_NAMESPACE), ClusteringUtils.CLUSTERING_CRS); } }