package mil.nga.giat.geowave.adapter.vector.export; import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import org.apache.avro.mapred.AvroKey; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.mapreduce.Mapper; import org.opengis.feature.simple.SimpleFeature; import org.opengis.feature.simple.SimpleFeatureType; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import mil.nga.giat.geowave.adapter.vector.AvroFeatureUtils; import mil.nga.giat.geowave.adapter.vector.avro.AttributeValues; import mil.nga.giat.geowave.adapter.vector.avro.AvroSimpleFeatureCollection; import mil.nga.giat.geowave.core.index.ByteArrayId; import mil.nga.giat.geowave.mapreduce.input.GeoWaveInputKey; public class VectorExportMapper extends Mapper<GeoWaveInputKey, SimpleFeature, AvroKey<AvroSimpleFeatureCollection>, NullWritable> { private static final Logger LOGGER = LoggerFactory.getLogger(Logger.class); private int batchSize; private final Map<ByteArrayId, AvroSFCWriter> adapterIdToAvroWriterMap = new HashMap<ByteArrayId, AvroSFCWriter>(); private final NullWritable outVal = NullWritable.get(); private final AvroKey<AvroSimpleFeatureCollection> outKey = new AvroKey<AvroSimpleFeatureCollection>(); @Override protected void map( final GeoWaveInputKey key, final SimpleFeature value, final Mapper<GeoWaveInputKey, SimpleFeature, AvroKey<AvroSimpleFeatureCollection>, NullWritable>.Context context ) throws IOException, InterruptedException { AvroSFCWriter avroWriter = adapterIdToAvroWriterMap.get(key.getAdapterId()); if (avroWriter == null) { avroWriter = new AvroSFCWriter( value.getFeatureType(), batchSize); adapterIdToAvroWriterMap.put( key.getAdapterId(), avroWriter); } final AvroSimpleFeatureCollection retVal = avroWriter.write(value); if (retVal != null) { outKey.datum(retVal); context.write( outKey, outVal); } } @Override protected void setup( final Mapper<GeoWaveInputKey, SimpleFeature, AvroKey<AvroSimpleFeatureCollection>, NullWritable>.Context context ) throws IOException, InterruptedException { super.setup(context); batchSize = context.getConfiguration().getInt( VectorMRExportJobRunner.BATCH_SIZE_KEY, VectorExportOptions.DEFAULT_BATCH_SIZE); } @Override protected void cleanup( final Mapper<GeoWaveInputKey, SimpleFeature, AvroKey<AvroSimpleFeatureCollection>, NullWritable>.Context context ) throws IOException, InterruptedException { super.cleanup(context); writeRemainingAvroBatches(context); } private void writeRemainingAvroBatches( final Mapper<GeoWaveInputKey, SimpleFeature, AvroKey<AvroSimpleFeatureCollection>, NullWritable>.Context context ) throws IOException, InterruptedException { for (final AvroSFCWriter writer : adapterIdToAvroWriterMap.values()) { if (writer.avList.size() > 0) { writer.simpleFeatureCollection.setSimpleFeatureCollection(writer.avList); outKey.datum(writer.simpleFeatureCollection); context.write( outKey, outVal); } } } private static class AvroSFCWriter { private final int batchSize; private final SimpleFeatureType sft; private AvroSimpleFeatureCollection simpleFeatureCollection = null; private List<AttributeValues> avList = null; private AvroSFCWriter( final SimpleFeatureType sft, final int batchSize ) { this.sft = sft; this.batchSize = batchSize; } private AvroSimpleFeatureCollection write( final SimpleFeature feature ) { AvroSimpleFeatureCollection retVal = null; if (simpleFeatureCollection == null) { newFeatureCollection(); } else if (avList.size() >= batchSize) { simpleFeatureCollection.setSimpleFeatureCollection(avList); retVal = simpleFeatureCollection; newFeatureCollection(); } final AttributeValues av = AvroFeatureUtils.buildAttributeValue( feature, sft); avList.add(av); return retVal; } // this isn't intended to be thread safe private void newFeatureCollection() { simpleFeatureCollection = new AvroSimpleFeatureCollection(); try { simpleFeatureCollection.setFeatureType(AvroFeatureUtils.buildFeatureDefinition( null, sft, null, "")); } catch (final IOException e) { // this should never actually happen, deault classification is // passed in LOGGER.warn( "Unable to find classification", e); } avList = new ArrayList<AttributeValues>( batchSize); } } }