/* * Copyright © 2016 Cask Data, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of * the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. */ package co.cask.cdap.etl.mock.batch.aggregator; import co.cask.cdap.api.annotation.Name; import co.cask.cdap.api.annotation.Plugin; import co.cask.cdap.api.data.format.StructuredRecord; import co.cask.cdap.api.data.schema.Schema; import co.cask.cdap.api.plugin.PluginClass; import co.cask.cdap.api.plugin.PluginConfig; import co.cask.cdap.api.plugin.PluginPropertyField; import co.cask.cdap.etl.api.Emitter; import co.cask.cdap.etl.api.PipelineConfigurer; import co.cask.cdap.etl.api.StageConfigurer; import co.cask.cdap.etl.api.batch.BatchAggregator; import co.cask.cdap.etl.api.batch.BatchAggregatorContext; import co.cask.cdap.etl.api.batch.BatchRuntimeContext; import co.cask.cdap.etl.proto.v2.ETLPlugin; import java.util.HashMap; import java.util.Iterator; import java.util.Map; /** * Groups on a specific field and adds count field. Used to test that the right values are going to the * right groups, to test multiple group keys for the same value, and to test setting the group key class * at runtime, and to test setting a supported non-writable class. */ @Plugin(type = BatchAggregator.PLUGIN_TYPE) @Name("FieldCount") public class FieldCountAggregator extends BatchAggregator<Object, StructuredRecord, StructuredRecord> { public static final PluginClass PLUGIN_CLASS = getPluginClass(); private final Config config; private Schema schema; public FieldCountAggregator(Config config) { this.config = config; } @Override public void configurePipeline(PipelineConfigurer pipelineConfigurer) throws IllegalArgumentException { StageConfigurer stageConfigurer = pipelineConfigurer.getStageConfigurer(); stageConfigurer.setOutputSchema(config.getSchema()); } @Override public void prepareRun(BatchAggregatorContext context) throws Exception { if ("long".equalsIgnoreCase(config.fieldType)) { context.setGroupKeyClass(Long.class); } else { context.setGroupKeyClass(String.class); } } @Override public void groupBy(StructuredRecord input, Emitter<Object> emitter) throws Exception { if ("long".equalsIgnoreCase(config.fieldType)) { emitter.emit(input.get(config.fieldName)); emitter.emit(0L); } else { emitter.emit(input.get(config.fieldName).toString()); emitter.emit("all"); } } @Override public void aggregate(Object groupKey, Iterator<StructuredRecord> groupValues, Emitter<StructuredRecord> emitter) throws Exception { long count = 0; while (groupValues.hasNext()) { groupValues.next(); count++; } emitter.emit(StructuredRecord.builder(schema) .set(config.fieldName, groupKey) .set("ct", count) .build()); } @Override public void initialize(BatchRuntimeContext context) throws Exception { schema = config.getSchema(); } /** * Conf for the aggregator. */ public static class Config extends PluginConfig { private final String fieldName; private final String fieldType; public Config() { this.fieldName = "field"; this.fieldType = "string"; } private Schema getSchema() { Schema.Field fieldSchema; if ("string".equalsIgnoreCase(fieldType)) { fieldSchema = Schema.Field.of(fieldName, Schema.of(Schema.Type.STRING)); } else if ("long".equalsIgnoreCase(fieldType)) { fieldSchema = Schema.Field.of(fieldName, Schema.of(Schema.Type.LONG)); } else { throw new IllegalArgumentException("Unsupported field type " + fieldType); } return Schema.recordOf( fieldName + ".count", fieldSchema, Schema.Field.of("ct", Schema.of(Schema.Type.LONG))); } } public static ETLPlugin getPlugin(String fieldName, String fieldType) { Map<String, String> properties = new HashMap<>(); properties.put("fieldName", fieldName); properties.put("fieldType", fieldType); return new ETLPlugin("FieldCount", BatchAggregator.PLUGIN_TYPE, properties, null); } private static PluginClass getPluginClass() { Map<String, PluginPropertyField> properties = new HashMap<>(); properties.put("fieldName", new PluginPropertyField("fieldName", "", "string", true)); properties.put("fieldType", new PluginPropertyField("fieldType", "", "string", true)); return new PluginClass(BatchAggregator.PLUGIN_TYPE, "FieldCount", "", FieldCountAggregator.class.getName(), "config", properties); } }