/** * Licensed under the terms of the Apache License 2.0. Please see LICENSE file in the project root for terms. */ package apex.benchmark; import java.io.IOException; import java.io.ObjectInputStream; import java.io.Serializable; import java.net.URI; import java.util.Map; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.apex.malhar.lib.dimensions.DimensionsEvent.Aggregate; import org.apache.apex.malhar.lib.dimensions.DimensionsEvent.InputEvent; import org.apache.commons.lang.mutable.MutableLong; import org.apache.hadoop.conf.Configuration; import com.esotericsoftware.kryo.Kryo; import com.esotericsoftware.kryo.io.Input; import com.esotericsoftware.kryo.io.Output; import com.google.common.base.Preconditions; import com.google.common.collect.Maps; import com.datatorrent.api.Context; import com.datatorrent.api.Context.DAGContext; import com.datatorrent.api.Context.OperatorContext; import com.datatorrent.api.Context.PortContext; import com.datatorrent.api.DAG; import com.datatorrent.api.DAG.Locality; import com.datatorrent.api.DefaultOutputPort; import com.datatorrent.api.StreamCodec; import com.datatorrent.api.StreamingApplication; import com.datatorrent.api.annotation.ApplicationAnnotation; import com.datatorrent.common.partitioner.StatelessPartitioner; import com.datatorrent.contrib.dimensions.AppDataSingleSchemaDimensionStoreHDHT; import com.datatorrent.contrib.dimensions.DimensionStoreHDHTNonEmptyQueryResultUnifier; import com.datatorrent.contrib.hdht.tfile.TFileImpl; import com.datatorrent.lib.appdata.schemas.SchemaUtils; import com.datatorrent.lib.counters.BasicCounters; import com.datatorrent.lib.dimensions.DimensionsComputationFlexibleSingleSchemaPOJO; import com.datatorrent.lib.io.PubSubWebSocketAppDataQuery; import com.datatorrent.lib.io.PubSubWebSocketAppDataResult; import com.datatorrent.lib.statistics.DimensionsComputationUnifierImpl; import com.datatorrent.lib.stream.DevNull; import com.datatorrent.netlet.util.Slice; /** * The application just include generator and dimensions computation * @author bright * */ @ApplicationAnnotation(name = ApplicationDimensionComputation.APP_NAME) public class ApplicationDimensionComputation implements StreamingApplication { public static final String APP_NAME = "DimensionComputation"; public static final String DIMENSION_SCHEMA = "eventSchema.json"; private static final transient Logger logger = LoggerFactory.getLogger(ApplicationDimensionComputation.class); protected static final int PARTITION_NUM = 20; protected String eventSchemaLocation = DIMENSION_SCHEMA; protected String PROP_STORE_PATH; protected int storePartitionCount = 4; protected boolean includeQuery = true; protected static final int STREAMING_WINDOW_SIZE_MILLIS = 200; public ApplicationDimensionComputation() { this(APP_NAME); } public ApplicationDimensionComputation(String appName) { PROP_STORE_PATH = "dt.application." + appName + ".operator.Store.fileStore.basePathPrefix"; } @Override public void populateDAG(DAG dag, Configuration configuration) { DimensionTupleGenerateOperator generateOperator = new DimensionTupleGenerateOperator(); dag.addOperator("Generator", generateOperator); dag.setAttribute(generateOperator, Context.OperatorContext.PARTITIONER, new StatelessPartitioner<EventGenerator>(PARTITION_NUM)); populateDimensionsDAG(dag, configuration, generateOperator.outputPort); } public void populateDimensionsDAG(DAG dag, Configuration conf, DefaultOutputPort<DimensionTuple> upstreamPort) { final String eventSchema = SchemaUtils.jarResourceFileToString(eventSchemaLocation); // dimension DimensionsComputationFlexibleSingleSchemaPOJO dimensions = dag.addOperator("DimensionsComputation", DimensionsComputationFlexibleSingleSchemaPOJO.class); // Set operator properties // key expression { Map<String, String> keyToExpression = Maps.newHashMap(); keyToExpression.put("campaignId", DimensionTuple.CAMPAIGNID); keyToExpression.put("time", DimensionTuple.EVENTTIME); dimensions.setKeyToExpression(keyToExpression); } // aggregate expression { Map<String, String> valueToExpression = Maps.newHashMap(); valueToExpression.put("clicks", DimensionTuple.CLICKS); valueToExpression.put("latency", DimensionTuple.LATENCY); dimensions.setAggregateToExpression(valueToExpression); } // event schema dimensions.setConfigurationSchemaJSON(eventSchema); dimensions.setUnifier(new DimensionsComputationUnifierImpl<InputEvent, Aggregate>()); dag.setUnifierAttribute(dimensions.output, OperatorContext.MEMORY_MB, 10240); dag.setInputPortAttribute(dimensions.input, Context.PortContext.PARTITION_PARALLEL, true); // store AppDataSingleSchemaDimensionStoreHDHT store = createStore(dag, conf, eventSchema); store.setCacheWindowDuration(10000 * 5 / STREAMING_WINDOW_SIZE_MILLIS); //cache for 5 windows dag.addStream("GenerateStream", upstreamPort, dimensions.input).setLocality(Locality.CONTAINER_LOCAL); StoreStreamCodec codec = new StoreStreamCodec(); dag.setInputPortAttribute(store.input, PortContext.STREAM_CODEC, codec); dag.addStream("DimensionalStream", dimensions.output, store.input); if (includeQuery) { createQuery(dag, conf, store); // wsOut PubSubWebSocketAppDataResult wsOut = createQueryResult(dag, conf, store); dag.addStream("QueryResult", store.queryResult, wsOut.input); } else { DevNull devNull = new DevNull(); dag.addOperator("devNull", devNull); dag.addStream("QueryResult", store.queryResult, devNull.data); } dag.setAttribute(DAGContext.STREAMING_WINDOW_SIZE_MILLIS, STREAMING_WINDOW_SIZE_MILLIS); } protected static class StoreStreamCodec implements StreamCodec<Aggregate>, Serializable { private static final long serialVersionUID = -482870472621208905L; protected transient Kryo kryo; public StoreStreamCodec() { this.kryo = new Kryo(); this.kryo.setClassLoader(Thread.currentThread().getContextClassLoader()); } private void readObject(ObjectInputStream in) throws IOException, ClassNotFoundException { in.defaultReadObject(); this.kryo = new Kryo(); this.kryo.setClassLoader(Thread.currentThread().getContextClassLoader()); } @Override public int getPartition(Aggregate aggregate) { return aggregate.getEventKey().getKey().getFieldsString()[0].hashCode(); } @Override public Object fromByteArray(Slice fragment) { final Input input = new Input(fragment.buffer, fragment.offset, fragment.length); try { return kryo.readClassAndObject(input); } finally { input.close(); } } @Override public Slice toByteArray(Aggregate o) { final Output output = new Output(32, -1); try { kryo.writeClassAndObject(output, o); } finally { output.close(); } return new Slice(output.getBuffer(), 0, output.position()); } } protected AppDataSingleSchemaDimensionStoreHDHT createStore(DAG dag, Configuration conf, String eventSchema) { AppDataSingleSchemaDimensionStoreHDHT store = dag.addOperator("Store", ProcessTimeAwareStore.class); store.setUpdateEnumValues(true); String basePath = Preconditions.checkNotNull(conf.get(PROP_STORE_PATH), "base path should be specified in the properties.xml"); TFileImpl hdsFile = new TFileImpl.DTFileImpl(); basePath += System.currentTimeMillis(); hdsFile.setBasePath(basePath); store.setFileStore(hdsFile); dag.setAttribute(store, Context.OperatorContext.COUNTERS_AGGREGATOR, new BasicCounters.LongAggregator<MutableLong>()); store.setConfigurationSchemaJSON(eventSchema); store.setPartitionCount(storePartitionCount); if(storePartitionCount > 1) { store.setPartitionCount(storePartitionCount); store.setQueryResultUnifier(new DimensionStoreHDHTNonEmptyQueryResultUnifier()); } return store; } protected String getQueryUriString(DAG dag, Configuration conf) { return ConfigUtil.getAppDataQueryPubSubUriString(dag, conf); } protected URI getQueryUri(DAG dag, Configuration conf) { return URI.create(getQueryUriString(dag, conf)); } protected PubSubWebSocketAppDataQuery createQuery(DAG dag, Configuration conf, AppDataSingleSchemaDimensionStoreHDHT store) { PubSubWebSocketAppDataQuery query = new PubSubWebSocketAppDataQuery(); URI queryUri = getQueryUri(dag, conf); logger.info("QueryUri: {}", queryUri); query.setUri(queryUri); store.setEmbeddableQueryInfoProvider(query); return query; } protected PubSubWebSocketAppDataResult createQueryResult(DAG dag, Configuration conf, AppDataSingleSchemaDimensionStoreHDHT store) { PubSubWebSocketAppDataResult wsOut = new PubSubWebSocketAppDataResult(); URI queryUri = getQueryUri(dag, conf); wsOut.setUri(queryUri); dag.addOperator("QueryResult", wsOut); // Set remaining dag options dag.setAttribute(store, Context.OperatorContext.COUNTERS_AGGREGATOR, new BasicCounters.LongAggregator<MutableLong>()); return wsOut; } }