/* * #! * % * Copyright (C) 2014 - 2016 Humboldt-Universität zu Berlin * % * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * #_ */ package de.hub.cs.dbis.aeolus.batching.api; import java.util.HashMap; import java.util.Map; import backtype.storm.topology.BoltDeclarer; import backtype.storm.topology.IBasicBolt; import backtype.storm.topology.IRichBolt; import backtype.storm.topology.IRichSpout; import backtype.storm.topology.IRichStateSpout; import backtype.storm.topology.SpoutDeclarer; import backtype.storm.topology.TopologyBuilder; /** * {@link AeolusBuilder} allows to specify an output batch size for each Spout/Bolt that is added to the topology. It * automatically inserts wrappers that batch the output and debatch the input if required. * * @author mjsax */ public class AeolusBuilder extends TopologyBuilder { @Override public BoltDeclarer setBolt(String id, IRichBolt bolt) { return this.setBolt(id, bolt, null, 0); } /** * Define a new bolt in this topology with parallelism of just one thread. * * @param id * The id of this component. This id is referenced by other components that want to consume this bolt's * outputs. * @param bolt * The bolt to be added to the topology. * @param batchSize * The batch size to be used for all output streams of the given spout (must not be negative). * * @return use the returned object to declare the inputs to this component */ public BoltDeclarer setBolt(String id, IRichBolt bolt, int batchSize) { return this.setBolt(id, bolt, null, batchSize); } /** * Define a new bolt in this topology with parallelism of just one thread. * * @param id * The id of this component. This id is referenced by other components that want to consume this bolt's * outputs. * @param bolt * The bolt to be added to the topology. * @param batchSizes * Specifies different batch sizes for different output streams. If the given bolt declares an output * stream, that is not specified in this map, the output tuples will not be batched. The specified batch * sizes must not be negative. * * @return use the returned object to declare the inputs to this component */ public BoltDeclarer setBolt(String id, IRichBolt bolt, HashMap<String, Integer> batchSizes) { return this.setBolt(id, bolt, null, batchSizes); } @Override public BoltDeclarer setBolt(String id, IRichBolt bolt, Number parallelismHint) { return this.setBolt(id, bolt, parallelismHint, 0); } /** * Define a new bolt in this topology with the specified amount of parallelism. * * @param id * The id of this component. This id is referenced by other components that want to consume this bolt's * outputs. * @param bolt * The bolt to be added to the topology. * @param parallelismHint * The number of tasks that should be assigned to execute this bolt. Each task will run on a thread in a * process somewhere around the cluster. * @param batchSize * The batch size to be used for all output streams of the given spout (must not be negative). * * @return use the returned object to declare the inputs to this component */ public BoltDeclarer setBolt(String id, IRichBolt bolt, Number parallelismHint, int batchSize) { bolt = new InputDebatcher(bolt); if(batchSize > 0) { bolt = new BoltOutputBatcher(bolt, batchSize); } else { bolt = new BoltOutputBatcher(bolt, new HashMap<String, Integer>()); } return new BatchedDeclarer(super.setBolt(id, bolt, parallelismHint)); } /** * Define a new bolt in this topology with the specified amount of parallelism. * * @param id * The id of this component. This id is referenced by other components that want to consume this bolt's * outputs. * @param bolt * The bolt to be added to the topology. * @param parallelismHint * The number of tasks that should be assigned to execute this bolt. Each task will run on a thread in a * process somewhere around the cluster. * @param batchSizes * Specifies different batch sizes for different output streams. If the given bolt declares an output * stream, that is not specified in this map, the output tuples will not be batched. The specified batch * sizes must not be negative. * * @return use the returned object to declare the inputs to this component */ public BoltDeclarer setBolt(String id, IRichBolt bolt, Number parallelismHint, HashMap<String, Integer> batchSizes) { if(batchSizes == null) { batchSizes = new HashMap<String, Integer>(); } return new BatchedDeclarer(super.setBolt(id, new BoltOutputBatcher(new InputDebatcher(bolt), batchSizes), parallelismHint)); } /** * Not supported yet. Throws an {@link UnsupportedOperationException}. * * @throws UnsupportedOperationException */ @Override public BoltDeclarer setBolt(String id, IBasicBolt bolt) { throw new UnsupportedOperationException(); } /** * Not supported yet. Throws an {@link UnsupportedOperationException}. * * @throws UnsupportedOperationException */ @Override public BoltDeclarer setBolt(String id, IBasicBolt bolt, Number parallelismHint) { throw new UnsupportedOperationException(); } @Override public SpoutDeclarer setSpout(String id, IRichSpout spout) { return this.setSpout(id, spout, null, null); } /** * Define a new spout in this topology. * * @param id * The ID of this component. This ID is referenced by other components that want to consume this spout's * outputs. * @param spout * The spout to be added to the topology. * @param batchSize * The batch size to be used for all output streams of the given spout (must not be negative). */ public SpoutDeclarer setSpout(String id, IRichSpout spout, int batchSize) { return this.setSpout(id, spout, null, batchSize); } /** * Define a new spout in this topology. * * @param id * The ID of this component. This ID is referenced by other components that want to consume this spout's * outputs. * @param spout * The spout to be added to the topology. * @param batchSizes * Specifies different batch sizes for different output streams. If the given spout declares an output * stream, that is not specified in this map, the output tuples will not be batched. The specified batch * sizes must not be negative. */ public SpoutDeclarer setSpout(String id, IRichSpout spout, Map<String, Integer> batchSizes) { return this.setSpout(id, spout, null, batchSizes); } /** * Define a new spout in this topology. * * @param id * The ID of this component. This ID is referenced by other components that want to consume this spout's * outputs. * @param spout * The spout to be added to the topology. * @param parallelismHint * The number of tasks that should be assigned to execute this spout. Each task will run on a thread in a * process somewhere around the cluster. * @param batchSize * The batch size to be used for all output streams of the given spout (must not be negative). */ public SpoutDeclarer setSpout(String id, IRichSpout spout, Number parallelismHint, int batchSize) { if(batchSize <= 0) { return super.setSpout(id, new SpoutOutputBatcher(spout, new HashMap<String, Integer>()), parallelismHint); } return super.setSpout(id, new SpoutOutputBatcher(spout, batchSize), parallelismHint); } /** * Define a new spout in this topology. * * @param id * The ID of this component. This ID is referenced by other components that want to consume this spout's * outputs. * @param spout * The spout to be added to the topology. * @param parallelismHint * The number of tasks that should be assigned to execute this spout. Each task will run on a thread in a * process somewhere around the cluster. * @param batchSizes * Specifies different batch sizes for different output streams. If the given spout declares an output * stream, that is not specified in this map, the output tuples will not be batched. The specified batch * sizes must not be negative. */ public SpoutDeclarer setSpout(String id, IRichSpout spout, Number parallelismHint, Map<String, Integer> batchSizes) { if(batchSizes == null) { batchSizes = new HashMap<String, Integer>(); } return super.setSpout(id, new SpoutOutputBatcher(spout, batchSizes), parallelismHint); } @Override public void setStateSpout(String id, IRichStateSpout stateSpout) { throw new UnsupportedOperationException("Not supported by Storm (0.9.3) yet."); } @Override public void setStateSpout(String id, IRichStateSpout stateSpout, Number parallelismHint) { throw new UnsupportedOperationException("Not supported by Storm (0.9.3) yet."); } }