AbstractOrderedInputSpout.java example

Explorer

aeolus-master
- batching
  - src
    - main
      - java
        de
        hub
        cs
        dbis
        aeolus
        batching
        AbstractBatchCollector.java
        Batch.java
        BatchColumn.java
        BatchOutputCollector.java
        BatchSpoutOutputCollector.java
        BatchingOutputFieldsDeclarer.java
        BoltBatchCollectorImpl.java
        SingleBatchSizeHashMap.java
        SpoutBatchCollectorImpl.java
        api
        AeolusBuilder.java
        BatchedDeclarer.java
        BoltOutputBatcher.java
        InputDebatcher.java
        SpoutOutputBatcher.java
    - test
      - java
        de
        hub
        cs
        dbis
        aeolus
        batching
        AbstractBatchCollectorTest.java
        BatchColumnTest.java
        BatchOutputCollectorTest.java
        BatchSpoutOutputCollectorTest.java
        BatchTest.java
        BatchingOutputFieldsDeclarerTest.java
        BoltBatchCollectorImplTest.java
        SingleBatchSizeHashMapTest.java
        SpoutBatchCollectorImplTest.java
        TestBatchCollector.java
        api
        AeolusBuilderTest.java
        BatchingFieldsGroupingITCase.java
        BatchingFieldsGroupingMultipleITCase.java
        BatchingShuffleITCase.java
        BoltOutputBatcherTest.java
        InputDebatcherTest.java
        SpoutOutputBatcherTest.java
        TestBolt.java
        TestSpout.java
        VerifyBolt.java
- monitoring
  - src
    - main
      - java
        de
        hub
        cs
        dbis
        aeolus
        monitoring
        MonitoringTopoloyBuilder.java
        latency
        BoltEndTimestampCollector.java
        BoltTimestampAppender.java
        LatencyBolt.java
        LatencyCollectorBolt.java
        LatencySpout.java
        SpoutTimestampAppender.java
        TimestampAttributeAppender.java
        TransparentFieldsDeclarer.java
        microbenchmarks
        MeasureOutputDataRate.java
        SchemaSpout.java
        throughput
        AbstractReportingThread.java
        AbstractThroughputCounter.java
        BoltInputReportingThread.java
        BoltOutputReportingThread.java
        BoltThroughputCounter.java
        Counter.java
        SpoutReportingThread.java
        SpoutThroughputCounter.java
        ThroughputBolt.java
        ThroughputOutputCollector.java
        ThroughputSpout.java
        ThroughputSpoutOutputCollector.java
        utils
        AeolusConfig.java
        ConfigReader.java
    - test
      - java
        de
        hub
        cs
        dbis
        aeolus
        monitoring
        utils
        AeolusConfigTest.java
        ConfigReaderTest.java
- queries
  - lrb
    - src
      - main
        java
        de
        hub
        cs
        dbis
        lrb
        operators
        AccidentDetectionBolt.java
        AccidentNotificationBolt.java
        AccidentSink.java
        AverageSpeedBolt.java
        AverageVehicleSpeedBolt.java
        CountVehiclesBolt.java
        DispatcherBolt.java
        FileReaderSpout.java
        LatestAverageVelocityBolt.java
        StoppedCarsBolt.java
        TollNotificationBolt.java
        TollSink.java
        queries
        AbstractQuery.java
        AccidentDetectionSubquery.java
        AccidentQuery.java
        AverageSpeedSubquery.java
        AverageVehicleSpeedSubquery.java
        CountVehicleSubquery.java
        LatestAverageVelocitySubquery.java
        LinearRoad.java
        LinearRoadWithAllIntermediateResults.java
        OperatorParallelism.java
        StoppedCarsSubquery.java
        TollQuery.java
        utils
        AccInputStreamsTsExtractor.java
        TollInputStreamsTsExtractor.java
        TopologyControl.java
        toll
        FileTollDataStore.java
        MemoryTollDataStore.java
        TollDataStore.java
        types
        AbstractInputTuple.java
        AbstractLRBTuple.java
        AbstractOutputTuple.java
        AccidentNotification.java
        AccountBalanceRequest.java
        DailyExpenditureRequest.java
        PositionReport.java
        TollNotification.java
        TravelTimeRequest.java
        internal
        AccidentTuple.java
        AvgSpeedTuple.java
        AvgVehicleSpeedTuple.java
        CountTuple.java
        LavTuple.java
        StoppedCarTuple.java
        util
        IPositionIdentifier.java
        ISegmentIdentifier.java
        PositionIdentifier.java
        SegmentIdentifier.java
        util
        AvgValue.java
        CarCount.java
        Constants.java
        Time.java
        storm
        lrb
        LRBTopology.java
        LRBTopologyMain.java
        LRBtopologyNormal.java
        TopologyControlOld.java
        bolt
        AccountBalanceBolt.java
        DailyExpenditureBolt.java
        FileTollDataStore.java
        SegmentStatsBolt.java
        TollDataStore.java
        model
        Accident.java
        AccidentImmutable.java
        AccountBalance.java
        MinuteStatistics.java
        NovLav.java
        SegmentStatistics.java
        TollEntry.java
        VehicleAccount.java
        VehicleInfo.java
        spout
        SocketClientSpoutPure.java
        tools
        CSVReader.java
        CommandLineParser.java
        Helper.java
        StopWatch.java
        TupleHelpers.java
      - test
        java
        de
        hub
        cs
        dbis
        lrb
        operators
        AccidentDetectionBoltTest.java
        AccidentNotificationBoltTest.java
        AverageSpeedBoltTest.java
        AverageVehicleSpeedBoltTest.java
        CountVehiclesBoltTest.java
        DispatcherBoltTest.java
        FileReaderSpoutITCase.java
        FileReaderSpoutTest.java
        LastAverageVelocityBoltTest.java
        SpoutDataFileOutputBolt.java
        StoppedCarsBoltTest.java
        TollNotificationBoltTest.java
        storm
        lrb
        LRBTopologyMainTest.java
        bolt
        AccountBalanceBoltTest.java
        DailyExpenditureBoltTest.java
        tools
        EntityHelper.java
- testUtils
  - src
    - main
      - java
        de
        hub
        cs
        dbis
        aeolus
        testUtils
        ForwardBolt.java
        IncSpout.java
        RandomSpout.java
        ResultBolt.java
        TestDeclarer.java
        TestOutputCollector.java
        TestSpoutOutputCollector.java
        TimestampComperator.java
        TimestampOrderChecker.java
    - test
      - java
        de
        hub
        cs
        dbis
        aeolus
        testUtils
        ForwardBoltTest.java
        IncSpoutTest.java
        RandomSpoutTest.java
        ResultBoltTest.java
        TestDeclarerTest.java
        TestOutputCollectorTest.java
        TestSpoutOutputCollectorTest.java
        TimestampOrderCheckerTest.java
- utils
  - src
    - main
      - java
        de
        hub
        cs
        dbis
        aeolus
        bolts
        ForwardBolt.java
        sinks
        AbstractFileOutputBolt.java
        FileFlushSinkBolt.java
        FileSinkBolt.java
        spouts
        AbstractOrderedFileInputSpout.java
        AbstractOrderedInputSpout.java
        DataDrivenStreamRateDriverCollector.java
        DataDrivenStreamRateDriverSpout.java
        FixedStreamRateDriverSpout.java
        utils
        StreamMerger.java
        TimeStampExtractor.java
        TimestampMerger.java
    - test
      - java
        de
        hub
        cs
        dbis
        aeolus
        bolts
        ForwardBoltTest.java
        sinks
        FileOutputBoltTest.java
        FileSinkBoltTest.java
        TestFileOutputBolt.java
        spouts
        OrderedFileInputSpoutTest.java
        OrderedInputSpoutTest.java
        SpoutDataDrivenStreamRateDriverCollectorTest.java
        SpoutDataDrivenStreamRateDriverTest.java
        SpoutFixedStreamRateDriverTest.java
        TestOrderedFileInputSpout.java
        TestOrderedInputSpout.java
        utils
        StreamMergerTest.java
        TimestampMergerTest.java

/*
 * #!
 * %
 * Copyright (C) 2014 - 2016 Humboldt-Universität zu Berlin
 * %
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *      http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 * #_
 */
package de.hub.cs.dbis.aeolus.spouts;

import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import backtype.storm.spout.SpoutOutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.IRichSpout;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Values;
import de.hub.cs.dbis.aeolus.utils.StreamMerger;





/**
 * {@link AbstractOrderedInputSpout} reads input tuples (of type {@code T}) from multiple sources (called
 * <em>partitions</em>) and pushes raw data into the topology. The default number of used partitions is one but can be
 * configured using {@link #NUMBER_OF_PARTITIONS}. The IDs of the partitions are {@code 0,...,NUMBER_OF_PARTITIONS-1}.<br/>
 * <br/>
 * Input data must be sorted in ascending timestamp order in each partition. For each successfully processed input
 * tuple, a single output tuple is emitted.<br/>
 * <br/>
 * <strong>Output schema:</strong> {@code <ts:}{@link Long}{@code ,rawTuple:T>}<br/>
 * Attribute {@code ts} contains the extracted timestamp value of the processed input tuple and {@code rawTuple}
 * contains the <em>complete</em> input tuple.<br/>
 * <br/>
 * {@link AbstractOrderedInputSpout} is parallelizable. If multiple input partitions are assigned to a single task,
 * {@link AbstractOrderedInputSpout} ensures that tuples are emitted in ascending timestamp order. In case of timestamp
 * duplicates, no ordering guarantee for all tuples having the same timestamp is given.
 * 
 * @author Leonardo Aniello (Sapienza Università di Roma, Roma, Italy)
 * @author Roberto Baldoni (Sapienza Università di Roma, Roma, Italy)
 * @author Leonardo Querzoni (Sapienza Università di Roma, Roma, Italy)
 * @author mjsax
 */
public abstract class AbstractOrderedInputSpout<T> implements IRichSpout {
	private final static long serialVersionUID = 6224448887936832190L;
	private final static Logger logger = LoggerFactory.getLogger(AbstractOrderedInputSpout.class);
	
	/**
	 * Can be used to specify the number of input partitions that are available (default value is one). The
	 * configuration value is expected to be of type {@link Integer}.
	 */
	public final static String NUMBER_OF_PARTITIONS = "OrderedInputSpout.partitions";
	
	/** The merger to be used. */
	private StreamMerger<Values> merger;
	
	/** The output collector to be used. */
	private SpoutOutputCollector collector;
	
	/**
	 * The stream ID to be used for declaration of timestamp and raw tuple fields. Can be {@code null} which causes the
	 * fields to be declared only.
	 */
	private final String streamID;
	
	
	
	/**
	 * Creates a {@code AbstractOrderedInputSpout} which declares fields without explicit stream ID.
	 */
	public AbstractOrderedInputSpout() {
		this(null);
	}
	
	/**
	 * Creates a {@code AbstractOrderedInputSpout} which declares fields on stream with ID {@code streamID}.
	 * 
	 * @param streamID
	 *            the ID of the stream the fields ought to be declared on
	 */
	public AbstractOrderedInputSpout(String streamID) {
		this.streamID = streamID;
	}
	
	/**
	 * {@inheritDoc}
	 * 
	 * Sets up internal data structures according to the number of used partitions {@link #NUMBER_OF_PARTITIONS}.
	 */
	@Override
	public void open(@SuppressWarnings("rawtypes") Map conf, TopologyContext context, SpoutOutputCollector collector) {
		int numberOfPartitons = 1;
		Integer numPartitions = (Integer)conf.get(NUMBER_OF_PARTITIONS);
		if(numPartitions != null) {
			numberOfPartitons = numPartitions.intValue();
		}
		logger.debug("Number of configured partitions: {}", new Integer(numberOfPartitons));
		
		Integer[] partitionIds = new Integer[numberOfPartitons];
		for(int i = 0; i < numberOfPartitons; ++i) {
			partitionIds[i] = new Integer(i);
		}
		this.merger = new StreamMerger<Values>(Arrays.asList(partitionIds), 0);
		this.collector = collector;
	}
	
	/**
	 * Makes a new output tuple available.
	 * 
	 * Must be used by {@link #nextTuple()} instead of an {@link SpoutOutputCollector} to emit tuples. In each call, all
	 * tuples that are still buffered over all partitions are considered to be emitted to the default output stream.
	 * 
	 * @param index
	 *            The partition id the tuple belongs to.
	 * @param timestamp
	 *            The timestamp of the tuple.
	 * @param tuple
	 *            The tuple to be emitted.
	 * 
	 * @return A map of all tuple that got emitted during this call including the task IDs each emitted tuple was sent
	 *         to.
	 */
	// TODO: add support for non-default and/or multiple output streams (what about directEmit(...)?)
	protected final Map<Values, List<Integer>> emitNextTuple(Integer index, Long timestamp, T tuple) {
		logger.trace("Received new output tuple (partitionId, ts, tuple): {}, {}, {}", index, timestamp, tuple);
		if(index != null && timestamp != null && tuple != null) {
			this.merger.addTuple(index, new Values(timestamp, tuple));
		}
		
		Values t;
		Map<Values, List<Integer>> emitted = new HashMap<Values, List<Integer>>();
		while((t = this.merger.getNextTuple()) != null) {
			logger.trace("Emitting tuple: {}", t);
			emitted.put(t, this.collector.emit(t));
		}
		
		return emitted;
	}
	
	/**
	 * Closes an input partition. Closing a partition is only successful, if no tuples belonging to the partition are
	 * buffered internally any more. No more data can be emitted by this partition if closing was successful.
	 * 
	 * @param partitionId
	 *            The ID of the partition to be closed.
	 * 
	 * @return {@code true} if the partition was closed successfully -- {@code false} otherwise
	 */
	protected boolean closePartition(Integer partitionId) {
		return this.merger.closePartition(partitionId);
	}
	
	/**
	 * Declares the two fields necessary for transmitting tuples with a timestamp. Calling
	 * {@code super.declareOutputFields} in overriding methods is strongly recommended.
	 * 
	 * @param declarer
	 */
	@Override
	public void declareOutputFields(OutputFieldsDeclarer declarer) {
		Fields fields = new Fields("ts", "rawTuple");
		if(this.streamID == null) {
			declarer.declare(fields);
		} else {
			declarer.declareStream(this.streamID, fields);
		}
	}
	
}