AbstractOrderedFileInputSpout.java example

Explorer

aeolus-master
- batching
  - src
    - main
      - java
        de
        hub
        cs
        dbis
        aeolus
        batching
        AbstractBatchCollector.java
        Batch.java
        BatchColumn.java
        BatchOutputCollector.java
        BatchSpoutOutputCollector.java
        BatchingOutputFieldsDeclarer.java
        BoltBatchCollectorImpl.java
        SingleBatchSizeHashMap.java
        SpoutBatchCollectorImpl.java
        api
        AeolusBuilder.java
        BatchedDeclarer.java
        BoltOutputBatcher.java
        InputDebatcher.java
        SpoutOutputBatcher.java
    - test
      - java
        de
        hub
        cs
        dbis
        aeolus
        batching
        AbstractBatchCollectorTest.java
        BatchColumnTest.java
        BatchOutputCollectorTest.java
        BatchSpoutOutputCollectorTest.java
        BatchTest.java
        BatchingOutputFieldsDeclarerTest.java
        BoltBatchCollectorImplTest.java
        SingleBatchSizeHashMapTest.java
        SpoutBatchCollectorImplTest.java
        TestBatchCollector.java
        api
        AeolusBuilderTest.java
        BatchingFieldsGroupingITCase.java
        BatchingFieldsGroupingMultipleITCase.java
        BatchingShuffleITCase.java
        BoltOutputBatcherTest.java
        InputDebatcherTest.java
        SpoutOutputBatcherTest.java
        TestBolt.java
        TestSpout.java
        VerifyBolt.java
- monitoring
  - src
    - main
      - java
        de
        hub
        cs
        dbis
        aeolus
        monitoring
        MonitoringTopoloyBuilder.java
        latency
        BoltEndTimestampCollector.java
        BoltTimestampAppender.java
        LatencyBolt.java
        LatencyCollectorBolt.java
        LatencySpout.java
        SpoutTimestampAppender.java
        TimestampAttributeAppender.java
        TransparentFieldsDeclarer.java
        microbenchmarks
        MeasureOutputDataRate.java
        SchemaSpout.java
        throughput
        AbstractReportingThread.java
        AbstractThroughputCounter.java
        BoltInputReportingThread.java
        BoltOutputReportingThread.java
        BoltThroughputCounter.java
        Counter.java
        SpoutReportingThread.java
        SpoutThroughputCounter.java
        ThroughputBolt.java
        ThroughputOutputCollector.java
        ThroughputSpout.java
        ThroughputSpoutOutputCollector.java
        utils
        AeolusConfig.java
        ConfigReader.java
    - test
      - java
        de
        hub
        cs
        dbis
        aeolus
        monitoring
        utils
        AeolusConfigTest.java
        ConfigReaderTest.java
- queries
  - lrb
    - src
      - main
        java
        de
        hub
        cs
        dbis
        lrb
        operators
        AccidentDetectionBolt.java
        AccidentNotificationBolt.java
        AccidentSink.java
        AverageSpeedBolt.java
        AverageVehicleSpeedBolt.java
        CountVehiclesBolt.java
        DispatcherBolt.java
        FileReaderSpout.java
        LatestAverageVelocityBolt.java
        StoppedCarsBolt.java
        TollNotificationBolt.java
        TollSink.java
        queries
        AbstractQuery.java
        AccidentDetectionSubquery.java
        AccidentQuery.java
        AverageSpeedSubquery.java
        AverageVehicleSpeedSubquery.java
        CountVehicleSubquery.java
        LatestAverageVelocitySubquery.java
        LinearRoad.java
        LinearRoadWithAllIntermediateResults.java
        OperatorParallelism.java
        StoppedCarsSubquery.java
        TollQuery.java
        utils
        AccInputStreamsTsExtractor.java
        TollInputStreamsTsExtractor.java
        TopologyControl.java
        toll
        FileTollDataStore.java
        MemoryTollDataStore.java
        TollDataStore.java
        types
        AbstractInputTuple.java
        AbstractLRBTuple.java
        AbstractOutputTuple.java
        AccidentNotification.java
        AccountBalanceRequest.java
        DailyExpenditureRequest.java
        PositionReport.java
        TollNotification.java
        TravelTimeRequest.java
        internal
        AccidentTuple.java
        AvgSpeedTuple.java
        AvgVehicleSpeedTuple.java
        CountTuple.java
        LavTuple.java
        StoppedCarTuple.java
        util
        IPositionIdentifier.java
        ISegmentIdentifier.java
        PositionIdentifier.java
        SegmentIdentifier.java
        util
        AvgValue.java
        CarCount.java
        Constants.java
        Time.java
        storm
        lrb
        LRBTopology.java
        LRBTopologyMain.java
        LRBtopologyNormal.java
        TopologyControlOld.java
        bolt
        AccountBalanceBolt.java
        DailyExpenditureBolt.java
        FileTollDataStore.java
        SegmentStatsBolt.java
        TollDataStore.java
        model
        Accident.java
        AccidentImmutable.java
        AccountBalance.java
        MinuteStatistics.java
        NovLav.java
        SegmentStatistics.java
        TollEntry.java
        VehicleAccount.java
        VehicleInfo.java
        spout
        SocketClientSpoutPure.java
        tools
        CSVReader.java
        CommandLineParser.java
        Helper.java
        StopWatch.java
        TupleHelpers.java
      - test
        java
        de
        hub
        cs
        dbis
        lrb
        operators
        AccidentDetectionBoltTest.java
        AccidentNotificationBoltTest.java
        AverageSpeedBoltTest.java
        AverageVehicleSpeedBoltTest.java
        CountVehiclesBoltTest.java
        DispatcherBoltTest.java
        FileReaderSpoutITCase.java
        FileReaderSpoutTest.java
        LastAverageVelocityBoltTest.java
        SpoutDataFileOutputBolt.java
        StoppedCarsBoltTest.java
        TollNotificationBoltTest.java
        storm
        lrb
        LRBTopologyMainTest.java
        bolt
        AccountBalanceBoltTest.java
        DailyExpenditureBoltTest.java
        tools
        EntityHelper.java
- testUtils
  - src
    - main
      - java
        de
        hub
        cs
        dbis
        aeolus
        testUtils
        ForwardBolt.java
        IncSpout.java
        RandomSpout.java
        ResultBolt.java
        TestDeclarer.java
        TestOutputCollector.java
        TestSpoutOutputCollector.java
        TimestampComperator.java
        TimestampOrderChecker.java
    - test
      - java
        de
        hub
        cs
        dbis
        aeolus
        testUtils
        ForwardBoltTest.java
        IncSpoutTest.java
        RandomSpoutTest.java
        ResultBoltTest.java
        TestDeclarerTest.java
        TestOutputCollectorTest.java
        TestSpoutOutputCollectorTest.java
        TimestampOrderCheckerTest.java
- utils
  - src
    - main
      - java
        de
        hub
        cs
        dbis
        aeolus
        bolts
        ForwardBolt.java
        sinks
        AbstractFileOutputBolt.java
        FileFlushSinkBolt.java
        FileSinkBolt.java
        spouts
        AbstractOrderedFileInputSpout.java
        AbstractOrderedInputSpout.java
        DataDrivenStreamRateDriverCollector.java
        DataDrivenStreamRateDriverSpout.java
        FixedStreamRateDriverSpout.java
        utils
        StreamMerger.java
        TimeStampExtractor.java
        TimestampMerger.java
    - test
      - java
        de
        hub
        cs
        dbis
        aeolus
        bolts
        ForwardBoltTest.java
        sinks
        FileOutputBoltTest.java
        FileSinkBoltTest.java
        TestFileOutputBolt.java
        spouts
        OrderedFileInputSpoutTest.java
        OrderedInputSpoutTest.java
        SpoutDataDrivenStreamRateDriverCollectorTest.java
        SpoutDataDrivenStreamRateDriverTest.java
        SpoutFixedStreamRateDriverTest.java
        TestOrderedFileInputSpout.java
        TestOrderedInputSpout.java
        utils
        StreamMergerTest.java
        TimestampMergerTest.java

/*
 * #!
 * %
 * Copyright (C) 2014 - 2016 Humboldt-Universität zu Berlin
 * %
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *      http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 * #_
 */
package de.hub.cs.dbis.aeolus.spouts;

import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import backtype.storm.spout.SpoutOutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.tuple.Values;





/**
 * {@link AbstractOrderedFileInputSpout} is an {@link AbstractOrderedInputSpout} that reads input tuples line by line
 * from multiple files (ie, each line must contain exactly one tuple). Each file is an <em>input partition</em> in
 * {@link AbstractOrderedInputSpout} terminology. The default file is {@code input}. Use {@link #INPUT_FILE_NAME} and
 * {@link #INPUT_FILE_SUFFIXES} to specify different file name(s) in the topology configuration (see
 * {@link backtype.storm.Config}).<br/>
 * <br/>
 * <strong>Output schema:</strong> {@code <ts:}{@link Long}{@code ,rawTuple:}{@link String}{@code >}<br/>
 * 
 * @author Leonardo Aniello (Sapienza Università di Roma, Roma, Italy)
 * @author Roberto Baldoni (Sapienza Università di Roma, Roma, Italy)
 * @author Leonardo Querzoni (Sapienza Università di Roma, Roma, Italy)
 * @author mjsax
 */
public abstract class AbstractOrderedFileInputSpout extends AbstractOrderedInputSpout<String> {
	private final static long serialVersionUID = -4690963122364704481L;
	private final static Logger logger = LoggerFactory.getLogger(AbstractOrderedFileInputSpout.class);
	
	/**
	 * Can be used to specify an input file name (or prefix together with {@link #INPUT_FILE_SUFFIXES}). The
	 * configuration value is expected to be of type {@link String}.
	 */
	public final static String INPUT_FILE_NAME = "OrderedFileInputSpout.input";
	
	/**
	 * Can be used to specify a list of file name suffixes (one suffix for each input file) if multiple input files are
	 * used. {@link #INPUT_FILE_NAME} is used as file prefix for each file. The configuration value is expected to be of
	 * type {@link List}.
	 */
	public final static String INPUT_FILE_SUFFIXES = "OrderedFileInputSpout.inputFileSuffixes";
	
	/** The prefix of all input file names. */
	private String prefix = "input";
	
	/** All input files to read from. */
	private final ArrayList<BufferedReader> inputFiles = new ArrayList<BufferedReader>();
	
	/** Emit-Round-Robin-Index. */
	private int emitIndex = -1;
	
	/**
	 * Map containing all tuples emitted by the last call of {@link #emitNextTuple(Integer, Long, Object)}. The map
	 * including the task IDs each tuple was sent to.
	 */
	protected Map<Values, List<Integer>> emitted = new HashMap<Values, List<Integer>>();
	
	
	
	/**
	 * Creates a {@code AbstractOrderedFileInputSpout} which declares fields without explicit stream ID.
	 */
	public AbstractOrderedFileInputSpout() {}
	
	/**
	 * Creates a {@code AbstractOrderedFileInputSpout} which declares fields on stream with ID {@code streamID}.
	 * 
	 * @param streamID
	 *            the ID of the stream the fields ought to be declared on
	 */
	public AbstractOrderedFileInputSpout(String streamID) {
		super(streamID);
	}
	
	
	
	@SuppressWarnings("unchecked")
	@Override
	public void open(@SuppressWarnings("rawtypes") Map conf, TopologyContext context, SpoutOutputCollector collector) {
		String fileName = (String)conf.get(INPUT_FILE_NAME);
		if(fileName != null) {
			this.prefix = fileName;
		}
		
		List<Object> suffixes = (List<Object>)conf.get(INPUT_FILE_SUFFIXES);
		if(suffixes != null) {
			// distribute input files over all tasks using round robin
			int componentTaskCount = context.getComponentTasks(context.getThisComponentId()).size();
			
			for(int index = context.getThisTaskIndex(); index < suffixes.size(); index += componentTaskCount) {
				try {
					logger.debug("Adding partition input file {}", this.prefix + suffixes.get(index));
					this.inputFiles.add(new BufferedReader(new FileReader(this.prefix + suffixes.get(index))));
				} catch(FileNotFoundException e) {
					logger.error("Input file <{}> not found.", this.prefix + suffixes.get(index));
				}
			}
		} else {
			try {
				logger.debug("Adding single input file {}", this.prefix);
				this.inputFiles.add(new BufferedReader(new FileReader(this.prefix)));
			} catch(FileNotFoundException e) {
				logger.error("Input file <{}> not found:", this.prefix);
			}
		}
		
		@SuppressWarnings("rawtypes")
		Map newConfig = new HashMap(conf); // need to copy into new HashMap because given one is read-only
		newConfig.put(NUMBER_OF_PARTITIONS, new Integer(this.inputFiles.size()));
		super.open(newConfig, context, collector);
	}
	
	/**
	 * {@inheritDoc}
	 * 
	 * Reads a line from at least one input file, and emits all eligible tuples (including previously buffered once).
	 * The emitted tuples can be retrieved via {@link #emitted}.
	 */
	@Override
	public void nextTuple() {
		final int numberOfFiles = this.inputFiles.size();
		for(int i = 0; i < numberOfFiles; ++i) {
			this.emitIndex = (this.emitIndex + 1) % numberOfFiles;
			
			if(this.inputFiles.get(this.emitIndex) == null) { // check for closed partition
				continue;
			}
			String line = null;
			Integer emitIndexAsObject = new Integer(this.emitIndex);
			try {
				logger.trace("Read from partition {}", emitIndexAsObject);
				line = this.inputFiles.get(this.emitIndex).readLine();
			} catch(IOException e) {
				logger.error(e.toString());
			}
			if(line != null) {
				try {
					this.emitted = super.emitNextTuple(emitIndexAsObject, new Long(this.extractTimestamp(line)), line);
					
					logger.trace("Emitted the following tuples {}", this.emitted);
					if(this.emitted.size() != 0) {
						return;
					}
				} catch(ParseException e) {
					logger.error(e.toString());
				}
			} else {
				logger.debug("Try to close empty partition {}", emitIndexAsObject);
				if(super.closePartition(emitIndexAsObject)) {
					try {
						this.inputFiles.get(this.emitIndex).close();
					} catch(IOException e) {
						logger.error("Closing input file reader failed.", e);
					}
					// set to null but do not remove from list -> would change partition IDs in super.emitNextTuple
					this.inputFiles.set(this.emitIndex, null);
				} else {
					// we cannot put any more data,
					this.emitted = super.emitNextTuple(null, null, null);
					logger.trace("Emitted the following tuples {}", this.emitted);
				}
			}
		}
	}
	
	/**
	 * Extracts the timestamp from the given tuple.
	 * 
	 * @param tuple
	 *            The tuple to be processed.
	 * 
	 * @return The tuple's timestamp.
	 * 
	 * @throws ParseException
	 *             if the timestamp could not be extracted
	 */
	protected abstract long extractTimestamp(String tuple) throws ParseException;
	
	@Override
	public void close() {
		for(BufferedReader reader : this.inputFiles) {
			try {
				if(reader != null) {
					reader.close();
				}
			} catch(IOException e) {
				logger.error("Closing input file reader failed.", e);
			}
		}
	}
	
}