// Copyright 2016 Twitter. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package com.twitter.heron.integration_test.common.spout; import java.io.BufferedReader; import java.io.File; import java.io.FileReader; import java.io.IOException; import java.util.Map; import java.util.logging.Logger; import com.twitter.heron.api.spout.BaseRichSpout; import com.twitter.heron.api.spout.SpoutOutputCollector; import com.twitter.heron.api.topology.OutputFieldsDeclarer; import com.twitter.heron.api.topology.TopologyContext; import com.twitter.heron.api.tuple.Fields; import com.twitter.heron.api.tuple.Values; /** * Given a list of local file paths, the spout will wait for file contents to be added, then * emit every line of the file in String format. First, we block in open method until file is created. * If using in integration test, to ensure atomicity, write to a separate file and then rename file to what this class * is polling from. * When we fetch all items from local file BufferedReader, we do not close the file. Instead, we keep polling for appended lines. * <p> * Note: The number of parallelisms for this spout should be equal to the number of files/paths * to read. */ public class PausedLocalFileSpout extends BaseRichSpout { private static final long serialVersionUID = 7233454257997083024L; private static final Logger LOG = Logger.getLogger(PausedLocalFileSpout.class.getName()); private BufferedReader br = null; private SpoutOutputCollector collector; private String[] paths; public PausedLocalFileSpout(String path) { this(new String[]{path}); } public PausedLocalFileSpout(String[] paths) { this.paths = paths; } @Override public void declareOutputFields(OutputFieldsDeclarer declarer) { declarer.declare(new Fields("string")); } // Here, the spout will block until the file at the path exists @Override public void open(Map<String, Object> stormConf, TopologyContext context, SpoutOutputCollector newCollector) { int numTasks = context.getComponentTasks(context.getThisComponentId()).size(); // Pre-condition: the number of tasks is equal to the number of files to read if (paths.length != numTasks) { throw new RuntimeException( String.format("Number of specified files %d not equal to number of tasks %d", paths.length, numTasks)); } this.collector = newCollector; int index = context.getThisTaskIndex(); String path = paths[index]; File file = new File(path); while (!file.exists()) { // busy loop until file is created. Don't throw any exceptions } try { LOG.info("Creating reader for input data from file " + file.getAbsolutePath()); // read from local file br = new BufferedReader( new FileReader(file), 1024 * 1024 ); } catch (IOException e) { // Clean stuff if any exceptions try { // Close the outmost is enough if (br != null) { br.close(); } } catch (IOException e1) { throw new RuntimeException("Unable to close file reader", e1); } // Here we should not close the FileSystem explicitly // Since different threads in a process may use a shared object, // if we close the FileSystem here, it will close the shared object, and other threads // reading data by using this shared object will throw Exception" // The FileSystem will be closed automatically when the process dies (the topology is killed) throw new RuntimeException("Failed to create BufferedReader from file path", e); } } // each nextTuple will either read the current line as null, and not emit anything // or emit one line from the text file // We do not explicitly close buffered reader. This is so the spout will read any new data // appended to the file. On spout close, buffered reader will close automatically @Override public void nextTuple() { if (br == null) { return; } try { String currentLine; // if at EoF, do not close buffered reader. Instead, keep polling from file until there is // more content, and do not emit anything if data is null if ((currentLine = br.readLine()) != null) { LOG.info("Emitting tuple from input data file: " + currentLine); collector.emit(new Values(currentLine), "MESSAGE_ID"); } } catch (IOException e) { // Clean stuff if any exceptions try { // Close the outmost is enough if (br != null) { br.close(); } } catch (IOException e1) { throw new RuntimeException("Unable to close stream reader", e1); } throw new RuntimeException("Unable to emit tuples normally", e); } } }