/* * Copyright 2014 University of Southern California * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package edu.usc.pgroup.floe.examples.pellets; import edu.usc.pgroup.floe.app.AppContext; import edu.usc.pgroup.floe.app.Emitter; import edu.usc.pgroup.floe.app.Tuple; import edu.usc.pgroup.floe.app.pellets.PelletContext; import edu.usc.pgroup.floe.app.pellets.StatelessPellet; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.BufferedReader; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.IOException; import java.io.StreamTokenizer; import java.util.ArrayList; import java.util.Iterator; import java.util.List; /** * @author kumbhare */ public class FileSourcePellet extends StatelessPellet { /** * the global logger instance. */ private static final Logger LOGGER = LoggerFactory.getLogger(FileSourcePellet.class); /** * Path to the file to be used as source. */ private final String path; /** * Interval between emitting words. */ private final long interval; /** * Constructor. * @param filePath file path. * @param sleepTime interval between reading lines. * @param keyFieldName fieldname used to emit tuples. */ public FileSourcePellet(final String keyFieldName, final String filePath, final long sleepTime) { //super(keyFieldName); this.path = filePath; this.interval = sleepTime; } /** * The execute method which is called for each tuple. (stateless) * * @param t input tuple received from the preceding pellet. * @param emitter An output emitter which may be used by the user to emmit */ @Override public final void execute(final Tuple t, final Emitter emitter) { LOGGER.info("Executing file source pellet."); BufferedReader reader = null; try { reader = new BufferedReader(new FileReader(path)); } catch (FileNotFoundException e) { e.printStackTrace(); } StreamTokenizer tokenizer = new StreamTokenizer(reader); tokenizer.resetSyntax(); final int wcs = 0x23; final int wce = 0xFF; final int wcss = 0x00; final int wcse = 0x20; tokenizer.wordChars(wcs, wce); tokenizer.whitespaceChars(wcss, wcse); tokenizer.quoteChar('"'); List<String> list = new ArrayList<>(); final int large = 100000; final int small = 6; Tuple ot = new Tuple(); Integer id = 0; while (true) { ot.put("word", id.toString()); emitter.emit(ot); id++; try { if (list.size() == 0) { while (tokenizer.nextToken() != StreamTokenizer.TT_EOF) { String w = tokenizer.sval; if (w == null) { continue; } ot.put("word", w); LOGGER.info("Emmitting: {}", ot); emitter.emit(ot); if (interval > 0) { Thread.sleep(interval); } list.add(w); } LOGGER.error("DONE FILE."); /*for (int i = 0; i < large; i++) { ot.put("word", "the"); for (int j = 0; j < small; j++) { emitter.emit(ot); } if (interval > 0) { Thread.sleep(interval); } }*/ //LOGGER.error("DONE THEs."); } else { Iterator<String> iterator = list.iterator(); while (iterator.hasNext()) { ot.put("word", iterator.next()); LOGGER.info("Emmitting: {}", ot); emitter.emit(ot); ot.put("word", "the"); LOGGER.info("Emmitting: {}", ot); emitter.emit(ot); emitter.emit(ot); if (interval > 0) { Thread.sleep(interval); } } } ot.put("word", id.toString()); emitter.emit(ot); id++; if (id >= small) { id = 0; } if (interval > 0) { Thread.sleep(interval); } } catch (InterruptedException e) { LOGGER.error("Exception: {}", e); break; } catch (IOException e) { LOGGER.error("Exception: {}", e); } } } /** * The setup function is called once to let the pellet initialize. * * @param appContext Application's context. Some data related to * application's deployment. * @param pelletContext Pellet instance context. Related to this */ @Override public final void onStart(final AppContext appContext, final PelletContext pelletContext) { } /** * The teardown function, called when the topology is killed. * Or when the Pellet instance is scaled down. */ @Override public void teardown() { } /** * @return The names of the streams to be used later during emitting * messages. */ @Override public final List<String> getOutputStreamNames() { return null; } }