/*
* Copyright (c) 2011-2015 EPFL DATA Laboratory
* Copyright (c) 2014-2015 The Squall Collaboration (see NOTICE)
*
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ch.epfl.data.squall.components.signal_components;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.ObjectInput;
import java.io.ObjectInputStream;
import java.io.ObjectOutput;
import java.io.ObjectOutputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import org.apache.log4j.Logger;
import com.esotericsoftware.minlog.Log;
import backtype.storm.Config;
import backtype.storm.spout.SpoutOutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.TopologyBuilder;
import backtype.storm.tuple.Values;
import backtype.storm.utils.Utils;
import ch.epfl.data.squall.components.ComponentProperties;
import ch.epfl.data.squall.components.signal_components.storm.SignalClient;
import ch.epfl.data.squall.operators.ChainOperator;
import ch.epfl.data.squall.storm_components.StormComponent;
import ch.epfl.data.squall.storm_components.SynchronizedStormDataSourceInterface;
import ch.epfl.data.squall.storm_components.synchronization.TopologyKiller;
import ch.epfl.data.squall.types.Type;
import ch.epfl.data.squall.utilities.MyUtilities;
import ch.epfl.data.squall.utilities.SystemParameters;
public class SynchronizedStormDataSource extends
StormSynchronizedSpoutComponent implements SynchronizedStormDataSourceInterface{
private static final long serialVersionUID = 1L;
private static Logger LOG = Logger
.getLogger(SynchronizedStormDataSource.class);
private boolean _hasReachedEOF = false;
private boolean _hasSentEOF = false; // have sent EOF to TopologyKiller
// (AckEachTuple mode)
private boolean _hasSentLastAck = false; // AckLastTuple mode
private long _pendingTuples = 0;
private int _numSentTuples = 0;
private final ChainOperator _operatorChain;
private final int _keyIndex;
private int _keyValue = 0;
private String _name;
private ArrayList<Type> _schema;
private final long _aggBatchOutputMillis;
//Harmonizer parameters
private String _zookeeperhost, _harmonizerSyncedSpoutName;
private int _harmonizerUpdateThreshold;
private int _currentHarmonizerUpdateFreq=0;
private transient SignalClient _scHarmonizer;
private HashMap<Integer, Integer> _keyFrequencies;
private boolean _isHarmonized;
private HashSet<Integer> _frequentSet;
private int _numberOfTuplesThreshold;
public static String SHUFFLE_GROUPING_STREAMID = "sync_shuffle";
public SynchronizedStormDataSource(ComponentProperties cp,
List<String> allCompNames, ArrayList<Type> tupleTypes,
int hierarchyPosition, int parallelism, int keyIndex,
boolean isPartitioner, TopologyBuilder builder,
TopologyKiller killer, Config conf,int numberOfTuples) {
super(cp, allCompNames, hierarchyPosition, isPartitioner, conf);
_numberOfTuplesThreshold=numberOfTuples;
_keyIndex = keyIndex;
_name = cp.getName();
_aggBatchOutputMillis = cp.getBatchOutputMillis();
_operatorChain = cp.getChainOperator();
_schema = tupleTypes;
_frequentSet = new HashSet<Integer>();
if (getHierarchyPosition() == FINAL_COMPONENT
&& (!MyUtilities.isAckEveryTuple(conf)))
killer.registerComponent(this, parallelism);
builder.setSpout(getID(), this, parallelism);
if (MyUtilities.isAckEveryTuple(conf))
killer.registerComponent(this, parallelism);
}
public SynchronizedStormDataSource(ComponentProperties cp,
List<String> allCompNames, ArrayList<Type> tupleTypes,
int hierarchyPosition, int parallelism, int keyIndex,
boolean isPartitioner, TopologyBuilder builder,
TopologyKiller killer, Config conf, int numberOfTuples, String zookeeperhost, String harmonizerName, int harmonizerUpdateThreshold) {
this(cp,allCompNames, tupleTypes,hierarchyPosition, parallelism, keyIndex, isPartitioner, builder, killer, conf, numberOfTuples);
_harmonizerSyncedSpoutName= harmonizerName;
_zookeeperhost=zookeeperhost;
_harmonizerUpdateThreshold=harmonizerUpdateThreshold;
_keyFrequencies= new HashMap<Integer, Integer>();
_isHarmonized=true;
}
// ack method on spout is called only if in AckEveryTuple mode (ACKERS > 0)
@Override
public void ack(Object msgId) {
_pendingTuples--;
}
@Override
public void aggBatchSend() {
throw new RuntimeException("Batching is disabled in this operator!");
}
protected void applyOperatorsAndSend(List<String> tuple) {
long timestamp = 0;
if ((MyUtilities.isCustomTimestampMode(getConf()) && getHierarchyPosition() == StormComponent.NEXT_TO_LAST_COMPONENT)
|| MyUtilities.isWindowTimestampMode(getConf()))
timestamp = System.currentTimeMillis();
tuple = _operatorChain.process(tuple, timestamp);
if (tuple == null)
return;
_numSentTuples++;
_pendingTuples++;
printTuple(tuple);
if (MyUtilities.isSending(getHierarchyPosition(), _aggBatchOutputMillis)) {
int tuplekey= Integer.parseInt(tuple.get(_keyIndex));
if(_frequentSet.contains(tuplekey)){
//LOG.info("Sending frequent tuple");
tupleSend(SHUFFLE_GROUPING_STREAMID, tuple, null, timestamp);
}
else{
//LOG.info("Sending non-frequent tuple");
tupleSend(tuple, null, timestamp);
}
}
if (MyUtilities.isPrintLatency(getHierarchyPosition(), getConf())) {
printTupleLatency(_numSentTuples - 1, timestamp);
}
}
@Override
public void close() {
super.close();
_scHarmonizer.close();
}
/*
* whatever is inside this method is done only once
*/
private void eofFinalization() {
printContent();
if (!MyUtilities.isAckEveryTuple(getConf()))
if (getHierarchyPosition() == FINAL_COMPONENT) {
if (!_hasSentEOF) {
_hasSentEOF = true; // to ensure we will not send multiple
// EOF per single spout
getCollector().emit(SystemParameters.EOF_STREAM,
new Values(SystemParameters.EOF));
}
} else if (!_hasSentLastAck) {
LOG.info(getID() + ":Has sent last_ack, tuples sent:"
+ _numSentTuples);
_hasSentLastAck = true;
final List<String> lastTuple = new ArrayList<String>(
Arrays.asList(SystemParameters.LAST_ACK));
tupleSend(lastTuple, null, 0);
}
if (_operatorChain != null) {
_operatorChain.finalizeProcessing();
}
}
@Override
public void fail(Object msgId) {
throw new RuntimeException("Failing tuple in " + getID());
}
@Override
public ChainOperator getChainOperator() {
return _operatorChain;
}
// StormComponent
@Override
public String getInfoID() {
final StringBuilder sb = new StringBuilder();
sb.append("Table ").append(getID()).append(" has ID: ").append(getID());
return sb.toString();
}
@Override
public long getNumSentTuples() {
return _numSentTuples;
}
public long getPendingTuples() {
return _pendingTuples;
}
// from IRichSpout interface
@Override
public void nextTuple() {
// Utils.sleep(500);
final String line = generateLine();
//Send frequency statistics & housekeeping
if(_isHarmonized && _currentHarmonizerUpdateFreq>_harmonizerUpdateThreshold){
try {
ByteArrayOutputStream bos = new ByteArrayOutputStream();
ObjectOutput out = null;
out = new ObjectOutputStream(bos);
out.writeObject(_keyFrequencies);
byte[] objectBytes = bos.toByteArray();
_scHarmonizer.send(objectBytes);
out.close();
bos.close();
} catch (Exception e) {
e.printStackTrace();
}
_keyFrequencies.clear();
_currentHarmonizerUpdateFreq=0;
}
if (_numSentTuples>= _numberOfTuplesThreshold) {
if (!_hasReachedEOF) {
_hasReachedEOF = true;
eofFinalization();
}
sendEOF();
Utils.sleep(SystemParameters.EOF_TIMEOUT_MILLIS);
return;
}
final List<String> tuple = MyUtilities.fileLineToTuple(line, getConf());
applyOperatorsAndSend(tuple);
//Update frequency statistics
if(_isHarmonized){
updateHistogram(Integer.parseInt(tuple.get(_keyIndex)));
_currentHarmonizerUpdateFreq++;
}
}
private void updateHistogram(int key){
Integer value= _keyFrequencies.get(key);
if(value!=null)
_keyFrequencies.put(key, value+1);
else
_keyFrequencies.put(key, 1);
}
// BaseRichSpout
@Override
public void open(Map map, TopologyContext tc, SpoutOutputCollector collector) {
super.open(map, tc, collector);
if(_harmonizerSyncedSpoutName!=null && _zookeeperhost!=null){
_scHarmonizer = new SignalClient(_zookeeperhost, _harmonizerSyncedSpoutName);
_scHarmonizer.start();
}
}
// HELPER methods
protected String generateLine() {
String text = "";
for (int i = 0; i < _schema.size(); i++) {
if (i == _keyIndex)
text += String.valueOf(_keyValue);
else {
Type attribute = _schema.get(i);
text += attribute.toString(attribute.generateRandomInstance());
}
text += "|";
}
return text;
}
/*
* sending EOF in AckEveryTuple mode when we send at least one tuple to the
* next component
*/
private void sendEOF() {
if (MyUtilities.isAckEveryTuple(getConf()))
if (_pendingTuples == 0)
if (!_hasSentEOF) {
_hasSentEOF = true;
getCollector().emit(SystemParameters.EOF_STREAM,
new Values(SystemParameters.EOF));
}
}
/**
* Signal from the Distribution Spout Signaler or it can be from the Harmonizer Signal
* @param payload
*/
@Override
public void onSignal(byte[] data) {
byte [] signal = Arrays.copyOfRange(data, 0, 4);
byte [] payload = Arrays.copyOfRange(data, 4, data.length);
int signalType = SignalUtilities.byteArrayToInt(signal);
if(signalType==SignalUtilities.HARMONIZER_SIGNAL)
try{
LOG.info(".......Recieved harmonizer signal.......");
ByteArrayInputStream bis = new ByteArrayInputStream(payload);
ObjectInput in = null;
in = new ObjectInputStream(bis);
HashSet<Integer> freqset = (HashSet<Integer>)in.readObject();
_frequentSet= freqset;
bis.close();
in.close();
} catch (Exception ex) {
ex.printStackTrace();
}
else if(signalType==SignalUtilities.DISTRIBUTION_SIGNAL){
LOG.info(".......Recieved Distribution signal.......");
int key = SignalUtilities.byteArrayToInt(payload);
LOG.info("Changed the KeyValue from " + _keyValue + " to " + key);
_keyValue = key;
}
}
}