/*
* Copyright (c) 2011-2015 EPFL DATA Laboratory
* Copyright (c) 2014-2015 The Squall Collaboration (see NOTICE)
*
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ch.epfl.data.squall.ewh.operators;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.Random;
import org.apache.log4j.Logger;
import backtype.storm.spout.SpoutOutputCollector;
import backtype.storm.task.OutputCollector;
import backtype.storm.tuple.Values;
import ch.epfl.data.squall.operators.Operator;
import ch.epfl.data.squall.operators.OneToOneOperator;
import ch.epfl.data.squall.utilities.MyUtilities;
import ch.epfl.data.squall.utilities.SystemParameters;
import ch.epfl.data.squall.visitors.OperatorVisitor;
public class SampleAsideAndForwardOperator extends OneToOneOperator implements Operator {
private static Logger LOG = Logger
.getLogger(SampleAsideAndForwardOperator.class);
private static final long serialVersionUID = 1L;
private double _sampleRate = 0;
private int _numTuplesProcessed = 0;
private Random _rnd = new Random();
private String _componentIndex;
private List<Integer> _hashIndexes = new ArrayList<Integer>(
Arrays.asList(0)); // we receive one-column tuples
private Map _conf;
// it's not clear design to put _collector in here, but we opted for it in
// order to allow the operator to be anywhere in the chain
private SpoutOutputCollector _spoutCollector;
private OutputCollector _boltCollector;
private String _streamId;
public SampleAsideAndForwardOperator(int relationSize, int numOfBuckets,
String streamId, Map conf) {
_conf = conf;
_streamId = streamId;
_sampleRate = ((double) (numOfBuckets * SystemParameters.TUPLES_PER_BUCKET))
/ relationSize;
if (_sampleRate >= 1) {
_sampleRate = 1;
}
LOG.info("Sample rate of SampleAsideAndForwardOperator is "
+ _sampleRate);
}
// invoked from open methods of StormBoltComponent (not known beforehand)
public void setCollector(OutputCollector collector) {
_boltCollector = collector;
}
public void setCollector(SpoutOutputCollector collector) {
_spoutCollector = collector;
}
public void setComponentIndex(String hostComponentIndex) {
_componentIndex = hostComponentIndex;
}
private boolean isAttachedToSpout() {
return _spoutCollector != null;
}
@Override
public void accept(OperatorVisitor ov) {
ov.visit(this);
}
@Override
public List<String> getContent() {
throw new RuntimeException(
"getContent for SampleAsideAndForwardOperator should never be invoked!");
}
@Override
public int getNumTuplesProcessed() {
return _numTuplesProcessed;
}
public double getSampleRate() {
return _sampleRate;
}
@Override
public boolean isBlocking() {
return false;
}
@Override
public String printContent() {
throw new RuntimeException(
"printContent for SampleAsideAndForwardOperator should never be invoked!");
}
@Override
public List<String> processOne(List<String> tuple, long lineageTimestamp) {
_numTuplesProcessed++;
// sending to this extra streamId
if (_rnd.nextDouble() < _sampleRate) {
Values stormTupleSnd = MyUtilities.createTupleValues(tuple, 0,
_componentIndex, _hashIndexes, null, _conf);
if (isAttachedToSpout()) {
_spoutCollector.emit(_streamId, stormTupleSnd);
} else {
_boltCollector.emit(_streamId, stormTupleSnd);
}
}
// normal forwarding
return tuple;
}
@Override
public String toString() {
final StringBuilder sb = new StringBuilder();
sb.append("SampleAsideAndForwardOperator with Sample Rate: ");
sb.append(_sampleRate);
return sb.toString();
}
}