/* * Copyright (c) 2011-2015 EPFL DATA Laboratory * Copyright (c) 2014-2015 The Squall Collaboration (see NOTICE) * * All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package ch.epfl.data.squall.operators; import java.util.List; import java.util.Random; import org.apache.log4j.Logger; import ch.epfl.data.squall.utilities.SystemParameters; import ch.epfl.data.squall.visitors.OperatorVisitor; public class SampleOperator extends OneToOneOperator implements Operator { private static Logger LOG = Logger.getLogger(SampleOperator.class); private static final long serialVersionUID = 1L; private double _sampleRate = 0; private int _numTuplesProcessed = 0; private Random _rnd = new Random(); public SampleOperator(double sampleRate) { _sampleRate = sampleRate; LOG.info("Sample rate is " + _sampleRate); } public SampleOperator(int relationSize, int numOfBuckets) { _sampleRate = ((double) (numOfBuckets * SystemParameters.TUPLES_PER_BUCKET)) / relationSize; if (_sampleRate >= 1) { _sampleRate = 1; } LOG.info("Sample rate is " + _sampleRate); } @Override public void accept(OperatorVisitor ov) { ov.visit(this); } @Override public List<String> getContent() { throw new RuntimeException( "getContent for SampleOperator should never be invoked!"); } @Override public int getNumTuplesProcessed() { return _numTuplesProcessed; } public double getSampleRate() { return _sampleRate; } @Override public boolean isBlocking() { return false; } @Override public String printContent() { throw new RuntimeException( "printContent for SampleOperator should never be invoked!"); } @Override public List<String> processOne(List<String> tuple, long lineageTimestamp) { _numTuplesProcessed++; if (_rnd.nextDouble() < _sampleRate) { return tuple; } else { return null; } } @Override public String toString() { final StringBuilder sb = new StringBuilder(); sb.append("SampleOperator with Sample Rate: "); sb.append(_sampleRate); return sb.toString(); } }