/*
* Copyright (c) 2011-2015 EPFL DATA Laboratory
* Copyright (c) 2014-2015 The Squall Collaboration (see NOTICE)
*
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ch.epfl.data.squall.operators;
import java.util.List;
import java.util.HashSet;
import java.util.Random;
import org.apache.log4j.Logger;
import ch.epfl.data.squall.utilities.SystemParameters;
import ch.epfl.data.squall.visitors.OperatorVisitor;
public class CustomSampleOperatorReachGraph extends OneToOneOperator implements Operator {
private static Logger LOG = Logger.getLogger(CustomSampleOperatorReachGraph.class);
private static final long serialVersionUID = 1L;
private int _numTuplesProcessed = 0;
private boolean _in = false;
private int _rate = 0;
private HashSet<String> inDegree = new HashSet<String>();
private HashSet<String> outDegree = new HashSet<String>();
public CustomSampleOperatorReachGraph(int rate, boolean in) {
_rate = rate;
_in = in;
createInDegrees();
createOutDegrees();
LOG.info("Custom Sample operator for Reachability Graph ");
}
private void createInDegrees() {
outDegree.add("5325333"); // "blogspot.com",
// outDegree.add("41718572"); // "wordpress.com",
// outDegree.add("42467638"); // "youtube.com",
// outDegree.add("41410181"); // "wikipedia.org",
// outDegree.add("33878319"); // "serebella.com",
// outDegree.add("31674470"); // "refertus.info",
// outDegree.add("38484153"); // "top20directory.com",
// outDegree.add("39262267"); // "typepad.com",
// outDegree.add("5686278"); // "botw.org",
// outDegree.add("39095913"); // "tumblr.com",
// outDegree.add("10906824"); // "dmoz.org",
// outDegree.add("40304803"); // "vindhetviahier.nl",
// outDegree.add("19887682"); // "jcsearch.com",
// outDegree.add("35748264"); // "startpagina.nl",
// outDegree.add("42206842"); // "yahoo.com",
// outDegree.add("36992745"); // "tatu.us",
// outDegree.add("14602869"); // "freeseek.org",
// outDegree.add("22195621"); // "lap.hu",
// outDegree.add("5263966"); // "blau-webkatalog.com",
// outDegree.add("1903103"); // "allepaginas.nl"
}
private void createOutDegrees() {
// inDegree.add("41718621"); // wordpress.org
// inDegree.add("42467638"); // youtube.com
// inDegree.add("41410181"); // wikipedia.org 1,243,291
// inDegree.add("15777213"); // gmpg.org 1,156,727
inDegree.add("5325333"); // blogspot.com 1,034,450
// inDegree.add("15964788"); // google.com 782,660
// inDegree.add("41718572"); // wordpress.com 710,590
// inDegree.add("39224483"); // twitter.com 646,239
// inDegree.add("42206842"); // yahoo.com 554,251
// inDegree.add("14050903"); // flickr.com 339,231
// inDegree.add("13237914"); // facebook.com 314,051
// inDegree.add("2719708"); // apple.com 312,396
// inDegree.add("25196427"); // miibeian.gov.cn 289,605
// inDegree.add("40294265"); // vimeo.com 269,003
// inDegree.add("39095913"); // tumblr.com 226,596
// inDegree.add("20328765"); // joomla.org 201,863
// inDegree.add("2150098"); // amazon.com 196,690
// inDegree.add("40673739"); // w3.org 196,507
// inDegree.add("27729888"); // nytimes.com 193,907
// inDegree.add("35243431"); // sourceforge.net 189,663//
}
@Override
public void accept(OperatorVisitor ov) {
ov.visit(this);
}
@Override
public List<String> getContent() {
throw new RuntimeException(
"getContent for CustomSampleOperatorReachGraph should never be invoked!");
}
@Override
public int getNumTuplesProcessed() {
return _numTuplesProcessed;
}
@Override
public boolean isBlocking() {
return false;
}
@Override
public String printContent() {
throw new RuntimeException(
"printContent for CustomSampleOperatorReachGraph should never be invoked!");
}
@Override
public List<String> processOne(List<String> tuple, long lineageTimestamp) {
_numTuplesProcessed++;
if (_in && inDegree.contains(tuple.get(1)))
return tuple;
else if (!_in && outDegree.contains(tuple.get(0)))
return tuple;
// else if (_numTuplesProcessed % _rate == 0)
// return tuple;
else
return null;
}
@Override
public String toString() {
final StringBuilder sb = new StringBuilder();
sb.append("CustomSampleOperatorReachGraph with rate : ");
sb.append(_rate);
return sb.toString();
}
}