/* * Copyright (c) 2011-2015 EPFL DATA Laboratory * Copyright (c) 2014-2015 The Squall Collaboration (see NOTICE) * * All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package ch.epfl.data.squall.components.signal_components; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import org.apache.commons.lang.ArrayUtils; import org.apache.log4j.Logger; import backtype.storm.Config; import backtype.storm.topology.TopologyBuilder; import ch.epfl.data.squall.components.Component; import ch.epfl.data.squall.components.DataSourceComponent; import ch.epfl.data.squall.expressions.ValueExpression; import ch.epfl.data.squall.operators.ChainOperator; import ch.epfl.data.squall.operators.Operator; import ch.epfl.data.squall.predicates.Predicate; import ch.epfl.data.squall.storm_components.InterchangingComponent; import ch.epfl.data.squall.storm_components.StormComponent; import ch.epfl.data.squall.storm_components.synchronization.TopologyKiller; import ch.epfl.data.squall.types.Type; import ch.epfl.data.squall.utilities.MyUtilities; import ch.epfl.data.squall.utilities.SystemParameters; public class SignaledDataSourceComponent implements Component { private static final long serialVersionUID = 1L; private static Logger LOG = Logger .getLogger(SignaledDataSourceComponent.class); private final String _componentName; private long _batchOutputMillis; private List<Integer> _hashIndexes; private List<ValueExpression> _hashExpressions; private SynchronizedStormDataSource _dataSource; private final ChainOperator _chain = new ChainOperator(); private boolean _printOut; private boolean _printOutSet; // whether printOut condition is already set private Component _child; // equi-weight histogram private boolean _isPartitioner; private String _zookeeperHost; private ArrayList<Type> _schema; private int _keyIndex; private int _distributionRefreshSeconds; private int _harmonizerWindowCountThreshold; private int _harmonizerFrequentThreshold; private int _harmonizerUpdaterRate; private int _numberOfTuplesThreshold; private boolean _isHarmonized=false; public SignaledDataSourceComponent(String componentName, String zookeeperHost, ArrayList<Type> schema, int keyIndex, int distributionRefreshSeconds, int numberOfTuplesThreshold) { _distributionRefreshSeconds= distributionRefreshSeconds; _componentName = componentName; _zookeeperHost = zookeeperHost; _keyIndex = keyIndex; _schema = schema; _numberOfTuplesThreshold=numberOfTuplesThreshold; } public SignaledDataSourceComponent(String componentName, String zookeeperHost, ArrayList<Type> schema, int keyIndex, int numberOfTuplesThreshold, int distributionRefreshSeconds, int harmonizerWindowCountThreshold, int harmonizerFrequentThreshold, int harmonizerUpdaterRate) { this(componentName,zookeeperHost, schema, keyIndex, numberOfTuplesThreshold,distributionRefreshSeconds); _harmonizerWindowCountThreshold= harmonizerWindowCountThreshold; _harmonizerFrequentThreshold=harmonizerFrequentThreshold; _harmonizerUpdaterRate=harmonizerUpdaterRate; _isHarmonized=true; } @Override public SignaledDataSourceComponent add(Operator operator) { _chain.addOperator(operator); return this; } @Override public boolean equals(Object obj) { if (obj instanceof Component) return _componentName.equals(((Component) obj).getName()); else return false; } @Override public List<DataSourceComponent> getAncestorDataSources() { throw new RuntimeException("Not implemented yet"); } @Override public long getBatchOutputMillis() { return _batchOutputMillis; } @Override public ChainOperator getChainOperator() { return _chain; } @Override public Component getChild() { return _child; } // from StormEmitter interface @Override public String[] getEmitterIDs() { return _dataSource.getEmitterIDs(); } @Override public List<String> getFullHashList() { throw new RuntimeException( "This method should not be invoked for DataSourceComponent!"); } @Override public List<ValueExpression> getHashExpressions() { return _hashExpressions; } @Override public List<Integer> getHashIndexes() { return _hashIndexes; } @Override public String getInfoID() { return _dataSource.getInfoID() + "\n"; } @Override public String getName() { return _componentName; } @Override public Component[] getParents() { return null; } @Override public boolean getPrintOut() { return _printOut; } @Override public int hashCode() { int hash = 3; hash = 59 * hash + (_componentName != null ? _componentName.hashCode() : 0); return hash; } @Override public void makeBolts(TopologyBuilder builder, TopologyKiller killer, List<String> allCompNames, Config conf, int hierarchyPosition) { // by default print out for the last component // for other conditions, can be set via setPrintOut if (hierarchyPosition == StormComponent.FINAL_COMPONENT && !_printOutSet) setPrintOut(true); final int parallelism = SystemParameters.getInt(conf, _componentName + "_PAR"); if (parallelism > 1 && _chain.getDistinct() != null) throw new RuntimeException( _componentName + ": Distinct operator cannot be specified for multiple spouts for one input file!"); MyUtilities.checkBatchOutput(_batchOutputMillis, _chain.getAggregation(), conf); DistributionSignalSpout dsp = new DistributionSignalSpout(_zookeeperHost, this.getName(), _distributionRefreshSeconds); builder.setSpout(this.getName() + "-distr", dsp, 1); // TODO if(_isHarmonized){ HarmonizerSignalSpout hsp = new HarmonizerSignalSpout(_zookeeperHost, this.getName(), this.getName() + "-harmonizer", _harmonizerWindowCountThreshold, _harmonizerFrequentThreshold); builder.setSpout(this.getName() + "-harm", hsp, 1); _dataSource = new SynchronizedStormDataSource(this, allCompNames, _schema, hierarchyPosition, parallelism, _keyIndex, _isPartitioner, builder, killer, conf, _numberOfTuplesThreshold, _zookeeperHost, this.getName() + "-harmonizer", _harmonizerUpdaterRate); } else{ _dataSource = new SynchronizedStormDataSource(this, allCompNames, _schema, hierarchyPosition, parallelism, _keyIndex, _isPartitioner, builder, killer, conf, _numberOfTuplesThreshold); } } @Override public SignaledDataSourceComponent setBatchOutputMillis(long millis) { throw new RuntimeException( "Setting batch mode is not allowed for DataSourceComponents!"); // _batchOutputMillis = millis; // return this; } @Override public void setChild(Component child) { _child = child; } @Override public SignaledDataSourceComponent setContentSensitiveThetaJoinWrapper( Type wrapper) { return this; } @Override public SignaledDataSourceComponent setFullHashList(List<String> fullHashList) { throw new RuntimeException( "This method should not be invoked for DataSourceComponent!"); } @Override public SignaledDataSourceComponent setHashExpressions( List<ValueExpression> hashExpressions) { _hashExpressions = hashExpressions; return this; } @Override public SignaledDataSourceComponent setInterComp(InterchangingComponent inter) { throw new RuntimeException( "Datasource component does not support setInterComp"); } @Override public SignaledDataSourceComponent setJoinPredicate(Predicate joinPredicate) { throw new RuntimeException( "Datasource component does not support Join Predicates"); } @Override public SignaledDataSourceComponent setOutputPartKey(int... hashIndexes) { return setOutputPartKey(Arrays.asList(ArrayUtils.toObject(hashIndexes))); } @Override public SignaledDataSourceComponent setOutputPartKey( List<Integer> hashIndexes) { _hashIndexes = hashIndexes; return this; } public SignaledDataSourceComponent setPartitioner(boolean isPartitioner) { _isPartitioner = isPartitioner; return this; } @Override public SignaledDataSourceComponent setPrintOut(boolean printOut) { _printOutSet = true; _printOut = printOut; return this; } }