RangeFilteredMulticastStreamGrouping.java example

Explorer
squall-master
/*
 * Copyright (c) 2011-2015 EPFL DATA Laboratory
 * Copyright (c) 2014-2015 The Squall Collaboration (see NOTICE)
 *
 * All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package ch.epfl.data.squall.ewh.storm_components.stream_grouping;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.log4j.Logger;

import backtype.storm.generated.GlobalStreamId;
import backtype.storm.task.WorkerTopologyContext;
import ch.epfl.data.squall.predicates.ComparisonPredicate;
import ch.epfl.data.squall.types.NumericType;
import ch.epfl.data.squall.utilities.MyUtilities;
import ch.epfl.data.squall.utilities.SystemParameters;
import ch.epfl.data.squall.utilities.SystemParameters.HistogramType;

// This class is used only for connecting D2Combiner to S1Reservoir,
//   when D2Combiner computes only d2_equi and has some duplicates of it over the boundaries (that's why filtering) due to a equi-depth histogram on R2
//   and S1Reservoir computes d2 using another equi-depth histogram on R1
public class RangeFilteredMulticastStreamGrouping extends
	RangeMulticastStreamGrouping {
    private static Logger LOG = Logger
	    .getLogger(RangeFilteredMulticastStreamGrouping.class);
    private List _srcRangeBoundaries; // (numTargetTasks - 1) of them

    private String _parentCompName;
    private Map<Integer, Integer> _parentTaskIdtoIndex = new HashMap<Integer, Integer>(); // Id
											  // is
											  // Storm-Dependent,
											  // Index
											  // =
											  // [0,
											  // parallelism)

    // with multicast
    public RangeFilteredMulticastStreamGrouping(Map map,
	    ComparisonPredicate comparison, NumericType wrapper,
	    HistogramType dstHistType, HistogramType srcHistType,
	    String parentCompName) {
	super(map, comparison, wrapper, dstHistType);
	_parentCompName = parentCompName;
	// LOG.info("I should be  EWH_SAMPLE_D2_COMBINER = " + _parentCompName);
	int numLastJoiners = SystemParameters.getInt(_map, "PAR_LAST_JOINERS");
	_srcRangeBoundaries = createBoundariesFromHistogram(
		srcHistType.filePrefix(), numLastJoiners);
    }

    @Override
    public void prepare(WorkerTopologyContext wtc, GlobalStreamId gsi,
	    List<Integer> targetTasks) {
	List<Integer> parentTaskIds = wtc.getComponentTasks(_parentCompName);
	for (int i = 0; i < parentTaskIds.size(); i++) {
	    // creating inverted index
	    // this should be the same order as in R2.targetTasks when sending
	    // to D2Combiner; this method is used in theta-joins (particularly
	    // dynamic)
	    int parentTaskId = parentTaskIds.get(i);
	    _parentTaskIdtoIndex.put(parentTaskId, i);
	}

	super.prepare(wtc, gsi, targetTasks);
    }

    @Override
    public List<Integer> chooseTasks(int taskId, List<Object> stormTuple) {
	final List<String> tuple = (List<String>) stormTuple.get(1);
	final String tupleHash = (String) stormTuple.get(2);
	if (!MyUtilities.isFinalAck(tuple, _map)) {
	    // taskId is Storm-dependent sender Id
	    int taskIndex = _parentTaskIdtoIndex.get(taskId); // translation to
							      // [0,
							      // parallelism)
	    if (!isWithinBoundaries(taskIndex, tupleHash, _srcRangeBoundaries)) {
		// LOG.info("FILTERED TUPLE key = " + tupleHash +
		// " in taskIndex " + taskIndex + "!");
		// send it nowhere
		return new ArrayList<Integer>();
	    }
	}
	return super.chooseTasks(taskId, stormTuple);
    }

    private boolean isWithinBoundaries(int taskIndex, String strKey,
	    List boundaries) {
	Object key = _wrapper.fromString(strKey);
	int keyTaskIndex = chooseTaskIndex(key, boundaries);
	// LOG.info("For each nonFinalAckTuple: key = " + key +
	// ", keyTaskIndex = " + keyTaskIndex);
	return keyTaskIndex == taskIndex;
    }
}