/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.alibaba.jstorm.task.group; import backtype.storm.generated.GlobalStreamId; import backtype.storm.generated.Grouping; import backtype.storm.generated.JavaObject; import backtype.storm.grouping.CustomStreamGrouping; import backtype.storm.task.TopologyContext; import backtype.storm.tuple.Fields; import backtype.storm.utils.Utils; import com.alibaba.jstorm.client.ConfigExtension; import com.alibaba.jstorm.daemon.worker.WorkerData; import com.alibaba.jstorm.task.execute.MsgInfo; import com.alibaba.jstorm.utils.JStormUtils; import com.alibaba.jstorm.utils.RandomRange; import com.alibaba.jstorm.utils.Thrift; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Random; /** * Grouper, get which task should be send to for one tuple * * @author yannian */ public class MkGrouper { private static final Logger LOG = LoggerFactory.getLogger(MkGrouper.class); private TopologyContext topologyContext; // this component output fields private Fields outFields; private Grouping thriftGrouping; private Grouping._Fields fields; private GrouperType groupType; private List<Integer> outTasks; private List<Integer> localTasks; private String streamId; private String targetComponent; // grouping method private RandomRange randomrange; private Random random; private MkShuffer shuffer; private MkCustomGrouper customGrouper; private MkFieldsGrouper fieldsGrouper; public MkGrouper(TopologyContext _topology_context, Fields _out_fields, Grouping _thrift_grouping, String targetComponent, String streamId, WorkerData workerData) { this.topologyContext = _topology_context; this.outFields = _out_fields; this.thriftGrouping = _thrift_grouping; this.streamId = streamId; this.targetComponent = targetComponent; List<Integer> outTasks = topologyContext.getComponentTasks(targetComponent); this.outTasks = new ArrayList<>(); this.outTasks.addAll(outTasks); Collections.sort(this.outTasks); this.localTasks = _topology_context.getThisWorkerTasks(); this.fields = Thrift.groupingType(thriftGrouping); this.groupType = this.parseGroupType(workerData); String id = _topology_context.getThisTaskId() + ":" + streamId; LOG.info(id + " groupType is " + groupType + ", outTasks is " + this.outTasks + ", localTasks" + localTasks); } public GrouperType gettype() { return groupType; } private GrouperType parseGroupType(WorkerData workerData) { GrouperType grouperType = null; if (Grouping._Fields.FIELDS.equals(fields)) { if (Thrift.isGlobalGrouping(thriftGrouping)) { // global grouping, just send tuple to first task grouperType = GrouperType.global; } else { List<String> fields_group = Thrift.fieldGrouping(thriftGrouping); Fields fields = new Fields(fields_group); Map conf = topologyContext.getStormConf(); boolean enableKeyRangeHash = ConfigExtension.isEnableKeyRangeFieldGroup(conf); if (enableKeyRangeHash) fieldsGrouper = new MkKeyRangeFieldsGrouper(conf, outFields, fields, outTasks); else fieldsGrouper = new MkFieldsGrouper(outFields, fields, outTasks); // hashcode by fields grouperType = GrouperType.fields; } } else if (Grouping._Fields.ALL.equals(fields)) { // send to every task grouperType = GrouperType.all; } else if (Grouping._Fields.SHUFFLE.equals(fields)) { grouperType = GrouperType.shuffle; shuffer = new MkShuffer(topologyContext.getThisComponentId(), targetComponent, workerData); } else if (Grouping._Fields.NONE.equals(fields)) { // random send one task this.random = new Random(); grouperType = GrouperType.none; } else if (Grouping._Fields.CUSTOM_OBJECT.equals(fields)) { // user custom grouping by JavaObject JavaObject jobj = thriftGrouping.get_custom_object(); CustomStreamGrouping g = Thrift.instantiateJavaObject(jobj); int myTaskId = topologyContext.getThisTaskId(); String componentId = topologyContext.getComponentId(myTaskId); GlobalStreamId stream = new GlobalStreamId(componentId, streamId); customGrouper = new MkCustomGrouper(topologyContext, g, stream, outTasks, myTaskId); grouperType = GrouperType.custom_obj; } else if (Grouping._Fields.CUSTOM_SERIALIZED.equals(fields)) { // user custom group by serialized Object byte[] obj = thriftGrouping.get_custom_serialized(); CustomStreamGrouping g = (CustomStreamGrouping) Utils.javaDeserialize(obj); int myTaskId = topologyContext.getThisTaskId(); String componentId = topologyContext.getComponentId(myTaskId); GlobalStreamId stream = new GlobalStreamId(componentId, streamId); customGrouper = new MkCustomGrouper(topologyContext, g, stream, outTasks, myTaskId); grouperType = GrouperType.custom_serialized; } else if (Grouping._Fields.DIRECT.equals(fields)) { // directly send to a special task grouperType = GrouperType.direct; } else if (Grouping._Fields.LOCAL_OR_SHUFFLE.equals(fields)) { grouperType = GrouperType.shuffle; shuffer = new MkShuffer(topologyContext.getThisComponentId(), targetComponent, workerData); } else if (Grouping._Fields.LOCAL_FIRST.equals(fields)) { grouperType = GrouperType.shuffle; shuffer = new MkShuffer(topologyContext.getThisComponentId(), targetComponent, workerData); } return grouperType; } /** * get which task should tuple be sent to */ public List<Integer> grouper(List<Object> values) { if (GrouperType.global.equals(groupType)) { // send to task which taskId is 0 return JStormUtils.mk_list(outTasks.get(0)); } else if (GrouperType.fields.equals(groupType)) { // field grouping return fieldsGrouper.grouper(values); } else if (GrouperType.all.equals(groupType)) { // send to every task return outTasks; } else if (GrouperType.shuffle.equals(groupType)) { // random, but the random is different from none return shuffer.grouper(values); } else if (GrouperType.none.equals(groupType)) { int rnd = Math.abs(random.nextInt() % outTasks.size()); return JStormUtils.mk_list(outTasks.get(rnd)); } else if (GrouperType.custom_obj.equals(groupType)) { return customGrouper.grouper(values); } else if (GrouperType.custom_serialized.equals(groupType)) { return customGrouper.grouper(values); } else { LOG.warn("Unsupported group type"); } return new ArrayList<>(); } public Map<Object, List<MsgInfo>> grouperBatch(List<MsgInfo> batch) { Map<Object, List<MsgInfo>> ret = new HashMap<>(); //optimize fieldGrouping & customGrouping if (GrouperType.shuffle.equals(groupType)) { // random, but the random is different from none ret.put(shuffer.grouper(null), batch); } else if (GrouperType.global.equals(groupType)) { // send to task which taskId is 0 ret.put(JStormUtils.mk_list(outTasks.get(0)), batch); } else if (GrouperType.fields.equals(groupType)) { fieldsGrouper.batchGrouper(batch, ret); } else if (GrouperType.all.equals(groupType)) { // send to every task ret.put(outTasks, batch); } else if (GrouperType.none.equals(groupType)) { int rnd = Math.abs(random.nextInt() % outTasks.size()); ret.put(JStormUtils.mk_list(outTasks.get(rnd)), batch); } else if (GrouperType.custom_obj.equals(groupType) || GrouperType.custom_serialized.equals(groupType)) { for (int i = 0; i < batch.size(); i++) { MsgInfo msg = batch.get(i); List<Integer> out = customGrouper.grouper(msg.values); List<MsgInfo> customBatch = ret.get(out); if (customBatch == null) { customBatch = JStormUtils.mk_list(); ret.put(out, customBatch); } customBatch.add(msg); } } else { LOG.warn("Unsupported group type"); } return ret; } }