/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.alibaba.jstorm.task.group;
import backtype.storm.generated.GlobalStreamId;
import backtype.storm.generated.Grouping;
import backtype.storm.generated.JavaObject;
import backtype.storm.grouping.CustomStreamGrouping;
import backtype.storm.task.TopologyContext;
import backtype.storm.tuple.Fields;
import backtype.storm.utils.Utils;
import com.alibaba.jstorm.daemon.worker.WorkerData;
import com.alibaba.jstorm.task.execute.MsgInfo;
import com.alibaba.jstorm.utils.JStormUtils;
import com.alibaba.jstorm.utils.RandomRange;
import com.alibaba.jstorm.utils.Thrift;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Random;
/**
* Grouper, get which task should be send to for one tuple
*
* @author yannian
*
*/
public class MkGrouper {
private static final Logger LOG = LoggerFactory.getLogger(MkGrouper.class);
private TopologyContext topology_context;
// this component output fields
private Fields out_fields;
private Grouping thrift_grouping;
private Grouping._Fields fields;
private GrouperType grouptype;
private List<Integer> out_tasks;
private List<Integer> local_tasks;
private String streamId;
private String targetComponent;
// grouping method
private RandomRange randomrange;
private Random random;
private MkShuffer shuffer;
private MkCustomGrouper custom_grouper;
private MkFieldsGrouper fields_grouper;
public MkGrouper(TopologyContext _topology_context, Fields _out_fields, Grouping _thrift_grouping, String targetComponent, String streamId,
WorkerData workerData) {
this.topology_context = _topology_context;
this.out_fields = _out_fields;
this.thrift_grouping = _thrift_grouping;
this.streamId = streamId;
this.targetComponent = targetComponent;
List<Integer> outTasks = topology_context.getComponentTasks(targetComponent);
this.out_tasks = new ArrayList<Integer>();
this.out_tasks.addAll(outTasks);
Collections.sort(this.out_tasks);
this.local_tasks = _topology_context.getThisWorkerTasks();
this.fields = Thrift.groupingType(thrift_grouping);
this.grouptype = this.parseGroupType(workerData);
String id = _topology_context.getThisTaskId() + ":" + streamId;
LOG.info(id + " grouptype is " + grouptype + ", out_tasks is " + out_tasks + ", local_tasks" + local_tasks);
}
public GrouperType gettype() {
return grouptype;
}
private GrouperType parseGroupType(WorkerData workerData) {
GrouperType grouperType = null;
if (Grouping._Fields.FIELDS.equals(fields)) {
if (Thrift.isGlobalGrouping(thrift_grouping)) {
// global grouping, just send tuple to first task
grouperType = GrouperType.global;
} else {
List<String> fields_group = Thrift.fieldGrouping(thrift_grouping);
Fields fields = new Fields(fields_group);
fields_grouper = new MkFieldsGrouper(out_fields, fields, out_tasks);
// hashcode by fields
grouperType = GrouperType.fields;
}
} else if (Grouping._Fields.ALL.equals(fields)) {
// send to every task
grouperType = GrouperType.all;
} else if (Grouping._Fields.SHUFFLE.equals(fields)) {
grouperType = GrouperType.shuffle;
shuffer = new MkShuffer(topology_context.getThisComponentId(), targetComponent, workerData);
} else if (Grouping._Fields.NONE.equals(fields)) {
// random send one task
this.random = new Random();
grouperType = GrouperType.none;
} else if (Grouping._Fields.CUSTOM_OBJECT.equals(fields)) {
// user custom grouping by JavaObject
JavaObject jobj = thrift_grouping.get_custom_object();
CustomStreamGrouping g = Thrift.instantiateJavaObject(jobj);
int myTaskId = topology_context.getThisTaskId();
String componentId = topology_context.getComponentId(myTaskId);
GlobalStreamId stream = new GlobalStreamId(componentId, streamId);
custom_grouper = new MkCustomGrouper(topology_context, g, stream, out_tasks, myTaskId);
grouperType = GrouperType.custom_obj;
} else if (Grouping._Fields.CUSTOM_SERIALIZED.equals(fields)) {
// user custom group by serialized Object
byte[] obj = thrift_grouping.get_custom_serialized();
CustomStreamGrouping g = (CustomStreamGrouping) Utils.javaDeserialize(obj);
int myTaskId = topology_context.getThisTaskId();
String componentId = topology_context.getComponentId(myTaskId);
GlobalStreamId stream = new GlobalStreamId(componentId, streamId);
custom_grouper = new MkCustomGrouper(topology_context, g, stream, out_tasks, myTaskId);
grouperType = GrouperType.custom_serialized;
} else if (Grouping._Fields.DIRECT.equals(fields)) {
// directly send to a special task
grouperType = GrouperType.direct;
} else if (Grouping._Fields.LOCAL_OR_SHUFFLE.equals(fields)) {
grouperType = GrouperType.shuffle;
shuffer = new MkShuffer(topology_context.getThisComponentId(), targetComponent, workerData);
} else if (Grouping._Fields.LOCAL_FIRST.equals(fields)) {
grouperType = GrouperType.shuffle;
shuffer = new MkShuffer(topology_context.getThisComponentId(), targetComponent, workerData);
}
return grouperType;
}
/**
* get which task should tuple be sent to
*
* @param values
* @return
*/
public List<Integer> grouper(List<Object> values) {
if (GrouperType.global.equals(grouptype)) {
// send to task which taskId is 0
return JStormUtils.mk_list(out_tasks.get(0));
} else if (GrouperType.fields.equals(grouptype)) {
// field grouping
return fields_grouper.grouper(values);
} else if (GrouperType.all.equals(grouptype)) {
// send to every task
return out_tasks;
} else if (GrouperType.shuffle.equals(grouptype)) {
// random, but the random is different from none
return shuffer.grouper(values);
} else if (GrouperType.none.equals(grouptype)) {
int rnd = Math.abs(random.nextInt() % out_tasks.size());
return JStormUtils.mk_list(out_tasks.get(rnd));
} else if (GrouperType.custom_obj.equals(grouptype)) {
return custom_grouper.grouper(values);
} else if (GrouperType.custom_serialized.equals(grouptype)) {
return custom_grouper.grouper(values);
} else {
LOG.warn("Unsupportted group type");
}
return new ArrayList<Integer>();
}
public Map<Object, List<MsgInfo>> grouperBatch(List<MsgInfo> batch) {
Map<Object, List<MsgInfo>> ret = new HashMap<Object, List<MsgInfo>>();
//optimize fieldGrouping & customGrouping
if (GrouperType.shuffle.equals(grouptype)) {
// random, but the random is different from none
ret.put(shuffer.grouper(null), batch);
} else if (GrouperType.global.equals(grouptype)) {
// send to task which taskId is 0
ret.put(JStormUtils.mk_list(out_tasks.get(0)), batch);
} else if (GrouperType.fields.equals(grouptype)) {
fields_grouper.batchGrouper(batch, ret);
} else if (GrouperType.all.equals(grouptype)) {
// send to every task
ret.put(out_tasks, batch);
} else if (GrouperType.none.equals(grouptype)) {
int rnd = Math.abs(random.nextInt() % out_tasks.size());
ret.put(JStormUtils.mk_list(out_tasks.get(rnd)), batch);
} else if (GrouperType.custom_obj.equals(grouptype) || GrouperType.custom_serialized.equals(grouptype)) {
for (int i = 0; i < batch.size(); i++ ) {
MsgInfo msg = batch.get(i);
List<Integer> out = custom_grouper.grouper(msg.values);
List<MsgInfo> customBatch = ret.get(out);
if (customBatch == null) {
customBatch = JStormUtils.mk_list();
ret.put(out, customBatch);
}
customBatch.add(msg);
}
} else {
LOG.warn("Unsupportted group type");
}
return ret;
}
}