/* * #! * % * Copyright (C) 2014 - 2016 Humboldt-Universität zu Berlin * % * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * #_ */ package de.hub.cs.dbis.aeolus.batching; import static org.mockito.Matchers.any; import static org.mockito.Matchers.eq; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Random; import java.util.Set; import org.junit.Assert; import org.junit.Before; import org.junit.Test; import org.junit.runner.RunWith; import org.powermock.api.mockito.PowerMockito; import org.powermock.core.classloader.annotations.PrepareForTest; import org.powermock.modules.junit4.PowerMockRunner; import backtype.storm.Config; import backtype.storm.generated.Grouping; import backtype.storm.task.TopologyContext; import backtype.storm.task.WorkerTopologyContext; import backtype.storm.tuple.Fields; import backtype.storm.tuple.Values; import backtype.storm.utils.Utils; /** * @author mjsax */ @RunWith(PowerMockRunner.class) @PrepareForTest(StormConnector.class) public class AbstractBatchCollectorTest { private Random r; final String sourceId = "sourceId"; private int[] generateConsumerTasks(int maxNumberOfReceivers, int maxNumberOfTasks) { assert (maxNumberOfReceivers > 0); assert (maxNumberOfTasks > 0); int minNumberOfReceiverd = 1; int minNumberOfTasks = 1; if(maxNumberOfReceivers > 1) { ++minNumberOfReceiverd; --maxNumberOfReceivers; } else if(maxNumberOfTasks > 1) { ++minNumberOfTasks; --maxNumberOfTasks; } final int numberOfConsumers = minNumberOfReceiverd + this.r.nextInt(maxNumberOfReceivers); final int[] numberOfConsumerTasks = new int[numberOfConsumers]; for(int i = 0; i < numberOfConsumers; ++i) { numberOfConsumerTasks[i] = minNumberOfTasks + this.r.nextInt(maxNumberOfTasks); } return numberOfConsumerTasks; } private String[] generateStreamIds(final int numberOfStreams, int mode) { assert (numberOfStreams > 0); assert (0 <= mode && mode <= 2); String[] outputStreams = new String[numberOfStreams]; for(int i = 0; i < outputStreams.length; ++i) { switch(mode) { case 0: outputStreams[i] = Utils.DEFAULT_STREAM_ID; break; case 1: outputStreams[i] = "streamId-" + i; break; case 2: outputStreams[i] = "streamId-" + this.r.nextInt(outputStreams.length); break; } } return outputStreams; } private HashMap<String, Integer> generateBatchSizes(String[] streamIds) { HashMap<String, Integer> batchSizes = new HashMap<String, Integer>(); boolean foundZero; do { for(String id : streamIds) { batchSizes.put(id, new Integer(this.r.nextInt(6))); } foundZero = false; for(Integer bS : batchSizes.values()) { if(bS.intValue() == 0) { foundZero = true; } } } while(foundZero); for(Integer bS : batchSizes.values()) { assert (bS.intValue() > 0); } return batchSizes; } @Before public void prepareTest() { final long seed = System.currentTimeMillis(); this.r = new Random(seed); System.out.println("Test seed: " + seed); PowerMockito.mockStatic(StormConnector.class); } @Test public void testEmitShuffleSimple() { this.prepateAndRunTestEmitShuffleDefaultOutputStream(this.generateConsumerTasks(1, 1)); } @Test public void testEmitShuffleMultipleConsumerTasks() { this.prepateAndRunTestEmitShuffleDefaultOutputStream(this.generateConsumerTasks(1, 10)); } @Test public void testEmitShuffleMultipleConsumersNoDop() { this.prepateAndRunTestEmitShuffleDefaultOutputStream(this.generateConsumerTasks(10, 1)); } @Test public void testEmitShuffleFull() { this.prepateAndRunTestEmitShuffleDefaultOutputStream(this.generateConsumerTasks(5, 5)); } private void prepateAndRunTestEmitShuffleDefaultOutputStream(int[] numberOfConsumerTasks) { this.runTestEmitShuffle(numberOfConsumerTasks, this.generateStreamIds(numberOfConsumerTasks.length, 0), null); } @Test public void testEmitShuffleDistinctOutputStreams() { int[] numberOfConsumerTasks = this.generateConsumerTasks(5, 5); this.runTestEmitShuffle(numberOfConsumerTasks, this.generateStreamIds(numberOfConsumerTasks.length, 1), null); } @Test public void testEmitShuffleRandomOutputStreams() { int[] numberOfConsumerTasks = this.generateConsumerTasks(5, 5); this.runTestEmitShuffle(numberOfConsumerTasks, this.generateStreamIds(numberOfConsumerTasks.length, 2), null); } @Test public void testEmitShuffleRandomOutputStreamsDifferentBatchSizes() { int[] numberOfConsumerTasks = this.generateConsumerTasks(5, 5); String[] streamIds = this.generateStreamIds(numberOfConsumerTasks.length, 2); this.runTestEmitShuffle(numberOfConsumerTasks, streamIds, this.generateBatchSizes(streamIds)); } private void runTestEmitShuffle(int[] numberOfConsumerTasks, String[] outputStreams, HashMap<String, Integer> batchSizes) { assert (numberOfConsumerTasks.length == outputStreams.length); final int batchSize = 1 + this.r.nextInt(5); final String consumerPrefix = "consumer-"; final int[] numberOfAttributes = new int[numberOfConsumerTasks.length]; @SuppressWarnings("unchecked") List<Object>[] expectedResult = new List[numberOfConsumerTasks.length]; Batch[] currentBatch = new Batch[numberOfConsumerTasks.length]; Grouping grouping = mock(Grouping.class); TopologyContext context = mock(TopologyContext.class); when(context.getThisComponentId()).thenReturn(this.sourceId); Map<String, Map<String, Grouping>> targets = new HashMap<String, Map<String, Grouping>>(); assert (numberOfConsumerTasks.length < 100); for(int i = 0; i < numberOfConsumerTasks.length; ++i) { assert (numberOfConsumerTasks[i] < 100); final String consumerId = consumerPrefix + i; Map<String, Grouping> consumer = new HashMap<String, Grouping>(); consumer.put(consumerId, grouping); final List<Integer> consumerTasks = new ArrayList<Integer>(); for(int j = 0; j < numberOfConsumerTasks[i]; ++j) { consumerTasks.add(new Integer(i * 100 + j)); } when(context.getComponentTasks(consumerId)).thenReturn(consumerTasks); Map<String, Grouping> streamMapping = targets.get(outputStreams[i]); if(streamMapping == null) { targets.put(outputStreams[i], consumer); } else { streamMapping.putAll(consumer); } Fields schema = context.getComponentOutputFields(this.sourceId, outputStreams[i]); if(schema == null) { numberOfAttributes[i] = 1 + this.r.nextInt(5); String[] attributes = new String[numberOfAttributes[i]]; for(int j = 0; j < numberOfAttributes[i]; ++j) { attributes[j] = "a" + i + "_" + j; } when(context.getComponentOutputFields(this.sourceId, outputStreams[i])).thenReturn( new Fields(attributes)); } else { numberOfAttributes[i] = schema.size(); } expectedResult[i] = new LinkedList<Object>(); if(batchSizes == null) { currentBatch[i] = new Batch(batchSize, numberOfAttributes[i]); } else { int bS = batchSizes.get(outputStreams[i]).intValue(); if(bS > 0) { currentBatch[i] = new Batch(bS, numberOfAttributes[i]); } } } when(context.getThisTargets()).thenReturn(targets); TestBatchCollector collector; int maxBatchSize = 0; if(batchSizes == null) { collector = new TestBatchCollector(context, batchSize); maxBatchSize = batchSize; } else { collector = new TestBatchCollector(context, batchSizes); for(Integer b : batchSizes.values()) { int bS = b.intValue(); if(bS > maxBatchSize) { maxBatchSize = bS; } } } final int numberOfTuples = maxBatchSize * 20 + this.r.nextInt(maxBatchSize * 10); for(int i = 0; i < numberOfTuples; ++i) { final int index = this.r.nextInt(numberOfConsumerTasks.length); Values tuple = new Values(); for(int j = 0; j < numberOfAttributes[index]; ++j) { switch(this.r.nextInt(3)) { case 0: tuple.add(new Integer(this.r.nextInt())); break; case 1: tuple.add(new Double(this.r.nextInt() + this.r.nextDouble())); break; default: tuple.add(new String("" + (char)(32 + this.r.nextInt(95)))); break; } } collector.tupleEmit(outputStreams[index], null, tuple, null); for(int j = 0; j < numberOfConsumerTasks.length; ++j) { if(outputStreams[index].equals(outputStreams[j])) { if(currentBatch[j] != null) { currentBatch[j].addTuple(tuple); if(currentBatch[j].isFull()) { expectedResult[j].add(currentBatch[j]); if(batchSizes == null) { currentBatch[j] = new Batch(batchSize, numberOfAttributes[j]); } else { currentBatch[j] = new Batch(batchSizes.get(outputStreams[index]).intValue(), numberOfAttributes[j]); } } } else { expectedResult[j].add(tuple); } } } } for(int i = 0; i < numberOfConsumerTasks.length; ++i) { Assert.assertEquals(expectedResult[i], collector.resultBuffer.get(outputStreams[i])); } } @Test public void testEmitFieldGroupingSimple() { this.prepareAndRunTestEmitFieldsGroupingDefaultOutputStream(this.generateConsumerTasks(1, 1)); } @Test public void testEmitFieldGroupingMultipleConsumerTasks() { this.prepareAndRunTestEmitFieldsGroupingDefaultOutputStream(this.generateConsumerTasks(1, 10)); } @Test public void testEmitFieldGroupingMultipleConsumersNoDop() { this.prepareAndRunTestEmitFieldsGroupingDefaultOutputStream(this.generateConsumerTasks(10, 1)); } @Test public void testEmitFieldGroupingFull() { this.prepareAndRunTestEmitFieldsGroupingDefaultOutputStream(this.generateConsumerTasks(3, 3)); } private void prepareAndRunTestEmitFieldsGroupingDefaultOutputStream(int[] numberOfConsumerTasks) { this.runTestEmitFieldsGrouping(numberOfConsumerTasks, this.generateStreamIds(numberOfConsumerTasks.length, 0), null); } @Test public void testEmitFieldGroupingDistinctOutputStreams() { int[] numberOfConsumerTasks = this.generateConsumerTasks(3, 3); this.runTestEmitFieldsGrouping(numberOfConsumerTasks, this.generateStreamIds(numberOfConsumerTasks.length, 1), null); } @Test public void testEmitFieldGroupingRandomOutputStreams() { int[] numberOfConsumerTasks = this.generateConsumerTasks(3, 3); this.runTestEmitFieldsGrouping(numberOfConsumerTasks, this.generateStreamIds(numberOfConsumerTasks.length, 2), null); } @Test public void testEmitFieldGroupingRandomOutputStreamsDifferentBatchSizes() { int[] numberOfConsumerTasks = this.generateConsumerTasks(3, 3); String[] streamIds = this.generateStreamIds(numberOfConsumerTasks.length, 2); this.runTestEmitFieldsGrouping(numberOfConsumerTasks, streamIds, this.generateBatchSizes(streamIds)); } private void runTestEmitFieldsGrouping(int[] numberOfConsumerTasks, String[] outputStreams, HashMap<String, Integer> batchSizes) { assert (numberOfConsumerTasks.length == outputStreams.length); assert (numberOfConsumerTasks.length < 100); for(int i = 0; i < numberOfConsumerTasks.length; ++i) { assert (numberOfConsumerTasks[i] < 100); } final int batchSize = 1 + this.r.nextInt(5); final String consumerPrefix = "consumer-"; Map<String, Map<Set<Integer>, Batch>> currentBatch = new HashMap<String, Map<Set<Integer>, Batch>>(); Map<String, List<Integer>> taskIds = new HashMap<String, List<Integer>>(); Map<String, List<String>> streams = new HashMap<String, List<String>>(); Grouping grouping = mock(Grouping.class); TopologyContext context = mock(TopologyContext.class); when(context.getThisComponentId()).thenReturn(this.sourceId); int maxNumberOfBatches = 1; for(int i = 0; i < numberOfConsumerTasks.length; ++i) { maxNumberOfBatches *= numberOfConsumerTasks[i]; } final int numberOfDistinctValues = 1 + maxNumberOfBatches / 2 + this.r.nextInt(2 * maxNumberOfBatches); Values[] partitionsTuple = new Values[numberOfDistinctValues]; for(int i = 0; i < numberOfDistinctValues; ++i) { partitionsTuple[i] = new Values(new Integer(i)); } Map<String, Map<String, Grouping>> targets = new HashMap<String, Map<String, Grouping>>(); for(int i = 0; i < numberOfConsumerTasks.length; ++i) { final String consumerId = consumerPrefix + i; Map<String, Grouping> consumer = new HashMap<String, Grouping>(); consumer.put(consumerId, grouping); Map<Set<Integer>, Batch> old = currentBatch.put(outputStreams[i], new HashMap<Set<Integer>, Batch>()); if(old == null) { old = new HashMap<Set<Integer>, Batch>(); } final List<Integer> consumerTasks = new ArrayList<Integer>(); for(int j = 0; j < numberOfConsumerTasks[i]; ++j) { Integer tid = new Integer(i * 100 + j); consumerTasks.add(tid); if(old.size() == 0) { Set<Integer> s = new HashSet<Integer>(); s.add(tid); if(batchSizes == null) { currentBatch.get(outputStreams[i]).put(s, new Batch(batchSize, 1)); } else { int bS = batchSizes.get(outputStreams[i]).intValue(); if(bS > 0) { currentBatch.get(outputStreams[i]).put(s, new Batch(bS, 1)); } } } else { for(Set<Integer> s : old.keySet()) { Set<Integer> s2 = new HashSet<Integer>(); s2.addAll(s); s2.add(tid); if(batchSizes == null) { currentBatch.get(outputStreams[i]).put(s2, new Batch(batchSize, 1)); } else { int bS = batchSizes.get(outputStreams[i]).intValue(); if(bS > 0) { currentBatch.get(outputStreams[i]).put(s2, new Batch(bS, 1)); } } } } } when(context.getComponentTasks(consumerId)).thenReturn(consumerTasks); taskIds.put(consumerId, consumerTasks); for(int j = 0; j < numberOfDistinctValues; ++j) { when( StormConnector.getFieldsGroupingReceiverTaskId(any(WorkerTopologyContext.class), eq(this.sourceId), eq(outputStreams[i]), eq(consumerId), eq(partitionsTuple[j]))).thenReturn( consumerTasks.get(j % numberOfConsumerTasks[i])); } Map<String, Grouping> streamMapping = targets.get(outputStreams[i]); if(streamMapping == null) { targets.put(outputStreams[i], consumer); } else { streamMapping.putAll(consumer); } Fields schema = context.getComponentOutputFields(this.sourceId, outputStreams[i]); if(schema == null) { when(context.getComponentOutputFields(this.sourceId, outputStreams[i])).thenReturn( new Fields("attribute")); } List<String> receivers = streams.get(outputStreams[i]); if(receivers == null) { receivers = new LinkedList<String>(); streams.put(outputStreams[i], receivers); } receivers.add(consumerId); } when(context.getThisTargets()).thenReturn(targets); when(new Boolean(grouping.is_set_fields())).thenReturn(new Boolean(true)); Map<String, List<Object>> expectedResult = new HashMap<String, List<Object>>(); for(String s : streams.keySet()) { expectedResult.put(s, new LinkedList<Object>()); } TestBatchCollector collector; int maxBatchSize = 0; if(batchSizes == null) { collector = new TestBatchCollector(context, batchSize); maxBatchSize = batchSize; } else { collector = new TestBatchCollector(context, batchSizes); for(Integer b : batchSizes.values()) { int bS = b.intValue(); if(bS > maxBatchSize) { maxBatchSize = bS; } } } final int numberOfTuples = numberOfDistinctValues * maxBatchSize * 20 + this.r.nextInt(numberOfDistinctValues * maxBatchSize * 10); for(int i = 0; i < numberOfTuples; ++i) { final int index = this.r.nextInt(numberOfConsumerTasks.length); Values tuple = new Values(new Integer(this.r.nextInt(numberOfDistinctValues))); String outputStream = outputStreams[index]; collector.tupleEmit(outputStream, null, tuple, null); for(Entry<String, List<String>> s : streams.entrySet()) { if(s.getKey().equals(outputStream)) { Set<Integer> batchKey = new HashSet<Integer>(); for(int k = 0; k < numberOfConsumerTasks.length; ++k) { if(outputStreams[k].equals(outputStream)) { batchKey.add(taskIds.get(consumerPrefix + k).get( ((Integer)tuple.get(0)).intValue() % numberOfConsumerTasks[k])); } } Batch batch = currentBatch.get(outputStream).get(batchKey); if(batch != null) { batch.addTuple(tuple); if(batch.isFull()) { expectedResult.get(outputStream).add(batch); if(batchSizes == null) { currentBatch.get(outputStream).put(batchKey, new Batch(batchSize, 1)); } else { currentBatch.get(outputStream).put(batchKey, new Batch(batchSizes.get(outputStream).intValue(), 1)); } } } else { expectedResult.get(outputStream).add(tuple); } } } } for(int i = 0; i < numberOfConsumerTasks.length; ++i) { Assert.assertEquals(expectedResult.get(outputStreams[i]), collector.resultBuffer.get(outputStreams[i])); } } @Test public void testEmitFieldGroupingViaDirectEmitSimple() { this.prepareAndRunTestEmitFieldsGroupingViaDirectEmitDefaultOutputStream(this.generateConsumerTasks(1, 1)); } @Test public void testEmitFieldGroupingViaDirectEmitMultipleConsumerTasks() { this.prepareAndRunTestEmitFieldsGroupingViaDirectEmitDefaultOutputStream(this.generateConsumerTasks(1, 10)); } @Test public void testEmitFieldGroupingViaDirectEmitMultipleConsumersNoDop() { this.prepareAndRunTestEmitFieldsGroupingViaDirectEmitDefaultOutputStream(this.generateConsumerTasks(10, 1)); } @Test public void testEmitFieldGroupingViaDirectEmitFull() { this.prepareAndRunTestEmitFieldsGroupingViaDirectEmitDefaultOutputStream(this.generateConsumerTasks(3, 3)); } private void prepareAndRunTestEmitFieldsGroupingViaDirectEmitDefaultOutputStream(int[] numberOfConsumerTasks) { this.runTestEmitFieldsGroupingViaDirectEmit(numberOfConsumerTasks, this.generateStreamIds(numberOfConsumerTasks.length, 0), null); } @Test public void testEmitFieldGroupingViaDirectEmitDistinctOutputStreams() { int[] numberOfConsumerTasks = this.generateConsumerTasks(3, 3); this.runTestEmitFieldsGroupingViaDirectEmit(numberOfConsumerTasks, this.generateStreamIds(numberOfConsumerTasks.length, 1), null); } @Test public void testEmitFieldGroupingViaDirectEmitRandomOutputStreams() { int[] numberOfConsumerTasks = this.generateConsumerTasks(3, 3); this.runTestEmitFieldsGroupingViaDirectEmit(numberOfConsumerTasks, this.generateStreamIds(numberOfConsumerTasks.length, 2), null); } @Test public void testEmitFieldGroupingViaDirectEmitRandomOutputStreamsDifferentBatchSizes() { int[] numberOfConsumerTasks = this.generateConsumerTasks(3, 3); String[] streamIds = this.generateStreamIds(numberOfConsumerTasks.length, 2); this.runTestEmitFieldsGroupingViaDirectEmit(numberOfConsumerTasks, streamIds, this.generateBatchSizes(streamIds)); } private void runTestEmitFieldsGroupingViaDirectEmit(int[] numberOfConsumerTasks, String[] outputStreams, HashMap<String, Integer> batchSizes) { assert (numberOfConsumerTasks.length == outputStreams.length); assert (numberOfConsumerTasks.length < 100); for(int i = 0; i < numberOfConsumerTasks.length; ++i) { assert (numberOfConsumerTasks[i] < 100); } final int batchSize = 1 + this.r.nextInt(5); final String consumerPrefix = "consumer-"; Map<Integer, Batch> currentBatch = new HashMap<Integer, Batch>(); Map<String, List<Integer>> taskIds = new HashMap<String, List<Integer>>(); Map<String, List<String>> streams = new HashMap<String, List<String>>(); Grouping grouping = mock(Grouping.class); when(new Boolean(grouping.is_set_fields())).thenReturn(new Boolean(true)); Grouping directGrouping = mock(Grouping.class); when(new Boolean(directGrouping.is_set_direct())).thenReturn(new Boolean(true)); TopologyContext context = mock(TopologyContext.class); when(context.getThisComponentId()).thenReturn(this.sourceId); int maxNumberOfBatches = 1; for(int i = 0; i < numberOfConsumerTasks.length; ++i) { maxNumberOfBatches *= numberOfConsumerTasks[i]; } final int numberOfDistinctValues = 1 + maxNumberOfBatches / 2 + this.r.nextInt(2 * maxNumberOfBatches); Values[] partitionsTuple = new Values[numberOfDistinctValues]; for(int i = 0; i < numberOfDistinctValues; ++i) { partitionsTuple[i] = new Values(new Integer(i)); } Map<String, Map<String, Grouping>> targets = new HashMap<String, Map<String, Grouping>>(); for(int i = 0; i < numberOfConsumerTasks.length; ++i) { final String consumerId = consumerPrefix + i; Map<String, Grouping> consumer = new HashMap<String, Grouping>(); consumer.put(consumerId, grouping); Map<String, Grouping> directConsumer = new HashMap<String, Grouping>(); directConsumer.put(consumerId, directGrouping); final List<Integer> consumerTasks = new ArrayList<Integer>(); for(int j = 0; j < numberOfConsumerTasks[i]; ++j) { Integer tid = new Integer(i * 100 + j); consumerTasks.add(tid); if(batchSizes == null) { currentBatch.put(tid, new Batch(batchSize, 1)); } else { int bS = batchSizes.get(outputStreams[i]).intValue(); if(bS > 0) { currentBatch.put(tid, new Batch(batchSizes.get(outputStreams[i]).intValue(), 1)); } } } when(context.getComponentTasks(consumerId)).thenReturn(consumerTasks); taskIds.put(consumerId, consumerTasks); for(int j = 0; j < numberOfDistinctValues; ++j) { when( StormConnector.getFieldsGroupingReceiverTaskId(any(WorkerTopologyContext.class), eq(this.sourceId), eq(outputStreams[i]), eq(consumerId), eq(partitionsTuple[j]))).thenReturn( consumerTasks.get(j % numberOfConsumerTasks[i])); } Map<String, Grouping> streamMapping = targets.get(outputStreams[i]); if(streamMapping == null) { targets.put(outputStreams[i], consumer); targets.put(BatchingOutputFieldsDeclarer.STREAM_PREFIX + outputStreams[i], directConsumer); } else { streamMapping.putAll(consumer); targets.get(BatchingOutputFieldsDeclarer.STREAM_PREFIX + outputStreams[i]).putAll(directConsumer); } Fields schema = context.getComponentOutputFields(this.sourceId, outputStreams[i]); if(schema == null) { when(context.getComponentOutputFields(this.sourceId, outputStreams[i])).thenReturn( new Fields("attribute")); when( context.getComponentOutputFields(this.sourceId, BatchingOutputFieldsDeclarer.STREAM_PREFIX + outputStreams[i])).thenReturn(new Fields("attribute")); } List<String> receivers = streams.get(outputStreams[i]); if(receivers == null) { receivers = new LinkedList<String>(); streams.put(outputStreams[i], receivers); } receivers.add(consumerId); } when(context.getThisTargets()).thenReturn(targets); Map<Integer, List<Object>> expectedResultPerTask = new HashMap<Integer, List<Object>>(); for(List<Integer> ids : taskIds.values()) { for(Integer i : ids) { expectedResultPerTask.put(i, new LinkedList<Object>()); } } TestBatchCollector collector; int maxBatchSize = 0; if(batchSizes == null) { collector = new TestBatchCollector(context, batchSize); maxBatchSize = batchSize; } else { collector = new TestBatchCollector(context, batchSizes); for(Integer b : batchSizes.values()) { int bS = b.intValue(); if(bS > maxBatchSize) { maxBatchSize = bS; } } } final int numberOfTuples = numberOfDistinctValues * maxBatchSize * 20 + this.r.nextInt(numberOfDistinctValues * maxBatchSize * 10); for(int i = 0; i < numberOfTuples; ++i) { final int index = this.r.nextInt(numberOfConsumerTasks.length); Values tuple = new Values(new Integer(this.r.nextInt(numberOfDistinctValues))); String outputStream = outputStreams[index]; collector.tupleEmit(outputStream, null, tuple, null); for(Entry<String, List<String>> s : streams.entrySet()) { if(s.getKey().equals(outputStream)) { for(int k = 0; k < numberOfConsumerTasks.length; ++k) { if(outputStreams[k].equals(outputStream)) { Integer tid = taskIds.get(consumerPrefix + k).get( ((Integer)tuple.get(0)).intValue() % numberOfConsumerTasks[k]); Batch batch = currentBatch.get(tid); if(batch != null) { batch.addTuple(tuple); if(batch.isFull()) { expectedResultPerTask.get(tid).add(batch); if(batchSizes == null) { currentBatch.put(tid, new Batch(batchSize, 1)); } else { currentBatch.put(tid, new Batch(batchSizes.get(outputStream).intValue(), 1)); } } } else { expectedResultPerTask.get(tid).add(tuple); } } } } } } for(int i = 0; i < numberOfConsumerTasks.length; ++i) { Assert.assertNull(collector.resultBuffer.get(outputStreams[i])); for(String consumer : streams.get(outputStreams[i])) { for(Integer tid : taskIds.get(consumer)) { List<Object> resultPerTask = new LinkedList<Object>(); Iterator<Integer> t = collector.taskBuffer.get( BatchingOutputFieldsDeclarer.STREAM_PREFIX + outputStreams[i]).iterator(); for(Object tupleOrBatch : collector.resultBuffer.get(BatchingOutputFieldsDeclarer.STREAM_PREFIX + outputStreams[i])) { if(t.next().intValue() == tid.intValue()) { resultPerTask.add(tupleOrBatch); } } Assert.assertEquals(expectedResultPerTask.get(tid), resultPerTask); } } } } // @Test // public void testCustomGrouping() { // Assert.fail(); // } @Test public void testEmitDirectSimple() { this.prepareAndRunTestEmitDirectDefaultOutputStream(this.generateConsumerTasks(1, 1)); } @Test public void testEmitDirectMultipleConsumerTasks() { this.prepareAndRunTestEmitDirectDefaultOutputStream(this.generateConsumerTasks(1, 10)); } @Test public void testEmitDirectMultipleConsumersNoDop() { this.prepareAndRunTestEmitDirectDefaultOutputStream(this.generateConsumerTasks(10, 1)); } @Test public void testEmitDirectFull() { this.prepareAndRunTestEmitDirectDefaultOutputStream(this.generateConsumerTasks(5, 5)); } private void prepareAndRunTestEmitDirectDefaultOutputStream(int[] numberOfConsumerTasks) { this.runTestEmitDirect(numberOfConsumerTasks, this.generateStreamIds(numberOfConsumerTasks.length, 0), null); } @Test public void testEmitDirectDistinctOutputStreams() { int[] numberOfConsumerTasks = this.generateConsumerTasks(5, 5); this.runTestEmitDirect(numberOfConsumerTasks, this.generateStreamIds(numberOfConsumerTasks.length, 1), null); } @Test public void testEmitDirectRandomOutputStreams() { int[] numberOfConsumerTasks = this.generateConsumerTasks(5, 5); this.runTestEmitDirect(numberOfConsumerTasks, this.generateStreamIds(numberOfConsumerTasks.length, 2), null); } @Test public void testEmitDirectRandomOutputStreamsDifferentBatchSizes() { int[] numberOfConsumerTasks = this.generateConsumerTasks(5, 5); String[] streamIds = this.generateStreamIds(numberOfConsumerTasks.length, 2); this.runTestEmitDirect(numberOfConsumerTasks, streamIds, this.generateBatchSizes(streamIds)); } private void runTestEmitDirect(int[] numberOfConsumerTasks, String[] outputStreams, HashMap<String, Integer> batchSizes) { assert (numberOfConsumerTasks.length == outputStreams.length); assert (numberOfConsumerTasks.length < 100); for(int i = 0; i < numberOfConsumerTasks.length; ++i) { assert (numberOfConsumerTasks[i] < 100); } final String consumerPrefix = "consumer-"; final int[] numberOfAttributes = new int[numberOfConsumerTasks.length]; Grouping grouping = mock(Grouping.class); when(new Boolean(grouping.is_set_direct())).thenReturn(new Boolean(true)); TopologyContext context = mock(TopologyContext.class); when(context.getThisComponentId()).thenReturn(this.sourceId); final int[][] taskIds = new int[numberOfConsumerTasks.length][]; final int batchSize = 1 + this.r.nextInt(5); Map<String, List<Object>> expectedResult = new HashMap<String, List<Object>>(); Map<Integer, Batch> currentBatch = new HashMap<Integer, Batch>(); Map<String, Map<String, Grouping>> targets = new HashMap<String, Map<String, Grouping>>(); for(int i = 0; i < numberOfConsumerTasks.length; ++i) { taskIds[i] = new int[numberOfConsumerTasks[i]]; for(int j = 0; j < taskIds[i].length; ++j) { taskIds[i][j] = i * 100 + j; } final String consumerId = consumerPrefix + i; Map<String, Grouping> consumer = new HashMap<String, Grouping>(); consumer.put(consumerId, grouping); final List<Integer> consumerTasks = new ArrayList<Integer>(); for(int j = 0; j < numberOfConsumerTasks[i]; ++j) { consumerTasks.add(new Integer(i * 100 + j)); } when(context.getComponentTasks(consumerId)).thenReturn(consumerTasks); Map<String, Grouping> streamMapping = targets.get(outputStreams[i]); if(streamMapping == null) { targets.put(outputStreams[i], consumer); } else { streamMapping.putAll(consumer); } Fields schema = context.getComponentOutputFields(this.sourceId, outputStreams[i]); if(schema == null) { numberOfAttributes[i] = 1 + this.r.nextInt(5); String[] attributes = new String[numberOfAttributes[i]]; for(int j = 0; j < numberOfAttributes[i]; ++j) { attributes[j] = "a" + i + "_" + j; } when(context.getComponentOutputFields(this.sourceId, outputStreams[i])).thenReturn( new Fields(attributes)); } else { numberOfAttributes[i] = schema.size(); } expectedResult.put(outputStreams[i], new LinkedList<Object>()); for(int j = 0; j < taskIds[i].length; ++j) { if(batchSizes == null) { currentBatch.put(new Integer(taskIds[i][j]), new Batch(batchSize, numberOfAttributes[i])); } else { int bS = batchSizes.get(outputStreams[i]).intValue(); if(bS > 0) { currentBatch.put(new Integer(taskIds[i][j]), new Batch(bS, numberOfAttributes[i])); } } } } when(context.getThisTargets()).thenReturn(targets); TestBatchCollector collector; int maxBatchSize = 0; if(batchSizes == null) { collector = new TestBatchCollector(context, batchSize); maxBatchSize = batchSize; } else { collector = new TestBatchCollector(context, batchSizes); for(Integer b : batchSizes.values()) { int bS = b.intValue(); if(bS > maxBatchSize) { maxBatchSize = bS; } } } final int numberOfTuples = maxBatchSize * 20 + this.r.nextInt(maxBatchSize * 10); for(int i = 0; i < numberOfTuples; ++i) { final int index = this.r.nextInt(numberOfConsumerTasks.length); Values tuple = new Values(); for(int j = 0; j < numberOfAttributes[index]; ++j) { switch(this.r.nextInt(3)) { case 0: tuple.add(new Integer(this.r.nextInt())); break; case 1: tuple.add(new Double(this.r.nextInt() + this.r.nextDouble())); break; default: tuple.add(new String("" + (char)(32 + this.r.nextInt(95)))); break; } } String outputStream = outputStreams[index]; for(int j = 0; j < numberOfConsumerTasks.length; ++j) { if(outputStream.equals(outputStreams[j])) { int tid = taskIds[j][this.r.nextInt(taskIds[j].length)]; collector.tupleEmitDirect(tid, outputStream, null, tuple, null); Integer tidO = new Integer(tid); Batch b = currentBatch.get(tidO); if(b != null) { b.addTuple(tuple); if(b.isFull()) { expectedResult.get(outputStream).add(b); if(batchSizes == null) { currentBatch.put(tidO, new Batch(batchSize, numberOfAttributes[j])); } else { currentBatch.put(tidO, new Batch(batchSizes.get(outputStream).intValue(), numberOfAttributes[j])); } } } else { expectedResult.get(outputStream).add(tuple); } } } } for(int i = 0; i < numberOfConsumerTasks.length; ++i) { List<Object> result = expectedResult.get(outputStreams[i]); if(result.size() == 0) { Assert.assertNull(collector.resultBuffer.get(outputStreams[i])); } else { Assert.assertEquals(result, collector.resultBuffer.get(outputStreams[i])); } } } @Test public void testFlushSimple() { final String secondStream = "stream-2"; final String thirdStream = "stream-3"; final String directStream = "direct"; HashMap<String, Grouping> consumer = new HashMap<String, Grouping>(); consumer.put("receiver", mock(Grouping.class)); HashMap<String, Grouping> directConsumer = new HashMap<String, Grouping>(); Grouping direct = mock(Grouping.class); when(new Boolean(direct.is_set_direct())).thenReturn(new Boolean(true)); directConsumer.put("directReceiver", direct); Map<String, Map<String, Grouping>> targets = new HashMap<String, Map<String, Grouping>>(); targets.put(Utils.DEFAULT_STREAM_ID, consumer); targets.put(secondStream, consumer); targets.put(thirdStream, consumer); targets.put(directStream, directConsumer); TopologyContext context = mock(TopologyContext.class); when(context.getThisTargets()).thenReturn(targets); when(context.getComponentOutputFields(null, Utils.DEFAULT_STREAM_ID)).thenReturn(new Fields("dummy")); when(context.getComponentOutputFields(null, secondStream)).thenReturn(new Fields("dummy")); when(context.getComponentOutputFields(null, thirdStream)).thenReturn(new Fields("dummy")); when(context.getComponentOutputFields(null, directStream)).thenReturn(new Fields("dummy")); when(context.getComponentTasks("directReceiver")).thenReturn( Arrays.asList(new Integer[] {new Integer(0), new Integer(1), new Integer(2)})); final int batchSize = 5; TestBatchCollector collector = new TestBatchCollector(context, batchSize); final int numberOfTuples = 42; for(int i = 0; i < numberOfTuples - 2; ++i) { collector.tupleEmit(Utils.DEFAULT_STREAM_ID, null, new Values(new Integer(i)), null); collector.tupleEmit(secondStream, null, new Values(new Integer(i)), null); collector.tupleEmit(thirdStream, null, new Values(new Integer(i)), null); collector.tupleEmitDirect(i % 3, directStream, null, new Values(new Integer(i)), null); } collector.tupleEmit(Utils.DEFAULT_STREAM_ID, null, new Values(new Integer(40)), null); collector.tupleEmit(Utils.DEFAULT_STREAM_ID, null, new Values(new Integer(41)), null); collector.tupleEmit(Utils.DEFAULT_STREAM_ID, null, new Values(new Integer(42)), null); collector.tupleEmit(secondStream, null, new Values(new Integer(40)), null); collector.tupleEmit(secondStream, null, new Values(new Integer(41)), null); Assert.assertEquals(8, collector.resultBuffer.get(Utils.DEFAULT_STREAM_ID).size()); Assert.assertEquals(8, collector.resultBuffer.get(secondStream).size()); Assert.assertEquals(8, collector.resultBuffer.get(thirdStream).size()); Assert.assertEquals(6, collector.resultBuffer.get(directStream).size()); collector.flush(); Assert.assertEquals(9, collector.resultBuffer.get(Utils.DEFAULT_STREAM_ID).size()); Assert.assertEquals(9, collector.resultBuffer.get(secondStream).size()); Assert.assertEquals(8, collector.resultBuffer.get(thirdStream).size()); Assert.assertEquals(9, collector.resultBuffer.get(directStream).size()); } @Test public void testFlushAdvanced() { final String secondStream = "stream-2"; final String thirdStream = "stream-3"; final String directStream = "direct-1"; final String directStream2 = "direct-2"; HashMap<String, Grouping> consumer = new HashMap<String, Grouping>(); consumer.put("receiver", mock(Grouping.class)); HashMap<String, Grouping> directConsumer = new HashMap<String, Grouping>(); Grouping direct = mock(Grouping.class); when(new Boolean(direct.is_set_direct())).thenReturn(new Boolean(true)); directConsumer.put("directReceiver", direct); Map<String, Map<String, Grouping>> targets = new HashMap<String, Map<String, Grouping>>(); targets.put(Utils.DEFAULT_STREAM_ID, consumer); targets.put(secondStream, consumer); targets.put(thirdStream, consumer); targets.put(directStream, directConsumer); targets.put(directStream2, directConsumer); TopologyContext context = mock(TopologyContext.class); when(context.getThisTargets()).thenReturn(targets); when(context.getComponentOutputFields(null, Utils.DEFAULT_STREAM_ID)).thenReturn(new Fields("dummy")); when(context.getComponentOutputFields(null, secondStream)).thenReturn(new Fields("dummy")); when(context.getComponentOutputFields(null, thirdStream)).thenReturn(new Fields("dummy")); when(context.getComponentOutputFields(null, directStream)).thenReturn(new Fields("dummy")); when(context.getComponentOutputFields(null, directStream2)).thenReturn(new Fields("dummy")); when(context.getComponentTasks("directReceiver")).thenReturn( Arrays.asList(new Integer[] {new Integer(0), new Integer(1), new Integer(2)})); HashMap<String, Integer> batchSizes = new HashMap<String, Integer>(); batchSizes.put(Utils.DEFAULT_STREAM_ID, new Integer(5)); batchSizes.put(secondStream, new Integer(0)); batchSizes.put(thirdStream, new Integer(3)); batchSizes.put(directStream, new Integer(5)); TestBatchCollector collector = new TestBatchCollector(context, batchSizes); final int numberOfTuples = 42; for(int i = 0; i < numberOfTuples - 2; ++i) { collector.tupleEmit(Utils.DEFAULT_STREAM_ID, null, new Values(new Integer(i)), null); collector.tupleEmit(secondStream, null, new Values(new Integer(i)), null); collector.tupleEmit(thirdStream, null, new Values(new Integer(i)), null); collector.tupleEmitDirect(i % 3, directStream, null, new Values(new Integer(i)), null); collector.tupleEmitDirect(i % 3, directStream2, null, new Values(new Integer(i)), null); } collector.tupleEmit(Utils.DEFAULT_STREAM_ID, null, new Values(new Integer(40)), null); collector.tupleEmit(Utils.DEFAULT_STREAM_ID, null, new Values(new Integer(41)), null); collector.tupleEmit(Utils.DEFAULT_STREAM_ID, null, new Values(new Integer(42)), null); collector.tupleEmit(secondStream, null, new Values(new Integer(40)), null); collector.tupleEmit(secondStream, null, new Values(new Integer(41)), null); Assert.assertEquals(8, collector.resultBuffer.get(Utils.DEFAULT_STREAM_ID).size()); Assert.assertEquals(42, collector.resultBuffer.get(secondStream).size()); Assert.assertEquals(13, collector.resultBuffer.get(thirdStream).size()); Assert.assertEquals(6, collector.resultBuffer.get(directStream).size()); Assert.assertEquals(40, collector.resultBuffer.get(directStream2).size()); collector.flush(); Assert.assertEquals(9, collector.resultBuffer.get(Utils.DEFAULT_STREAM_ID).size()); Assert.assertEquals(42, collector.resultBuffer.get(secondStream).size()); Assert.assertEquals(14, collector.resultBuffer.get(thirdStream).size()); Assert.assertEquals(9, collector.resultBuffer.get(directStream).size()); Assert.assertEquals(40, collector.resultBuffer.get(directStream2).size()); } @Test public void testKryoRegistrations() { Config stormConfig = mock(Config.class); AbstractBatchCollector.registerKryoClasses(stormConfig); verify(stormConfig).registerSerialization(Batch.class); verify(stormConfig).registerSerialization(BatchColumn.class); } }