/* * Copyright (c) 2008-2017, Hazelcast, Inc. All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.hazelcast.mapreduce.aggregation.impl; import com.hazelcast.mapreduce.Collator; import com.hazelcast.mapreduce.Combiner; import com.hazelcast.mapreduce.CombinerFactory; import com.hazelcast.mapreduce.Context; import com.hazelcast.mapreduce.Mapper; import com.hazelcast.mapreduce.Reducer; import com.hazelcast.mapreduce.ReducerFactory; import com.hazelcast.mapreduce.aggregation.Supplier; import com.hazelcast.mapreduce.impl.task.DefaultContext; import com.hazelcast.nio.ObjectDataInput; import com.hazelcast.nio.ObjectDataOutput; import com.hazelcast.nio.serialization.IdentifiedDataSerializable; import com.hazelcast.nio.serialization.BinaryInterface; import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; import java.io.IOException; import java.util.HashSet; import java.util.Map; import java.util.Random; import java.util.Set; /** * The predefined distinct value aggregation. * * @param <Key> the input key type * @param <Value> the input value type * @param <DistinctType> the common super type for all distinct values */ public class DistinctValuesAggregation<Key, Value, DistinctType> implements AggType<Key, Value, Integer, DistinctType, Set<DistinctType>, Set<DistinctType>, Set<DistinctType>> { private static final int DEFAULT_DISTRIBUTION_FACTOR = 20; @Override public Collator<Map.Entry<Integer, Set<DistinctType>>, Set<DistinctType>> getCollator() { return new Collator<Map.Entry<Integer, Set<DistinctType>>, Set<DistinctType>>() { @Override public Set<DistinctType> collate(Iterable<Map.Entry<Integer, Set<DistinctType>>> values) { Set<DistinctType> distinctValues = new HashSet<DistinctType>(); for (Map.Entry<Integer, Set<DistinctType>> value : values) { distinctValues.addAll(value.getValue()); } return distinctValues; } }; } @Override public Mapper<Key, Value, Integer, DistinctType> getMapper(Supplier<Key, Value, DistinctType> supplier) { return new DistinctValueMapper<Key, Value, DistinctType>(supplier); } @Override public CombinerFactory<Integer, DistinctType, Set<DistinctType>> getCombinerFactory() { return new DistinctValuesCombinerFactory<DistinctType>(); } @Override public ReducerFactory<Integer, Set<DistinctType>, Set<DistinctType>> getReducerFactory() { return new DistinctValuesReducerFactory<DistinctType>(); } /** * Distinct values CombinerFactory * * @param <DistinctType> the distinct values type */ @BinaryInterface static class DistinctValuesCombinerFactory<DistinctType> extends AbstractAggregationCombinerFactory<Integer, DistinctType, Set<DistinctType>> { @Override public Combiner<DistinctType, Set<DistinctType>> newCombiner(Integer key) { return new DistinctValuesCombiner<DistinctType>(); } @Override public int getId() { return AggregationsDataSerializerHook.DISTINCT_VALUES_COMBINER_FACTORY; } } /** * Distinct values Combiner * * @param <DistinctType> the distinct values type */ @BinaryInterface private static class DistinctValuesCombiner<DistinctType> extends Combiner<DistinctType, Set<DistinctType>> { private final Set<DistinctType> distinctValues = new HashSet<DistinctType>(); @Override public void combine(DistinctType value) { distinctValues.add(value); } @Override public Set<DistinctType> finalizeChunk() { Set<DistinctType> distinctValues = new SetAdapter<DistinctType>(); distinctValues.addAll(this.distinctValues); this.distinctValues.clear(); return distinctValues; } } /** * Distinct values ReducerFactory * * @param <DistinctType> the distinct values type */ @BinaryInterface static class DistinctValuesReducerFactory<DistinctType> extends AbstractAggregationReducerFactory<Integer, Set<DistinctType>, Set<DistinctType>> { @Override public Reducer<Set<DistinctType>, Set<DistinctType>> newReducer(Integer key) { return new DistinctValuesReducer<DistinctType>(); } @Override public int getId() { return AggregationsDataSerializerHook.DISTINCT_VALUES_REDUCER_FACTORY; } } /** * Distinct values Reducer * * @param <DistinctType> the distinct values type */ private static class DistinctValuesReducer<DistinctType> extends Reducer<Set<DistinctType>, Set<DistinctType>> { private final Set<DistinctType> distinctValues = new SetAdapter<DistinctType>(); @Override public void reduce(Set<DistinctType> value) { distinctValues.addAll(value); } @Override public Set<DistinctType> finalizeReduce() { return distinctValues; } } /** * A special mapper for distributing reducing of distinct values * * @param <Key> the input key type * @param <Value> the input value type * @param <DistinctType> the type of distinct values */ @SuppressFBWarnings("SE_NO_SERIALVERSIONID") @BinaryInterface static class DistinctValueMapper<Key, Value, DistinctType> implements Mapper<Key, Value, Integer, DistinctType>, IdentifiedDataSerializable { // These keys are used to distribute reducer steps around the cluster private static final int[] DISTRIBUTION_KEYS; static { Random random = new Random(); DISTRIBUTION_KEYS = new int[DEFAULT_DISTRIBUTION_FACTOR]; for (int i = 0; i < DISTRIBUTION_KEYS.length; i++) { DISTRIBUTION_KEYS[i] = random.nextInt(); } } private transient SimpleEntry<Key, Value> entry = new SimpleEntry<Key, Value>(); private transient int keyPosition; private Supplier<Key, Value, DistinctType> supplier; DistinctValueMapper() { } DistinctValueMapper(Supplier<Key, Value, DistinctType> supplier) { this.supplier = supplier; } @Override public void map(Key key, Value value, Context<Integer, DistinctType> context) { int mappingKey = key(); entry.setKey(key); entry.setValue(value); entry.setSerializationService(((DefaultContext) context).getSerializationService()); DistinctType valueOut = supplier.apply(entry); if (valueOut != null) { context.emit(mappingKey, valueOut); } } @Override public int getFactoryId() { return AggregationsDataSerializerHook.F_ID; } @Override public int getId() { return AggregationsDataSerializerHook.DISTINCT_VALUES_MAPPER; } @Override public void writeData(ObjectDataOutput out) throws IOException { out.writeObject(supplier); } @Override public void readData(ObjectDataInput in) throws IOException { supplier = in.readObject(); } private int key() { if (keyPosition >= DISTRIBUTION_KEYS.length) { keyPosition = 0; } return keyPosition++; } } }