/* * Copyright (c) 2008-2017, Hazelcast, Inc. All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.hazelcast.aggregation; import com.hazelcast.config.Config; import com.hazelcast.config.InMemoryFormat; import com.hazelcast.config.MapConfig; import com.hazelcast.config.NetworkConfig; import com.hazelcast.core.Hazelcast; import com.hazelcast.core.HazelcastInstance; import com.hazelcast.core.IMap; import com.hazelcast.test.HazelcastTestSupport; import com.hazelcast.util.UuidUtil; import org.junit.Ignore; import java.io.InputStream; import java.io.InputStreamReader; import java.io.LineNumberReader; import java.io.Serializable; import java.util.HashMap; import java.util.Map; import java.util.StringTokenizer; import static java.util.Collections.singletonList; @Ignore("Run manually only") public class MapWordCountAggregationPerformanceTest extends HazelcastTestSupport { private static final String[] DATA_RESOURCES_TO_LOAD = {"dracula.txt"}; private static final String MAP_NAME = "articles"; public static void main(String[] args) throws Exception { HazelcastInstance hazelcastInstance = buildCluster(3); try { System.out.println("Filling map..."); for (int i = 0; i < 20 * 8; i++) { //fillMapWithDataEachLineNewEntry(hazelcastInstance); fillMapWithData(hazelcastInstance); } IMap<String, String> map = hazelcastInstance.getMap(MAP_NAME); System.out.println("Garbage collecting..."); for (int i = 0; i < 10; i++) { System.gc(); } for (int i = 0; i < 10; i++) { System.out.println("Executing job..."); long start = System.currentTimeMillis(); Map<String, MutableInt> result = map.aggregate(new WordCountAggregator()); System.err.println(result.size()); System.err.println("TimeTaken=" + (System.currentTimeMillis() - start)); System.err.println("---------------------------------------------"); System.gc(); } } finally { Hazelcast.shutdownAll(); } } private static HazelcastInstance buildCluster(int memberCount) { Config config = new Config(); NetworkConfig networkConfig = config.getNetworkConfig(); networkConfig.getJoin().getMulticastConfig().setEnabled(false); networkConfig.getJoin().getTcpIpConfig().setEnabled(true); networkConfig.getJoin().getTcpIpConfig().setMembers(singletonList("127.0.0.1")); MapConfig mapConfig = new MapConfig(); mapConfig.setInMemoryFormat(InMemoryFormat.OBJECT); mapConfig.setName(MAP_NAME); mapConfig.setBackupCount(0); config.addMapConfig(mapConfig); config.setProperty("hazelcast.query.predicate.parallel.evaluation", "true"); config.setProperty("hazelcast.aggregation.accumulation.parallel.evaluation", "true"); HazelcastInstance[] hazelcastInstances = new HazelcastInstance[memberCount]; for (int i = 0; i < memberCount; i++) { hazelcastInstances[i] = Hazelcast.newHazelcastInstance(config); } return hazelcastInstances[0]; } private static void fillMapWithData(HazelcastInstance hazelcastInstance) throws Exception { IMap<String, String> map = hazelcastInstance.getMap(MAP_NAME); for (String file : DATA_RESOURCES_TO_LOAD) { InputStream is = MapWordCountAggregationPerformanceTest.class.getResourceAsStream("/wordcount/" + file); LineNumberReader reader = new LineNumberReader(new InputStreamReader(is)); StringBuilder sb = new StringBuilder(); String line; while ((line = reader.readLine()) != null) { sb.append(line).append("\n"); } map.put(UuidUtil.newSecureUuidString(), sb.toString()); is.close(); reader.close(); } } private static void fillMapWithDataEachLineNewEntry(HazelcastInstance hazelcastInstance) throws Exception { IMap<String, String> map = hazelcastInstance.getMap(MAP_NAME); for (String file : DATA_RESOURCES_TO_LOAD) { InputStream is = MapWordCountAggregationPerformanceTest.class.getResourceAsStream("/wordcount/" + file); LineNumberReader reader = new LineNumberReader(new InputStreamReader(is)); int batchSize = 10000; int batchSizeCount = 0; Map<String, String> batch = new HashMap<String, String>(batchSize); String line; while ((line = reader.readLine()) != null) { batch.put(UuidUtil.newSecureUuidString(), line); batchSizeCount++; if (batchSizeCount == batchSize) { map.putAll(batch); batchSizeCount = 0; batch.clear(); } } if (batchSizeCount > 0) { map.putAll(batch); batch.clear(); } is.close(); reader.close(); } } private static class MutableInt implements Serializable { private int value = 0; @Override public String toString() { return String.valueOf(value); } } private static String cleanWord(String word) { return word.replaceAll("[^A-Za-z0-9]", ""); } private static class WordCountAggregator extends Aggregator<Map.Entry<String, String>, Map<String, MutableInt>> { Map<String, MutableInt> result = new HashMap<String, MutableInt>(1000); void accumulate(String value, int times) { StringTokenizer tokenizer = new StringTokenizer(value); while (tokenizer.hasMoreTokens()) { String word = cleanWord(tokenizer.nextToken()).toLowerCase(); MutableInt count = result.get(word); if (count == null) { count = new MutableInt(); result.put(word, count); } count.value += times; } } @Override public void accumulate(Map.Entry<String, String> entry) { accumulate(entry.getValue(), 1); } @Override public void combine(Aggregator aggregator) { WordCountAggregator aggr = (WordCountAggregator) aggregator; for (Map.Entry<String, MutableInt> toCombine : aggr.result.entrySet()) { doCombine(toCombine); } } private void doCombine(Map.Entry<String, MutableInt> toCombine) { String word = toCombine.getKey(); MutableInt count = result.get(word); if (count == null) { count = new MutableInt(); result.put(word, count); } count.value += toCombine.getValue().value; } @Override public Map<String, MutableInt> aggregate() { return result; } } }