MapWordCountAggregationPerformanceTest.java example

Explorer
hazelcast-master
/*
 * Copyright (c) 2008-2017, Hazelcast, Inc. All Rights Reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.hazelcast.aggregation;

import com.hazelcast.config.Config;
import com.hazelcast.config.InMemoryFormat;
import com.hazelcast.config.MapConfig;
import com.hazelcast.config.NetworkConfig;
import com.hazelcast.core.Hazelcast;
import com.hazelcast.core.HazelcastInstance;
import com.hazelcast.core.IMap;
import com.hazelcast.test.HazelcastTestSupport;
import com.hazelcast.util.UuidUtil;
import org.junit.Ignore;

import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.LineNumberReader;
import java.io.Serializable;
import java.util.HashMap;
import java.util.Map;
import java.util.StringTokenizer;

import static java.util.Collections.singletonList;

@Ignore("Run manually only")
public class MapWordCountAggregationPerformanceTest extends HazelcastTestSupport {

    private static final String[] DATA_RESOURCES_TO_LOAD = {"dracula.txt"};

    private static final String MAP_NAME = "articles";

    public static void main(String[] args) throws Exception {
        HazelcastInstance hazelcastInstance = buildCluster(3);

        try {
            System.out.println("Filling map...");
            for (int i = 0; i < 20 * 8; i++) {
                //fillMapWithDataEachLineNewEntry(hazelcastInstance);
                fillMapWithData(hazelcastInstance);
            }
            IMap<String, String> map = hazelcastInstance.getMap(MAP_NAME);

            System.out.println("Garbage collecting...");
            for (int i = 0; i < 10; i++) {
                System.gc();
            }

            for (int i = 0; i < 10; i++) {
                System.out.println("Executing job...");
                long start = System.currentTimeMillis();

                Map<String, MutableInt> result = map.aggregate(new WordCountAggregator());

                System.err.println(result.size());
                System.err.println("TimeTaken=" + (System.currentTimeMillis() - start));
                System.err.println("---------------------------------------------");
                System.gc();
            }
        } finally {
            Hazelcast.shutdownAll();
        }
    }

    private static HazelcastInstance buildCluster(int memberCount) {
        Config config = new Config();
        NetworkConfig networkConfig = config.getNetworkConfig();
        networkConfig.getJoin().getMulticastConfig().setEnabled(false);
        networkConfig.getJoin().getTcpIpConfig().setEnabled(true);
        networkConfig.getJoin().getTcpIpConfig().setMembers(singletonList("127.0.0.1"));

        MapConfig mapConfig = new MapConfig();
        mapConfig.setInMemoryFormat(InMemoryFormat.OBJECT);
        mapConfig.setName(MAP_NAME);
        mapConfig.setBackupCount(0);
        config.addMapConfig(mapConfig);

        config.setProperty("hazelcast.query.predicate.parallel.evaluation", "true");
        config.setProperty("hazelcast.aggregation.accumulation.parallel.evaluation", "true");

        HazelcastInstance[] hazelcastInstances = new HazelcastInstance[memberCount];
        for (int i = 0; i < memberCount; i++) {
            hazelcastInstances[i] = Hazelcast.newHazelcastInstance(config);
        }
        return hazelcastInstances[0];
    }

    private static void fillMapWithData(HazelcastInstance hazelcastInstance) throws Exception {

        IMap<String, String> map = hazelcastInstance.getMap(MAP_NAME);
        for (String file : DATA_RESOURCES_TO_LOAD) {
            InputStream is = MapWordCountAggregationPerformanceTest.class.getResourceAsStream("/wordcount/" + file);
            LineNumberReader reader = new LineNumberReader(new InputStreamReader(is));

            StringBuilder sb = new StringBuilder();
            String line;
            while ((line = reader.readLine()) != null) {
                sb.append(line).append("\n");
            }
            map.put(UuidUtil.newSecureUuidString(), sb.toString());

            is.close();
            reader.close();
        }
    }

    private static void fillMapWithDataEachLineNewEntry(HazelcastInstance hazelcastInstance) throws Exception {

        IMap<String, String> map = hazelcastInstance.getMap(MAP_NAME);
        for (String file : DATA_RESOURCES_TO_LOAD) {
            InputStream is = MapWordCountAggregationPerformanceTest.class.getResourceAsStream("/wordcount/" + file);
            LineNumberReader reader = new LineNumberReader(new InputStreamReader(is));

            int batchSize = 10000;
            int batchSizeCount = 0;
            Map<String, String> batch = new HashMap<String, String>(batchSize);
            String line;
            while ((line = reader.readLine()) != null) {
                batch.put(UuidUtil.newSecureUuidString(), line);
                batchSizeCount++;
                if (batchSizeCount == batchSize) {
                    map.putAll(batch);
                    batchSizeCount = 0;
                    batch.clear();
                }
            }

            if (batchSizeCount > 0) {
                map.putAll(batch);
                batch.clear();
            }

            is.close();
            reader.close();
        }
    }

    private static class MutableInt implements Serializable {

        private int value = 0;

        @Override
        public String toString() {
            return String.valueOf(value);
        }
    }

    private static String cleanWord(String word) {
        return word.replaceAll("[^A-Za-z0-9]", "");
    }

    private static class WordCountAggregator extends Aggregator<Map.Entry<String, String>, Map<String, MutableInt>> {

        Map<String, MutableInt> result = new HashMap<String, MutableInt>(1000);

        void accumulate(String value, int times) {
            StringTokenizer tokenizer = new StringTokenizer(value);

            while (tokenizer.hasMoreTokens()) {
                String word = cleanWord(tokenizer.nextToken()).toLowerCase();

                MutableInt count = result.get(word);
                if (count == null) {
                    count = new MutableInt();
                    result.put(word, count);
                }
                count.value += times;
            }
        }

        @Override
        public void accumulate(Map.Entry<String, String> entry) {
            accumulate(entry.getValue(), 1);
        }

        @Override
        public void combine(Aggregator aggregator) {
            WordCountAggregator aggr = (WordCountAggregator) aggregator;
            for (Map.Entry<String, MutableInt> toCombine : aggr.result.entrySet()) {
                doCombine(toCombine);
            }
        }

        private void doCombine(Map.Entry<String, MutableInt> toCombine) {
            String word = toCombine.getKey();
            MutableInt count = result.get(word);
            if (count == null) {
                count = new MutableInt();
                result.put(word, count);
            }
            count.value += toCombine.getValue().value;
        }

        @Override
        public Map<String, MutableInt> aggregate() {
            return result;
        }
    }
}