/* * Copyright 2015 Goldman Sachs. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.gs.collections.impl.jmh; import java.util.Arrays; import java.util.Collection; import java.util.Comparator; import java.util.HashSet; import java.util.Map; import java.util.Set; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.TimeUnit; import java.util.stream.Collectors; import com.gs.collections.api.RichIterable; import com.gs.collections.api.multimap.MutableMultimap; import com.gs.collections.api.multimap.set.MutableSetMultimap; import com.gs.collections.api.multimap.set.UnsortedSetMultimap; import com.gs.collections.api.tuple.Pair; import com.gs.collections.impl.block.factory.Comparators; import com.gs.collections.impl.block.factory.Procedures; import com.gs.collections.impl.forkjoin.FJIterate; import com.gs.collections.impl.jmh.runner.AbstractJMHTestRunner; import com.gs.collections.impl.list.mutable.CompositeFastList; import com.gs.collections.impl.list.mutable.FastList; import com.gs.collections.impl.parallel.ParallelIterate; import com.gs.collections.impl.set.mutable.UnifiedSet; import com.gs.collections.impl.tuple.Tuples; import org.apache.commons.lang.RandomStringUtils; import org.junit.Assert; import org.openjdk.jmh.annotations.Benchmark; import org.openjdk.jmh.annotations.BenchmarkMode; import org.openjdk.jmh.annotations.Mode; import org.openjdk.jmh.annotations.OutputTimeUnit; import org.openjdk.jmh.annotations.Scope; import org.openjdk.jmh.annotations.Setup; import org.openjdk.jmh.annotations.State; import org.openjdk.jmh.annotations.TearDown; @State(Scope.Thread) @BenchmarkMode(Mode.Throughput) @OutputTimeUnit(TimeUnit.SECONDS) public class AnagramSetTest extends AbstractJMHTestRunner { private static final int SIZE = 1_000_000; private static final int BATCH_SIZE = 10_000; private static final int SIZE_THRESHOLD = 10; private final UnifiedSet<String> gscWords = UnifiedSet.newSet(FastList.newWithNValues(SIZE, () -> RandomStringUtils.randomAlphabetic(5).toUpperCase())); private final Set<String> jdkWords = new HashSet<>(this.gscWords); private ExecutorService executorService; @Setup public void setUp() { this.executorService = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors()); } @TearDown public void tearDown() throws InterruptedException { this.executorService.shutdownNow(); this.executorService.awaitTermination(1L, TimeUnit.SECONDS); } @Benchmark public void serial_eager_scala() { AnagramSetScalaTest.serial_eager_scala(); } @Benchmark public void serial_lazy_scala() { AnagramSetScalaTest.serial_lazy_scala(); } @Benchmark public void parallel_lazy_scala() { AnagramSetScalaTest.parallel_lazy_scala(); } @Benchmark public void serial_eager_gsc() { MutableSetMultimap<Alphagram, String> groupBy = this.gscWords.groupBy(Alphagram::new); groupBy.multiValuesView() .select(iterable -> iterable.size() >= SIZE_THRESHOLD) .toSortedList(Comparators.<RichIterable<String>>byIntFunction(RichIterable::size)) .asReversed() .collect(iterable -> iterable.size() + ": " + iterable) .forEach(Procedures.cast(e -> Assert.assertFalse(e.isEmpty()))); } @Benchmark public void parallel_eager_gsc() { MutableMultimap<Alphagram, String> groupBy = ParallelIterate.groupBy(this.gscWords, Alphagram::new); CompositeFastList<RichIterable<String>> select = ParallelIterate.select(groupBy.multiValuesView(), iterable -> iterable.size() >= SIZE_THRESHOLD, new CompositeFastList<>(), false); Collection<String> collect = ParallelIterate.collect(select .toSortedList(Comparators.<RichIterable<String>>byIntFunction(RichIterable::size)) .asReversed(), iterable -> iterable.size() + ": " + iterable); ParallelIterate.forEach(collect, Procedures.cast(e -> Assert.assertFalse(e.isEmpty()))); } @Benchmark public void parallel_lazy_gsc() { UnsortedSetMultimap<Alphagram, String> multimap = this.gscWords.asParallel(this.executorService, BATCH_SIZE) .groupBy(Alphagram::new); FastList<Pair<Integer, String>> pairs = (FastList<Pair<Integer, String>>) FastList.newList(multimap.multiValuesView()).asParallel(this.executorService, BATCH_SIZE) .select(iterable -> iterable.size() >= SIZE_THRESHOLD) .collect(iterable -> Tuples.pair(iterable.size(), iterable.size() + ": " + iterable)) .toSortedList((pair1, pair2) -> Integer.compare(pair2.getOne(), pair1.getOne())); pairs.asParallel(this.executorService, BATCH_SIZE) .collect(Pair::getTwo) .forEach(Procedures.cast(e -> Assert.assertFalse(e.isEmpty()))); } @Benchmark public void parallel_eager_forkjoin_gsc() { MutableMultimap<Alphagram, String> groupBy = FJIterate.groupBy(this.gscWords, Alphagram::new); CompositeFastList<RichIterable<String>> select = FJIterate.select(groupBy.multiValuesView(), iterable -> iterable.size() >= SIZE_THRESHOLD, new CompositeFastList<>(), false); Collection<String> collect = FJIterate.collect(select .toSortedList(Comparators.<RichIterable<String>>byIntFunction(RichIterable::size)) .asReversed(), iterable -> iterable.size() + ": " + iterable); FJIterate.forEach(collect, Procedures.cast(e -> Assert.assertFalse(e.isEmpty()))); } @Benchmark public void serial_lazy_jdk() { Map<Alphagram, Set<String>> groupBy = this.jdkWords.stream().collect(Collectors.groupingBy(Alphagram::new, Collectors.<String>toSet())); groupBy.entrySet() .stream() .map(Map.Entry::getValue) .filter(list -> list.size() >= SIZE_THRESHOLD) .sorted(Comparator.<Set<String>>comparingInt(Set::size).reversed()) .map(list -> list.size() + ": " + list) .forEach(e -> Assert.assertFalse(e.isEmpty())); } @Benchmark public void serial_lazy_streams_gsc() { Map<Alphagram, Set<String>> groupBy = this.gscWords.stream().collect(Collectors.groupingBy(Alphagram::new, Collectors.<String>toSet())); groupBy.entrySet() .stream() .map(Map.Entry::getValue) .filter(list -> list.size() >= SIZE_THRESHOLD) .sorted(Comparator.<Set<String>>comparingInt(Set::size).reversed()) .map(list -> list.size() + ": " + list) .forEach(e -> Assert.assertFalse(e.isEmpty())); } @Benchmark public void parallel_lazy_jdk() { Map<Alphagram, Set<String>> groupBy = this.jdkWords.parallelStream().collect(Collectors.groupingBy(Alphagram::new, Collectors.<String>toSet())); groupBy.entrySet() .parallelStream() .map(Map.Entry::getValue) .filter(list -> list.size() >= SIZE_THRESHOLD) .sorted(Comparator.<Set<String>>comparingInt(Set::size).reversed()) .parallel() .map(list -> list.size() + ": " + list) .forEach(e -> Assert.assertFalse(e.isEmpty())); } @Benchmark public void parallel_lazy_streams_gsc() { Map<Alphagram, Set<String>> groupBy = this.gscWords.parallelStream().collect(Collectors.groupingBy(Alphagram::new, Collectors.<String>toSet())); groupBy.entrySet() .parallelStream() .map(Map.Entry::getValue) .filter(list -> list.size() >= SIZE_THRESHOLD) .sorted(Comparator.<Set<String>>comparingInt(Set::size).reversed()) .parallel() .map(list -> list.size() + ": " + list) .forEach(e -> Assert.assertFalse(e.isEmpty())); } private static final class Alphagram { private final char[] key; private Alphagram(String string) { this.key = string.toCharArray(); Arrays.sort(this.key); } @Override public boolean equals(Object o) { if (this == o) { return true; } if (o == null || this.getClass() != o.getClass()) { return false; } Alphagram alphagram = (Alphagram) o; return Arrays.equals(this.key, alphagram.key); } @Override public int hashCode() { return Arrays.hashCode(this.key); } @Override public String toString() { return new String(this.key); } } }