/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.dstream.local.ri;
import static io.dstream.utils.Tuples.Tuple2.tuple2;
import static io.dstream.utils.Tuples.Tuple4.tuple4;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import java.util.Collections;
import java.util.List;
import java.util.Map.Entry;
import java.util.concurrent.Future;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.junit.Test;
import io.dstream.DStream;
import io.dstream.support.PartitionIdHelper;
import io.dstream.utils.ExecutionResultUtils;
import io.dstream.utils.KVUtils;
import io.dstream.utils.StringUtils;
import io.dstream.utils.Tuples.Tuple2;
import io.dstream.utils.Tuples.Tuple3;
import io.dstream.utils.Tuples.Tuple4;
public class DStreamExecutionTests {
private static String EXECUTION_NAME = DStreamExecutionTests.class.getSimpleName();
@Test
public void noOperations() throws Exception {
Future<Stream<Stream<String>>> resultFuture = DStream.ofType(String.class, "wc")
.executeAs(EXECUTION_NAME);
Stream<Stream<String>> resultPartitionsStream = resultFuture.get();
// ExecutionResultUtils.printResults(resultPartitionsStream, true);
List<Stream<String>> resultPartitionsList = resultPartitionsStream.collect(Collectors.toList());
assertEquals(2, resultPartitionsList.size());
// spot check
List<String> p1Result = resultPartitionsList.get(0).collect(Collectors.toList());
assertEquals("The ship drew on and had safely passed the strait, which some volcanic", p1Result.get(0));
assertEquals("shock has made between the Calasareigne and Jaros islands; had doubled", p1Result.get(1));
}
@Test
public void noOperationsWithPeek() throws Exception {
Future<Stream<Stream<String>>> resultFuture = DStream.ofType(String.class, "wc")
.peek(e -> System.out.println(" Peek-a-boo : " + e))
.executeAs(EXECUTION_NAME);
Stream<Stream<String>> resultPartitionsStream = resultFuture.get();
List<Stream<String>> resultPartitionsList = resultPartitionsStream.collect(Collectors.toList());
assertEquals(2, resultPartitionsList.size());
// spot check
List<String> p1Result = resultPartitionsList.get(0).collect(Collectors.toList());
assertEquals("The ship drew on and had safely passed the strait, which some volcanic", p1Result.get(0));
assertEquals("shock has made between the Calasareigne and Jaros islands; had doubled", p1Result.get(1));
}
@Test
public void classifySource() throws Exception {
Future<Stream<Stream<String>>> resultFuture = DStream.ofType(String.class, "wc")
.classify(record -> 1)
.executeAs(EXECUTION_NAME);
Stream<Stream<String>> resultPartitionsStream = resultFuture.get();
// ExecutionResultUtils.printResults(resultPartitionsStream, true);
List<Stream<String>> resultPartitionsList = resultPartitionsStream.collect(Collectors.toList());
assertEquals(1, resultPartitionsList.size());
// spot check
List<String> p1Result = resultPartitionsList.get(0).collect(Collectors.toList());
assertEquals("shock has made between the Calasareigne and Jaros islands; had doubled", p1Result.get(1));
}
@Test
public void classifySourceBeforeAndAfterPeek() throws Exception {
Future<Stream<Stream<String>>> resultFuture = DStream.ofType(String.class, "wc")
.peek(e -> System.out.println("Before classify : " + e))
.classify(record -> 1)
.peek(e -> System.out.println("After classify : " + e))
.executeAs(EXECUTION_NAME);
Stream<Stream<String>> resultPartitionsStream = resultFuture.get();
List<Stream<String>> resultPartitionsList = resultPartitionsStream.collect(Collectors.toList());
assertEquals(1, resultPartitionsList.size());
// spot check
List<String> p1Result = resultPartitionsList.get(0).collect(Collectors.toList());
assertEquals("shock has made between the Calasareigne and Jaros islands; had doubled", p1Result.get(1));
}
@Test
public void classifyWithPriorTransformation() throws Exception {
Future<Stream<Stream<String>>> resultFuture = DStream.ofType(String.class, "wc")
.map(s -> s.toLowerCase())
.classify(record -> record.substring(0, 1))
.executeAs(EXECUTION_NAME);
Stream<Stream<String>> resultPartitionsStream = resultFuture.get();
// ExecutionResultUtils.printResults(resultPartitionsStream, true);
List<Stream<String>> resultPartitionsList = resultPartitionsStream.collect(Collectors.toList());
assertEquals(2, resultPartitionsList.size());
// spot check
List<String> p1Result = resultPartitionsList.get(0).collect(Collectors.toList());
assertEquals("happened on board.", p1Result.get(0));
assertEquals("the ship drew on and had safely passed the strait, which some volcanic", p1Result.get(2));
}
@Test
public void classifyWithPriorTransformationAndPeekAfter() throws Exception {
Future<Stream<Stream<String>>> resultFuture = DStream.ofType(String.class, "wc")
.peek(e -> System.out.println("Before map : " + e))
.map(s -> s.toLowerCase())
.peek(e -> System.out.println("After map : " + e))
.classify(record -> record.substring(0, 1))
.peek(e -> System.out.println("After classify : " + e))
.executeAs(EXECUTION_NAME);
Stream<Stream<String>> resultPartitionsStream = resultFuture.get();
List<Stream<String>> resultPartitionsList = resultPartitionsStream.collect(Collectors.toList());
assertEquals(2, resultPartitionsList.size());
// spot check
List<String> p1Result = resultPartitionsList.get(0).collect(Collectors.toList());
assertEquals("happened on board.", p1Result.get(0));
assertEquals("the ship drew on and had safely passed the strait, which some volcanic", p1Result.get(2));
}
@Test
public void classifyWithPriorKVProducingTransformation() throws Exception {
Future<Stream<Stream<Entry<String, Integer>>>> resultFuture = DStream.ofType(String.class, "wc")
.map(s -> KVUtils.kv(s, 1))
.classify(entry -> entry.getKey().substring(0, 6))
.executeAs(EXECUTION_NAME);
Stream<Stream<Entry<String, Integer>>> resultPartitionsStream = resultFuture.get();
// ExecutionResultUtils.printResults(resultPartitionsStream, true);
List<Stream<Entry<String, Integer>>> resultPartitionsList = resultPartitionsStream.collect(Collectors.toList());
assertEquals(2, resultPartitionsList.size());
// spot check
List<Entry<String, Integer>> p1Result = resultPartitionsList.get(0).collect(Collectors.toList());
assertEquals(KVUtils.kv("The ship drew on and had safely passed the strait, which some volcanic", 1), p1Result.get(0));
List<Entry<String, Integer>> p2Result = resultPartitionsList.get(1).collect(Collectors.toList());
assertEquals(KVUtils.kv("Pomegue, and approached the harbor under topsails, jib, and spanker, but", 1), p2Result.get(0));
}
@Test
public void classifyWithPriorKVProducingTransformationWithPeek() throws Exception {
Future<Stream<Stream<Entry<String, Integer>>>> resultFuture = DStream.ofType(String.class, "wc")
.peek(e -> System.out.println("Before map : " + e))
.map(s -> KVUtils.kv(s, 1))
.peek(e -> System.out.println("After map : " + e))
.classify(entry -> entry.getKey().substring(0, 6))
.peek(e -> System.out.println("After classify : " + e))
.executeAs(EXECUTION_NAME);
Stream<Stream<Entry<String, Integer>>> resultPartitionsStream = resultFuture.get();
List<Stream<Entry<String, Integer>>> resultPartitionsList = resultPartitionsStream.collect(Collectors.toList());
assertEquals(2, resultPartitionsList.size());
// spot check
List<Entry<String, Integer>> p1Result = resultPartitionsList.get(0).collect(Collectors.toList());
assertEquals(KVUtils.kv("The ship drew on and had safely passed the strait, which some volcanic", 1), p1Result.get(0));
List<Entry<String, Integer>> p2Result = resultPartitionsList.get(1).collect(Collectors.toList());
assertEquals(KVUtils.kv("Pomegue, and approached the harbor under topsails, jib, and spanker, but", 1), p2Result.get(0));
}
@Test
public void classifyWithTransformationAfterClassification() throws Exception {
Future<Stream<Stream<Entry<String, Integer>>>> resultFuture = DStream.ofType(String.class, "wc")
.map(s -> KVUtils.kv(s, 1))
.classify(entry -> entry.getKey().substring(0, 5))
.map(entry -> KVUtils.kv(new StringBuilder(entry.getKey()).reverse().toString(), entry.getValue()))
.executeAs(EXECUTION_NAME);
Stream<Stream<Entry<String, Integer>>> resultPartitionsStream = resultFuture.get();
// ExecutionResultUtils.printResults(resultPartitionsStream, true);
List<Stream<Entry<String, Integer>>> resultPartitionsList = resultPartitionsStream.collect(Collectors.toList());
assertEquals(2, resultPartitionsList.size());
// spot check
List<Entry<String, Integer>> p2Result = resultPartitionsList.get(1).collect(Collectors.toList());
assertEquals(KVUtils.kv("evah dluoc enutrofsim tahw rehtona eno deksa ,live fo rennurerof eht", 1), p2Result.get(1));
}
@Test
public void classifyWithTransformationAfterClassificationWithPeek() throws Exception {
Future<Stream<Stream<Entry<String, Integer>>>> resultFuture = DStream.ofType(String.class, "wc")
.peek(e -> System.out
.println("Before map : " + e))
.map(s -> KVUtils.kv(s, 1))
.peek(e -> System.out
.println("After map : " + e))
.classify(entry -> entry.getKey()
.substring(0, 5))
.peek(e -> System.out
.println("After classify : " + e))
.map(entry -> KVUtils.kv(new StringBuilder(
entry.getKey()).reverse()
.toString(),
entry.getValue()))
.peek(e -> System.out
.println("After map-2 : " + e))
.executeAs(EXECUTION_NAME);
Stream<Stream<Entry<String, Integer>>> resultPartitionsStream = resultFuture.get();
List<Stream<Entry<String, Integer>>> resultPartitionsList = resultPartitionsStream.collect(Collectors.toList());
assertEquals(2, resultPartitionsList.size());
// spot check
List<Entry<String, Integer>> p2Result = resultPartitionsList.get(1).collect(Collectors.toList());
assertEquals(KVUtils.kv("evah dluoc enutrofsim tahw rehtona eno deksa ,live fo rennurerof eht", 1),
p2Result.get(1));
}
@Test
public void classifyAfterShuffleOperation() throws Exception {
Future<Stream<Stream<Entry<String, Integer>>>> resultFuture = DStream.ofType(String.class, "wc")
.flatMap(line -> Stream.of(line.split("\\s+")))
.reduceValues(s -> s, s -> 1, Integer::sum)
.classify(entry -> 1)
.executeAs(EXECUTION_NAME);
Stream<Stream<Entry<String, Integer>>> resultPartitionsStream = resultFuture.get();
// ExecutionResultUtils.printResults(resultPartitionsStream, true);
List<Stream<Entry<String, Integer>>> resultPartitionsList = resultPartitionsStream.collect(Collectors.toList());
assertEquals(1, resultPartitionsList.size());
List<Entry<String, Integer>> p1Result = resultPartitionsList.get(0).collect(Collectors.toList());
assertEquals(KVUtils.kv("harbor", 1), p1Result.get(7));
}
@Test
public void classifyAfterShuffleOperationWithPeek() throws Exception {
Future<Stream<Stream<Entry<String, Integer>>>> resultFuture = DStream.ofType(String.class, "wc")
.peek(e -> System.out
.println("Before flatmap : " + e))
.flatMap(line -> Stream
.of(line.split("\\s+")))
.peek(e -> System.out
.println("After flatmap : " + e))
.reduceValues(s -> s, s -> 1, Integer::sum)
.peek(e -> System.out
.println("After reduceValues : " +
e))
.classify(entry -> 1).peek(e -> System.out
.println("After classify : " + e)).executeAs(EXECUTION_NAME);
Stream<Stream<Entry<String, Integer>>> resultPartitionsStream = resultFuture.get();
List<Stream<Entry<String, Integer>>> resultPartitionsList = resultPartitionsStream.collect(Collectors.toList());
assertEquals(1, resultPartitionsList.size());
List<Entry<String, Integer>> p1Result = resultPartitionsList.get(0).collect(Collectors.toList());
assertEquals(KVUtils.kv("harbor", 1), p1Result.get(7));
}
@Test
public void classifyAfterShuffleAndShuffleAgain() throws Exception {
Future<Stream<Stream<Entry<String, List<Entry<String, Integer>>>>>> resultFuture = DStream.ofType(String.class, "wc")
.flatMap(line -> Stream.of(line.split("\\s+")))
.reduceValues(s -> s, s -> 1, Integer::sum)
.classify(entry -> 1)
.aggregateValues(e -> e.getKey().substring(0, 1), e -> e)
.executeAs(EXECUTION_NAME);
Stream<Stream<Entry<String, List<Entry<String, Integer>>>>> resultPartitionsStream = resultFuture.get();
// ExecutionResultUtils.printResults(resultPartitionsStream, true);
List<Stream<Entry<String, List<Entry<String, Integer>>>>> resultPartitionsList = resultPartitionsStream.collect(Collectors.toList());
assertEquals(2, resultPartitionsList.size());
// spot check
List<Entry<String, List<Entry<String, Integer>>>> p1Result = resultPartitionsList.get(0).collect(Collectors.toList());
assertEquals("b=[between=2, board.=1, but=1]", p1Result.get(3).toString());
assertEquals("j=[jib,=1]", p1Result.get(7).toString());
List<Entry<String, List<Entry<String, Integer>>>> p2Result = resultPartitionsList.get(1).collect(Collectors.toList());
assertEquals("s=[safely=1, ship=1, shock=1, slowly=1, so=1, some=1, spanker,=1, sedately=1, strait,=1]", p2Result.get(7).toString());
}
@Test
public void classifyAfterShuffleAndShuffleAgainWithPeek() throws Exception {
Future<Stream<Stream<Entry<String, List<Entry<String, Integer>>>>>> resultFuture = DStream
.ofType(String.class, "wc").peek(e -> System.out.println("Before flatMap : " + e))
.flatMap(line -> Stream.of(line.split("\\s+"))).peek(e -> System.out.println("After flatmap : " + e))
.reduceValues(s -> s, s -> 1, Integer::sum).peek(e -> System.out.println("After reduceValues : " + e))
.classify(entry -> 1).peek(e -> System.out.println("After classify : " + e))
.aggregateValues(e -> e.getKey().substring(0, 1), e -> e)
.peek(e -> System.out.println("After aggregateValues : " + e)).executeAs(EXECUTION_NAME);
Stream<Stream<Entry<String, List<Entry<String, Integer>>>>> resultPartitionsStream = resultFuture.get();
// ExecutionResultUtils.printResults(resultPartitionsStream, true);
List<Stream<Entry<String, List<Entry<String, Integer>>>>> resultPartitionsList = resultPartitionsStream
.collect(Collectors.toList());
assertEquals(2, resultPartitionsList.size());
// spot check
List<Entry<String, List<Entry<String, Integer>>>> p1Result = resultPartitionsList.get(0)
.collect(Collectors.toList());
assertEquals("b=[between=2, board.=1, but=1]", p1Result.get(3).toString());
assertEquals("j=[jib,=1]", p1Result.get(7).toString());
List<Entry<String, List<Entry<String, Integer>>>> p2Result = resultPartitionsList.get(1)
.collect(Collectors.toList());
assertEquals("s=[safely=1, ship=1, shock=1, slowly=1, so=1, some=1, spanker,=1, sedately=1, strait,=1]",
p2Result.get(7).toString());
}
@Test
public void subsequentClassify() throws Exception {
Future<Stream<Stream<Entry<String, Integer>>>> resultFuture = DStream.ofType(String.class, "wc")
.flatMap(line -> Stream.of(line.split("\\s+")))
.reduceValues(s -> s, s -> 1, Integer::sum)
.classify(s -> s.getKey().substring(0, 1))
.classify(s -> s.getKey().substring(0, 2))
.executeAs(EXECUTION_NAME);
Stream<Stream<Entry<String, Integer>>> resultPartitionsStream = resultFuture.get();
// ExecutionResultUtils.printResults(resultPartitionsStream, true);
List<Stream<Entry<String, Integer>>> resultPartitionsList = resultPartitionsStream.collect(Collectors.toList());
assertEquals(2, resultPartitionsList.size());
List<Entry<String, Integer>> p1Result = resultPartitionsList.get(0).collect(Collectors.toList());
assertEquals(KVUtils.kv("Calasareigne", 1), p1Result.get(0));
assertEquals(KVUtils.kv("asked", 1), p1Result.get(2));
assertEquals(KVUtils.kv("that", 2), p1Result.get(13));
}
@Test
public void subsequentClassifyWithPeek() throws Exception {
Future<Stream<Stream<Entry<String, Integer>>>> resultFuture = DStream.ofType(String.class, "wc")
.peek(e -> System.out
.println("Before flatMap : " + e))
.flatMap(line -> Stream
.of(line.split("\\s+")))
.peek(e -> System.out
.println("After flatMap : " + e))
.reduceValues(s -> s, s -> 1, Integer::sum)
.peek(e -> System.out
.println("After reduceValues : " + e))
.classify(s -> s.getKey().substring(0, 1))
.peek(e -> System.out
.println("After classify : " + e))
.classify(s -> s.getKey().substring(0, 2))
.peek(e -> System.out
.println("After classify - 2 : " +
e))
.executeAs(EXECUTION_NAME);
Stream<Stream<Entry<String, Integer>>> resultPartitionsStream = resultFuture.get();
List<Stream<Entry<String, Integer>>> resultPartitionsList = resultPartitionsStream.collect(Collectors.toList());
assertEquals(2, resultPartitionsList.size());
List<Entry<String, Integer>> p1Result = resultPartitionsList.get(0).collect(Collectors.toList());
assertEquals(KVUtils.kv("Calasareigne", 1), p1Result.get(0));
assertEquals(KVUtils.kv("asked", 1), p1Result.get(2));
assertEquals(KVUtils.kv("that", 2), p1Result.get(13));
}
@Test
public void transformationOnly() throws Exception {
Future<Stream<Stream<String>>> resultFuture = DStream.ofType(String.class, "wc")
.map(s -> s.toUpperCase())
.executeAs(EXECUTION_NAME);
Stream<Stream<String>> resultPartitionsStream = resultFuture.get();
// ExecutionResultUtils.printResults(resultPartitionsStream, true);
List<Stream<String>> resultPartitionsList = resultPartitionsStream.collect(Collectors.toList());
assertEquals(2, resultPartitionsList.size());
// spot check
List<String> p1Result = resultPartitionsList.get(0).collect(Collectors.toList());
assertEquals("THE SHIP DREW ON AND HAD SAFELY PASSED THE STRAIT, WHICH SOME VOLCANIC", p1Result.get(0));
assertEquals("THE FORERUNNER OF EVIL, ASKED ONE ANOTHER WHAT MISFORTUNE COULD HAVE", p1Result.get(2));
}
@Test
public void computeOnly() throws Exception {
Future<Stream<Stream<String>>> resultFuture = DStream.ofType(String.class, "wc")
.<String>compute(stream -> stream.map(s -> s.toUpperCase()))
.executeAs(EXECUTION_NAME);
Stream<Stream<String>> resultPartitionsStream = resultFuture.get();
// ExecutionResultUtils.printResults(resultPartitionsStream, true);
List<Stream<String>> resultPartitionsList = resultPartitionsStream.collect(Collectors.toList());
assertEquals(2, resultPartitionsList.size());
// spot check
List<String> p2Result = resultPartitionsList.get(1).collect(Collectors.toList());
assertEquals("POMEGUE, AND APPROACHED THE HARBOR UNDER TOPSAILS, JIB, AND SPANKER, BUT", p2Result.get(0));
assertEquals("SO SLOWLY AND SEDATELY BETWEEN THAT THE IDLERS, WITH THAT INSTINCT WHICH IS", p2Result.get(1));
}
@Test
public void computeCompute() throws Exception {
Future<Stream<Stream<String>>> resultFuture = DStream.ofType(String.class, "wc")
.<String>compute(stream -> stream.map(s -> s.toUpperCase()))
.<String>compute(stream -> stream.map(s -> new StringBuilder(s).reverse().toString()))
.executeAs(EXECUTION_NAME);
Stream<Stream<String>> resultPartitionsStream = resultFuture.get();
List<Stream<String>> resultPartitionsList = resultPartitionsStream.collect(Collectors.toList());
assertEquals(2, resultPartitionsList.size());
// spot check
List<String> p1Result = resultPartitionsList.get(0).collect(Collectors.toList());
assertEquals("CINACLOV EMOS HCIHW ,TIARTS EHT DESSAP YLEFAS DAH DNA NO WERD PIHS EHT", p1Result.get(0));
}
@Test
public void computeComputeWithPeek() throws Exception {
Future<Stream<Stream<String>>> resultFuture = DStream.ofType(String.class, "wc")
.peek(e -> System.out.println("Before compute : " + e))
.<String>compute(
stream -> stream.map(s -> s.toUpperCase()))
.peek(e -> System.out
.println("After compute - uppercase" + " : " + e))
.<String>compute(stream -> stream
.map(s -> new StringBuilder(s).reverse()
.toString()))
.peek(e -> System.out
.println("After compute - reverse :" + " " + e))
.executeAs(EXECUTION_NAME);
Stream<Stream<String>> resultPartitionsStream = resultFuture.get();
List<Stream<String>> resultPartitionsList = resultPartitionsStream.collect(Collectors.toList());
assertEquals(2, resultPartitionsList.size());
// spot check
List<String> p1Result = resultPartitionsList.get(0).collect(Collectors.toList());
assertEquals("CINACLOV EMOS HCIHW ,TIARTS EHT DESSAP YLEFAS DAH DNA NO WERD PIHS EHT", p1Result.get(0));
}
@Test
public void computeClassifyCompute() throws Exception {
Future<Stream<Stream<String>>> resultFuture = DStream.ofType(String.class, "wc")
.<String>compute(stream -> stream.map(s -> s.toUpperCase()))
.classify(s -> s.split(" ")[0].length())
.<String>compute(stream -> stream.map(s -> new StringBuilder(s).reverse().toString()))
.executeAs(EXECUTION_NAME);
Stream<Stream<String>> resultPartitionsStream = resultFuture.get();
// ExecutionResultUtils.printResults(resultPartitionsStream, true);
List<Stream<String>> resultPartitionsList = resultPartitionsStream.collect(Collectors.toList());
assertEquals(2, resultPartitionsList.size());
// spot check
List<String> p1Result = resultPartitionsList.get(0).collect(Collectors.toList());
assertEquals(".DRAOB NO DENEPPAH", p1Result.get(2));
}
@Test
public void computeClassifyComputeWithPeek() throws Exception {
Future<Stream<Stream<String>>> resultFuture = DStream.ofType(String.class, "wc")
.peek(e -> System.out.println("Before compute" + " : " +
e)).<String>compute(
stream -> stream.map(s -> s.toUpperCase()))
.peek(e -> System.out.println("After compute" + " : " + e))
.classify(s -> s.split(" ")[0].length())
.peek(e -> System.out.println("After classify" + " : " +
e)).<String>compute(
stream -> stream.map(s -> new StringBuilder(s).reverse().toString()))
.peek(e -> System.out.println("After compute" + " : " + e))
.executeAs(EXECUTION_NAME);
Stream<Stream<String>> resultPartitionsStream = resultFuture.get();
List<Stream<String>> resultPartitionsList = resultPartitionsStream.collect(Collectors.toList());
assertEquals(2, resultPartitionsList.size());
// spot check
List<String> p1Result = resultPartitionsList.get(0).collect(Collectors.toList());
assertEquals(".DRAOB NO DENEPPAH", p1Result.get(2));
}
@Test
public void computeShuffleCompute() throws Exception {
Future<Stream<Stream<String>>> resultFuture = DStream.ofType(String.class, "wc")
.<String>compute(stream -> stream.map(s -> s.toUpperCase()))
.reduceValues(s -> s, s -> 1, Integer::sum)
.<String>compute(stream -> stream.map(s -> new StringBuilder(s.getKey()).reverse().toString()))
.executeAs(EXECUTION_NAME);
Stream<Stream<String>> resultPartitionsStream = resultFuture.get();
// ExecutionResultUtils.printResults(resultPartitionsStream, true);
List<Stream<String>> resultPartitionsList = resultPartitionsStream.collect(Collectors.toList());
assertEquals(2, resultPartitionsList.size());
// spot check
List<String> p1Result = resultPartitionsList.get(0).collect(Collectors.toList());
assertEquals("CINACLOV EMOS HCIHW ,TIARTS EHT DESSAP YLEFAS DAH DNA NO WERD PIHS EHT", p1Result.get(3));
}
@Test
public void shuffleOnly() throws Exception {
Future<Stream<Stream<Entry<String, Integer>>>> resultFuture = DStream.ofType(String.class, "wc")
.reduceValues(s -> s, s -> 1, Integer::sum)
.executeAs(EXECUTION_NAME);
Stream<Stream<Entry<String, Integer>>> resultPartitionsStream = resultFuture.get();
// ExecutionResultUtils.printResults(resultPartitionsStream, true);
List<Stream<Entry<String, Integer>>> resultPartitionsList = resultPartitionsStream.collect(Collectors.toList());
assertEquals(2, resultPartitionsList.size());
// spot check
List<Entry<String, Integer>> p1Result = resultPartitionsList.get(0).collect(Collectors.toList());
assertEquals(KVUtils.kv("The ship drew on and had safely passed the strait, which some volcanic", 1), p1Result.get(0));
}
@Test
public void transformationAndShuffle() throws Exception {
Future<Stream<Stream<Entry<String, Integer>>>> resultFuture = DStream.ofType(String.class, "wc")
.flatMap(line -> Stream.of(line.split("\\s+")))
.filter(word -> word.length() > 5)
.map(s -> s.toUpperCase())
.reduceValues(word -> word, word -> 1, Integer::sum)
.executeAs(EXECUTION_NAME);
Stream<Stream<Entry<String, Integer>>> resultPartitionsStream = resultFuture.get();
// ExecutionResultUtils.printResults(resultPartitionsStream, true);
List<Stream<Entry<String, Integer>>> resultPartitionsList = resultPartitionsStream.collect(Collectors.toList());
assertEquals(2, resultPartitionsList.size());
// spot check
List<Entry<String, Integer>> p1Result = resultPartitionsList.get(0).collect(Collectors.toList());
assertEquals(KVUtils.kv("BETWEEN", 2), p1Result.get(0));
assertEquals(KVUtils.kv("INSTINCT", 1), p1Result.get(4));
List<Entry<String, Integer>> p2Result = resultPartitionsList.get(1).collect(Collectors.toList());
assertEquals(KVUtils.kv("ANOTHER", 1), p2Result.get(0));
assertEquals(KVUtils.kv("SEDATELY", 1), p2Result.get(7));
}
@Test
public void transformationAndShuffleWithPeek() throws Exception {
Future<Stream<Stream<Entry<String, Integer>>>> resultFuture = DStream.ofType(String.class, "wc").flatMap(
line -> Stream.of(line.split("\\s+"))).filter(word -> word.length() > 5).map(s -> s.toUpperCase())
.reduceValues(word -> word, word -> 1,
Integer::sum).peek(e -> System.out
.println("After " + "transformation shuffle : " +
e)).executeAs(EXECUTION_NAME);
Stream<Stream<Entry<String, Integer>>> resultPartitionsStream = resultFuture.get();
List<Stream<Entry<String, Integer>>> resultPartitionsList = resultPartitionsStream.collect(Collectors.toList());
assertEquals(2, resultPartitionsList.size());
// spot check
List<Entry<String, Integer>> p1Result = resultPartitionsList.get(0).collect(Collectors.toList());
assertEquals(KVUtils.kv("BETWEEN", 2), p1Result.get(0));
assertEquals(KVUtils.kv("INSTINCT", 1), p1Result.get(4));
List<Entry<String, Integer>> p2Result = resultPartitionsList.get(1).collect(Collectors.toList());
assertEquals(KVUtils.kv("ANOTHER", 1), p2Result.get(0));
assertEquals(KVUtils.kv("SEDATELY", 1), p2Result.get(7));
}
@Test
public void reduceSource() throws Exception {
Future<Stream<Stream<String>>> resultFuture = DStream.ofType(String.class, "wc")
.reduce(String::concat)
.executeAs(EXECUTION_NAME);
Stream<Stream<String>> resultPartitionsStream = resultFuture.get();
// ExecutionResultUtils.printResults(resultPartitionsStream, true);
List<Stream<String>> resultPartitionsList = resultPartitionsStream.collect(Collectors.toList());
assertEquals(1, resultPartitionsList.size());
List<String> p1Result = resultPartitionsList.get(0).collect(Collectors.toList());
assertEquals(1, p1Result.size());
assertTrue(p1Result.get(0).startsWith("The ship drew on and had safely passed the strait, which some volcanicshock has made between"));
}
@Test
public void reduceAfterTransformation() throws Exception {
Future<Stream<Stream<String>>> resultFuture = DStream.ofType(String.class, "wc")
.flatMap(s -> Stream.of(s.split("\\s+")))
.reduce(String::concat)
.executeAs(EXECUTION_NAME);
Stream<Stream<String>> resultPartitionsStream = resultFuture.get();
// ExecutionResultUtils.printResults(resultPartitionsStream, true);
List<Stream<String>> resultPartitionsList = resultPartitionsStream.collect(Collectors.toList());
assertEquals(1, resultPartitionsList.size());
List<String> p1Result = resultPartitionsList.get(0).collect(Collectors.toList());
assertEquals(1, p1Result.size());
assertTrue(p1Result.get(0).startsWith("Theshipdrewonandhadsafelypassedthestrait,whichsomevolcanicshockhas"));
}
@Test
public void reduceAfterTransformationWithPeek() throws Exception {
Future<Stream<Stream<String>>> resultFuture = DStream.ofType(String.class, "wc")
.flatMap(s -> Stream.of(s.split("\\s+")))
.reduce(String::concat).peek(e -> System.out
.println("After transformation reduce : " + e)).executeAs(EXECUTION_NAME);
Stream<Stream<String>> resultPartitionsStream = resultFuture.get();
// ExecutionResultUtils.printResults(resultPartitionsStream, true);
List<Stream<String>> resultPartitionsList = resultPartitionsStream.collect(Collectors.toList());
assertEquals(1, resultPartitionsList.size());
List<String> p1Result = resultPartitionsList.get(0).collect(Collectors.toList());
assertEquals(1, p1Result.size());
assertTrue(p1Result.get(0).startsWith("Theshipdrewonandhadsafelypassedthestrait,whichsomevolcanicshockhas"));
}
@Test
public void reduceAfterShuffle() throws Exception {
Future<Stream<Stream<String>>> resultFuture = DStream.ofType(String.class, "wc")
.flatMap(s -> Stream.of(s.split("\\s+")))
.reduceValues(s -> s, s -> 1, Integer::sum)
.map(s -> s.toString())
.reduce(String::concat)
.executeAs(EXECUTION_NAME);
Stream<Stream<String>> resultPartitionsStream = resultFuture.get();
// ExecutionResultUtils.printResults(resultPartitionsStream, true);
List<Stream<String>> resultPartitionsList = resultPartitionsStream.collect(Collectors.toList());
assertEquals(1, resultPartitionsList.size());
List<String> p1Result = resultPartitionsList.get(0).collect(Collectors.toList());
assertEquals(1, p1Result.size());
assertTrue(p1Result.get(0).startsWith("Pomegue,=1asked=1between=2board.=1drew=1evil,=1forerunner=1harbor"));
}
@Test
public void countSource() throws Exception {
Future<Stream<Stream<Long>>> resultFuture = DStream.ofType(String.class, "wc")
.count()
.executeAs(EXECUTION_NAME);
Stream<Stream<Long>> resultPartitionsStream = resultFuture.get();
// ExecutionResultUtils.printResults(resultPartitionsStream, true);
List<Stream<Long>> resultPartitionsList = resultPartitionsStream.collect(Collectors.toList());
assertEquals(1, resultPartitionsList.size());
List<Long> p1Result = resultPartitionsList.get(0).collect(Collectors.toList());
assertEquals(1, p1Result.size());
assertEquals(6L, (long)p1Result.get(0));
}
@Test
public void countAfterTransformation() throws Exception {
Future<Stream<Stream<Long>>> resultFuture = DStream.ofType(String.class, "wc")
.flatMap(s -> Stream.of(s.split("\\s+")))
.count()
.executeAs(EXECUTION_NAME);
Stream<Stream<Long>> resultPartitionsStream = resultFuture.get();
// ExecutionResultUtils.printResults(resultPartitionsStream, true);
List<Stream<Long>> resultPartitionsList = resultPartitionsStream.collect(Collectors.toList());
assertEquals(1, resultPartitionsList.size());
List<Long> p1Result = resultPartitionsList.get(0).collect(Collectors.toList());
assertEquals(1, p1Result.size());
assertEquals(62L, (long)p1Result.get(0));
}
@Test
public void countAfterShuffle() throws Exception {
Future<Stream<Stream<Long>>> resultFuture = DStream.ofType(String.class, "wc")
.flatMap(s -> Stream.of(s.split("\\s+")))
.reduceValues(s -> s, s -> 1, Integer::sum)
.count()
.executeAs(EXECUTION_NAME);
Stream<Stream<Long>> resultPartitionsStream = resultFuture.get();
// ExecutionResultUtils.printResults(resultPartitionsStream, true);
List<Stream<Long>> resultPartitionsList = resultPartitionsStream.collect(Collectors.toList());
assertEquals(1, resultPartitionsList.size());
List<Long> p1Result = resultPartitionsList.get(0).collect(Collectors.toList());
assertEquals(1, p1Result.size());
assertEquals(49L, (long)p1Result.get(0));
}
@Test
public void countAfterShuffleWithPeek() throws Exception {
Future<Stream<Stream<Long>>> resultFuture = DStream.ofType(String.class, "wc")
.flatMap(s -> Stream.of(s.split("\\s+")))
.reduceValues(s -> s, s -> 1, Integer::sum).count()
.peek(e -> System.out.println("After count shuffle : " + e))
.executeAs(EXECUTION_NAME);
Stream<Stream<Long>> resultPartitionsStream = resultFuture.get();
List<Stream<Long>> resultPartitionsList = resultPartitionsStream.collect(Collectors.toList());
assertEquals(1, resultPartitionsList.size());
List<Long> p1Result = resultPartitionsList.get(0).collect(Collectors.toList());
assertEquals(1, p1Result.size());
assertEquals(49L, (long) p1Result.get(0));
}
@Test
public void distinctSingleStage() throws Exception {
Future<Stream<Stream<String>>> resultFuture = DStream.ofType(String.class, "wc")
.flatMap(s -> Stream.of(s.split("\\s+")))
.distinct()
.executeAs(EXECUTION_NAME);
Stream<Stream<String>> resultPartitionsStream = resultFuture.get();
// ExecutionResultUtils.printResults(resultPartitionsStream, true);
List<Stream<String>> resultPartitionsList = resultPartitionsStream.collect(Collectors.toList());
assertEquals(2, resultPartitionsList.size());
// spot check
List<String> p1Result = resultPartitionsList.get(0).collect(Collectors.toList());
p1Result.stream().collect(Collectors.toMap(s -> s, s -> 1, Integer::sum)).values().forEach(count -> {
if (count > 1){
throw new IllegalStateException("value occures more then once");
}
});
}
@Test
public void distinctAfterShuffle() throws Exception {
Future<Stream<Stream<String>>> resultFuture = DStream.ofType(String.class, "wc")
.flatMap(s -> Stream.of(s.split("\\s+")))
.classify(word -> word.length())
.distinct()
.executeAs(EXECUTION_NAME);
Stream<Stream<String>> resultPartitionsStream = resultFuture.get();
// ExecutionResultUtils.printResults(resultPartitionsStream, true);
List<Stream<String>> resultPartitionsList = resultPartitionsStream.collect(Collectors.toList());
assertEquals(2, resultPartitionsList.size());
// spot check
List<String> p1Result = resultPartitionsList.get(0).collect(Collectors.toList());
p1Result.stream().collect(Collectors.toMap(s -> s, s -> 1, Integer::sum)).values().forEach(count -> {
if (count > 1){
throw new IllegalStateException("value occures more then once");
}
});
}
@Test
public void distinctAfterShuffleWithPeek() throws Exception {
Future<Stream<Stream<String>>> resultFuture = DStream.ofType(String.class, "wc")
.flatMap(s -> Stream.of(s.split("\\s+")))
.classify(word -> word.length()).distinct()
.peek(e -> System.out
.println("Distinct after shuffle : " + e))
.executeAs(EXECUTION_NAME);
Stream<Stream<String>> resultPartitionsStream = resultFuture.get();
// ExecutionResultUtils.printResults(resultPartitionsStream, true);
List<Stream<String>> resultPartitionsList = resultPartitionsStream.collect(Collectors.toList());
assertEquals(2, resultPartitionsList.size());
// spot check
List<String> p1Result = resultPartitionsList.get(0).collect(Collectors.toList());
p1Result.stream().collect(Collectors.toMap(s -> s, s -> 1, Integer::sum)).values().forEach(count -> {
if (count > 1) {
throw new IllegalStateException("value occures more then once");
}
});
}
@Test
public void minMaxSingleStage() throws Exception {
Future<Stream<Stream<String>>> resultFuture = DStream.ofType(String.class, "wc")
.flatMap(line -> Stream.of(line.split("\\s+")))
.max(StringUtils::compareLength)
.executeAs(EXECUTION_NAME);
Stream<Stream<String>> resultPartitionsStream = resultFuture.get();
// ExecutionResultUtils.printResults(resultPartitionsStream, true);
List<Stream<String>> resultPartitionsList = resultPartitionsStream.collect(Collectors.toList());
assertEquals(1, resultPartitionsList.size());
List<String> p1Result = resultPartitionsList.get(0).collect(Collectors.toList());
assertEquals(1, p1Result.size());
assertEquals("Calasareigne", p1Result.get(0));
}
@Test
public void minMaxAfterShuffle() throws Exception {
Future<Stream<Stream<String>>> resultFuture = DStream.ofType(String.class, "wc")
.flatMap(line -> Stream.of(line.split("\\s+")))
.classify(s -> s)
.max(StringUtils::compareLength)
.executeAs(EXECUTION_NAME);
Stream<Stream<String>> resultPartitionsStream = resultFuture.get();
// ExecutionResultUtils.printResults(resultPartitionsStream, true);
List<Stream<String>> resultPartitionsList = resultPartitionsStream.collect(Collectors.toList());
assertEquals(2, resultPartitionsList.size());
List<String> p1Result = resultPartitionsList.get(0).collect(Collectors.toList());
assertEquals(1, p1Result.size());
assertEquals("forerunner", p1Result.get(0));
List<String> p2Result = resultPartitionsList.get(1).collect(Collectors.toList());
assertEquals(1, p2Result.size());
assertEquals("Calasareigne", p2Result.get(0));
}
@Test
public void minMaxAfterShuffleWithPeek() throws Exception {
Future<Stream<Stream<String>>> resultFuture = DStream.ofType(String.class, "wc")
.flatMap(line -> Stream.of(line.split("\\s+")))
.classify(s -> s).max(StringUtils::compareLength)
.peek(e -> System.out
.println("After min max shuffle : " + e))
.executeAs(EXECUTION_NAME);
Stream<Stream<String>> resultPartitionsStream = resultFuture.get();
List<Stream<String>> resultPartitionsList = resultPartitionsStream.collect(Collectors.toList());
assertEquals(2, resultPartitionsList.size());
List<String> p1Result = resultPartitionsList.get(0).collect(Collectors.toList());
assertEquals(1, p1Result.size());
assertEquals("forerunner", p1Result.get(0));
List<String> p2Result = resultPartitionsList.get(1).collect(Collectors.toList());
assertEquals(1, p2Result.size());
assertEquals("Calasareigne", p2Result.get(0));
}
@Test
public void sortedSingleStage() throws Exception {
Future<Stream<Stream<String>>> resultFuture = DStream.ofType(String.class, "wc")
.flatMap(line -> Stream.of(line.split("\\s+")))
.sorted(StringUtils::compareLength)
.executeAs(EXECUTION_NAME);
Stream<Stream<String>> resultPartitionsStream = resultFuture.get();
// ExecutionResultUtils.printResults(resultPartitionsStream, true);
List<Stream<String>> resultPartitionsList = resultPartitionsStream.collect(Collectors.toList());
assertEquals(2, resultPartitionsList.size());
// spot check
List<String> p1Result = resultPartitionsList.get(0).collect(Collectors.toList());
assertEquals("so", p1Result.get(0));
assertEquals("asked", p1Result.get(13));
List<String> p2Result = resultPartitionsList.get(1).collect(Collectors.toList());
assertEquals("on", p2Result.get(0));
assertEquals("jib,", p2Result.get(18));
}
@Test
public void sortedSingleStageWithPeek() throws Exception {
Future<Stream<Stream<String>>> resultFuture = DStream.ofType(String.class, "wc")
.flatMap(line -> Stream.of(line.split("\\s+")))
.sorted(StringUtils::compareLength)
.peek(e -> System.out.println("After sorted : " + e))
.executeAs(EXECUTION_NAME);
Stream<Stream<String>> resultPartitionsStream = resultFuture.get();
// ExecutionResultUtils.printResults(resultPartitionsStream, true);
List<Stream<String>> resultPartitionsList = resultPartitionsStream.collect(Collectors.toList());
assertEquals(2, resultPartitionsList.size());
// spot check
List<String> p1Result = resultPartitionsList.get(0).collect(Collectors.toList());
assertEquals("so", p1Result.get(0));
assertEquals("asked", p1Result.get(13));
List<String> p2Result = resultPartitionsList.get(1).collect(Collectors.toList());
assertEquals("on", p2Result.get(0));
assertEquals("jib,", p2Result.get(18));
}
@Test
public void sortedAfterShuffle() throws Exception {
Future<Stream<Stream<Entry<String, Integer>>>> resultFuture = DStream.ofType(String.class, "wc")
.flatMap(line -> Stream.of(line.split("\\s+")))
.reduceValues(s -> s, s -> 1, Integer::sum)
.sorted((entryA, entryB) -> StringUtils.compareLength(entryA.getKey(), entryB.getKey()))
.executeAs(EXECUTION_NAME);
Stream<Stream<Entry<String, Integer>>> resultPartitionsStream = resultFuture.get();
// ExecutionResultUtils.printResults(resultPartitionsStream, true);
List<Stream<Entry<String, Integer>>> resultPartitionsList = resultPartitionsStream.collect(Collectors.toList());
assertEquals(2, resultPartitionsList.size());
// spot check
List<Entry<String, Integer>> p1Result = resultPartitionsList.get(0).collect(Collectors.toList());
assertEquals("is=1", p1Result.get(0).toString());
assertEquals("under=1", p1Result.get(13).toString());
List<Entry<String, Integer>> p2Result = resultPartitionsList.get(1).collect(Collectors.toList());
assertEquals("of=1", p2Result.get(0).toString());
assertEquals("islands;=1", p2Result.get(18).toString());
}
@Test
public void mapPartitions() throws Exception {
Future<Stream<Stream<String>>> resultFuture = DStream.ofType(String.class, "wc")
.flatMap(line -> Stream.of(line.split("\\s+")))
.reduceValues(s -> s, s -> 1, Integer::sum)
.map(s -> {
if (s.getKey().equals("happened")){
assertEquals(1, PartitionIdHelper.getPartitionId());
}
else if (s.getKey().equals("spanker")){
assertEquals(0, PartitionIdHelper.getPartitionId());
}
return s.getKey();
})
.executeAs(EXECUTION_NAME);
Stream<Stream<String>> resultPartitionsStream = resultFuture.get();
ExecutionResultUtils.printResults(resultPartitionsStream, true);
}
@Test
public void mapPartitionsWithPeek() throws Exception {
Future<Stream<Stream<String>>> resultFuture = DStream.ofType(String.class, "wc")
.flatMap(line -> Stream.of(line.split("\\s+")))
.reduceValues(s -> s, s -> 1, Integer::sum).map(s -> {
if (s.getKey().equals("happened")) {
assertEquals(1, PartitionIdHelper.getPartitionId());
} else if (s.getKey().equals("spanker")) {
assertEquals(0, PartitionIdHelper.getPartitionId());
}
return s.getKey();
}).peek(e -> System.out.println("map partitions : " + e))
.executeAs(EXECUTION_NAME);
Stream<Stream<String>> resultPartitionsStream = resultFuture.get();
ExecutionResultUtils.printResults(resultPartitionsStream, true);
}
@Test
public void transformationShuffleTransformationShuffle() throws Exception {
Future<Stream<Stream<Entry<Integer, List<String>>>>> resultFuture = DStream.ofType(String.class, "wc")
.flatMap(line -> Stream.of(line.split("\\s+")))
.filter(word -> word.length() > 5)
.map(s -> s.toUpperCase())
.reduceValues(word -> word, word -> 1, Integer::sum)
.filter(entry -> entry.getKey().length() > 6)
.aggregateValues(entry -> entry.getValue(), entry -> entry.getKey())
.map(s -> {Collections.sort(s.getValue()); return s;})
.executeAs(EXECUTION_NAME);
Stream<Stream<Entry<Integer, List<String>>>> resultPartitionsStream = resultFuture.get();
// ExecutionResultUtils.printResults(resultPartitionsStream, true);
List<Stream<Entry<Integer, List<String>>>> resultPartitionsList = resultPartitionsStream.collect(Collectors.toList());
assertEquals(2, resultPartitionsList.size());
// spot check
List<Entry<Integer, List<String>>> p1Result = resultPartitionsList.get(0).collect(Collectors.toList());
assertEquals("2=[BETWEEN]", p1Result.get(0).toString());
List<Entry<Integer, List<String>>> p2Result = resultPartitionsList.get(1).collect(Collectors.toList());
assertEquals("1=[ANOTHER, APPROACHED, CALASAREIGNE, DOUBLED, FORERUNNER, HAPPENED, IDLERS,, INSTINCT, ISLANDS;, MISFORTUNE, POMEGUE,, SEDATELY, SPANKER,, STRAIT,, TOPSAILS,, VOLCANIC]", p2Result.get(0).toString());
}
@Test
public void transformationShuffleTransformationShuffleWithPeek() throws Exception {
Future<Stream<Stream<Entry<Integer, List<String>>>>> resultFuture = DStream.ofType(String.class, "wc").flatMap(
line -> Stream.of(line.split("\\s+"))).filter(word -> word.length() > 5).map(s -> s.toUpperCase())
.reduceValues(word -> word,
word -> 1, Integer::sum)
.filter(entry ->
entry.getKey().length() > 6)
.aggregateValues(
entry -> entry.getValue(),
entry -> entry.getKey())
.map(s -> {
Collections.sort(s.getValue());
return s;
}).peek(e -> System.out
.println("transformation shuffle " + "transformation " + "shuffle : " + e))
.executeAs(EXECUTION_NAME);
Stream<Stream<Entry<Integer, List<String>>>> resultPartitionsStream = resultFuture.get();
// ExecutionResultUtils.printResults(resultPartitionsStream, true);
List<Stream<Entry<Integer, List<String>>>> resultPartitionsList = resultPartitionsStream
.collect(Collectors.toList());
assertEquals(2, resultPartitionsList.size());
// spot check
List<Entry<Integer, List<String>>> p1Result = resultPartitionsList.get(0).collect(Collectors.toList());
assertEquals("2=[BETWEEN]", p1Result.get(0).toString());
List<Entry<Integer, List<String>>> p2Result = resultPartitionsList.get(1).collect(Collectors.toList());
assertEquals(
"1=[ANOTHER, APPROACHED, CALASAREIGNE, DOUBLED, FORERUNNER, HAPPENED, IDLERS,, INSTINCT, ISLANDS;, MISFORTUNE, POMEGUE,, SEDATELY, SPANKER,, STRAIT,, TOPSAILS,, VOLCANIC]",
p2Result.get(0).toString());
}
@Test(expected=IllegalStateException.class)
public void failCrossJoinOnMultiplePartitions() throws Exception {
DStream<String> one = DStream.ofType(String.class, "one");
DStream<String> two = DStream.ofType(String.class, "two");
Future<Stream<Stream<Tuple2<String, String>>>> resultFuture = one
.join(two)
.executeAs(EXECUTION_NAME);
resultFuture.get();
}
@Test
public void joinSinglePartitionWithTransformationsAndPredicate() throws Exception {
DStream<String> one = DStream.ofType(String.class, "one");
DStream<String> two = DStream.ofType(String.class, "two");
Future<Stream<Stream<Tuple2<String, String>>>> resultFuture = one
.map(s -> s.toLowerCase())
.join(two.map(s -> s.toUpperCase())).on(t2 -> t2._1().split("\\s+")[0].equals(t2._2().split("\\s+")[2]))
.executeAs(EXECUTION_NAME + "-1");
Stream<Stream<Tuple2<String, String>>> resultPartitionsStream = resultFuture.get();
// ExecutionResultUtils.printResults(resultPartitionsStream, true);
List<Stream<Tuple2<String, String>>> resultPartitionsList = resultPartitionsStream.collect(Collectors.toList());
assertEquals(1, resultPartitionsList.size());
// spot check
List<Tuple2<String, String>> p1Result = resultPartitionsList.get(0).collect(Collectors.toList());
assertEquals("[2 amazon, JEFF BEZOS 2]", p1Result.get(2).toString());
}
@Test(expected=IllegalStateException.class)
public void failJoinMultiPartitionWithTransformationsAndPredicate() throws Exception {
DStream<String> one = DStream.ofType(String.class, "one");
DStream<String> two = DStream.ofType(String.class, "two");
Future<Stream<Stream<Tuple2<String, String>>>> resultFuture = one
.map(s -> s.toLowerCase())
.join(two.map(s -> s.toUpperCase())).on(t2 -> t2._1().split("\\s+")[0].equals(t2._2().split("\\s+")[2]))
.executeAs(EXECUTION_NAME);
resultFuture.get();
}
@Test
public void successCrossJoinOnSinglePartitions() throws Exception {
DStream<String> one = DStream.ofType(String.class, "one");
DStream<String> two = DStream.ofType(String.class, "two");
Future<Stream<Stream<Tuple2<String, String>>>> resultFuture = one
.join(two)
.executeAs(EXECUTION_NAME + "-1");
Stream<Stream<Tuple2<String, String>>> resultPartitionsStream = resultFuture.get();
// ExecutionResultUtils.printResults(resultPartitionsStream, true);
List<Stream<Tuple2<String, String>>> resultPartitionsList = resultPartitionsStream.collect(Collectors.toList());
assertEquals(1, resultPartitionsList.size());
List<Tuple2<String, String>> p1Result = resultPartitionsList.get(0).collect(Collectors.toList());
assertEquals("[1 Oracle, Tom McCuch 3]", p1Result.get(4).toString());
}
@Test
public void twoWayJoinWithPredicateAndClassifier() throws Exception {
DStream<String> one = DStream.ofType(String.class, "one").classify(a -> a.split("\\s+")[0]);
DStream<String> two = DStream.ofType(String.class, "two").classify(a -> a.split("\\s+")[2]);
Future<Stream<Stream<Tuple2<String, String>>>> resultFuture = one
.join(two).on(tuple2 -> tuple2._1().substring(0, 1).equals(tuple2._2().substring(tuple2._2().length()-1)))
.executeAs(EXECUTION_NAME);
Stream<Stream<Tuple2<String, String>>> resultPartitionsStream = resultFuture.get();
// ExecutionResultUtils.printResults(resultPartitionsStream, true);
List<Stream<Tuple2<String, String>>> resultPartitionsList = resultPartitionsStream.collect(Collectors.toList());
assertEquals(2, resultPartitionsList.size());
// spot check
List<Tuple2<String, String>> p1Result = resultPartitionsList.get(0).collect(Collectors.toList());
assertEquals("[2 Amazon, Jeff Bezos 2]", p1Result.get(0).toString());
List<Tuple2<String, String>> p2Result = resultPartitionsList.get(1).collect(Collectors.toList());
assertEquals("[3 Hortonworks, Arun Murthy 3]", p2Result.get(6).toString());
}
@Test
public void twoWayJoinWithPredicateAndClassifierWithPeek() throws Exception {
DStream<String> one = DStream.ofType(String.class, "one").classify(a -> a.split("\\s+")[0]);
DStream<String> two = DStream.ofType(String.class, "two").classify(a -> a.split("\\s+")[2]);
Future<Stream<Stream<Tuple2<String, String>>>> resultFuture = one
.join(two).on(
tuple2 -> tuple2._1().substring(0, 1).equals(tuple2._2().substring(tuple2._2().length() - 1)))
.peek(e -> System.out.println("twoWay Join With PredicateAndClassifier : "+ e))
.executeAs(EXECUTION_NAME);
Stream<Stream<Tuple2<String, String>>> resultPartitionsStream = resultFuture.get();
// ExecutionResultUtils.printResults(resultPartitionsStream, true);
List<Stream<Tuple2<String, String>>> resultPartitionsList = resultPartitionsStream.collect(Collectors.toList());
assertEquals(2, resultPartitionsList.size());
// spot check
List<Tuple2<String, String>> p1Result = resultPartitionsList.get(0).collect(Collectors.toList());
assertEquals("[2 Amazon, Jeff Bezos 2]", p1Result.get(0).toString());
List<Tuple2<String, String>> p2Result = resultPartitionsList.get(1).collect(Collectors.toList());
assertEquals("[3 Hortonworks, Arun Murthy 3]", p2Result.get(6).toString());
}
@Test
public void twoWayJoinWithPredicateAndNoClassifier() throws Exception {
DStream<String> one = DStream.ofType(String.class, "one");
DStream<String> two = DStream.ofType(String.class, "two");
Future<Stream<Stream<Tuple2<String, String>>>> resultFuture = one
.join(two).on(tuple2 -> tuple2._1().substring(0, 1).equals(tuple2._2().substring(tuple2._2().length()-1)))
.executeAs(EXECUTION_NAME + "-1");
Stream<Stream<Tuple2<String, String>>> resultPartitionsStream = resultFuture.get();
// ExecutionResultUtils.printResults(resultPartitionsStream, true);
List<Stream<Tuple2<String, String>>> resultPartitionsList = resultPartitionsStream.collect(Collectors.toList());
assertEquals(1, resultPartitionsList.size());
// spot check
List<Tuple2<String, String>> p1Result = resultPartitionsList.get(0).collect(Collectors.toList());
assertEquals("[3 Hortonworks, Oleg Zhurakousky 3]", p1Result.get(7).toString());
}
@Test
public void threeWayJoinWithPredicateSinglePartition() throws Exception {
DStream<String> one = DStream.ofType(String.class, "one");
DStream<String> two = DStream.ofType(String.class, "two");
DStream<String> three = DStream.ofType(String.class, "three");
Future<Stream<Stream<Tuple3<String, String, String>>>> resultFuture = one
.join(two)
.join(three).on(tuple3 -> tuple3._1().substring(0, 1).equals(tuple3._2().substring(tuple3._2().length()-1)) && tuple3._3().startsWith("The"))
.executeAs(EXECUTION_NAME + "-1");
Stream<Stream<Tuple3<String, String, String>>> resultPartitionsStream = resultFuture.get();
// ExecutionResultUtils.printResults(resultPartitionsStream, true);
List<Stream<Tuple3<String, String, String>>> resultPartitionsList = resultPartitionsStream.collect(Collectors.toList());
assertEquals(1, resultPartitionsList.size());
// spot check
List<Tuple3<String, String, String>> p1Result = resultPartitionsList.get(0).collect(Collectors.toList());
assertEquals("[1 Oracle, Thomas Kurian 1, The ship drew on and had safely passed the strait, which some volcanic]", p1Result.get(1).toString());
}
@Test
public void threeWayJoinWithPredicateSinglePartitionWithPeek() throws Exception {
DStream<String> one = DStream.ofType(String.class, "one");
DStream<String> two = DStream.ofType(String.class, "two");
DStream<String> three = DStream.ofType(String.class, "three");
Future<Stream<Stream<Tuple3<String, String, String>>>> resultFuture = one.join(two).join(three).on(tuple3 ->
tuple3._1().substring(0, 1).equals(tuple3._2().substring(tuple3._2().length() - 1)) && tuple3._3()
.startsWith(
"The"))
.peek(e -> System.out.println(
"three way join with "
+ "predicate single partition : "
+ e))
.executeAs(EXECUTION_NAME + "-1");
Stream<Stream<Tuple3<String, String, String>>> resultPartitionsStream = resultFuture.get();
// ExecutionResultUtils.printResults(resultPartitionsStream, true);
List<Stream<Tuple3<String, String, String>>> resultPartitionsList = resultPartitionsStream
.collect(Collectors.toList());
assertEquals(1, resultPartitionsList.size());
// spot check
List<Tuple3<String, String, String>> p1Result = resultPartitionsList.get(0).collect(Collectors.toList());
assertEquals(
"[1 Oracle, Thomas Kurian 1, The ship drew on and had safely passed the strait, which some volcanic]",
p1Result.get(1).toString());
}
@Test
public void fourWayJoinWithIntermediateTransformations() throws Exception {
DStream<String> one = DStream.ofType(String.class, "one").classify(a -> a.split("\\s+")[0]);
DStream<String> two = DStream.ofType(String.class, "two").classify(a -> a.split("\\s+")[2]);
DStream<String> three = DStream.ofType(String.class, "three").classify(a -> a.split("\\s+")[0]);
DStream<String> four = DStream.ofType(String.class, "four").classify(a -> a.split("\\s+")[0]);
Future<Stream<Stream<Tuple4<String, String, String, String>>>> resultFuture = one
.join(two)
.filter(t2 -> t2._1().contains("Hortonworks"))
.map(t2 -> tuple2(t2._1().toUpperCase(), t2._2().toUpperCase()))
.join(three)
.join(four).on(t3 -> {
String v1 = t3._1()._1().split("\\s+")[0];
String v2 = t3._1()._2().split("\\s+")[2];
String v3 = t3._2().split("\\s+")[0];
String v4 = t3._3().split("\\s+")[0];
return v1.equals(v2) && v1.equals(v3) && v1.equals(v4);
})
.map(t3 -> tuple4(t3._1()._1(), t3._1()._2(), t3._2(), t3._3()))
.executeAs(EXECUTION_NAME);
Stream<Stream<Tuple4<String, String, String, String>>> resultPartitionsStream = resultFuture.get();
// ExecutionResultUtils.printResults(resultPartitionsStream, true);
List<Stream<Tuple4<String, String, String, String>>> resultPartitionsList = resultPartitionsStream.collect(Collectors.toList());
assertEquals(2, resultPartitionsList.size());
// spot check
List<Tuple4<String, String, String, String>> p2Result = resultPartitionsList.get(1).collect(Collectors.toList());
assertEquals("[3 HORTONWORKS, ROB BEARDEN 3, 3 $1B, 3 5470 Great America Parkway Santa Clara, CA 95054]", p2Result.get(0).toString());
assertTrue(resultPartitionsList.get(0).collect(Collectors.toList()).isEmpty());
}
@Test
public void fourWayJoinWithIntermediateTransformationsWithPeek() throws Exception {
DStream<String> one = DStream.ofType(String.class, "one").classify(a -> a.split("\\s+")[0]);
DStream<String> two = DStream.ofType(String.class, "two").classify(a -> a.split("\\s+")[2]);
DStream<String> three = DStream.ofType(String.class, "three").classify(a -> a.split("\\s+")[0]);
DStream<String> four = DStream.ofType(String.class, "four").classify(a -> a.split("\\s+")[0]);
Future<Stream<Stream<Tuple4<String, String, String, String>>>> resultFuture = one
.join(two)
.filter(t2 -> t2._1().contains("Hortonworks"))
.map(t2 -> tuple2(t2._1().toUpperCase(), t2._2().toUpperCase()))
.join(three)
.join(four).on(t3 -> {
String v1 = t3._1()._1().split("\\s+")[0];
String v2 = t3._1()._2().split("\\s+")[2];
String v3 = t3._2().split("\\s+")[0];
String v4 = t3._3().split("\\s+")[0];
return v1.equals(v2) && v1.equals(v3) && v1.equals(v4);
})
.map(t3 -> tuple4(t3._1()._1(), t3._1()._2(), t3._2(), t3._3()))
.peek(e -> System.out.println("After map : " + e))
.executeAs(EXECUTION_NAME);
Stream<Stream<Tuple4<String, String, String, String>>> resultPartitionsStream = resultFuture.get();
// ExecutionResultUtils.printResults(resultPartitionsStream, true);
List<Stream<Tuple4<String, String, String, String>>> resultPartitionsList = resultPartitionsStream.collect(Collectors.toList());
assertEquals(2, resultPartitionsList.size());
// spot check
List<Tuple4<String, String, String, String>> p2Result = resultPartitionsList.get(1).collect(Collectors.toList());
assertEquals("[3 HORTONWORKS, ROB BEARDEN 3, 3 $1B, 3 5470 Great America Parkway Santa Clara, CA 95054]", p2Result.get(0).toString());
assertTrue(resultPartitionsList.get(0).collect(Collectors.toList()).isEmpty());
}
@Test
public void unionDistinctSinglePartition() throws Exception {
DStream<String> one = DStream.ofType(String.class, "unone");
DStream<String> two = DStream.ofType(String.class, "untwo");
Future<Stream<Stream<String>>> resultFuture = one
.union(two)
.executeAs(EXECUTION_NAME + "-1");
Stream<Stream<String>> resultPartitionsStream = resultFuture.get();
// ExecutionResultUtils.printResults(resultPartitionsStream, true);
List<Stream<String>> resultPartitionsList = resultPartitionsStream.collect(Collectors.toList());
assertEquals(1, resultPartitionsList.size());
// spot check
List<String> p1Result = resultPartitionsList.get(0).collect(Collectors.toList());
assertEquals(4, p1Result.size());
assertEquals("You have to learn the rules of the game. ", p1Result.get(0).toString());
}
@Test
public void unionDistinctSinglePartitionWithPeek() throws Exception {
DStream<String> one = DStream.ofType(String.class, "unone");
DStream<String> two = DStream.ofType(String.class, "untwo");
Future<Stream<Stream<String>>> resultFuture = one
.union(two)
.peek(e -> System.out.println("After union : " + e))
.executeAs(EXECUTION_NAME + "-1");
Stream<Stream<String>> resultPartitionsStream = resultFuture.get();
// ExecutionResultUtils.printResults(resultPartitionsStream, true);
List<Stream<String>> resultPartitionsList = resultPartitionsStream.collect(Collectors.toList());
assertEquals(1, resultPartitionsList.size());
// spot check
List<String> p1Result = resultPartitionsList.get(0).collect(Collectors.toList());
assertEquals(4, p1Result.size());
assertEquals("You have to learn the rules of the game. ", p1Result.get(0).toString());
}
@Test
public void unionAllSinglePartition() throws Exception {
DStream<String> one = DStream.ofType(String.class, "unone");
DStream<String> two = DStream.ofType(String.class, "untwo");
Future<Stream<Stream<String>>> resultFuture = one
.unionAll(two)
.executeAs(EXECUTION_NAME + "-1");
Stream<Stream<String>> resultPartitionsStream = resultFuture.get();
// ExecutionResultUtils.printResults(resultPartitionsStream, true);
List<Stream<String>> resultPartitionsList = resultPartitionsStream.collect(Collectors.toList());
assertEquals(1, resultPartitionsList.size());
// spot check
List<String> p1Result = resultPartitionsList.get(0).collect(Collectors.toList());
assertEquals(6, p1Result.size());
assertEquals("You have to learn the rules of the game. ", p1Result.get(0).toString());
}
@Test
public void unionAllSinglePartitionWithPeek() throws Exception {
DStream<String> one = DStream.ofType(String.class, "unone");
DStream<String> two = DStream.ofType(String.class, "untwo");
Future<Stream<Stream<String>>> resultFuture = one
.unionAll(two)
.peek(e -> System.out.println("After Union : " + e))
.executeAs(EXECUTION_NAME + "-1");
Stream<Stream<String>> resultPartitionsStream = resultFuture.get();
// ExecutionResultUtils.printResults(resultPartitionsStream, true);
List<Stream<String>> resultPartitionsList = resultPartitionsStream.collect(Collectors.toList());
assertEquals(1, resultPartitionsList.size());
// spot check
List<String> p1Result = resultPartitionsList.get(0).collect(Collectors.toList());
assertEquals(6, p1Result.size());
assertEquals("You have to learn the rules of the game. ", p1Result.get(0).toString());
}
@Test
public void simpleTwoWayUnionWithClassifier() throws Exception {
DStream<String> one = DStream.ofType(String.class, "one").classify(a -> a.split("\\s+")[0]);
DStream<String> two = DStream.ofType(String.class, "two").classify(a -> a.split("\\s+")[2]);
Future<Stream<Stream<String>>> resultFuture = one
.union(two)
.executeAs(EXECUTION_NAME);
Stream<Stream<String>> resultPartitionsStream = resultFuture.get();
// ExecutionResultUtils.printResults(resultPartitionsStream, true);
List<Stream<String>> resultPartitionsList = resultPartitionsStream.collect(Collectors.toList());
assertEquals(2, resultPartitionsList.size());
// spot check
List<String> p1Result = resultPartitionsList.get(0).collect(Collectors.toList());
assertEquals("2 Amazon", p1Result.get(0));
assertEquals("Jeff Bezos 2", p1Result.get(1));
assertEquals("Jeffrey Blackburn 2", p1Result.get(2));
}
@Test
public void simpleTwoWayUnionWithClassifierWithPeek() throws Exception {
DStream<String> one = DStream.ofType(String.class, "one").classify(a -> a.split("\\s+")[0]);
DStream<String> two = DStream.ofType(String.class, "two").classify(a -> a.split("\\s+")[2]);
Future<Stream<Stream<String>>> resultFuture = one.union(two)
.peek(e -> System.out.println("After union two" + " : " + e))
.executeAs(EXECUTION_NAME);
Stream<Stream<String>> resultPartitionsStream = resultFuture.get();
List<Stream<String>> resultPartitionsList = resultPartitionsStream.collect(Collectors.toList());
assertEquals(2, resultPartitionsList.size());
// spot check
List<String> p1Result = resultPartitionsList.get(0).collect(Collectors.toList());
assertEquals("2 Amazon", p1Result.get(0));
assertEquals("Jeff Bezos 2", p1Result.get(1));
assertEquals("Jeffrey Blackburn 2", p1Result.get(2));
}
@Test
public void simpleThreeWayUnionSinglePartition() throws Exception {
DStream<String> one = DStream.ofType(String.class, "one");
DStream<String> two = DStream.ofType(String.class, "two");
DStream<String> three = DStream.ofType(String.class, "three");
Future<Stream<Stream<String>>> resultFuture = one
.union(two)
.union(three)
.executeAs(EXECUTION_NAME + "-1");
Stream<Stream<String>> resultPartitionsStream = resultFuture.get();
// ExecutionResultUtils.printResults(resultPartitionsStream, true);
List<Stream<String>> resultPartitionsList = resultPartitionsStream.collect(Collectors.toList());
assertEquals(1, resultPartitionsList.size());
// spot check
List<String> p1Result = resultPartitionsList.get(0).collect(Collectors.toList());
assertEquals("1 Oracle", p1Result.get(0));
assertEquals("Thomas Kurian 1", p1Result.get(9).toString());
}
@Test
public void simpleThreeWayUnionSinglePartitionWithPeek() throws Exception {
DStream<String> one = DStream.ofType(String.class, "one");
DStream<String> two = DStream.ofType(String.class, "two");
DStream<String> three = DStream.ofType(String.class, "three");
Future<Stream<Stream<String>>> resultFuture = one.
peek(e -> System.out
.println("Before union two : " + e)).union(two)
.
peek(e -> System.out.println("After union two :" + e))
.union(three)
.peek(e -> System.out.println("After union three : " + e))
.executeAs(EXECUTION_NAME + "-1");
Stream<Stream<String>> resultPartitionsStream = resultFuture.get();
List<Stream<String>> resultPartitionsList = resultPartitionsStream.collect(Collectors.toList());
assertEquals(1, resultPartitionsList.size());
// spot check
List<String> p1Result = resultPartitionsList.get(0).collect(Collectors.toList());
assertEquals("1 Oracle", p1Result.get(0));
assertEquals("Thomas Kurian 1", p1Result.get(9).toString());
}
}