package io.dstream.tez;
import static org.junit.Assert.assertEquals;
import java.util.List;
import java.util.Map.Entry;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.junit.After;
import org.junit.Test;
import io.dstream.DStream;
import io.dstream.utils.KVUtils;
import io.dstream.utils.Tuples.Tuple2;
public class ClassificationTests extends BaseTezTests {
private final String applicationName = this.getClass().getSimpleName();
@After
public void after(){
clean(applicationName);
}
// @Test
// public void partitionDefault() throws Exception {
// Future<Stream<Stream<String>>> resultFuture = DStream.ofType(String.class, "partitionDefault")
// .partition()
// .executeAs(this.applicationName + "-default");
//
// Stream<Stream<String>> result = resultFuture.get(1000000, TimeUnit.MILLISECONDS);
// List<Stream<String>> resultStreams = result.collect(Collectors.toList());
// resultStreams.get(0).forEach(System.out::println);
// Assert.assertEquals(1, resultStreams.size());
// result.close();
// }
// @Test
// public void partitionDefaultWithKV() throws Exception {
// Future<Stream<Stream<Entry<String, Integer>>>> resultFuture = DStream.ofType(String.class, "partitionDefault")
// .map(s -> KVUtils.kv(s, 1))
// .partition()
// .executeAs(this.applicationName + "-default");
//
// Stream<Stream<Entry<String, Integer>>> result = resultFuture.get(1000000, TimeUnit.MILLISECONDS);
// List<Stream<Entry<String, Integer>>> resultStreams = result.collect(Collectors.toList());
// resultStreams.get(0).forEach(System.out::println);
// Assert.assertEquals(1, resultStreams.size());
// result.close();
// }
// @Test
// public void partitionSetSize() throws Exception {
// Future<Stream<Stream<String>>> resultFuture = DStream.ofType(String.class, "partitionSetSize")
// .partition()
// .executeAs(this.applicationName + "-size");
//
// Stream<Stream<String>> result = resultFuture.get(10000, TimeUnit.MILLISECONDS);
// List<Stream<String>> resultStreams = result.collect(Collectors.toList());
// Assert.assertEquals(4, resultStreams.size());
// result.close();
// }
// @Test
// public void partitionSetSizeMultiStages() throws Exception {
// Future<Stream<Stream<Entry<String, Integer>>>> resultFuture = DStream.ofType(String.class, "partitionSetSizeMultiStages")
// .filter(line -> line.length() > 70)
// .partition()
// .flatMap(line -> Stream.of(line.split(" ")))
// .reduceGroups(s -> s, s -> 1, Integer::sum)
// .executeAs(this.applicationName + "-size");
//
// Stream<Stream<Entry<String, Integer>>> result = resultFuture.get(10000, TimeUnit.MILLISECONDS);
// List<Stream<Entry<String, Integer>>> resultStreams = result.collect(Collectors.toList());
// Assert.assertEquals(4, resultStreams.size());
// result.close();
// }
// @Test
// public void partitionSetSizeAndPartitioner() throws Exception {
// new File("TestPartitioner").delete();
// Future<Stream<Stream<String>>> resultFuture = DStream.ofType(String.class, "partitionSetSizeAndPartitioner")
// .partition()
// .executeAs(this.applicationName + "-partitioner");
//
// Stream<Stream<String>> result = resultFuture.get(10000, TimeUnit.MILLISECONDS);
// List<Stream<String>> resultStreams = result.collect(Collectors.toList());
// Assert.assertEquals(6, resultStreams.size());
// result.close();
// assertTrue(new File("TestPartitioner").exists());
// }
// @Test
// public void partitionAfterJoinDefault() throws Exception {
// DStream<String> s1 = DStream.ofType(String.class, "hash");
// DStream<String> s2 = DStream.ofType(String.class, "probe");
//
// Future<Stream<Stream<Tuple2<String, String>>>> resultFuture = s1
// .filter(s -> true)
// .join(s2).on(a -> true)
// .partition()
// .executeAs(this.applicationName + "-default");
//
// Stream<Stream<Tuple2<String, String>>> result = resultFuture.get(10000, TimeUnit.MILLISECONDS);
// List<Stream<Tuple2<String, String>>> resultStreams = result.collect(Collectors.toList());
// Assert.assertEquals(1, resultStreams.size());
// result.close();
// }
// @Test
// public void partitionAfterJoinSize() throws Exception {
// DStream<String> s1 = DStream.ofType(String.class, "hash");
// DStream<String> s2 = DStream.ofType(String.class, "probe");
//
// Future<Stream<Stream<Tuple2<String, String>>>> resultFuture = s1
// .filter(s -> true)
// .join(s2).on(a -> true)
// .partition()
// .executeAs(this.applicationName + "-size");
//
// Stream<Stream<Tuple2<String, String>>> result = resultFuture.get(10000, TimeUnit.MILLISECONDS);
// List<Stream<Tuple2<String, String>>> resultStreams = result.collect(Collectors.toList());
// Assert.assertEquals(4, resultStreams.size());
// result.close();
// }
@Test(expected=IllegalStateException.class)
public void failJoinWithoutClassification() throws Exception {
DStream<String> s1 = DStream.ofType(String.class, "hash");
DStream<String> s2 = DStream.ofType(String.class, "probe");
Future<Stream<Stream<Tuple2<Entry<String, String>, Entry<String, String>>>>> resultFuture = s1
.map(h -> KVUtils.kv(h.split(" ")[0], h))
.join(s2.map(p -> KVUtils.kv(p.split(" ")[2], p))).on(t2 -> t2._1().getKey().equals(t2._2().getKey()))
.executeAs(this.applicationName + "-partitioner");
resultFuture.get(1000, TimeUnit.MILLISECONDS);
}
@Test
public void classifyWithDefaultClassifier() throws Exception {
Future<Stream<Stream<String>>> resultFuture = DStream.ofType(String.class, "partitionWithClassifier")
.filter(line -> line.length() > 73)
.classify(s -> s.substring(0, 7))
.executeAs(this.applicationName + "-default");
Stream<Stream<String>> resultPartitionsStream = resultFuture.get(100000, TimeUnit.MILLISECONDS);
// ExecutionResultUtils.printResults(resultPartitionsStream, true);
List<Stream<String>> resultStreams = resultPartitionsStream.collect(Collectors.toList());
assertEquals(1, resultStreams.size());
// spot check
List<String> rValues = resultStreams.get(0).collect(Collectors.toList());
assertEquals(1, rValues.size());
assertEquals("so slowly and sedately between that the idlers, with that instinct which is", rValues.get(0));
resultPartitionsStream.close();
}
@Test
public void classifyWithClassifierSizeSet() throws Exception {
Future<Stream<Stream<String>>> resultFuture = DStream.ofType(String.class, "partitionWithClassifier")
.filter(line -> line.length() > 73)
.classify(s -> s.substring(0, 5))
.executeAs(this.applicationName + "-size");
Stream<Stream<String>> resultPartitionsStream = resultFuture.get(10000, TimeUnit.MILLISECONDS);
// ExecutionResultUtils.printResults(resultPartitionsStream, true);
List<Stream<String>> resultStreams = resultPartitionsStream.collect(Collectors.toList());
assertEquals(4, resultStreams.size());
List<String> rValues = resultStreams.get(1).collect(Collectors.toList());
assertEquals(1, rValues.size());
assertEquals("so slowly and sedately between that the idlers, with that instinct which is", rValues.get(0));
resultPartitionsStream.close();
}
// @Test
// public void partitionAfterJoinWithClassifier() throws Exception {
//// assertFalse(new File("TestPartitionerWithClassifier").exists());
// DStream<Entry<String, Integer>> s1 = DStream.ofType(String.class, "partitionAfterJoinWithClassifier")
// .filter(s -> s.length() > 60)
// .flatMap(line -> Stream.of(line.split(" ")))
// .map(s -> s.trim())
// .reduceGroups(s -> s, s -> 1, Integer::sum);
//
//// Future<Stream<Stream<Entry<String, Integer>>>> resultFuture = s1.executeAs(this.applicationName);
//// Stream<Stream<Entry<String, Integer>>> result = resultFuture.get();
//// result.forEach(s -> s.forEach(System.out::println));
////
//// System.out.println("===========");
//
// DStream<String> s2 = DStream.ofType(String.class, "bar")
// .flatMap(line -> Stream.of(line.split(" ")))
// .map(s -> s.trim());
//
// DStream<String> s3 = DStream.ofType(String.class, "bar");
//
//// Future<Stream<Stream<String>>> resultFuture2 = s2.executeAs(this.applicationName);
//// Stream<Stream<String>> result2 = resultFuture2.get();
//// result2.forEach(s -> s.forEach(System.out::println));
//
//// Future<Stream<Stream<Tuple2<Entry<String, Integer>, String>>>> resultFuture = s1
//// .join(s2).on(a -> a._1().getKey().equals(a._2()))
////// .aggregateGroups(s -> s, s -> s, Aggregators::aggregateFlatten)
////// .partition(s -> s)
//// .executeAs(this.applicationName);
//
// s1
// .join(s2).on(a -> a._1().getKey().equals(a._2()))
// .reduceGroups(s -> {
// System.out.println(s);
// return s;}, s -> 1, Integer::sum)
// .partition(s -> s)
// .join(s3)
// .executeAs(this.applicationName).get();
// //resultFuture.get(1000000, TimeUnit.MILLISECONDS);
//// Stream<Stream<Entry<Tuple2<Entry<String, Integer>, String>, Integer>>> result = resultFuture.get(1000000, TimeUnit.MILLISECONDS);
//// List<Stream<Entry<Tuple2<Entry<String, Integer>, String>, Integer>>> resultStreams = result.collect(Collectors.toList());
//// resultStreams.get(0).forEach(System.out::println);
//// System.out.println(resultStreams.size());
//// Assert.assertEquals(2, resultStreams.size());
//// result.close();
//// assertFalse(new File("TestPartitionerWithClassifier").exists());
// }
}