/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package io.dstream.local.ri; import java.lang.reflect.Field; import java.net.URI; import java.nio.file.Files; import java.nio.file.Paths; import java.util.ArrayList; import java.util.Collections; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Properties; import java.util.TreeMap; import java.util.stream.Collectors; import java.util.stream.Stream; import io.dstream.DStreamConstants; import io.dstream.DStreamExecutionGraph; import io.dstream.DStreamOperation; import io.dstream.SerializableStreamAssets.SerFunction; import io.dstream.local.ri.ShuffleHelper.RefHolder; import io.dstream.support.AbstractPartitionedStreamProducingSourceSupplier; import io.dstream.support.Aggregators; import io.dstream.support.Classifier; import io.dstream.support.HashClassifier; import io.dstream.support.PartitionIdHelper; import io.dstream.support.SourceSupplier; import io.dstream.support.UriSourceSupplier; import io.dstream.utils.KVUtils; import io.dstream.utils.ReflectionUtils; import io.dstream.utils.SingleValueIterator; /** * * */ final class LocalDStreamExecutionEngine { private final Properties executionConfig; private final String executionName; private final Classifier classifier; private List<List<?>> realizedStageResults; private final ThreadLocal<Integer> partitionIdHolder; @SuppressWarnings("unchecked") public LocalDStreamExecutionEngine(String executionName, Properties executionConfig){ this.executionName = executionName; this.executionConfig = executionConfig; this.classifier = this.determineClassifier(); try { Field tl = ReflectionUtils.findField(PartitionIdHelper.class, "partitionIdHolder", ThreadLocal.class); tl.setAccessible(true); this.partitionIdHolder = (ThreadLocal<Integer>) tl.get(null); } catch (Exception e) { throw new IllegalStateException(e); } } /** * */ public Stream<Stream<?>> execute(DStreamExecutionGraph pipeline) { return this.execute(pipeline, false); } /** * */ private Stream<Stream<?>> execute(DStreamExecutionGraph pipeline, boolean partition) { List<DStreamOperation> streamOperations = pipeline.getOperations(); for (int i = 0; i < streamOperations.size(); i++) { this.doExecuteStage(streamOperations.get(i), partition, pipeline.getName()); } return this.realizedStageResults.stream().map(list -> list.stream()); } /** * * @param streamOperation * @param partition */ @SuppressWarnings("unchecked") private void doExecuteStage(DStreamOperation streamOperation, boolean partition, String pipelineName){ SerFunction<Stream<?>, Stream<?>> streamFunction = streamOperation.getStreamOperationFunction(); if (this.realizedStageResults == null){ List<List<?>> realizedIntermediateResult = Stream.of( streamFunction.apply(this.createInitialStream(pipelineName)) ) .map(stream -> stream.collect(Collectors.toList())) .collect(Collectors.toList()); if (partition){ Stream<?> mergedStream = realizedIntermediateResult.stream().map(list -> ((Stream<Object>)list.stream())).reduce((a,b) -> Stream.concat(a, b)).get(); Stream<Entry<Integer, List<Object>>> partitionedStreamResult = this.partitionStream(mergedStream); Stream<Stream<?>> partitionedStreamResultNoId = this.unmapPartitions(partitionedStreamResult); realizedIntermediateResult = partitionedStreamResultNoId.map(stream -> stream.collect(Collectors.toList())).collect(Collectors.toList()); } this.realizedStageResults = realizedIntermediateResult; } else { Stream<?> mergedStream = this.realizedStageResults.stream().map(list -> ((Stream<Object>)list.stream())).reduce((a,b) -> Stream.concat(a, b)).get(); Stream<Entry<Integer, List<Object>>> partitionedStreamResult = this.partitionStream(mergedStream); Stream<Stream<?>> partitionedStreamResultNoId = this.unmapPartitions(partitionedStreamResult); if (streamOperation.getCombinableExecutionGraphs().size() > 0){ List<Stream<?>> currentPartitions = partitionedStreamResultNoId.collect(Collectors.toList()); Map<Integer, Object> matchedPartitions = new LinkedHashMap<>(); for (int i = 0; i < currentPartitions.size(); i++) { matchedPartitions.merge(i, currentPartitions.get(i), Aggregators::aggregateToList); } List<DStreamExecutionGraph> dependentPipelines = streamOperation.getCombinableExecutionGraphs(); for (DStreamExecutionGraph dependentPipeline : dependentPipelines) { LocalDStreamExecutionEngine e = new LocalDStreamExecutionEngine(this.executionName, this.executionConfig); Stream<Stream<?>> dependentStream = e.execute(dependentPipeline, true); List<Stream<?>> dependentPartitions = dependentStream.collect(Collectors.toList()); for (int i = 0; i < dependentPartitions.size(); i++) { matchedPartitions.merge(i, dependentPartitions.get(i), Aggregators::aggregateToList); } } partitionedStreamResultNoId = matchedPartitions.values().stream().map(list -> ((List<?>)list).stream()); } Stream<Stream<?>> transformedStreams = partitionedStreamResultNoId.map(stream -> streamFunction.apply(stream)); List<List<?>> realizedIntermediateResult = transformedStreams.map(stream -> stream.collect(Collectors.toList())).collect(Collectors.toList()); this.realizedStageResults = realizedIntermediateResult; } } /** * * @param shuffledPartitionStream * @return */ private Stream<Stream<?>> unmapPartitions(Stream<Entry<Integer, List<Object>>> shuffledPartitionStream) { return shuffledPartitionStream.map(entry -> entry.getValue().stream().map(val -> { this.partitionIdHolder.set(entry.getKey()); return val;})); } /** * * @param pipelineName * @return */ @SuppressWarnings("unchecked") private <R> Stream<R> createInitialStream(String pipelineName){ SourceSupplier<R> sourceSupplier = SourceSupplier.<R> create(this.executionConfig, pipelineName, null); if (sourceSupplier instanceof UriSourceSupplier) { UriSourceSupplier uriSupplier = (UriSourceSupplier) sourceSupplier; Stream<URI> uriSources = uriSupplier.get(); return (Stream<R>) uriSources.map(this::buildStreamFromURI).reduce(Stream::concat).get(); } else if (sourceSupplier instanceof AbstractPartitionedStreamProducingSourceSupplier) { AbstractPartitionedStreamProducingSourceSupplier<R> spSourceSupplier = (AbstractPartitionedStreamProducingSourceSupplier<R>) sourceSupplier; return spSourceSupplier.get(); } else { throw new IllegalStateException("Unsupported SourceSupplier " + sourceSupplier.getClass().getName()); } } /** * */ private Stream<String> buildStreamFromURI(URI uri) { try { return Files.lines(Paths.get(uri)); } catch (Exception e) { throw new IllegalStateException("Failed to create Stream from URI: " + uri, e); } } /** * * @param streamToShuffle * @return */ @SuppressWarnings({ "rawtypes", "unchecked" }) private Stream<Entry<Integer, List<Object>>> partitionStream(Stream<?> streamToShuffle){ // Map collectedPartitions = streamToShuffle.collect(Collectors.groupingBy(element -> this.classifier.getClassificationId(element), Collectors.toList())); // Stream<Entry<Integer, List<Object>>> groupedPartitionsStream = collectedPartitions.entrySet().stream(); // return groupedPartitionsStream; Stream<Entry<Integer, Object>> partitionedStream = streamToShuffle .map(element -> KVUtils.kv(this.classifier.getClassificationId(element), element)); /* * Groups elements for each partition using ShuffleHelper * If an element is a Key/Value Entry, then ShuffleHelper will group it as Key/List[Values] * The resulting partition entry will look like this: {0={key1=[v,v,v,v],key2=v}} * If an element is not a Key/Value Entry,then values will be grouped into a List - List[Values] * The resulting partition entry will look like this: {0=[v1,v2,v1,v3],v4} */ Stream<Map<Integer, ?>> groupedPartitionsStream = Stream.of(partitionedStream) .map(stream -> stream.collect(Collectors.toMap((Entry<Integer, Object> s) -> s.getKey(), s -> (Object)new RefHolder(s.getValue()), ShuffleHelper::group))); Stream<Entry<Integer, List<Object>>> normalizedPartitionStream = groupedPartitionsStream.flatMap(map -> map.entrySet().stream()).map(entry -> { Object value = entry.getValue(); Entry<Integer, List<Object>> normalizedEntry = null; if (value instanceof RefHolder){ Object realValue = ((RefHolder) value).ref; if (realValue instanceof Entry){ value = Stream.of((Entry) realValue).collect(Collectors.toMap(e -> e.getKey(), e -> Collections.singletonList(e.getValue()))); } else { value = Stream.of(realValue).collect(Collectors.toList()); } } if (value instanceof Map){ Map vMap = (Map) value; vMap.forEach((k,v) -> vMap.replace(k, v instanceof List ? ((List)v).iterator() : new SingleValueIterator(v) )); TreeMap<Object, Object> sortedMap = new TreeMap<>(vMap); normalizedEntry = KVUtils.kv(entry.getKey(), new ArrayList<>(sortedMap.entrySet())); } else { normalizedEntry = KVUtils.kv(entry.getKey(), (List)value); } return normalizedEntry; }); return normalizedPartitionStream; } /** * */ private Classifier determineClassifier(){ String parallelizmProp = this.executionConfig.getProperty(DStreamConstants.PARALLELISM); String partitionerProp = this.executionConfig.getProperty(DStreamConstants.CLASSIFIER); int parallelism = parallelizmProp == null ? 1 : Integer.parseInt(parallelizmProp); return partitionerProp != null ? ReflectionUtils.newInstance(partitionerProp, new Class[]{int.class}, new Object[]{parallelism}) : new HashClassifier(parallelism); } }