/* * Copyright © 2014-2015 Cask Data, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of * the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. */ package co.cask.cdap.data.stream; import co.cask.cdap.common.io.Locations; import co.cask.cdap.data.file.FileReader; import co.cask.cdap.data.file.PositionReporter; import co.cask.cdap.data.file.ReadFilter; import co.cask.cdap.data2.transaction.stream.StreamConfig; import com.google.common.base.Function; import com.google.common.base.Stopwatch; import com.google.common.collect.Iterables; import com.google.common.collect.Lists; import com.google.common.collect.Sets; import com.google.common.primitives.Longs; import it.unimi.dsi.fastutil.PriorityQueue; import it.unimi.dsi.fastutil.objects.ObjectHeapPriorityQueue; import java.io.Closeable; import java.io.IOException; import java.util.Collection; import java.util.Iterator; import java.util.List; import java.util.Set; import java.util.concurrent.TimeUnit; import javax.annotation.concurrent.NotThreadSafe; /** * A {@link FileReader} that combines multiple event stream into single event stream. */ @NotThreadSafe public final class MultiLiveStreamFileReader implements FileReader<StreamEventOffset, Iterable<StreamFileOffset>> { private final PriorityQueue<StreamEventSource> eventSources; private final Set<StreamEventSource> emptySources; private final Set<StreamEventSource> allSources; private final Iterable<StreamFileOffset> offsetsView; public MultiLiveStreamFileReader(StreamConfig streamConfig, Iterable<? extends StreamFileOffset> offsets) { this.allSources = Sets.newTreeSet(); for (StreamFileOffset source : offsets) { StreamEventSource eventSource = new StreamEventSource(streamConfig, source); allSources.add(eventSource); } this.eventSources = new ObjectHeapPriorityQueue<>(allSources.size()); this.emptySources = Sets.newHashSet(allSources); this.offsetsView = Iterables.transform(allSources, new Function<StreamEventSource, StreamFileOffset>() { @Override public StreamFileOffset apply(StreamEventSource input) { return input.getPosition(); } }); } @Override public void initialize() throws IOException { for (StreamEventSource source : allSources) { source.initialize(); } } @Override public int read(Collection<? super StreamEventOffset> events, int maxEvents, long timeout, TimeUnit unit) throws IOException, InterruptedException { return read(events, maxEvents, timeout, unit, ReadFilter.ALWAYS_ACCEPT); } @Override public int read(Collection<? super StreamEventOffset> events, int maxEvents, long timeout, TimeUnit unit, ReadFilter readFilter) throws IOException, InterruptedException { int eventsRead = 0; Stopwatch stopwatch = new Stopwatch(); stopwatch.start(); while (eventsRead < maxEvents && !(emptySources.isEmpty() && eventSources.isEmpty())) { if (!emptySources.isEmpty()) { prepareEmptySources(readFilter); } eventsRead += read(events, readFilter); if (eventSources.isEmpty() && stopwatch.elapsedTime(unit) >= timeout) { break; } } return (eventsRead == 0 && emptySources.isEmpty() && eventSources.isEmpty()) ? -1 : eventsRead; } @Override public Iterable<StreamFileOffset> getPosition() { return offsetsView; } /** * For all sources that doesn't have any event buffered, try to read an event and put it in the priority queue * if event is available. */ private void prepareEmptySources(ReadFilter readFilter) throws IOException, InterruptedException { Iterator<StreamEventSource> iterator = emptySources.iterator(); while (iterator.hasNext()) { StreamEventSource source = iterator.next(); int len = source.prepare(readFilter); if (len != 0) { iterator.remove(); if (len > 0) { eventSources.enqueue(source); } } } } private int read(Collection<? super StreamEventOffset> events, ReadFilter readFilter) throws IOException, InterruptedException { if (eventSources.isEmpty()) { return 0; } StreamEventSource source = eventSources.first(); source.read(events); int res = source.prepare(readFilter); if (res > 0) { eventSources.changed(); } else if (res <= 0) { eventSources.dequeue(); if (res == 0) { emptySources.add(source); } } return 1; } @Override public void close() throws IOException { for (StreamEventSource source : allSources) { source.close(); } emptySources.clear(); eventSources.clear(); } private static final class StreamEventSource implements Comparable<StreamEventSource>, Closeable, PositionReporter<StreamFileOffset> { private final FileReader<PositionStreamEvent, StreamFileOffset> reader; private final List<PositionStreamEvent> events; private StreamFileOffset currentOffset; private StreamFileOffset nextOffset; private StreamEventSource(StreamConfig streamConfig, StreamFileOffset beginOffset) { this.reader = new LiveStreamFileReader(streamConfig, beginOffset); this.events = Lists.newArrayListWithCapacity(1); this.currentOffset = new StreamFileOffset(beginOffset); this.nextOffset = beginOffset; } void initialize() throws IOException { reader.initialize(); currentOffset = reader.getPosition(); } void read(Collection<? super StreamEventOffset> result) throws IOException, InterruptedException { // Pop the cached event and use the event start position as the event offset being returned. PositionStreamEvent streamEvent = events.get(0); // Use nextOffset location to construct file offset // because the actual file location can only be determined by a read to a LiveFileReader, // hence located inside nextOffset StreamFileOffset resultOffset = new StreamFileOffset(nextOffset.getEventLocation(), streamEvent.getStart(), nextOffset.getGeneration()); result.add(new StreamEventOffset(streamEvent, resultOffset)); events.clear(); // Updates current offset information to be after the current event. currentOffset = nextOffset; } /** * Tries to read one event from the stream source. * * @return {@code 1} if an event is available from the source. * {@code 0} if no event is available. * {@code -1} if reached end of source. * @throws IOException * @throws InterruptedException */ int prepare(ReadFilter readFilter) throws IOException, InterruptedException { if (events.isEmpty()) { int res = reader.read(events, 1, 0L, TimeUnit.MILLISECONDS, readFilter); nextOffset = reader.getPosition(); return res; } return 1; } @Override public int compareTo(StreamEventSource other) { if (this == other) { return 0; } // No event always come first. long ts = events.isEmpty() ? 0L : events.get(0).getTimestamp(); long otherTs = other.events.isEmpty() ? 0L : other.events.get(0).getTimestamp(); // Compare by timestamp int cmp = Longs.compare(ts, otherTs); if (cmp != 0) { return cmp; } // Tie break by file path return Locations.LOCATION_COMPARATOR.compare(getPosition().getEventLocation(), other.getPosition().getEventLocation()); } @Override public void close() throws IOException { reader.close(); } @Override public StreamFileOffset getPosition() { return currentOffset; } } }