/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.beam.sdk.io.kinesis;
import static com.google.common.base.Preconditions.checkNotNull;
import static com.google.common.collect.Lists.newArrayList;
import java.io.IOException;
import java.util.List;
import java.util.NoSuchElementException;
import org.apache.beam.sdk.io.UnboundedSource;
import org.joda.time.Instant;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Reads data from multiple kinesis shards in a single thread.
* It uses simple round robin algorithm when fetching data from shards.
*/
class KinesisReader extends UnboundedSource.UnboundedReader<KinesisRecord> {
private static final Logger LOG = LoggerFactory.getLogger(KinesisReader.class);
private final SimplifiedKinesisClient kinesis;
private final UnboundedSource<KinesisRecord, ?> source;
private final CheckpointGenerator initialCheckpointGenerator;
private RoundRobin<ShardRecordsIterator> shardIterators;
private CustomOptional<KinesisRecord> currentRecord = CustomOptional.absent();
public KinesisReader(SimplifiedKinesisClient kinesis,
CheckpointGenerator initialCheckpointGenerator,
UnboundedSource<KinesisRecord, ?> source) {
this.kinesis = checkNotNull(kinesis, "kinesis");
this.initialCheckpointGenerator =
checkNotNull(initialCheckpointGenerator, "initialCheckpointGenerator");
this.source = source;
}
/**
* Generates initial checkpoint and instantiates iterators for shards.
*/
@Override
public boolean start() throws IOException {
LOG.info("Starting reader using {}", initialCheckpointGenerator);
try {
KinesisReaderCheckpoint initialCheckpoint =
initialCheckpointGenerator.generate(kinesis);
List<ShardRecordsIterator> iterators = newArrayList();
for (ShardCheckpoint checkpoint : initialCheckpoint) {
iterators.add(checkpoint.getShardRecordsIterator(kinesis));
}
shardIterators = new RoundRobin<>(iterators);
} catch (TransientKinesisException e) {
throw new IOException(e);
}
return advance();
}
/**
* Moves to the next record in one of the shards.
* If current shard iterator can be move forward (i.e. there's a record present) then we do it.
* If not, we iterate over shards in a round-robin manner.
*/
@Override
public boolean advance() throws IOException {
try {
for (int i = 0; i < shardIterators.size(); ++i) {
currentRecord = shardIterators.getCurrent().next();
if (currentRecord.isPresent()) {
return true;
} else {
shardIterators.moveForward();
}
}
} catch (TransientKinesisException e) {
LOG.warn("Transient exception occurred", e);
}
return false;
}
@Override
public byte[] getCurrentRecordId() throws NoSuchElementException {
return currentRecord.get().getUniqueId();
}
@Override
public KinesisRecord getCurrent() throws NoSuchElementException {
return currentRecord.get();
}
/**
* When {@link KinesisReader} was advanced to the current record.
* We cannot use approximate arrival timestamp given for each record by Kinesis as it
* is not guaranteed to be accurate - this could lead to mark some records as "late"
* even if they were not.
*/
@Override
public Instant getCurrentTimestamp() throws NoSuchElementException {
return currentRecord.get().getReadTime();
}
@Override
public void close() throws IOException {
}
/**
* Current time.
* We cannot give better approximation of the watermark with current semantics of
* {@link KinesisReader#getCurrentTimestamp()}, because we don't know when the next
* {@link KinesisReader#advance()} will be called.
*/
@Override
public Instant getWatermark() {
return Instant.now();
}
@Override
public UnboundedSource.CheckpointMark getCheckpointMark() {
return KinesisReaderCheckpoint.asCurrentStateOf(shardIterators);
}
@Override
public UnboundedSource<KinesisRecord, ?> getCurrentSource() {
return source;
}
}