// Copyright 2017 JanusGraph Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package org.janusgraph.hadoop.formats.cassandra;
import org.janusgraph.diskstorage.Entry;
import org.janusgraph.diskstorage.StaticBuffer;
import org.janusgraph.diskstorage.util.StaticArrayBuffer;
import org.janusgraph.diskstorage.util.StaticArrayEntry;
import org.apache.cassandra.db.Cell;
import org.apache.cassandra.hadoop.ColumnFamilyRecordReader;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.SortedMap;
/**
* Wraps a ColumnFamilyRecordReader and converts CFRR's binary types to JanusGraph binary types.
*/
public class CassandraBinaryRecordReader extends RecordReader<StaticBuffer, Iterable<Entry>> {
private ColumnFamilyRecordReader reader;
private KV currentKV;
private KV incompleteKV;
public CassandraBinaryRecordReader(final ColumnFamilyRecordReader reader) {
this.reader = reader;
}
@Override
public void initialize(final InputSplit inputSplit, final TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
reader.initialize(inputSplit, taskAttemptContext);
}
@Override
public boolean nextKeyValue() throws IOException, InterruptedException {
return null != (currentKV = completeNextKV());
}
private KV completeNextKV() throws IOException {
KV completedKV = null;
boolean hasNext;
do {
hasNext = reader.nextKeyValue();
if (!hasNext) {
completedKV = incompleteKV;
incompleteKV = null;
} else {
StaticArrayBuffer key = StaticArrayBuffer.of(reader.getCurrentKey());
SortedMap<ByteBuffer, Cell> valueSortedMap = reader.getCurrentValue();
List<Entry> entries = new ArrayList<>(valueSortedMap.size());
for (Map.Entry<ByteBuffer, Cell> ent : valueSortedMap.entrySet()) {
ByteBuffer col = ent.getKey();
ByteBuffer val = ent.getValue().value();
entries.add(StaticArrayEntry.of(StaticArrayBuffer.of(col), StaticArrayBuffer.of(val)));
}
if (null == incompleteKV) {
// Initialization; this should happen just once in an instance's lifetime
incompleteKV = new KV(key);
} else if (!incompleteKV.key.equals(key)) {
// The underlying Cassandra reader has just changed to a key we haven't seen yet
// This implies that there will be no more entries for the prior key
completedKV = incompleteKV;
incompleteKV = new KV(key);
}
incompleteKV.addEntries(entries);
}
/* Loop ends when either
* A) the cassandra reader ran out of data
* or
* B) the cassandra reader switched keys, thereby completing a KV */
} while (hasNext && null == completedKV);
return completedKV;
}
@Override
public StaticBuffer getCurrentKey() throws IOException, InterruptedException {
return currentKV.key;
}
@Override
public Iterable<Entry> getCurrentValue() throws IOException, InterruptedException {
return currentKV.entries;
}
@Override
public void close() throws IOException {
reader.close();
}
@Override
public float getProgress() {
return reader.getProgress();
}
private static class KV {
private final StaticArrayBuffer key;
private ArrayList<Entry> entries;
public KV(StaticArrayBuffer key) {
this.key = key;
}
public void addEntries(Collection<Entry> toAdd) {
if (null == entries)
entries = new ArrayList<>(toAdd.size());
entries.addAll(toAdd);
}
}
}