// Copyright 2017 JanusGraph Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package org.janusgraph.hadoop;
import org.janusgraph.CassandraStorageSetup;
import org.janusgraph.core.JanusGraphVertex;
import org.janusgraph.diskstorage.*;
import org.janusgraph.diskstorage.cassandra.thrift.CassandraThriftStoreManager;
import org.janusgraph.diskstorage.configuration.*;
import org.janusgraph.diskstorage.keycolumnvalue.KeyColumnValueStore;
import org.janusgraph.diskstorage.keycolumnvalue.KeyColumnValueStoreManager;
import org.janusgraph.diskstorage.keycolumnvalue.StoreTransaction;
import org.janusgraph.diskstorage.keycolumnvalue.scan.ScanJob;
import org.janusgraph.diskstorage.util.StandardBaseTransactionConfig;
import org.janusgraph.diskstorage.util.time.TimestampProviders;
import org.janusgraph.graphdb.JanusGraphBaseTest;
import org.janusgraph.graphdb.configuration.GraphDatabaseConfiguration;
import org.janusgraph.hadoop.config.JanusGraphHadoopConfiguration;
import org.janusgraph.hadoop.formats.cassandra.CassandraInputFormat;
import org.janusgraph.hadoop.scan.CassandraHadoopScanRunner;
import org.janusgraph.hadoop.scan.HadoopScanMapper;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.util.Arrays;
import java.util.concurrent.ExecutionException;
import static org.junit.Assert.*;
public class CassandraScanJobIT extends JanusGraphBaseTest {
private static final Logger log = LoggerFactory.getLogger(CassandraScanJobIT.class);
@Test
public void testSimpleScan()
throws InterruptedException, ExecutionException, IOException, BackendException {
int keys = 1000;
int cols = 40;
String[][] values = KeyValueStoreUtil.generateData(keys, cols);
//Make it only half the number of columns for every 2nd key
for (int i = 0; i < values.length; i++) {
if (i%2==0) values[i]= Arrays.copyOf(values[i], cols / 2);
}
log.debug("Loading values: " + keys + "x" + cols);
KeyColumnValueStoreManager mgr = new CassandraThriftStoreManager(GraphDatabaseConfiguration.buildGraphConfiguration());
KeyColumnValueStore store = mgr.openDatabase("edgestore");
StoreTransaction tx = mgr.beginTransaction(StandardBaseTransactionConfig.of(TimestampProviders.MICRO));
KeyColumnValueStoreUtil.loadValues(store, tx, values);
tx.commit(); // noop on Cassandra, but harmless
SimpleScanJobRunner runner = (ScanJob job, Configuration jobConf, String rootNSName) -> {
try {
return new CassandraHadoopScanRunner(job).scanJobConf(jobConf).scanJobConfRoot(rootNSName)
.partitionerOverride("org.apache.cassandra.dht.Murmur3Partitioner").run();
} catch (ClassNotFoundException e) {
throw new RuntimeException(e);
}
};
SimpleScanJob.runBasicTests(keys, cols, runner);
}
@Test
public void testPartitionedVertexScan() throws Exception {
tearDown();
clearGraph(getConfiguration());
WriteConfiguration partConf = getConfiguration();
open(partConf);
mgmt.makeVertexLabel("part").partition().make();
finishSchema();
JanusGraphVertex supernode = graph.addVertex("part");
for (int i = 0; i < 128; i++) {
JanusGraphVertex v = graph.addVertex("part");
v.addEdge("default", supernode);
if (0 < i && 0 == i % 4)
graph.tx().commit();
}
graph.tx().commit();
org.apache.hadoop.conf.Configuration c = new org.apache.hadoop.conf.Configuration();
c.set(ConfigElement.getPath(JanusGraphHadoopConfiguration.GRAPH_CONFIG_KEYS, true) + "." + "storage.cassandra.keyspace", getClass().getSimpleName());
c.set(ConfigElement.getPath(JanusGraphHadoopConfiguration.GRAPH_CONFIG_KEYS, true) + "." + "storage.backend", "cassandrathrift");
c.set("cassandra.input.partitioner.class", "org.apache.cassandra.dht.Murmur3Partitioner");
Job job = getVertexJobWithDefaultMapper(c);
// Should throw an exception since filter-partitioned-vertices wasn't enabled
assertFalse(job.waitForCompletion(true));
}
@Test
public void testPartitionedVertexFilteredScan() throws Exception {
tearDown();
clearGraph(getConfiguration());
WriteConfiguration partConf = getConfiguration();
open(partConf);
mgmt.makeVertexLabel("part").partition().make();
finishSchema();
JanusGraphVertex supernode = graph.addVertex("part");
for (int i = 0; i < 128; i++) {
JanusGraphVertex v = graph.addVertex("part");
v.addEdge("default", supernode);
if (0 < i && 0 == i % 4)
graph.tx().commit();
}
graph.tx().commit();
org.apache.hadoop.conf.Configuration c = new org.apache.hadoop.conf.Configuration();
c.set(ConfigElement.getPath(JanusGraphHadoopConfiguration.GRAPH_CONFIG_KEYS, true) + "." + "storage.cassandra.keyspace", getClass().getSimpleName());
c.set(ConfigElement.getPath(JanusGraphHadoopConfiguration.GRAPH_CONFIG_KEYS, true) + "." + "storage.backend", "cassandrathrift");
c.set(ConfigElement.getPath(JanusGraphHadoopConfiguration.FILTER_PARTITIONED_VERTICES), "true");
c.set("cassandra.input.partitioner.class", "org.apache.cassandra.dht.Murmur3Partitioner");
Job job = getVertexJobWithDefaultMapper(c);
// Should succeed
assertTrue(job.waitForCompletion(true));
}
private Job getVertexJobWithDefaultMapper(org.apache.hadoop.conf.Configuration c) throws IOException {
Job job = Job.getInstance(c);
job.setJarByClass(HadoopScanMapper.class);
job.setJobName("testPartitionedVertexScan");
job.setOutputKeyClass(NullWritable.class);
job.setOutputValueClass(NullWritable.class);
job.setMapOutputKeyClass(NullWritable.class);
job.setMapOutputValueClass(NullWritable.class);
job.setNumReduceTasks(0);
job.setOutputFormatClass(NullOutputFormat.class);
job.setInputFormatClass(CassandraInputFormat.class);
return job;
}
@Override
public WriteConfiguration getConfiguration() {
String className = getClass().getSimpleName();
ModifiableConfiguration mc = CassandraStorageSetup.getEmbeddedConfiguration(className);
return mc.getConfiguration();
}
// public static class NoopScanJob implements ScanJob {
//
// @Override
// public void process(StaticBuffer key, Map<SliceQuery, EntryList> entries, ScanMetrics metrics) {
// // do nothing
// }
//
// @Override
// public List<SliceQuery> getQueries() {
// int len = 4;
// return ImmutableList.of(new SliceQuery(BufferUtil.zeroBuffer(len), BufferUtil.oneBuffer(len)));
// }
// }
}