package org.apache.cassandra.hadoop2.multiquery; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import java.io.IOException; import java.util.List; import java.util.Random; import java.util.Set; import com.datastax.driver.core.PreparedStatement; import com.datastax.driver.core.Row; import com.google.common.base.Preconditions; import com.google.common.collect.Sets; import org.apache.hadoop.mapreduce.InputSplit; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * Run a few simple sanity checks for the input format. */ public class TestInputFormat extends BaseInputFormatTest { private static final Logger LOG = LoggerFactory.getLogger(TestInputFormat.class); private static final String KEYSPACE_BIG = "big"; private static final String TABLE_BIG = "big"; private static final String COL_KEY = "foo"; private static final String COL_VAL = "bar"; private static final long NUM_VALUES = 10L; private Set<Integer> mDataInTable; private void createNewTable() { mSession.execute(String.format( "CREATE KEYSPACE IF NOT EXISTS %s " + "WITH REPLICATION = { 'class' : 'SimpleStrategy', 'replication_factor' : 3 };", KEYSPACE_BIG )); mSession.execute(String.format( "CREATE TABLE %s.%s (%s INT PRIMARY KEY, %s INT)", KEYSPACE_BIG, TABLE_BIG, COL_KEY, COL_VAL)); } private void createLotsOfData() { Random random = new Random(); mDataInTable = Sets.newHashSet(); for (int i = 0; i < NUM_VALUES; i += 1) { int nextValue; do { nextValue = random.nextInt(); } while (mDataInTable.contains(nextValue)); mDataInTable.add(nextValue); } Preconditions.checkArgument(mDataInTable.size() == NUM_VALUES); } private void insertDataIntoTable() { PreparedStatement insertStatement = mSession.prepare(String.format( "INSERT INTO %s.%s (%s, %s) VALUES (?, ?)", KEYSPACE_BIG, TABLE_BIG, COL_KEY, COL_VAL )); for (int val : mDataInTable) { mSession.execute(insertStatement.bind(val, val)); } } @Test public void testInputFormat() { // Create a new table and populate it with lots and lots of data. createNewTable(); createLotsOfData(); insertDataIntoTable(); ConfigHelper.setInputCqlQuery( mConf, CqlQuerySpec.builder() .withKeyspace(KEYSPACE_BIG) .withTable(TABLE_BIG) .build() ); MultiQueryCqlInputFormat inputFormat = new MultiQueryCqlInputFormat(); List<InputSplit> inputSplits; try { inputSplits = inputFormat.getSplitsFromConf(mConf); MultiQueryRecordReader recordReader = new MultiQueryRecordReader(); Set<Integer> readValues = Sets.newHashSet(); for (InputSplit inputSplit : inputSplits) { recordReader.initializeWithConf(inputSplit, mConf); while (recordReader.nextKeyValue()) { List<Row> rows = recordReader.getCurrentValue(); assertEquals(1, rows.size()); int val = rows.get(0).getInt(COL_VAL); assertFalse(readValues.contains(val)); readValues.add(val); } } assertEquals(mDataInTable, readValues); } catch (IOException ioe) { throw new AssertionError(); } } }