/** * (c) Copyright 2014 WibiData, Inc. * * See the NOTICE file distributed with this work for additional * information regarding copyright ownership. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.kiji.mapreduce.pivot; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; import org.apache.avro.Schema; import org.apache.avro.Schema.Field; import org.apache.avro.generic.GenericRecordBuilder; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.mapreduce.Counters; import org.codehaus.jackson.node.JsonNodeFactory; import org.junit.Assert; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.kiji.avro.dsl.JavaAvroDSL; import org.kiji.mapreduce.KijiMRTestLayouts; import org.kiji.mapreduce.KijiMapReduceJob; import org.kiji.mapreduce.avro.generated.CellRewriteSpec; import org.kiji.mapreduce.output.MapReduceJobOutputs; import org.kiji.schema.DecodedCell; import org.kiji.schema.Kiji; import org.kiji.schema.KijiCell; import org.kiji.schema.KijiClientTest; import org.kiji.schema.KijiDataRequest; import org.kiji.schema.KijiDataRequestBuilder.ColumnsDef; import org.kiji.schema.KijiRowData; import org.kiji.schema.KijiTable; import org.kiji.schema.KijiTableReader; import org.kiji.schema.KijiURI; import org.kiji.schema.layout.ColumnReaderSpec; import org.kiji.schema.util.InstanceBuilder; /** Tests for the KijiCellRewriter pivot M/R job. */ public class TestKijiCellRewriter extends KijiClientTest { private static final Logger LOG = LoggerFactory.getLogger(TestKijiCellRewriter.class); /** Table layout to test cell-rewrites. */ private static final String LAYOUT_TEST1 = "org/kiji/mapreduce/layout/org.kiji.mapreduce.pivot.TestKijiCellRewriter.test1.json"; /** Table layout to test chained cell-rewrites. */ private static final String LAYOUT_TEST2 = "org/kiji/mapreduce/layout/org.kiji.mapreduce.pivot.TestKijiCellRewriter.test2.json"; @Test public void testConvertAvro() throws Exception { final Schema intSchema = Schema.create(Schema.Type.INT); final Schema longSchema = Schema.create(Schema.Type.LONG); final DecodedCell<Object> original = new DecodedCell<Object>(intSchema, 1); Assert.assertEquals(1L, KijiCellRewriter.convertAvro(original, longSchema).getData()); } @Test public void testSimpleRewrite() throws Exception { final String tableName = "test1"; final Kiji kiji = new InstanceBuilder(getKiji()) .withTable(KijiMRTestLayouts.getLayout(LAYOUT_TEST1)) .withRow("row1") .withFamily("family") .withQualifier("qualifier") .withValue(1L, 1) .withValue(2L, 2) .withValue(3L, 3) .withRow("row2") .withFamily("family") .withQualifier("qualifier") .withValue(4L, 4) .withValue(5L, 5) .withValue(6L, 6) .build(); final KijiURI tableURI; // Check the initial content of the table: { final KijiTable table = kiji.openTable(tableName); try { tableURI = table.getURI(); final KijiTableReader reader = table.getReaderFactory().openTableReader(); try { final KijiDataRequest dataRequest = KijiDataRequest.builder() .addColumns(ColumnsDef.create() .withMaxVersions(HConstants.ALL_VERSIONS) .add("family", "qualifier", ColumnReaderSpec.avroWriterSchemaGeneric())) .build(); final KijiRowData row1 = reader.get(table.getEntityId("row1"), dataRequest); Assert.assertEquals(new Integer(1), row1.getValue("family", "qualifier", 1L)); Assert.assertEquals(new Integer(2), row1.getValue("family", "qualifier", 2L)); Assert.assertEquals(new Integer(3), row1.getValue("family", "qualifier", 3L)); final KijiRowData row2 = reader.get(table.getEntityId("row2"), dataRequest); Assert.assertEquals(new Integer(4), row2.getValue("family", "qualifier", 4L)); Assert.assertEquals(new Integer(5), row2.getValue("family", "qualifier", 5L)); Assert.assertEquals(new Integer(6), row2.getValue("family", "qualifier", 6L)); } finally { reader.close(); } } finally { table.release(); } } // Run the cell-rewriter job: final Schema intSchema = Schema.create(Schema.Type.INT); final Schema longSchema = Schema.create(Schema.Type.LONG); final CellRewriteSpec spec = CellRewriteSpec.newBuilder() .setColumn("family:qualifier") .setRules(ImmutableMap.<String, String>builder() .put(intSchema.toString(), longSchema.toString()) .build()) .build(); final Configuration conf = getConf(); final JavaAvroDSL avroDSL = new JavaAvroDSL(); conf.set( KijiCellRewriter.ConfKeys.spec.get(), avroDSL.valueToString(spec, CellRewriteSpec.getClassSchema())); final KijiMapReduceJob job = KijiPivotJobBuilder.create() .withConf(conf) .withPivoter(KijiCellRewriter.class) .withInputTable(tableURI) .withOutput(MapReduceJobOutputs.newDirectKijiTableMapReduceJobOutput(tableURI)) .build(); Assert.assertTrue(job.run()); // Validate job-level expectations (Map/Reduce counters): final Counters counter = job.getHadoopJob().getCounters(); Assert.assertEquals(6, counter.findCounter(KijiCellRewriter.Counters.CELLS_PROCESSED).getValue()); Assert.assertEquals(6, counter.findCounter(KijiCellRewriter.Counters.CELLS_REWRITTEN).getValue()); // Validate the new, rewritten content of the job: { final KijiTable table = kiji.openTable(tableName); try { final KijiTableReader reader = table.getReaderFactory().openTableReader(); try { final KijiDataRequest dataRequest = KijiDataRequest.builder() .addColumns(ColumnsDef.create() .withMaxVersions(HConstants.ALL_VERSIONS) .add("family", "qualifier", ColumnReaderSpec.avroWriterSchemaGeneric())) .build(); final KijiRowData row1 = reader.get(table.getEntityId("row1"), dataRequest); Assert.assertEquals(new Long(1), row1.getValue("family", "qualifier", 1L)); Assert.assertEquals(new Long(2), row1.getValue("family", "qualifier", 2L)); Assert.assertEquals(new Long(3), row1.getValue("family", "qualifier", 3L)); final KijiRowData row2 = reader.get(table.getEntityId("row2"), dataRequest); Assert.assertEquals(new Long(4), row2.getValue("family", "qualifier", 4L)); Assert.assertEquals(new Long(5), row2.getValue("family", "qualifier", 5L)); Assert.assertEquals(new Long(6), row2.getValue("family", "qualifier", 6L)); } finally { reader.close(); } } finally { table.release(); } } } /** * Tests that chaining cell rewrites works as expected: * - TestRecord v1 instances will be rewritten as TestRecord v2 instances. * - TestRecord v2 instances will be rewritten as TestRecord v3 instances. * - TestRecord v3 instances are left unmodified. * At the end of the process, there should be no instance of TestRecord v1 or v2 left. */ @Test public void testChainedRewrite() throws Exception { final Schema intSchema = Schema.create(Schema.Type.INT); final Schema longSchema = Schema.create(Schema.Type.LONG); final Schema stringSchema = Schema.create(Schema.Type.STRING); final Schema recordV1 = Schema.createRecord("TestRecord", null, null, false); recordV1.setFields(Lists.newArrayList( new Field("int_field", intSchema, null, null), new Field("long_field", longSchema, null, null))); final Schema recordV2 = Schema.createRecord("TestRecord", null, null, false); recordV2.setFields(Lists.newArrayList( new Field("long_field", longSchema, null, null))); final Schema recordV3 = Schema.createRecord("TestRecord", null, null, false); recordV3.setFields(Lists.newArrayList( new Field("int_field", stringSchema, null, JsonNodeFactory.instance.textNode("")), new Field("long_field", longSchema, null, null))); final String tableName = "test2"; final Kiji kiji = new InstanceBuilder(getKiji()) .withTable(KijiMRTestLayouts.getLayout(LAYOUT_TEST2)) .withRow("row1") .withFamily("family") .withQualifier("qualifier") .withValue(1L, new GenericRecordBuilder(recordV1) .set("int_field", 1234) .set("long_field", 12345L) .build()) .withValue(2L, new GenericRecordBuilder(recordV2) .set("long_field", 12345L) .build()) .withValue(3L, new GenericRecordBuilder(recordV3) .set("int_field", "string") .set("long_field", 12345L) .build()) .build(); final KijiURI tableURI; // Check the initial content of the table: { final KijiTable table = kiji.openTable(tableName); try { tableURI = table.getURI(); final KijiTableReader reader = table.getReaderFactory().openTableReader(); try { final KijiDataRequest dataRequest = KijiDataRequest.builder() .addColumns(ColumnsDef.create() .withMaxVersions(HConstants.ALL_VERSIONS) .add("family", "qualifier", ColumnReaderSpec.avroWriterSchemaGeneric())) .build(); final KijiRowData row1 = reader.get(table.getEntityId("row1"), dataRequest); for (KijiCell<Object> cell : row1.asIterable("family", "qualifier")) { LOG.info("Row {} - Cell: {}", row1.getEntityId(), cell); } } finally { reader.close(); } } finally { table.release(); } } // Run the cell-rewriter job: final CellRewriteSpec spec = CellRewriteSpec.newBuilder() .setColumn("family:qualifier") .setRules(ImmutableMap.<String, String>builder() .put(recordV1.toString(), recordV2.toString()) .put(recordV2.toString(), recordV3.toString()) .build()) .build(); final Configuration conf = getConf(); final JavaAvroDSL avroDSL = new JavaAvroDSL(); conf.set( KijiCellRewriter.ConfKeys.spec.get(), avroDSL.valueToString(spec, CellRewriteSpec.getClassSchema())); final KijiMapReduceJob job = KijiPivotJobBuilder.create() .withConf(conf) .withPivoter(KijiCellRewriter.class) .withInputTable(tableURI) .withOutput(MapReduceJobOutputs.newDirectKijiTableMapReduceJobOutput(tableURI)) .build(); Assert.assertTrue(job.run()); // Validate job-level expectations (Map/Reduce counters): final Counters counter = job.getHadoopJob().getCounters(); Assert.assertEquals(3, counter.findCounter(KijiCellRewriter.Counters.CELLS_PROCESSED).getValue()); Assert.assertEquals(2, counter.findCounter(KijiCellRewriter.Counters.CELLS_REWRITTEN).getValue()); // Validate the new, rewritten content of the job: { final KijiTable table = kiji.openTable(tableName); try { final KijiTableReader reader = table.getReaderFactory().openTableReader(); try { final KijiDataRequest dataRequest = KijiDataRequest.builder() .addColumns(ColumnsDef.create() .withMaxVersions(HConstants.ALL_VERSIONS) .add("family", "qualifier", ColumnReaderSpec.avroWriterSchemaGeneric())) .build(); final KijiRowData row1 = reader.get(table.getEntityId("row1"), dataRequest); for (KijiCell<Object> cell : row1.asIterable("family", "qualifier")) { LOG.info("Row {} - Cell: {}", row1.getEntityId(), cell); } Assert.assertEquals( new GenericRecordBuilder(recordV3) .set("int_field", "") .set("long_field", 12345L) .build(), row1.getValue("family", "qualifier", 1L)); Assert.assertEquals( new GenericRecordBuilder(recordV3) .set("int_field", "") .set("long_field", 12345L) .build(), row1.getValue("family", "qualifier", 2L)); Assert.assertEquals( new GenericRecordBuilder(recordV3) .set("int_field", "string") .set("long_field", 12345L) .build(), row1.getValue("family", "qualifier", 3L)); } finally { reader.close(); } } finally { table.release(); } } } }