/** * Copyright 2013 Cloudera Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.kitesdk.data.crunch; import java.io.IOException; import junit.framework.Assert; import org.apache.avro.generic.GenericRecord; import org.apache.avro.util.Utf8; import org.apache.crunch.PCollection; import org.apache.crunch.Pipeline; import org.apache.crunch.Target; import org.apache.crunch.impl.mr.MRPipeline; import org.apache.hadoop.hbase.util.Bytes; import org.junit.After; import org.junit.AfterClass; import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; import org.kitesdk.data.Dataset; import org.kitesdk.data.DatasetDescriptor; import org.kitesdk.data.DatasetReader; import org.kitesdk.data.DatasetWriter; import org.kitesdk.data.View; import org.kitesdk.data.hbase.HBaseDatasetRepository; import org.kitesdk.data.hbase.HBaseDatasetRepositoryTest; import org.kitesdk.data.hbase.avro.AvroUtils; import org.kitesdk.data.hbase.testing.HBaseTestUtils; import org.kitesdk.data.spi.DatasetRepository; import static org.junit.Assert.assertEquals; import static org.kitesdk.data.spi.filesystem.DatasetTestUtilities.datasetSize; public class TestCrunchDatasetsHBase { private static final String testGenericEntity; static { try { testGenericEntity = AvroUtils.inputStreamToString(TestCrunchDatasetsHBase.class .getResourceAsStream("/TestGenericEntity.avsc")); } catch (Exception e) { throw new RuntimeException(e); } } private DatasetRepository repo; private static final String tableName = "testtable"; private static final String managedTableName = "managed_schemas"; @BeforeClass public static void beforeClass() throws Exception { HBaseTestUtils.getMiniCluster(); // managed table should be created by HBaseDatasetRepository HBaseTestUtils.util.deleteTable(Bytes.toBytes(managedTableName)); } @AfterClass public static void afterClass() throws Exception { HBaseTestUtils.util.deleteTable(Bytes.toBytes(tableName)); if (HBaseTestUtils.getMiniCluster() != null) { HBaseTestUtils.util.shutdownMiniHBaseCluster(); HBaseTestUtils.util.shutdownMiniDFSCluster(); } } @Before public void setUp() throws Exception { this.repo = new HBaseDatasetRepository.Builder() .configuration(HBaseTestUtils.getConf()).build(); } @After public void after() throws Exception { HBaseTestUtils.util.truncateTable(Bytes.toBytes(tableName)); HBaseTestUtils.util.truncateTable(Bytes.toBytes(managedTableName)); } @Test public void testGeneric() throws IOException { String datasetName = tableName + ".TestGenericEntity"; DatasetDescriptor descriptor = new DatasetDescriptor.Builder() .schemaLiteral(testGenericEntity) .build(); Dataset<GenericRecord> inputDataset = repo.create("default", "in", descriptor); Dataset<GenericRecord> outputDataset = repo.create("default", datasetName, descriptor); writeRecords(inputDataset, 10); Pipeline pipeline = new MRPipeline(TestCrunchDatasetsHBase.class, HBaseTestUtils.getConf()); PCollection<GenericRecord> data = pipeline.read( CrunchDatasets.asSource(inputDataset)); pipeline.write(data, CrunchDatasets.asTarget(outputDataset), Target.WriteMode.APPEND); pipeline.run(); checkRecords(outputDataset, 10, 0); } @Test public void testSourceView() throws IOException { String datasetName = tableName + ".TestGenericEntity"; DatasetDescriptor descriptor = new DatasetDescriptor.Builder() .schemaLiteral(testGenericEntity) .build(); Dataset<GenericRecord> inputDataset = repo.create("default", "in", descriptor); Dataset<GenericRecord> outputDataset = repo.create("default", datasetName, descriptor); writeRecords(inputDataset, 10); View<GenericRecord> inputView = inputDataset .from("part1", new Utf8("part1_2")).to("part1", new Utf8("part1_7")) .from("part2", new Utf8("part2_2")).to("part2", new Utf8("part2_7")); Assert.assertEquals(6, datasetSize(inputView)); Pipeline pipeline = new MRPipeline(TestCrunchDatasetsHBase.class, HBaseTestUtils.getConf()); PCollection<GenericRecord> data = pipeline.read( CrunchDatasets.asSource(inputView)); pipeline.write(data, CrunchDatasets.asTarget(outputDataset), Target.WriteMode.APPEND); pipeline.run(); checkRecords(outputDataset, 6, 2); } private void writeRecords(Dataset<GenericRecord> dataset, int count) { DatasetWriter<GenericRecord> writer = dataset.newWriter(); try { for (int i = 0; i < count; ++i) { GenericRecord entity = HBaseDatasetRepositoryTest.createGenericEntity(i); writer.write(entity); } } finally { writer.close(); } } private void checkRecords(Dataset<GenericRecord> dataset, int count, int start) { int cnt = start; DatasetReader<GenericRecord> reader = dataset.newReader(); try { for (GenericRecord entity : reader) { HBaseDatasetRepositoryTest.compareEntitiesWithUtf8(cnt, entity); cnt++; } assertEquals(count, cnt - start); } finally { reader.close(); } } }