/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.apache.nifi.processors.kite; import com.google.common.collect.Lists; import java.io.IOException; import java.io.OutputStream; import java.nio.charset.Charset; import java.util.List; import org.apache.avro.Schema; import org.apache.avro.SchemaBuilder; import org.apache.avro.generic.GenericData.Record; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.nifi.util.TestRunner; import org.apache.nifi.util.TestRunners; import org.junit.AfterClass; import org.junit.Assert; import org.junit.BeforeClass; import org.junit.Ignore; import org.junit.Test; import org.kitesdk.data.Dataset; import org.kitesdk.data.DatasetDescriptor; import org.kitesdk.data.Datasets; import org.kitesdk.data.spi.DefaultConfiguration; import org.kitesdk.minicluster.HdfsService; import org.kitesdk.minicluster.HiveService; import org.kitesdk.minicluster.MiniCluster; import static org.apache.nifi.processors.kite.TestUtil.USER_SCHEMA; import static org.apache.nifi.processors.kite.TestUtil.bytesFor; import static org.apache.nifi.processors.kite.TestUtil.streamFor; import static org.apache.nifi.processors.kite.TestUtil.user; @Ignore("Does not work on windows") public class TestKiteProcessorsCluster { public static MiniCluster cluster = null; public static DatasetDescriptor descriptor = new DatasetDescriptor.Builder() .schema(USER_SCHEMA) .build(); @BeforeClass public static void startCluster() throws IOException, InterruptedException { long rand = Math.abs((long) (Math.random() * 1000000)); cluster = new MiniCluster.Builder() .workDir("/tmp/minicluster-" + rand) .clean(true) .addService(HdfsService.class) .addService(HiveService.class) .bindIP("127.0.0.1") .hiveMetastorePort(9083) .build(); cluster.start(); } @AfterClass public static void stopCluster() throws IOException, InterruptedException { if (cluster != null) { cluster.stop(); cluster = null; } } @Test public void testBasicStoreToHive() throws IOException { String datasetUri = "dataset:hive:ns/test"; Dataset<Record> dataset = Datasets.create(datasetUri, descriptor, Record.class); TestRunner runner = TestRunners.newTestRunner(StoreInKiteDataset.class); runner.assertNotValid(); runner.setProperty(StoreInKiteDataset.KITE_DATASET_URI, datasetUri); runner.assertValid(); List<Record> users = Lists.newArrayList( user("a", "a@example.com"), user("b", "b@example.com"), user("c", "c@example.com") ); runner.enqueue(streamFor(users)); runner.run(); runner.assertAllFlowFilesTransferred("success", 1); List<Record> stored = Lists.newArrayList( (Iterable<Record>) dataset.newReader()); Assert.assertEquals("Records should match", users, stored); Datasets.delete(datasetUri); } @Test public void testSchemaFromDistributedFileSystem() throws IOException { Schema expected = SchemaBuilder.record("Test").fields() .requiredLong("id") .requiredString("color") .optionalDouble("price") .endRecord(); Path schemaPath = new Path("hdfs:/tmp/schema.avsc"); FileSystem fs = schemaPath.getFileSystem(DefaultConfiguration.get()); OutputStream out = fs.create(schemaPath); out.write(bytesFor(expected.toString(), Charset.forName("utf8"))); out.close(); Schema schema = AbstractKiteProcessor.getSchema( schemaPath.toString(), DefaultConfiguration.get()); Assert.assertEquals("Schema from file should match", expected, schema); } }