/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package gobblin.serde; import java.io.FileReader; import java.io.IOException; import java.util.List; import java.util.Properties; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.Writable; import org.testng.Assert; import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; import com.google.common.collect.ImmutableList; import com.google.common.collect.Iterators; import com.google.common.io.Closer; import gobblin.configuration.ConfigurationKeys; import gobblin.configuration.SourceState; import gobblin.configuration.State; import gobblin.configuration.WorkUnitState; import gobblin.converter.DataConversionException; import gobblin.converter.serde.HiveSerDeConverter; import gobblin.source.extractor.DataRecordException; import gobblin.source.extractor.hadoop.OldApiWritableFileExtractor; import gobblin.source.extractor.hadoop.OldApiWritableFileSource; import gobblin.source.workunit.WorkUnit; import gobblin.util.HadoopUtils; import gobblin.writer.Destination; import gobblin.writer.Destination.DestinationType; import gobblin.writer.HiveWritableHdfsDataWriter; import gobblin.writer.HiveWritableHdfsDataWriterBuilder; import gobblin.writer.WriterOutputFormat; /** * Unit test for data ingestion using Hive SerDes. * * @author Ziyang Liu */ public class HiveSerDeTest { private FileSystem fs; @BeforeClass public void setUp() throws IOException { this.fs = FileSystem.get(new Configuration()); } /** * This test uses Avro SerDe to deserialize data from Avro files, and use ORC SerDe * to serialize them into ORC files. */ @Test(groups = { "gobblin.serde" }) public void testAvroOrcSerDes() throws IOException, DataRecordException, DataConversionException { Properties properties = new Properties(); properties.load(new FileReader("gobblin-core/src/test/resources/serde/serde.properties")); SourceState sourceState = new SourceState(new State(properties), ImmutableList.<WorkUnitState> of()); OldApiWritableFileSource source = new OldApiWritableFileSource(); List<WorkUnit> workUnits = source.getWorkunits(sourceState); Assert.assertEquals(workUnits.size(), 1); WorkUnitState wus = new WorkUnitState(workUnits.get(0)); wus.addAll(sourceState); Closer closer = Closer.create(); HiveWritableHdfsDataWriter writer = null; try { OldApiWritableFileExtractor extractor = closer.register((OldApiWritableFileExtractor) source.getExtractor(wus)); HiveSerDeConverter converter = closer.register(new HiveSerDeConverter()); writer = closer.register((HiveWritableHdfsDataWriter) new HiveWritableHdfsDataWriterBuilder<>().withBranches(1) .withWriterId("0").writeTo(Destination.of(DestinationType.HDFS, sourceState)) .writeInFormat(WriterOutputFormat.ORC).build()); converter.init(wus); Writable record; while ((record = extractor.readRecord(null)) != null) { Iterable<Writable> convertedRecordIterable = converter.convertRecordImpl(null, record, wus); Assert.assertEquals(Iterators.size(convertedRecordIterable.iterator()), 1); writer.write(convertedRecordIterable.iterator().next()); } } catch (Throwable t) { throw closer.rethrow(t); } finally { closer.close(); if (writer != null) { writer.commit(); } Assert.assertTrue(this.fs.exists(new Path(sourceState.getProp(ConfigurationKeys.WRITER_OUTPUT_DIR), sourceState.getProp(ConfigurationKeys.WRITER_FILE_NAME)))); HadoopUtils.deletePath(this.fs, new Path(sourceState.getProp(ConfigurationKeys.WRITER_OUTPUT_DIR)), true); } } }