/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package gobblin.data.management.copy.converter; import java.io.IOException; import java.io.InputStream; import java.nio.charset.StandardCharsets; import java.util.List; import org.apache.commons.compress.archivers.tar.TarArchiveEntry; import org.apache.commons.compress.archivers.tar.TarArchiveInputStream; import org.apache.commons.io.IOUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.testng.Assert; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; import com.google.common.collect.ImmutableList; import com.google.common.collect.Iterables; import joptsimple.internal.Strings; import gobblin.configuration.WorkUnitState; import gobblin.converter.DataConversionException; import gobblin.data.management.copy.CopyableFileUtils; import gobblin.data.management.copy.FileAwareInputStream; public class UnGzipConverterTest { @DataProvider(name = "fileDataProvider") public static Object[][] fileDataProvider() { // {filePath, expectedText} return new Object[][] { { "unGzipConverterTest/archived.tar.gz", "text" }, { "unGzipConverterTest/archived.tgz", "text" } }; } @Test(dataProvider = "fileDataProvider") public void testGz(final String filePath, final String expectedText) throws Exception { UnGzipConverter converter = new UnGzipConverter(); FileSystem fs = FileSystem.getLocal(new Configuration()); String fullPath = getClass().getClassLoader().getResource(filePath).getFile(); FileAwareInputStream fileAwareInputStream = new FileAwareInputStream(CopyableFileUtils.getTestCopyableFile(filePath), fs.open(new Path(fullPath))); Iterable<FileAwareInputStream> iterable = converter.convertRecord("outputSchema", fileAwareInputStream, new WorkUnitState()); String actual = readGzipStreamAsString(Iterables.getFirst(iterable, null).getInputStream()); Assert.assertEquals(actual.trim(), expectedText); } @Test public void testExtensionStripping() throws DataConversionException, IOException { List<String> helloWorldFiles = ImmutableList.of("helloworld.txt.gzip", "helloworld.txt.gz"); UnGzipConverter converter = new UnGzipConverter(); FileSystem fs = FileSystem.getLocal(new Configuration()); for (String fileName: helloWorldFiles) { String filePath = "unGzipConverterTest/" + fileName; String fullPath = getClass().getClassLoader().getResource(filePath).getFile(); FileAwareInputStream fileAwareInputStream = new FileAwareInputStream(CopyableFileUtils.getTestCopyableFile(filePath, "/tmp/" + fileName, null, null), fs.open(new Path(fullPath))); Iterable<FileAwareInputStream> iterable = converter.convertRecord("outputSchema", fileAwareInputStream, new WorkUnitState()); FileAwareInputStream out = iterable.iterator().next(); Assert.assertEquals(out.getFile().getDestination().getName(), "helloworld.txt"); String contents = IOUtils.toString(out.getInputStream(), StandardCharsets.UTF_8); Assert.assertEquals(contents, "helloworld\n"); } } private static String readGzipStreamAsString(InputStream is) throws Exception { TarArchiveInputStream tarIn = new TarArchiveInputStream(is); try { TarArchiveEntry tarEntry; while ((tarEntry = tarIn.getNextTarEntry()) != null) { if (tarEntry.isFile() && tarEntry.getName().endsWith(".txt")) { return IOUtils.toString(tarIn, "UTF-8"); } } } finally { tarIn.close(); } return Strings.EMPTY; } }