/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package gobblin.data.management.copy.writer; import gobblin.configuration.ConfigurationKeys; import gobblin.configuration.WorkUnitState; import gobblin.data.management.copy.CopySource; import gobblin.data.management.copy.CopyableDatasetMetadata; import gobblin.data.management.copy.CopyableFile; import gobblin.data.management.copy.CopyableFileUtils; import gobblin.data.management.copy.FileAwareInputStream; import gobblin.data.management.copy.OwnerAndPermission; import gobblin.data.management.copy.TestCopyableDataset; import gobblin.data.management.copy.converter.UnGzipConverter; import gobblin.util.PathUtils; import gobblin.util.TestUtils; import java.io.FileInputStream; import java.io.IOException; import org.apache.commons.io.IOUtils; import org.apache.commons.lang.RandomStringUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.permission.FsAction; import org.apache.hadoop.fs.permission.FsPermission; import org.testng.Assert; import org.testng.annotations.AfterClass; import org.testng.annotations.BeforeClass; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; import com.google.common.collect.Iterables; import com.google.common.io.Files; public class TarArchiveInputStreamDataWriterTest { private FileSystem fs; private Path testTempPath; @BeforeClass public void setup() throws Exception { fs = FileSystem.getLocal(new Configuration()); testTempPath = new Path(Files.createTempDir().getAbsolutePath(), "tarArchiveInputStreamDataWriterTest"); fs.mkdirs(testTempPath); } @DataProvider(name = "testFileDataProvider") public static Object[][] fileDataProvider() { // {filePath, newFileName, expectedText} return new Object[][] { { "tarArchiveInputStreamDataWriterTest/archived.tar.gz", "archived.tar.gz", "text" }, { "tarArchiveInputStreamDataWriterTest/archived.tgz", "archived_new_name", "text" } }; } @Test(dataProvider = "testFileDataProvider") public void testWrite(final String filePath, final String newFileName, final String expectedText) throws Exception { String expectedFileContents = "text"; String fileNameInArchive = "text.txt"; WorkUnitState state = TestUtils.createTestWorkUnitState(); state.setProp(ConfigurationKeys.WRITER_STAGING_DIR, new Path(testTempPath, "staging").toString()); state.setProp(ConfigurationKeys.WRITER_OUTPUT_DIR, new Path(testTempPath, "output").toString()); state.setProp(ConfigurationKeys.WRITER_FILE_PATH, "writer_file_path_" + RandomStringUtils.randomAlphabetic(5)); CopyableDatasetMetadata metadata = new CopyableDatasetMetadata(new TestCopyableDataset(new Path("/source"))); CopySource.serializeCopyableDataset(state, metadata); FileAwareInputStream fileAwareInputStream = getCompressedInputStream(filePath, newFileName); CopySource.serializeCopyEntity(state, fileAwareInputStream.getFile()); TarArchiveInputStreamDataWriter dataWriter = new TarArchiveInputStreamDataWriter(state, 1, 0); dataWriter.write(fileAwareInputStream); dataWriter.commit(); // the archive file contains file test.txt Path unArchivedFilePath = new Path(fileAwareInputStream.getFile().getDestination(), fileNameInArchive); // Path at which the writer writes text.txt Path taskOutputFilePath = new Path(new Path(state.getProp(ConfigurationKeys.WRITER_OUTPUT_DIR), fileAwareInputStream.getFile().getDatasetAndPartition(metadata).identifier()), PathUtils.withoutLeadingSeparator(unArchivedFilePath)); Assert.assertEquals(IOUtils.toString(new FileInputStream(taskOutputFilePath.toString())).trim(), expectedFileContents); } /** * Find the test compressed file <code><filePath/code> in classpath and read it as a {@link FileAwareInputStream} */ private FileAwareInputStream getCompressedInputStream(final String filePath, final String newFileName) throws Exception { UnGzipConverter converter = new UnGzipConverter(); FileSystem fs = FileSystem.getLocal(new Configuration()); String fullPath = getClass().getClassLoader().getResource(filePath).getFile(); FileStatus status = fs.getFileStatus(testTempPath); OwnerAndPermission ownerAndPermission = new OwnerAndPermission(status.getOwner(), status.getGroup(), new FsPermission(FsAction.ALL, FsAction.ALL, FsAction.ALL)); CopyableFile cf = CopyableFileUtils.getTestCopyableFile(filePath, new Path(testTempPath, newFileName).toString(), newFileName, ownerAndPermission); FileAwareInputStream fileAwareInputStream = new FileAwareInputStream(cf, fs.open(new Path(fullPath))); Iterable<FileAwareInputStream> iterable = converter.convertRecord("outputSchema", fileAwareInputStream, new WorkUnitState()); return Iterables.getFirst(iterable, null); } @AfterClass public void cleanup() { try { fs.delete(testTempPath, true); } catch (IOException e) { // ignore } } }