/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package gobblin.publisher; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Collections; import java.util.List; import java.util.Map; import java.util.Properties; import org.apache.commons.io.FileUtils; import org.apache.commons.io.IOUtils; import org.testng.Assert; import org.testng.annotations.Test; import com.google.common.collect.ImmutableList; import com.google.common.io.Files; import gobblin.configuration.ConfigurationKeys; import gobblin.configuration.State; import gobblin.configuration.WorkUnitState; import gobblin.metadata.MetadataMerger; import gobblin.metadata.types.GlobalMetadata; import gobblin.util.ForkOperatorUtils; /** * Tests for BaseDataPublisher */ public class BaseDataPublisherTest { /** * Test DATA_PUBLISHER_METADATA_STR: a user should be able to put an arbitrary metadata string in job configuration * and have that written out. */ @Test public void testMetadataStrOneBranch() throws IOException { State s = buildDefaultState(1); WorkUnitState wuState = new WorkUnitState(); wuState.setProp(ConfigurationKeys.DATA_PUBLISHER_METADATA_STR, "foobar"); addStateToWorkunit(s, wuState); BaseDataPublisher publisher = new BaseDataPublisher(s); publisher.publishMetadata(wuState); try (InputStream mdStream = new FileInputStream(openMetadataFile(s, 1, 0))) { String mdBytes = IOUtils.toString(mdStream, StandardCharsets.UTF_8); Assert.assertEquals(mdBytes, "foobar", "Expected to read back metadata from string"); } } /** * Test that DATA_PUBLISHER_METADATA_STR functionality works across multiple branches. */ @Test public void testMetadataStrMultipleWorkUnitsAndBranches() throws IOException { final int numBranches = 3; State s = buildDefaultState(numBranches); List<WorkUnitState> workUnits = new ArrayList<>(); for (int i = 0; i < numBranches; i++) { WorkUnitState wuState = new WorkUnitState(); wuState.setProp(ConfigurationKeys.DATA_PUBLISHER_METADATA_STR, "foobar"); addStateToWorkunit(s, wuState); workUnits.add(wuState); } BaseDataPublisher publisher = new BaseDataPublisher(s); publisher.publishMetadata(workUnits); for (int branch = 0; branch < numBranches; branch++) { try (InputStream mdStream = new FileInputStream(openMetadataFile(s, numBranches, branch))) { String mdBytes = IOUtils.toString(mdStream, StandardCharsets.UTF_8); Assert.assertEquals(mdBytes, "foobar", "Expected to read back metadata from string"); } } } /** * Test that an exception is properly thrown if we configure a merger that doesn't actually implement * MetadataMerger */ @Test(expectedExceptions = IllegalArgumentException.class) public void testBogusMetadataMerger() throws IOException { State s = buildDefaultState(1); s.setProp(ConfigurationKeys.DATA_PUBLISH_WRITER_METADATA_KEY, "true"); s.setProp(ConfigurationKeys.DATA_PUBLISH_WRITER_METADATA_MERGER_NAME_KEY, "java.lang.String"); s.setProp(ConfigurationKeys.DATA_PUBLISHER_METADATA_STR, "foobar"); WorkUnitState wuState = new WorkUnitState(); addStateToWorkunit(s, wuState); BaseDataPublisher publisher = new BaseDataPublisher(s); publisher.publishMetadata(Collections.singletonList(wuState)); } /** * This test is testing several things at once: * 1. That a merger is called properly for all workunits in a brach * 2. That different mergers can be instantiated per branch */ @Test public void testMergedMetadata() throws IOException { final int numBranches = 2; final int numWorkUnits = 10; State s = buildDefaultState(numBranches); for (int i = 0; i < numBranches; i++) { String mdKeyName = ForkOperatorUtils .getPropertyNameForBranch(ConfigurationKeys.DATA_PUBLISH_WRITER_METADATA_KEY, numBranches, i); String mdMergerKeyName = ForkOperatorUtils .getPropertyNameForBranch(ConfigurationKeys.DATA_PUBLISH_WRITER_METADATA_MERGER_NAME_KEY, numBranches, i); s.setProp(mdKeyName, "true"); s.setProp(mdMergerKeyName, (i % 2) == 0 ? TestAdditionMerger.class.getName() : TestMultiplicationMerger.class.getName()); } // For each branch, metadata is (branchId+1*workUnitNumber+1) - adding 1 so we don't ever multiply by 0 List<WorkUnitState> workUnits = new ArrayList<>(); for (int workUnitId = 0; workUnitId < numWorkUnits; workUnitId++) { WorkUnitState wuState = new WorkUnitState(); addStateToWorkunit(s, wuState); for (int branchId = 0; branchId < numBranches; branchId++) { String mdForBranchName = ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.WRITER_METADATA_KEY, numBranches, branchId); wuState.setProp(mdForBranchName, String.valueOf((branchId + 1) * (workUnitId + 1))); } workUnits.add(wuState); } BaseDataPublisher publisher = new BaseDataPublisher(s); publisher.publishMetadata(workUnits); for (int branch = 0; branch < numBranches; branch++) { int expectedSum = (branch % 2 == 0) ? 0 : 1; for (int i = 0; i < numWorkUnits; i++) { if (branch % 2 == 0) { expectedSum += (branch + 1) * (i + 1); } else { expectedSum *= (branch + 1) * (i + 1); } } try (InputStream mdStream = new FileInputStream(openMetadataFile(s, numBranches, branch))) { String mdBytes = IOUtils.toString(mdStream, StandardCharsets.UTF_8); Assert.assertEquals(mdBytes, String.valueOf(expectedSum), "Expected to read back correctly merged metadata from string"); } } } @Test public void testNoOutputWhenDisabled() throws IOException { State s = buildDefaultState(1); WorkUnitState wuState = new WorkUnitState(); addStateToWorkunit(s, wuState); wuState.setProp(ConfigurationKeys.WRITER_METADATA_KEY, "abcdefg"); BaseDataPublisher publisher = new BaseDataPublisher(s); publisher.publishMetadata(Collections.singletonList(wuState)); File mdFile = openMetadataFile(s, 1, 0); Assert.assertFalse(mdFile.exists(), "Internal metadata from writer should not be written out if no merger is set in config"); } @Test public void testWithPartitionKey() throws IOException { File publishPath = Files.createTempDir(); try { File part1 = new File(publishPath, "1-2-3-4"); part1.mkdir(); File part2 = new File(publishPath, "5-6-7-8"); part2.mkdir(); State s = buildDefaultState(1); String md = new GlobalMetadata().toJson(); s.removeProp(ConfigurationKeys.DATA_PUBLISHER_METADATA_OUTPUT_DIR); s.setProp(ConfigurationKeys.DATA_PUBLISH_WRITER_METADATA_KEY, "true"); s.setProp(ConfigurationKeys.WRITER_METADATA_KEY, md); s.setProp(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR, publishPath.getAbsolutePath()); s.setProp(ConfigurationKeys.DATA_PUBLISHER_APPEND_EXTRACT_TO_FINAL_DIR, "false"); s.setProp(ConfigurationKeys.DATA_PUBLISHER_METADATA_OUTPUT_FILE, "metadata.json"); WorkUnitState wuState1 = new WorkUnitState(); wuState1.setProp(ConfigurationKeys.WRITER_PARTITION_PATH_KEY, "1-2-3-4"); wuState1.setProp(ConfigurationKeys.WRITER_METADATA_KEY, md); addStateToWorkunit(s, wuState1); WorkUnitState wuState2 = new WorkUnitState(); wuState2.setProp(ConfigurationKeys.WRITER_PARTITION_PATH_KEY, "5-6-7-8"); wuState2.setProp(ConfigurationKeys.WRITER_METADATA_KEY, md); addStateToWorkunit(s, wuState2); BaseDataPublisher publisher = new BaseDataPublisher(s); publisher.publishMetadata(ImmutableList.of(wuState1, wuState2)); Assert.assertTrue(new File(part1, "metadata.json").exists()); Assert.assertTrue(new File(part2, "metadata.json").exists()); } finally { FileUtils.deleteDirectory(publishPath); } } public static class TestAdditionMerger implements MetadataMerger<String> { private int sum = 0; @Override public void update(String metadata) { sum += Integer.valueOf(metadata); } @Override public String getMergedMetadata() { return String.valueOf(sum); } } public static class TestMultiplicationMerger implements MetadataMerger<String> { private int product = 1; public TestMultiplicationMerger(Properties config) { // testing ctor call } @Override public void update(String metadata) { product *= Integer.valueOf(metadata); } @Override public String getMergedMetadata() { return String.valueOf(product); } } private void addStateToWorkunit(State s, WorkUnitState wuState) { for (Map.Entry<Object, Object> prop : s.getProperties().entrySet()) { wuState.setProp((String) prop.getKey(), prop.getValue()); } } private File openMetadataFile(State state, int numBranches, int branchId) { String dir = state.getProp(ConfigurationKeys.DATA_PUBLISHER_METADATA_OUTPUT_DIR); String fileName = state.getProp(ConfigurationKeys.DATA_PUBLISHER_METADATA_OUTPUT_FILE); if (numBranches > 1) { fileName += "." + String.valueOf(branchId); } return new File(dir, fileName); } private State buildDefaultState(int numBranches) throws IOException { State state = new State(); state.setProp(ConfigurationKeys.FORK_BRANCHES_KEY, numBranches); File tmpLocation = File.createTempFile("metadata", ""); tmpLocation.delete(); state.setProp(ConfigurationKeys.DATA_PUBLISHER_METADATA_OUTPUT_DIR, tmpLocation.getParent()); state.setProp(ConfigurationKeys.DATA_PUBLISHER_METADATA_OUTPUT_FILE, tmpLocation.getName()); return state; } }