/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.falcon.update; import org.apache.falcon.FalconException; import org.apache.falcon.cluster.util.EmbeddedCluster; import org.apache.falcon.entity.AbstractTestBase; import org.apache.falcon.entity.ClusterHelper; import org.apache.falcon.entity.EntityUtil; import org.apache.falcon.entity.FeedHelper; import org.apache.falcon.entity.parser.EntityParserFactory; import org.apache.falcon.entity.parser.FeedEntityParser; import org.apache.falcon.entity.parser.ProcessEntityParser; import org.apache.falcon.entity.store.ConfigurationStore; import org.apache.falcon.entity.v0.datasource.Datasource; import org.apache.falcon.entity.v0.datasource.Credential; import org.apache.falcon.entity.v0.EntityType; import org.apache.falcon.entity.v0.Frequency; import org.apache.falcon.entity.v0.SchemaHelper; import org.apache.falcon.entity.v0.cluster.ACL; import org.apache.falcon.entity.v0.cluster.Cluster; import org.apache.falcon.entity.v0.cluster.ClusterLocationType; import org.apache.falcon.entity.v0.cluster.Interface; import org.apache.falcon.entity.v0.cluster.Interfacetype; import org.apache.falcon.entity.v0.feed.CatalogTable; import org.apache.falcon.entity.v0.feed.Feed; import org.apache.falcon.entity.v0.feed.Location; import org.apache.falcon.entity.v0.feed.LocationType; import org.apache.falcon.entity.v0.feed.Locations; import org.apache.falcon.entity.v0.feed.Partition; import org.apache.falcon.entity.v0.feed.Properties; import org.apache.falcon.entity.v0.feed.Property; import org.apache.falcon.entity.v0.process.LateProcess; import org.apache.falcon.entity.v0.process.PolicyType; import org.apache.falcon.entity.v0.process.Process; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.testng.Assert; import org.testng.annotations.AfterClass; import org.testng.annotations.BeforeClass; import org.testng.annotations.BeforeMethod; import org.testng.annotations.Test; import javax.xml.bind.JAXBException; import javax.xml.bind.Unmarshaller; import java.io.IOException; import java.io.InputStream; /** * Test for Update helper methods. */ public class UpdateHelperTest extends AbstractTestBase { private final FeedEntityParser parser = (FeedEntityParser)EntityParserFactory.getParser(EntityType.FEED); private final ProcessEntityParser processParser = (ProcessEntityParser)EntityParserFactory.getParser(EntityType.PROCESS); @BeforeClass public void init() throws Exception { this.dfsCluster = EmbeddedCluster.newCluster("testCluster"); this.conf = dfsCluster.getConf(); setup(); } @AfterClass public void tearDown() { this.dfsCluster.shutdown(); } @BeforeMethod public void setUp() throws Exception { storeEntity(EntityType.CLUSTER, "testCluster"); storeEntity(EntityType.CLUSTER, "backupCluster"); storeEntity(EntityType.FEED, "clicksFeed"); storeEntity(EntityType.FEED, "impressionFeed"); storeEntity(EntityType.FEED, "imp-click-join1"); storeEntity(EntityType.FEED, "imp-click-join2"); storeEntity(EntityType.DATASOURCE, "datasource1"); storeEntity(EntityType.DATASOURCE, "datasource2"); } private void prepare(Process process) throws IOException, FalconException { FileSystem fs = dfsCluster.getFileSystem(); Cluster clusterEntity = ConfigurationStore.get().get(EntityType.CLUSTER, "testCluster"); Path staging = EntityUtil.getNewStagingPath(clusterEntity, process); fs.mkdirs(staging); fs.create(new Path(staging, "workflow.xml")).close(); fs.create(new Path(staging, "checksums")).close(); } @Test public void testIsEntityUpdated() throws Exception { Feed oldFeed = parser.parseAndValidate(this.getClass().getResourceAsStream(FEED_XML)); String cluster = "testCluster"; Feed newFeed = (Feed) oldFeed.copy(); Cluster clusterEntity = ConfigurationStore.get().get(EntityType.CLUSTER, cluster); Path feedPath = EntityUtil.getNewStagingPath(clusterEntity, oldFeed); Assert.assertFalse(UpdateHelper.isEntityUpdated(oldFeed, newFeed, cluster, feedPath)); //Add tags and ensure isEntityUpdated returns false newFeed.setTags("category=test"); Assert.assertFalse(UpdateHelper.isEntityUpdated(oldFeed, newFeed, cluster, feedPath)); newFeed.setGroups("newgroups"); Assert.assertFalse(UpdateHelper.isEntityUpdated(oldFeed, newFeed, cluster, feedPath)); newFeed.getLateArrival().setCutOff(Frequency.fromString("hours(8)")); Assert.assertFalse(UpdateHelper.isEntityUpdated(oldFeed, newFeed, cluster, feedPath)); newFeed.setFrequency(Frequency.fromString("days(1)")); Assert.assertTrue(UpdateHelper.isEntityUpdated(oldFeed, newFeed, cluster, feedPath)); Process oldProcess = processParser.parseAndValidate(this.getClass().getResourceAsStream(PROCESS_XML)); prepare(oldProcess); Process newProcess = (Process) oldProcess.copy(); Path procPath = EntityUtil.getNewStagingPath(clusterEntity, oldProcess); newProcess.getRetry().setPolicy(PolicyType.FINAL); Assert.assertFalse(UpdateHelper.isEntityUpdated(oldProcess, newProcess, cluster, procPath)); newProcess.getLateProcess().getLateInputs().remove(1); Assert.assertFalse(UpdateHelper.isEntityUpdated(oldProcess, newProcess, cluster, procPath)); newProcess.getLateProcess().setPolicy(PolicyType.PERIODIC); Assert.assertFalse(UpdateHelper.isEntityUpdated(oldProcess, newProcess, cluster, procPath)); newProcess.setFrequency(Frequency.fromString("days(1)")); Assert.assertTrue(UpdateHelper.isEntityUpdated(oldProcess, newProcess, cluster, procPath)); //Adding new cluster shouldn't cause update in the old cluster newProcess = (Process) oldProcess.copy(); org.apache.falcon.entity.v0.process.Cluster procCluster = new org.apache.falcon.entity.v0.process.Cluster(); procCluster.setName("newcluster"); procCluster.setValidity(newProcess.getClusters().getClusters().get(0).getValidity()); newProcess.getClusters().getClusters().add(procCluster); Assert.assertFalse(UpdateHelper.isEntityUpdated(oldProcess, newProcess, cluster, procPath)); //change pipelines and ensure it doesn't cause an update oldProcess.setPipelines("test"); newProcess.setPipelines("newTest"); newProcess.setTags("category=test"); Assert.assertFalse(UpdateHelper.isEntityUpdated(oldProcess, newProcess, cluster, procPath)); //In the case of incomplete update, where new entity is scheduled but still not updated in config store, //another update call shouldn't cause update in workflow engine newProcess.setFrequency(Frequency.fromString("days(1)")); procPath = EntityUtil.getNewStagingPath(clusterEntity, newProcess); Assert.assertFalse(UpdateHelper.isEntityUpdated(oldProcess, newProcess, cluster, procPath)); } @Test public void testShouldUpdateAffectedEntities() throws Exception { Feed oldFeed = parser.parseAndValidate(this.getClass().getResourceAsStream(FEED_XML)); Feed newFeed = (Feed) oldFeed.copy(); Process process = processParser.parseAndValidate(this.getClass().getResourceAsStream(PROCESS_XML)); prepare(process); String cluster = process.getClusters().getClusters().get(0).getName(); Assert.assertFalse(UpdateHelper.shouldUpdate(oldFeed, newFeed, process, cluster)); newFeed.getLateArrival().setCutOff(Frequency.fromString("hours(1)")); Assert.assertFalse(UpdateHelper.shouldUpdate(oldFeed, newFeed, process, cluster)); newFeed.getLateArrival().setCutOff(oldFeed.getLateArrival().getCutOff()); getLocation(newFeed, LocationType.DATA, cluster).setPath("/test"); Assert.assertTrue(UpdateHelper.shouldUpdate(oldFeed, newFeed, process, cluster)); getLocation(newFeed, LocationType.DATA, cluster).setPath( getLocation(oldFeed, LocationType.DATA, cluster).getPath()); newFeed.setFrequency(Frequency.fromString("months(1)")); Assert.assertTrue(UpdateHelper.shouldUpdate(oldFeed, newFeed, process, cluster)); newFeed.setFrequency(oldFeed.getFrequency()); Partition partition = new Partition(); partition.setName("1"); newFeed.getPartitions().getPartitions().add(partition); Assert.assertFalse(UpdateHelper.shouldUpdate(oldFeed, newFeed, process, cluster)); Property property = new Property(); property.setName("1"); property.setValue("1"); newFeed.setProperties(new Properties()); newFeed.getProperties().getProperties().add(property); Assert.assertFalse(UpdateHelper.shouldUpdate(oldFeed, newFeed, process, cluster)); newFeed.getProperties().getProperties().remove(0); Assert.assertFalse(UpdateHelper.shouldUpdate(oldFeed, newFeed, process, cluster)); //Change in start time should trigger process update as instance time changes FeedHelper.getCluster(newFeed, process.getClusters().getClusters().get(0).getName()).getValidity().setStart( SchemaHelper.parseDateUTC("2012-11-01T00:00Z")); Assert.assertTrue(UpdateHelper.shouldUpdate(oldFeed, newFeed, process, cluster)); FeedHelper.getCluster(newFeed, process.getClusters().getClusters().get(0).getName()).getValidity(). setStart(FeedHelper.getCluster(oldFeed, process.getClusters().getClusters().get(0).getName()).getValidity().getStart()); //Change location to table should trigger process update newFeed.setLocations(null); CatalogTable table = new CatalogTable(); table.setUri("catalog:default:clicks-blah#ds=${YEAR}-${MONTH}-${DAY}-${HOUR}"); newFeed.setTable(table); Assert.assertFalse(UpdateHelper.shouldUpdate(oldFeed, newFeed, process, cluster)); } @Test public void testIsEntityUpdatedTable() throws Exception { InputStream inputStream = getClass().getResourceAsStream("/config/feed/hive-table-feed.xml"); Feed oldTableFeed = (Feed) EntityType.FEED.getUnmarshaller().unmarshal(inputStream); getStore().publish(EntityType.FEED, oldTableFeed); String cluster = "testCluster"; Cluster clusterEntity = ConfigurationStore.get().get(EntityType.CLUSTER, cluster); Path feedPath = EntityUtil.getNewStagingPath(clusterEntity, oldTableFeed); Feed newTableFeed = (Feed) oldTableFeed.copy(); Assert.assertFalse(UpdateHelper.isEntityUpdated(oldTableFeed, newTableFeed, cluster, feedPath)); newTableFeed.setGroups("newgroups"); Assert.assertFalse(UpdateHelper.isEntityUpdated(oldTableFeed, newTableFeed, cluster, feedPath)); newTableFeed.setFrequency(Frequency.fromString("days(1)")); Assert.assertTrue(UpdateHelper.isEntityUpdated(oldTableFeed, newTableFeed, cluster, feedPath)); final CatalogTable table = new CatalogTable(); table.setUri("catalog:default:clicks-blah#ds=${YEAR}-${MONTH}-${DAY}-${HOUR}"); newTableFeed.setTable(table); Assert.assertTrue(UpdateHelper.isEntityUpdated(oldTableFeed, newTableFeed, cluster, feedPath)); inputStream = getClass().getResourceAsStream("/config/process/process-table.xml"); Process oldProcess = (Process) EntityType.PROCESS.getUnmarshaller().unmarshal(inputStream); FileSystem fs = dfsCluster.getFileSystem(); Path staging = EntityUtil.getNewStagingPath(clusterEntity, oldProcess); fs.mkdirs(staging); fs.create(new Path(staging, "workflow.xml")).close(); fs.create(new Path(staging, "checksums")).close(); Process newProcess = (Process) oldProcess.copy(); Path procPath = EntityUtil.getNewStagingPath(clusterEntity, oldProcess); newProcess.getRetry().setPolicy(PolicyType.FINAL); Assert.assertFalse(UpdateHelper.isEntityUpdated(oldProcess, newProcess, cluster, procPath)); newProcess.setFrequency(Frequency.fromString("days(1)")); Assert.assertTrue(UpdateHelper.isEntityUpdated(oldProcess, newProcess, cluster, procPath)); } @Test public void testIsEntityACLUpdated() throws Exception { Feed oldFeed = parser.parseAndValidate(this.getClass().getResourceAsStream(FEED_XML)); String cluster = "testCluster"; Feed newFeed = (Feed) oldFeed.copy(); Cluster clusterEntity = ConfigurationStore.get().get(EntityType.CLUSTER, cluster); Path feedPath = EntityUtil.getNewStagingPath(clusterEntity, oldFeed); Assert.assertFalse(UpdateHelper.isEntityUpdated(oldFeed, newFeed, cluster, feedPath)); newFeed.getACL().setOwner("new-user"); newFeed.getACL().setGroup("new-group"); Assert.assertNotEquals(oldFeed.getACL().getOwner(), newFeed.getACL().getOwner()); Assert.assertNotEquals(oldFeed.getACL().getGroup(), newFeed.getACL().getGroup()); Assert.assertTrue(UpdateHelper.isEntityUpdated(oldFeed, newFeed, cluster, feedPath)); Process oldProcess = processParser.parseAndValidate(this.getClass().getResourceAsStream(PROCESS_XML)); prepare(oldProcess); Process newProcess = (Process) oldProcess.copy(); Path procPath = EntityUtil.getNewStagingPath(clusterEntity, oldProcess); Assert.assertFalse(UpdateHelper.isEntityUpdated(oldProcess, newProcess, cluster, procPath)); org.apache.falcon.entity.v0.process.ACL processACL = new org.apache.falcon.entity.v0.process.ACL(); processACL.setOwner("owner"); processACL.setOwner("group"); newProcess.setACL(processACL); Assert.assertTrue(UpdateHelper.isEntityUpdated(oldProcess, newProcess, cluster, procPath)); } @Test public void testIsEntityLateProcessUpdated() throws Exception { String cluster = "testCluster"; Cluster clusterEntity = ConfigurationStore.get().get(EntityType.CLUSTER, cluster); Process oldProcess = processParser.parseAndValidate(this.getClass().getResourceAsStream(PROCESS_XML)); prepare(oldProcess); Path procPath = EntityUtil.getNewStagingPath(clusterEntity, oldProcess); // The Process should not be updated when late processing is updated. // As the definition does not affect the Oozie workflow. Process newProcess = (Process) oldProcess.copy(); newProcess.getLateProcess().setPolicy(PolicyType.FINAL); Assert.assertFalse(UpdateHelper.isEntityUpdated(oldProcess, newProcess, cluster, procPath)); LateProcess lateProcess = newProcess.getLateProcess(); newProcess.setLateProcess(null); // The Process should be updated when late processing is removed. // Pre-processing needs to be removed from the workflow Assert.assertTrue(UpdateHelper.isEntityUpdated(oldProcess, newProcess, cluster, procPath)); Process newerProcess = (Process) newProcess.copy(); newerProcess.setLateProcess(lateProcess); // The Process should be updated when late processing is added. // Pre-processing needs to be added to the workflow Assert.assertTrue(UpdateHelper.isEntityUpdated(newProcess, newerProcess, cluster, procPath)); } @Test public void testIsClusterEntityUpdated() throws Exception { Unmarshaller unmarshaller = EntityType.CLUSTER.getUnmarshaller(); String cluster = "testCluster"; Cluster clusterEntity = ConfigurationStore.get().get(EntityType.CLUSTER, cluster); Cluster newClusterEntity = (Cluster) unmarshaller.unmarshal(this.getClass().getResource(CLUSTER_XML)); newClusterEntity.setName(cluster); Assert.assertNotNull(newClusterEntity); // Tags, ACL, description update should not update bundle/workflow for dependent entities ACL acl = new ACL(); acl.setOwner("Test"); acl.setGroup("testGroup"); acl.setPermission("*"); newClusterEntity.setACL(acl); newClusterEntity.setDescription("New Description"); newClusterEntity.setTags("test=val,test2=val2"); Assert.assertFalse(UpdateHelper.isClusterEntityUpdated(clusterEntity, newClusterEntity)); // Changing colo should trigger update newClusterEntity.setColo("NewColoValue"); Assert.assertTrue(UpdateHelper.isClusterEntityUpdated(clusterEntity, newClusterEntity)); // Updating an interface should trigger update bundle/workflow for dependent entities Interface writeInterface = ClusterHelper.getInterface(newClusterEntity, Interfacetype.WRITE); newClusterEntity.getInterfaces().getInterfaces().remove(writeInterface); Assert.assertNotNull(writeInterface); writeInterface.setEndpoint("hdfs://test.host.name:8020"); writeInterface.setType(Interfacetype.WRITE); writeInterface.setVersion("2.2.0"); newClusterEntity.getInterfaces().getInterfaces().add(writeInterface); Assert.assertTrue(UpdateHelper.isClusterEntityUpdated(clusterEntity, newClusterEntity)); // Updating a property should trigger update bundle/workflow for dependent entities newClusterEntity = (Cluster) unmarshaller.unmarshal(this.getClass().getResource(CLUSTER_XML)); newClusterEntity.setName(cluster); Assert.assertNotNull(newClusterEntity); org.apache.falcon.entity.v0.cluster.Property property = new org.apache.falcon.entity.v0.cluster.Property(); property.setName("testName"); property.setValue("testValue"); newClusterEntity.getProperties().getProperties().add(property); Assert.assertTrue(UpdateHelper.isClusterEntityUpdated(clusterEntity, newClusterEntity)); // Updating a location should trigger update bundle/workflow for dependent entities newClusterEntity = (Cluster) unmarshaller.unmarshal(this.getClass().getResource(CLUSTER_XML)); newClusterEntity.setName(cluster); Assert.assertNotNull(newClusterEntity); org.apache.falcon.entity.v0.cluster.Location stagingLocation = ClusterHelper.getLocation(newClusterEntity, ClusterLocationType.STAGING); Assert.assertNotNull(stagingLocation); newClusterEntity.getInterfaces().getInterfaces().remove(stagingLocation); stagingLocation.setPath("/test/path/here"); newClusterEntity.getLocations().getLocations().add(stagingLocation); Assert.assertTrue(UpdateHelper.isClusterEntityUpdated(clusterEntity, newClusterEntity)); } @Test public void testIsDatasourceEntityUpdated() throws Exception { Unmarshaller unmarshaller = EntityType.DATASOURCE.getUnmarshaller(); String datasource = "datasource1"; Datasource datasourceEntity = ConfigurationStore.get().get(EntityType.DATASOURCE, datasource); Datasource newDatasourceEntity = getNewDatasource(unmarshaller, datasource); Assert.assertNotNull(newDatasourceEntity); // Tags, ACL, description, colo update should not update bundle/workflow for dependent entities org.apache.falcon.entity.v0.datasource.ACL acl = new org.apache.falcon.entity.v0.datasource.ACL(); acl.setOwner("Test"); acl.setGroup("testGroup"); acl.setPermission("*"); newDatasourceEntity.setACL(acl); newDatasourceEntity.setDescription("New Description"); newDatasourceEntity.setTags("test=val,test2=val2"); newDatasourceEntity.setColo("newColo2"); Assert.assertFalse(UpdateHelper.isDatasourceEntityUpdated(datasourceEntity, newDatasourceEntity)); // Changing read or write endpoint should trigger rewrite newDatasourceEntity.getInterfaces().getInterfaces().get(0).setEndpoint("jdbc:hsqldb:localhost2/db1"); Assert.assertTrue(UpdateHelper.isDatasourceEntityUpdated(datasourceEntity, newDatasourceEntity)); // change credential type or value should trigger newDatasourceEntity = getNewDatasource(unmarshaller, datasource); Credential cred = newDatasourceEntity.getInterfaces().getInterfaces().get(0).getCredential(); cred.setPasswordText("blah"); Assert.assertTrue(UpdateHelper.isDatasourceEntityUpdated(datasourceEntity, newDatasourceEntity)); } private Datasource getNewDatasource(Unmarshaller unmarshaller, String datasource) throws JAXBException { Datasource newDatasourceEntity = (Datasource) unmarshaller.unmarshal(this.getClass() .getResource(DATASOURCE_XML)); newDatasourceEntity.setName(datasource); return newDatasourceEntity; } private static Location getLocation(Feed feed, LocationType type, String cluster) { org.apache.falcon.entity.v0.feed.Cluster feedCluster = FeedHelper.getCluster(feed, cluster); if (feedCluster.getLocations() != null) { return getLocation(feedCluster.getLocations(), type); } return getLocation(feed.getLocations(), type); } private static Location getLocation(Locations locations, LocationType type) { for (Location loc : locations.getLocations()) { if (loc.getType() == type) { return loc; } } Location loc = new Location(); loc.setPath("/tmp"); loc.setType(type); return loc; } }