/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.falcon.oozie; import org.apache.commons.lang3.StringUtils; import org.apache.falcon.FalconException; import org.apache.falcon.LifeCycle; import org.apache.falcon.Tag; import org.apache.falcon.entity.FeedHelper; import org.apache.falcon.entity.Storage; import org.apache.falcon.entity.v0.SchemaHelper; import org.apache.falcon.entity.v0.cluster.Cluster; import org.apache.falcon.entity.v0.feed.Feed; import org.apache.falcon.entity.v0.feed.LocationType; import org.apache.falcon.oozie.coordinator.ACTION; import org.apache.falcon.oozie.coordinator.COORDINATORAPP; import org.apache.falcon.oozie.coordinator.DATAOUT; import org.apache.falcon.oozie.coordinator.DATASETS; import org.apache.falcon.oozie.coordinator.OUTPUTEVENTS; import org.apache.falcon.oozie.coordinator.SYNCDATASET; import org.apache.falcon.oozie.coordinator.WORKFLOW; import org.apache.hadoop.fs.Path; import org.slf4j.LoggerFactory; import java.util.Arrays; import java.util.Date; import java.util.List; import java.util.Properties; /** * Builds Oozie coordinator for database import. */ public class FeedImportCoordinatorBuilder extends OozieCoordinatorBuilder<Feed> { public FeedImportCoordinatorBuilder(Feed entity) { super(entity, LifeCycle.IMPORT); } public static final String IMPORT_DATASET_NAME = "import-dataset"; public static final String IMPORT_DATAOUT_NAME = "import-output"; private static final org.slf4j.Logger LOG = LoggerFactory.getLogger(FeedImportCoordinatorBuilder.class); @Override public List<Properties> buildCoords(Cluster cluster, Path buildPath) throws FalconException { LOG.info("Generating Feed IMPORT coordinator."); org.apache.falcon.entity.v0.feed.Cluster feedCluster = FeedHelper.getCluster((Feed) entity, cluster.getName()); if (!FeedHelper.isImportEnabled(feedCluster)) { return null; } if (feedCluster.getValidity().getEnd().before(new Date())) { LOG.warn("Feed IMPORT is not applicable as Feed's end time for cluster {} is not in the future", cluster.getName()); return null; } COORDINATORAPP coord = new COORDINATORAPP(); initializeCoordAttributes(coord, (Feed) entity, cluster); Properties props = createCoordDefaultConfiguration(getEntityName()); initializeOutputPath(coord, cluster, props); props.putAll(FeedHelper.getUserWorkflowProperties(getLifecycle())); WORKFLOW workflow = new WORKFLOW(); Path coordPath = getBuildPath(buildPath); Properties wfProp = OozieOrchestrationWorkflowBuilder.get(entity, cluster, Tag.IMPORT).build(cluster, coordPath); workflow.setAppPath(getStoragePath(wfProp.getProperty(OozieEntityBuilder.ENTITY_PATH))); props.putAll(wfProp); workflow.setConfiguration(getConfig(props)); ACTION action = new ACTION(); action.setWorkflow(workflow); coord.setAction(action); Path marshalPath = marshal(cluster, coord, coordPath); return Arrays.asList(getProperties(marshalPath, getEntityName())); } private void initializeOutputPath(COORDINATORAPP coord, Cluster cluster, Properties props) throws FalconException { if (coord.getDatasets() == null) { coord.setDatasets(new DATASETS()); } if (coord.getOutputEvents() == null) { coord.setOutputEvents(new OUTPUTEVENTS()); } Storage storage = FeedHelper.createStorage(cluster, (Feed) entity); SYNCDATASET syncdataset = createDataSet((Feed) entity, cluster, storage, IMPORT_DATASET_NAME, LocationType.DATA); if (syncdataset == null) { return; } coord.getDatasets().getDatasetOrAsyncDataset().add(syncdataset); DATAOUT dataout = createDataOut(entity); coord.getOutputEvents().getDataOut().add(dataout); } private DATAOUT createDataOut(Feed feed) { DATAOUT dataout = new DATAOUT(); dataout.setName(IMPORT_DATAOUT_NAME); dataout.setDataset(IMPORT_DATASET_NAME); dataout.setInstance("${coord:current(0)}"); return dataout; } /** * Create DataSet. The start instance is set to current date if the merge type is snapshot. * Otherwise, the Feed cluster start data will be used as start instance. * * @param feed * @param cluster * @param storage * @param datasetName * @param locationType * @return * @throws FalconException */ private SYNCDATASET createDataSet(Feed feed, Cluster cluster, Storage storage, String datasetName, LocationType locationType) throws FalconException { SYNCDATASET syncdataset = new SYNCDATASET(); syncdataset.setName(datasetName); syncdataset.setFrequency("${coord:" + feed.getFrequency().toString() + "}"); String uriTemplate = storage.getUriTemplate(locationType); if (StringUtils.isBlank(uriTemplate)) { return null; } if (storage.getType() == Storage.TYPE.TABLE) { uriTemplate = uriTemplate.replace("thrift", "hcat"); // Oozie requires this!!! } syncdataset.setUriTemplate(uriTemplate); org.apache.falcon.entity.v0.feed.Cluster feedCluster = FeedHelper.getCluster(feed, cluster.getName()); Date initialInstance = FeedHelper.getImportInitalInstance(feedCluster); syncdataset.setInitialInstance(SchemaHelper.formatDateUTC(initialInstance)); syncdataset.setTimezone(feed.getTimezone().getID()); if (StringUtils.isNotBlank(feed.getAvailabilityFlag())) { syncdataset.setDoneFlag(feed.getAvailabilityFlag()); } else { syncdataset.setDoneFlag(""); } return syncdataset; } /** * Initialize the coordinator with current data as start if the merge type is snapshot. * Otherwise, use the feed cluster validate as the coordinator start date. * * @param coord * @param feed * @param cluster */ private void initializeCoordAttributes(COORDINATORAPP coord, Feed feed, Cluster cluster) { coord.setName(getEntityName()); // for feeds with snapshot layout, the start date will be the time of scheduling since it dumps whole table org.apache.falcon.entity.v0.feed.Cluster feedCluster = FeedHelper.getCluster(feed, cluster.getName()); Date initialInstance = FeedHelper.getImportInitalInstance(feedCluster); coord.setStart(SchemaHelper.formatDateUTC(initialInstance)); coord.setEnd(SchemaHelper.formatDateUTC(feedCluster.getValidity().getEnd())); coord.setTimezone(entity.getTimezone().getID()); coord.setFrequency("${coord:" + entity.getFrequency().toString() + "}"); } }