/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.falcon.entity.parser;
import java.net.ConnectException;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.TimeZone;
import org.apache.falcon.FalconException;
import org.apache.falcon.entity.ClusterHelper;
import org.apache.falcon.entity.EntityUtil;
import org.apache.falcon.entity.FeedHelper;
import org.apache.falcon.entity.Storage;
import org.apache.falcon.entity.store.ConfigurationStore;
import org.apache.falcon.entity.v0.EntityType;
import org.apache.falcon.entity.v0.cluster.Cluster;
import org.apache.falcon.entity.v0.feed.Feed;
import org.apache.falcon.entity.v0.process.Input;
import org.apache.falcon.entity.v0.process.Inputs;
import org.apache.falcon.entity.v0.process.LateInput;
import org.apache.falcon.entity.v0.process.Output;
import org.apache.falcon.entity.v0.process.Outputs;
import org.apache.falcon.entity.v0.process.Process;
import org.apache.falcon.hadoop.HadoopClientFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
/**
* Concrete Parser which has XML parsing and validation logic for Process XML.
*/
public class ProcessEntityParser extends EntityParser<Process> {
public ProcessEntityParser() {
super(EntityType.PROCESS);
}
@Override
public void validate(Process process) throws FalconException {
if (process.getTimezone() == null) {
process.setTimezone(TimeZone.getTimeZone("UTC"));
}
// check if dependent entities exists
Set<String> clusters = new HashSet<String>();
for (org.apache.falcon.entity.v0.process.Cluster cluster : process.getClusters().getClusters()) {
String clusterName = cluster.getName();
if (!clusters.add(cluster.getName())) {
throw new ValidationException("Cluster: " + cluster.getName()
+ " is defined more than once for process: " + process.getName());
}
validateEntityExists(EntityType.CLUSTER, clusterName);
validateProcessValidity(cluster.getValidity().getStart(), cluster.getValidity().getEnd());
validateHDFSPaths(process, clusterName);
if (process.getInputs() != null) {
for (Input input : process.getInputs().getInputs()) {
validateEntityExists(EntityType.FEED, input.getFeed());
Feed feed = ConfigurationStore.get().get(EntityType.FEED, input.getFeed());
CrossEntityValidations.validateFeedDefinedForCluster(feed, clusterName);
CrossEntityValidations.validateFeedRetentionPeriod(input.getStart(), feed, clusterName);
CrossEntityValidations.validateInstanceRange(process, input, feed);
validateInputPartition(input, feed);
validateOptionalInputsForTableStorage(feed, input);
}
}
if (process.getOutputs() != null) {
for (Output output : process.getOutputs().getOutputs()) {
validateEntityExists(EntityType.FEED, output.getFeed());
Feed feed = ConfigurationStore.get().get(EntityType.FEED, output.getFeed());
CrossEntityValidations.validateFeedDefinedForCluster(feed, clusterName);
CrossEntityValidations.validateInstance(process, output, feed);
}
}
}
validateDatasetName(process.getInputs(), process.getOutputs());
validateLateInputs(process);
}
/**
* Validate if the user submitting this entity has access to the specific dirs on HDFS.
*
* @param process process
* @param clusterName cluster the process is materialized on
* @throws FalconException
*/
private void validateHDFSPaths(Process process, String clusterName) throws FalconException {
org.apache.falcon.entity.v0.cluster.Cluster cluster = ConfigurationStore.get().get(EntityType.CLUSTER,
clusterName);
if (!EntityUtil.responsibleFor(cluster.getColo())) {
return;
}
String workflowPath = process.getWorkflow().getPath();
String libPath = process.getWorkflow().getLib();
String nameNode = getNameNode(cluster, clusterName);
try {
Configuration configuration = ClusterHelper.getConfiguration(cluster);
FileSystem fs = HadoopClientFactory.get().createProxiedFileSystem(configuration);
if (!fs.exists(new Path(workflowPath))) {
throw new ValidationException(
"Workflow path: " + workflowPath + " does not exists in HDFS: " + nameNode);
}
if (libPath != null && !fs.exists(new Path(libPath))) {
throw new ValidationException("Lib path: " + libPath + " does not exists in HDFS: " + nameNode);
}
} catch (ValidationException e) {
throw new ValidationException(e);
} catch (ConnectException e) {
throw new ValidationException(
"Unable to connect to Namenode: " + nameNode + " referenced in cluster: " + clusterName);
} catch (Exception e) {
throw new FalconException(e);
}
}
private String getNameNode(Cluster cluster, String clusterName) throws ValidationException {
// cluster should never be null as it is validated while submitting feeds.
if (new Path(ClusterHelper.getStorageUrl(cluster)).toUri().getScheme() == null) {
throw new ValidationException(
"Cannot get valid nameNode scheme from write interface of cluster: " + clusterName);
}
return ClusterHelper.getStorageUrl(cluster);
}
private void validateProcessValidity(Date start, Date end) throws FalconException {
try {
if (!start.before(end)) {
throw new ValidationException(
"Process start time: " + start + " should be before process end time: " + end);
}
} catch (ValidationException e) {
throw new ValidationException(e);
} catch (Exception e) {
throw new FalconException(e);
}
}
private void validateInputPartition(Input input, Feed feed) throws FalconException {
if (input.getPartition() == null) {
return;
}
final Storage.TYPE baseFeedStorageType = FeedHelper.getStorageType(feed);
if (baseFeedStorageType == Storage.TYPE.FILESYSTEM) {
CrossEntityValidations.validateInputPartition(input, feed);
} else if (baseFeedStorageType == Storage.TYPE.TABLE) {
throw new ValidationException("Input partitions are not supported for table storage: " + input.getName());
}
}
private void validateDatasetName(Inputs inputs, Outputs outputs) throws ValidationException {
Set<String> datasetNames = new HashSet<String>();
if (inputs != null) {
for (Input input : inputs.getInputs()) {
if (!datasetNames.add(input.getName())) {
throw new ValidationException("Input name: " + input.getName() + " is already used");
}
}
}
if (outputs != null) {
for (Output output : outputs.getOutputs()) {
if (!datasetNames.add(output.getName())) {
throw new ValidationException("Output name: " + output.getName() + " is already used");
}
}
}
}
private void validateLateInputs(Process process) throws ValidationException {
if (process.getLateProcess() == null) {
return;
}
Map<String, String> feeds = new HashMap<String, String>();
if (process.getInputs() != null) {
for (Input in : process.getInputs().getInputs()) {
feeds.put(in.getName(), in.getFeed());
}
}
for (LateInput lp : process.getLateProcess().getLateInputs()) {
if (!feeds.keySet().contains(lp.getInput())) {
throw new ValidationException("Late Input: " + lp.getInput() + " is not specified in the inputs");
}
try {
Feed feed = ConfigurationStore.get().get(EntityType.FEED, feeds.get(lp.getInput()));
if (feed.getLateArrival() == null) {
throw new ValidationException(
"Late Input feed: " + lp.getInput() + " is not configured with late arrival cut-off");
}
} catch (FalconException e) {
throw new ValidationException(e);
}
}
}
private void validateOptionalInputsForTableStorage(Feed feed, Input input) throws FalconException {
if (input.isOptional() && FeedHelper.getStorageType(feed) == Storage.TYPE.TABLE) {
throw new ValidationException("Optional Input is not supported for feeds with table storage! "
+ input.getName());
}
}
}