/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.falcon.entity.parser;
import org.apache.commons.lang.StringUtils;
import org.apache.falcon.FalconException;
import org.apache.falcon.catalog.CatalogServiceFactory;
import org.apache.falcon.entity.*;
import org.apache.falcon.entity.store.ConfigurationStore;
import org.apache.falcon.entity.v0.Entity;
import org.apache.falcon.entity.v0.EntityGraph;
import org.apache.falcon.entity.v0.EntityType;
import org.apache.falcon.entity.v0.feed.Cluster;
import org.apache.falcon.entity.v0.feed.ClusterType;
import org.apache.falcon.entity.v0.feed.Feed;
import org.apache.falcon.entity.v0.feed.LocationType;
import org.apache.falcon.entity.v0.feed.Location;
import org.apache.falcon.entity.v0.process.Input;
import org.apache.falcon.entity.v0.process.Output;
import org.apache.falcon.entity.v0.process.Process;
import org.apache.falcon.expression.ExpressionHelper;
import org.apache.falcon.group.FeedGroup;
import org.apache.falcon.group.FeedGroupMap;
import org.apache.falcon.security.CurrentUser;
import org.apache.falcon.security.SecurityUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.Date;
import java.util.HashSet;
import java.util.Set;
import java.util.TimeZone;
import java.util.List;
/**
* Parser that parses feed entity definition.
*/
public class FeedEntityParser extends EntityParser<Feed> {
private static final Logger LOG = LoggerFactory.getLogger(FeedEntityParser.class);
public FeedEntityParser() {
super(EntityType.FEED);
}
@Override
public void validate(Feed feed) throws FalconException {
if (feed.getTimezone() == null) {
feed.setTimezone(TimeZone.getTimeZone("UTC"));
}
if (feed.getClusters() == null) {
throw new ValidationException("Feed should have atleast one cluster");
}
for (Cluster cluster : feed.getClusters().getClusters()) {
validateEntityExists(EntityType.CLUSTER, cluster.getName());
validateClusterValidity(cluster.getValidity().getStart(), cluster.getValidity().getEnd(),
cluster.getName());
validateClusterHasRegistry(feed, cluster);
validateFeedCutOffPeriod(feed, cluster);
}
validateFeedStorage(feed);
validateFeedPath(feed);
validateFeedPartitionExpression(feed);
validateFeedGroups(feed);
validateUser(feed);
// Seems like a good enough entity object for a new one
// But is this an update ?
Feed oldFeed = ConfigurationStore.get().get(EntityType.FEED, feed.getName());
if (oldFeed == null) {
return; // Not an update case
}
// Is actually an update. Need to iterate over all the processes
// depending on this feed and see if they are valid with the new
// feed reference
EntityGraph graph = EntityGraph.get();
Set<Entity> referenced = graph.getDependents(oldFeed);
Set<Process> processes = findProcesses(referenced);
if (processes.isEmpty()) {
return;
}
ensureValidityFor(feed, processes);
}
protected void validateUser(Feed feed) throws ValidationException {
String owner = feed.getACL().getOwner();
if (!owner.equals(CurrentUser.getUser())) {
throw new ValidationException("Entity's owner " + owner + " is not same as current user "
+ CurrentUser.getUser());
}
}
private Set<Process> findProcesses(Set<Entity> referenced) {
Set<Process> processes = new HashSet<Process>();
for (Entity entity : referenced) {
if (entity.getEntityType() == EntityType.PROCESS) {
processes.add((Process) entity);
}
}
return processes;
}
private void validateFeedGroups(Feed feed) throws FalconException {
String[] groupNames = feed.getGroups() != null ? feed.getGroups().split(",") : new String[]{};
final Storage storage = FeedHelper.createStorage(feed);
String defaultPath = storage.getUriTemplate(LocationType.DATA);
for (Cluster cluster : feed.getClusters().getClusters()) {
final String uriTemplate = FeedHelper.createStorage(cluster, feed).getUriTemplate(LocationType.DATA);
if (!FeedGroup.getDatePattern(uriTemplate).equals(
FeedGroup.getDatePattern(defaultPath))) {
throw new ValidationException("Feeds default path pattern: "
+ storage.getUriTemplate(LocationType.DATA)
+ ", does not match with cluster: "
+ cluster.getName()
+ " path pattern: "
+ uriTemplate);
}
}
for (String groupName : groupNames) {
FeedGroup group = FeedGroupMap.get().getGroupsMapping().get(groupName);
if (group != null && !group.canContainFeed(feed)) {
throw new ValidationException(
"Feed " + feed.getName() + "'s frequency: " + feed.getFrequency().toString()
+ ", path pattern: " + storage
+ " does not match with group: " + group.getName() + "'s frequency: "
+ group.getFrequency()
+ ", date pattern: " + group.getDatePattern());
}
}
}
private void ensureValidityFor(Feed newFeed, Set<Process> processes) throws FalconException {
for (Process process : processes) {
try {
ensureValidityFor(newFeed, process);
} catch (FalconException e) {
throw new ValidationException(
"Process " + process.getName() + " is not compatible " + "with changes to feed "
+ newFeed.getName(), e);
}
}
}
private void ensureValidityFor(Feed newFeed, Process process) throws FalconException {
for (org.apache.falcon.entity.v0.process.Cluster cluster : process.getClusters().getClusters()) {
String clusterName = cluster.getName();
if (process.getInputs() != null) {
for (Input input : process.getInputs().getInputs()) {
if (!input.getFeed().equals(newFeed.getName())) {
continue;
}
CrossEntityValidations.validateFeedDefinedForCluster(newFeed, clusterName);
CrossEntityValidations.validateFeedRetentionPeriod(input.getStart(), newFeed, clusterName);
CrossEntityValidations.validateInstanceRange(process, input, newFeed);
validateInputPartition(newFeed, input);
}
}
if (process.getOutputs() != null) {
for (Output output : process.getOutputs().getOutputs()) {
if (!output.getFeed().equals(newFeed.getName())) {
continue;
}
CrossEntityValidations.validateFeedDefinedForCluster(newFeed, clusterName);
CrossEntityValidations.validateInstance(process, output, newFeed);
}
}
LOG.debug("Verified and found {} to be valid for new definition of {}",
process.getName(), newFeed.getName());
}
}
private void validateInputPartition(Feed newFeed, Input input) throws FalconException {
if (input.getPartition() == null) {
return;
}
final Storage.TYPE baseFeedStorageType = FeedHelper.getStorageType(newFeed);
if (baseFeedStorageType == Storage.TYPE.FILESYSTEM) {
CrossEntityValidations.validateInputPartition(input, newFeed);
} else if (baseFeedStorageType == Storage.TYPE.TABLE) {
throw new ValidationException("Input partitions are not supported for table storage: " + input.getName());
}
}
private void validateClusterValidity(Date start, Date end, String clusterName) throws FalconException {
try {
if (start.after(end)) {
throw new ValidationException("Feed start time: " + start + " cannot be after feed end time: " + end
+ " for cluster: " + clusterName);
}
} catch (ValidationException e) {
throw new ValidationException(e);
} catch (Exception e) {
throw new FalconException(e);
}
}
private void validateFeedCutOffPeriod(Feed feed, Cluster cluster) throws FalconException {
ExpressionHelper evaluator = ExpressionHelper.get();
String feedRetention = cluster.getRetention().getLimit().toString();
long retentionPeriod = evaluator.evaluate(feedRetention, Long.class);
if (feed.getLateArrival() == null) {
LOG.debug("Feed's late arrival cut-off not set");
return;
}
String feedCutoff = feed.getLateArrival().getCutOff().toString();
long feedCutOffPeriod = evaluator.evaluate(feedCutoff, Long.class);
if (retentionPeriod < feedCutOffPeriod) {
throw new ValidationException(
"Feed's retention limit: " + feedRetention + " of referenced cluster " + cluster.getName()
+ " should be more than feed's late arrival cut-off period: " + feedCutoff + " for feed: "
+ feed.getName());
}
}
private void validateFeedPartitionExpression(Feed feed) throws FalconException {
int numSourceClusters = 0, numTrgClusters = 0;
Set<String> clusters = new HashSet<String>();
for (Cluster cl : feed.getClusters().getClusters()) {
if (!clusters.add(cl.getName())) {
throw new ValidationException("Cluster: " + cl.getName()
+ " is defined more than once for feed: " + feed.getName());
}
if (cl.getType() == ClusterType.SOURCE) {
numSourceClusters++;
} else if (cl.getType() == ClusterType.TARGET) {
numTrgClusters++;
}
}
if (numTrgClusters >= 1 && numSourceClusters == 0) {
throw new ValidationException("Feed: " + feed.getName()
+ " should have atleast one source cluster defined");
}
int feedParts = feed.getPartitions() != null ? feed.getPartitions().getPartitions().size() : 0;
for (Cluster cluster : feed.getClusters().getClusters()) {
if (cluster.getType() == ClusterType.SOURCE && numSourceClusters > 1 && numTrgClusters >= 1) {
String part = FeedHelper.normalizePartitionExpression(cluster.getPartition());
if (StringUtils.split(part, '/').length == 0) {
throw new ValidationException(
"Partition expression has to be specified for cluster " + cluster.getName()
+ " as there are more than one source clusters");
}
validateClusterExpDefined(cluster);
} else if (cluster.getType() == ClusterType.TARGET) {
for (Cluster src : feed.getClusters().getClusters()) {
if (src.getType() == ClusterType.SOURCE) {
String part = FeedHelper.normalizePartitionExpression(src.getPartition(),
cluster.getPartition());
int numParts = StringUtils.split(part, '/').length;
if (numParts > feedParts) {
throw new ValidationException(
"Partition for " + src.getName() + " and " + cluster.getName()
+ "clusters is more than the number of partitions defined in feed");
}
}
}
if (numTrgClusters > 1 && numSourceClusters >= 1) {
validateClusterExpDefined(cluster);
}
}
}
}
private void validateClusterExpDefined(Cluster cl) throws FalconException {
if (cl.getPartition() == null) {
return;
}
org.apache.falcon.entity.v0.cluster.Cluster cluster = EntityUtil.getEntity(EntityType.CLUSTER, cl.getName());
String part = FeedHelper.normalizePartitionExpression(cl.getPartition());
if (FeedHelper.evaluateClusterExp(cluster, part).equals(part)) {
throw new ValidationException(
"Alteast one of the partition tags has to be a cluster expression for cluster " + cl.getName());
}
}
/**
* Ensure table is already defined in the catalog registry.
* Does not matter for FileSystem storage.
*/
private void validateFeedStorage(Feed feed) throws FalconException {
validateUser(feed);
validateACL(feed);
final Storage.TYPE baseFeedStorageType = FeedHelper.getStorageType(feed);
validateMultipleSourcesExist(feed, baseFeedStorageType);
validateUniformStorageType(feed, baseFeedStorageType);
validatePartitions(feed, baseFeedStorageType);
validateStorageExists(feed);
}
private void validateMultipleSourcesExist(Feed feed, Storage.TYPE baseFeedStorageType) throws FalconException {
if (baseFeedStorageType == Storage.TYPE.FILESYSTEM) {
return;
}
// validate that there is only one source cluster
int numberOfSourceClusters = 0;
for (Cluster cluster : feed.getClusters().getClusters()) {
if (cluster.getType() == ClusterType.SOURCE) {
numberOfSourceClusters++;
}
}
if (numberOfSourceClusters > 1) {
throw new ValidationException("Multiple sources are not supported for feed with table storage: "
+ feed.getName());
}
}
private void validateUniformStorageType(Feed feed, Storage.TYPE feedStorageType) throws FalconException {
for (Cluster cluster : feed.getClusters().getClusters()) {
Storage.TYPE feedClusterStorageType = FeedHelper.getStorageType(feed, cluster);
if (feedStorageType != feedClusterStorageType) {
throw new ValidationException("The storage type is not uniform for cluster: " + cluster.getName());
}
}
}
private void validateClusterHasRegistry(Feed feed, Cluster cluster) throws FalconException {
Storage.TYPE feedClusterStorageType = FeedHelper.getStorageType(feed, cluster);
if (feedClusterStorageType != Storage.TYPE.TABLE) {
return;
}
org.apache.falcon.entity.v0.cluster.Cluster clusterEntity = EntityUtil.getEntity(EntityType.CLUSTER,
cluster.getName());
if (ClusterHelper.getRegistryEndPoint(clusterEntity) == null) {
throw new ValidationException("Cluster should have registry interface defined: " + clusterEntity.getName());
}
}
private void validatePartitions(Feed feed, Storage.TYPE storageType) throws FalconException {
if (storageType == Storage.TYPE.TABLE && feed.getPartitions() != null) {
throw new ValidationException("Partitions are not supported for feeds with table storage. "
+ "It should be defined as part of the table URI. "
+ feed.getName());
}
}
private void validateStorageExists(Feed feed) throws FalconException {
StringBuilder buffer = new StringBuilder();
for (Cluster cluster : feed.getClusters().getClusters()) {
org.apache.falcon.entity.v0.cluster.Cluster clusterEntity =
EntityUtil.getEntity(EntityType.CLUSTER, cluster.getName());
if (!EntityUtil.responsibleFor(clusterEntity.getColo())) {
continue;
}
final Storage storage = FeedHelper.createStorage(cluster, feed);
// this is only true for table, filesystem always returns true
if (storage.getType() == Storage.TYPE.FILESYSTEM) {
continue;
}
CatalogStorage catalogStorage = (CatalogStorage) storage;
String metaStorePrincipal = ClusterHelper.getPropertyValue(clusterEntity,
SecurityUtil.HIVE_METASTORE_PRINCIPAL);
if (!CatalogServiceFactory.getCatalogService().tableExists(catalogStorage.getCatalogUrl(),
catalogStorage.getDatabase(), catalogStorage.getTable(), metaStorePrincipal)) {
buffer.append("Table [")
.append(catalogStorage.getTable())
.append("] does not exist for feed: ")
.append(feed.getName())
.append(" in cluster: ")
.append(cluster.getName());
}
}
if (buffer.length() > 0) {
throw new ValidationException(buffer.toString());
}
}
private void validateACL(Feed feed) throws FalconException {
for (Cluster cluster : feed.getClusters().getClusters()) {
org.apache.falcon.entity.v0.cluster.Cluster clusterEntity =
EntityUtil.getEntity(EntityType.CLUSTER, cluster.getName());
if (!EntityUtil.responsibleFor(clusterEntity.getColo())) {
continue;
}
final Storage storage = FeedHelper.createStorage(cluster, feed);
try {
storage.validateACL(feed.getACL().getOwner(), feed.getACL().getGroup(), feed.getACL().getPermission());
} catch(FalconException e) {
throw new ValidationException(e);
}
}
}
/**
* Validate if FileSystem based feed contains location type data.
*
* @param feed Feed entity
* @throws FalconException
*/
private void validateFeedPath(Feed feed) throws FalconException {
if (FeedHelper.getStorageType(feed) == Storage.TYPE.TABLE) {
return;
}
for (Cluster cluster : feed.getClusters().getClusters()) {
List<Location> locations = FeedHelper.getLocations(cluster, feed);
Location dataLocation = FileSystemStorage.getLocation(locations, LocationType.DATA);
if (dataLocation == null) {
throw new ValidationException(feed.getName() + " is a FileSystem based feed "
+ "but it doesn't contain location type - data in cluster " + cluster.getName().toString());
}
}
}
}