/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.falcon.entity; import org.apache.commons.lang.StringUtils; import org.apache.falcon.FalconException; import org.apache.falcon.Tag; import org.apache.falcon.entity.common.FeedDataPath; import org.apache.falcon.entity.common.FeedDataPath.VARS; import org.apache.falcon.entity.v0.Entity; import org.apache.falcon.entity.v0.EntityType; import org.apache.falcon.entity.v0.cluster.Property; import org.apache.falcon.entity.v0.feed.CatalogTable; import org.apache.falcon.entity.v0.feed.Cluster; import org.apache.falcon.entity.v0.feed.Feed; import org.apache.falcon.entity.v0.feed.Location; import org.apache.falcon.entity.v0.feed.LocationType; import org.apache.falcon.entity.v0.feed.Locations; import org.apache.falcon.expression.ExpressionHelper; import org.apache.falcon.resource.APIResult; import org.apache.falcon.resource.FeedInstanceResult; import org.apache.falcon.util.BuildProperties; import org.apache.hadoop.fs.Path; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.net.URISyntaxException; import java.text.DateFormat; import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.Arrays; import java.util.Date; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Properties; import java.util.Set; import java.util.TimeZone; import java.util.TreeMap; import java.util.regex.Matcher; /** * Feed entity helper methods. */ public final class FeedHelper { private FeedHelper() {} public static Cluster getCluster(Feed feed, String clusterName) { for (Cluster cluster : feed.getClusters().getClusters()) { if (cluster.getName().equals(clusterName)) { return cluster; } } return null; } public static Storage createStorage(Feed feed) throws FalconException { final Locations feedLocations = feed.getLocations(); if (feedLocations != null && feedLocations.getLocations().size() != 0) { return new FileSystemStorage(feed); } try { final CatalogTable table = feed.getTable(); if (table != null) { return new CatalogStorage(feed); } } catch (URISyntaxException e) { throw new FalconException(e); } throw new FalconException("Both catalog and locations are not defined."); } public static Storage createStorage(org.apache.falcon.entity.v0.cluster.Cluster clusterEntity, Feed feed) throws FalconException { return createStorage(getCluster(feed, clusterEntity.getName()), feed, clusterEntity); } public static Storage createStorage(String clusterName, Feed feed) throws FalconException { return createStorage(getCluster(feed, clusterName), feed); } public static Storage createStorage(Cluster cluster, Feed feed) throws FalconException { final org.apache.falcon.entity.v0.cluster.Cluster clusterEntity = EntityUtil.getEntity(EntityType.CLUSTER, cluster.getName()); return createStorage(cluster, feed, clusterEntity); } public static Storage createStorage(Cluster cluster, Feed feed, org.apache.falcon.entity.v0.cluster.Cluster clusterEntity) throws FalconException { final List<Location> locations = getLocations(cluster, feed); if (locations != null) { return new FileSystemStorage(ClusterHelper.getStorageUrl(clusterEntity), locations); } try { final CatalogTable table = getTable(cluster, feed); if (table != null) { return new CatalogStorage(clusterEntity, table); } } catch (URISyntaxException e) { throw new FalconException(e); } throw new FalconException("Both catalog and locations are not defined."); } /** * Factory method to dole out a storage instance used for replication source. * * @param clusterEntity cluster entity * @param feed feed entity * @return an implementation of Storage * @throws FalconException */ public static Storage createReadOnlyStorage(org.apache.falcon.entity.v0.cluster.Cluster clusterEntity, Feed feed) throws FalconException { Cluster feedCluster = getCluster(feed, clusterEntity.getName()); final List<Location> locations = getLocations(feedCluster, feed); if (locations != null) { return new FileSystemStorage(ClusterHelper.getReadOnlyStorageUrl(clusterEntity), locations); } try { final CatalogTable table = getTable(feedCluster, feed); if (table != null) { return new CatalogStorage(clusterEntity, table); } } catch (URISyntaxException e) { throw new FalconException(e); } throw new FalconException("Both catalog and locations are not defined."); } public static Storage createStorage(String type, String storageUriTemplate) throws URISyntaxException { Storage.TYPE storageType = Storage.TYPE.valueOf(type); if (storageType == Storage.TYPE.FILESYSTEM) { return new FileSystemStorage(storageUriTemplate); } else if (storageType == Storage.TYPE.TABLE) { return new CatalogStorage(storageUriTemplate); } throw new IllegalArgumentException("Bad type: " + type); } public static Storage.TYPE getStorageType(Feed feed) throws FalconException { final Locations feedLocations = feed.getLocations(); if (feedLocations != null && feedLocations.getLocations().size() != 0) { return Storage.TYPE.FILESYSTEM; } final CatalogTable table = feed.getTable(); if (table != null) { return Storage.TYPE.TABLE; } throw new FalconException("Both catalog and locations are not defined."); } public static Storage.TYPE getStorageType(Feed feed, Cluster cluster) throws FalconException { final List<Location> locations = getLocations(cluster, feed); if (locations != null) { return Storage.TYPE.FILESYSTEM; } final CatalogTable table = getTable(cluster, feed); if (table != null) { return Storage.TYPE.TABLE; } throw new FalconException("Both catalog and locations are not defined."); } public static Storage.TYPE getStorageType(Feed feed, org.apache.falcon.entity.v0.cluster.Cluster clusterEntity) throws FalconException { Cluster feedCluster = getCluster(feed, clusterEntity.getName()); return getStorageType(feed, feedCluster); } public static List<Location> getLocations(Cluster cluster, Feed feed) { // check if locations are overridden in cluster final Locations clusterLocations = cluster.getLocations(); if (clusterLocations != null && clusterLocations.getLocations().size() != 0) { return clusterLocations.getLocations(); } final Locations feedLocations = feed.getLocations(); return feedLocations == null ? null : feedLocations.getLocations(); } protected static CatalogTable getTable(Cluster cluster, Feed feed) { // check if table is overridden in cluster if (cluster.getTable() != null) { return cluster.getTable(); } return feed.getTable(); } public static String normalizePartitionExpression(String part1, String part2) { String partExp = StringUtils.stripToEmpty(part1) + "/" + StringUtils.stripToEmpty(part2); partExp = partExp.replaceAll("//+", "/"); partExp = StringUtils.stripStart(partExp, "/"); partExp = StringUtils.stripEnd(partExp, "/"); return partExp; } public static String normalizePartitionExpression(String partition) { return normalizePartitionExpression(partition, null); } public static Properties getClusterProperties(org.apache.falcon.entity.v0.cluster.Cluster cluster) { Properties properties = new Properties(); Map<String, String> clusterVars = new HashMap<String, String>(); clusterVars.put("colo", cluster.getColo()); clusterVars.put("name", cluster.getName()); if (cluster.getProperties() != null) { for (Property property : cluster.getProperties().getProperties()) { clusterVars.put(property.getName(), property.getValue()); } } properties.put("cluster", clusterVars); return properties; } public static Properties getFeedProperties(Feed feed) { Properties feedProperties = new Properties(); if (feed.getProperties() != null) { for (org.apache.falcon.entity.v0.feed.Property property : feed.getProperties().getProperties()) { feedProperties.put(property.getName(), property.getValue()); } } return feedProperties; } public static String evaluateClusterExp(org.apache.falcon.entity.v0.cluster.Cluster clusterEntity, String exp) throws FalconException { Properties properties = getClusterProperties(clusterEntity); ExpressionHelper expHelp = ExpressionHelper.get(); expHelp.setPropertiesForVariable(properties); return expHelp.evaluateFullExpression(exp, String.class); } public static String getStagingPath(org.apache.falcon.entity.v0.cluster.Cluster clusterEntity, Feed feed, CatalogStorage storage, Tag tag, String suffix) { String stagingDirPath = getStagingDir(clusterEntity, feed, storage, tag); String datedPartitionKey = storage.getDatedPartitionKeys().get(0); String datedPartitionKeySuffix = datedPartitionKey + "=${coord:dataOutPartitionValue('output'," + "'" + datedPartitionKey + "')}"; return stagingDirPath + "/" + datedPartitionKeySuffix + "/" + suffix + "/" + "data"; } public static String getStagingDir(org.apache.falcon.entity.v0.cluster.Cluster clusterEntity, Feed feed, CatalogStorage storage, Tag tag) { String workflowName = EntityUtil.getWorkflowName( tag, Arrays.asList(clusterEntity.getName()), feed).toString(); // log path is created at scheduling wf and has 777 perms return ClusterHelper.getStorageUrl(clusterEntity) + EntityUtil.getLogPath(clusterEntity, feed) + "/" + workflowName + "/" + storage.getDatabase() + "/" + storage.getTable(); } public static Properties getUserWorkflowProperties(String policy) { Properties props = new Properties(); props.put("userWorkflowName", policy + "-policy"); props.put("userWorkflowEngine", "falcon"); String version; try { version = BuildProperties.get().getProperty("build.version"); } catch (Exception e) { // unfortunate that this is only available in prism/webapp version = "0.5"; } props.put("userWorkflowVersion", version); return props; } /** * Replaces timed variables with corresponding time notations e.g., ${YEAR} with yyyy and so on. * @param templatePath - template feed path * @return time notations */ public static String getDateFormatInPath(String templatePath) { String mask = extractDatePartFromPathMask(templatePath, templatePath); //yyyyMMddHHmm return mask.replaceAll(VARS.YEAR.regex(), "yyyy") .replaceAll(VARS.MONTH.regex(), "MM") .replaceAll(VARS.DAY.regex(), "dd") .replaceAll(VARS.HOUR.regex(), "HH") .replaceAll(VARS.MINUTE.regex(), "mm"); } private static String extractDatePartFromPathMask(String mask, String inPath) { String[] elements = FeedDataPath.PATTERN.split(mask); String out = inPath; for (String element : elements) { out = out.replaceFirst(element, ""); } return out; } private static final Logger LOG = LoggerFactory.getLogger(FeedHelper.class); public static final String FORMAT = "yyyyMMddHHmm"; /** * Extracts date from the actual data path e.g., /path/2014/05/06 maps to 2014-05-06T00:00Z. * @param file - actual data path * @param templatePath - template path from feed definition * @param dateMask - path mask from getDateFormatInPath() * @param timeZone * @return date corresponding to the path */ //consider just the first occurrence of the pattern public static Date getDate(Path file, String templatePath, String dateMask, String timeZone) { String path = extractDatePartFromPathMask(templatePath, file.toString()); Map<VARS, String> map = getDatePartMap(path, dateMask); String errArg = file + "(" + templatePath + ")"; if (map.isEmpty()) { LOG.warn("No date present in {}", errArg); return null; } StringBuilder date = new StringBuilder(); int ordinal = 0; for (Entry<VARS, String> entry : map.entrySet()) { if (ordinal++ == entry.getKey().ordinal()) { date.append(entry.getValue()); } else { LOG.warn("Prior element to {} is missing {}", entry.getKey(), errArg); return null; } } try { DateFormat dateFormat = new SimpleDateFormat(FORMAT.substring(0, date.length())); dateFormat.setTimeZone(TimeZone.getTimeZone(timeZone)); return dateFormat.parse(date.toString()); } catch (ParseException e) { LOG.warn("Unable to parse date: {}, {}", date, errArg); return null; } } private static Map<VARS, String> getDatePartMap(String path, String mask) { Map<VARS, String> map = new TreeMap<VARS, String>(); Matcher matcher = FeedDataPath.DATE_FIELD_PATTERN.matcher(mask); int start = 0; while (matcher.find(start)) { String subMask = mask.substring(matcher.start(), matcher.end()); String subPath = path.substring(matcher.start(), matcher.end()); VARS var = VARS.from(subMask); if (!map.containsKey(var)) { map.put(var, subPath); } start = matcher.start() + 1; } return map; } public static FeedInstanceResult getFeedInstanceListing(Entity entityObject, Date start, Date end) throws FalconException { Set<String> clusters = EntityUtil.getClustersDefinedInColos(entityObject); FeedInstanceResult result = new FeedInstanceResult(APIResult.Status.SUCCEEDED, "Success"); for (String cluster : clusters) { Feed feed = (Feed) entityObject; Storage storage = createStorage(cluster, feed); List<FeedInstanceStatus> feedListing = storage.getListing(feed, cluster, LocationType.DATA, start, end); FeedInstanceResult.Instance[] instances = new FeedInstanceResult.Instance[feedListing.size()]; int index = 0; for (FeedInstanceStatus feedStatus : feedListing) { FeedInstanceResult.Instance instance = new FeedInstanceResult.Instance(cluster, feedStatus.getInstance(), feedStatus.getStatus().name()); instance.creationTime = feedStatus.getCreationTime(); instance.uri = feedStatus.getUri(); instance.size = feedStatus.getSize(); instances[index++] = instance; } result.setInstances(instances); } return result; } }