/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.falcon.entity; import org.apache.falcon.FalconException; import org.apache.falcon.entity.common.FeedDataPath; import org.apache.falcon.entity.v0.cluster.Cluster; import org.apache.falcon.entity.v0.cluster.Interfacetype; import org.apache.falcon.entity.v0.feed.CatalogTable; import org.apache.falcon.entity.v0.feed.Feed; import org.apache.falcon.entity.v0.feed.LocationType; import java.net.URI; import java.net.URISyntaxException; import java.util.ArrayList; import java.util.Date; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.regex.Matcher; /** * A catalog registry implementation of a feed storage. */ public class CatalogStorage implements Storage { public static final String PARTITION_SEPARATOR = ";"; public static final String PARTITION_KEYVAL_SEPARATOR = "="; public static final String INPUT_PATH_SEPARATOR = ":"; public static final String OUTPUT_PATH_SEPARATOR = "/"; public static final String PARTITION_VALUE_QUOTE = "'"; public static final String CATALOG_URL = "${hcatNode}"; private final String catalogUrl; private String database; private String table; private Map<String, String> partitions; protected CatalogStorage(Feed feed) throws URISyntaxException { this(CATALOG_URL, feed.getTable()); } public CatalogStorage(Cluster cluster, CatalogTable table) throws URISyntaxException { this(ClusterHelper.getInterface(cluster, Interfacetype.REGISTRY).getEndpoint(), table); } protected CatalogStorage(String catalogUrl, CatalogTable table) throws URISyntaxException { this(catalogUrl, table.getUri()); } protected CatalogStorage(String catalogUrl, String tableUri) throws URISyntaxException { if (catalogUrl == null || catalogUrl.length() == 0) { throw new IllegalArgumentException("Catalog Registry URL cannot be null or empty"); } this.catalogUrl = catalogUrl; parseFeedUri(tableUri); } /** * Validate URI to conform to catalog:$database:$table#$partitions. * scheme=catalog:database=$database:table=$table#$partitions * partitions=key=value;key=value * * @param catalogTableUri table URI to parse and validate * @throws URISyntaxException */ private void parseFeedUri(String catalogTableUri) throws URISyntaxException { final String processed = catalogTableUri.replaceAll(DOLLAR_EXPR_START_REGEX, DOLLAR_EXPR_START_NORMALIZED) .replaceAll("}", EXPR_CLOSE_NORMALIZED); URI tableUri = new URI(processed); if (!"catalog".equals(tableUri.getScheme())) { throw new URISyntaxException(tableUri.toString(), "catalog scheme is missing"); } final String schemeSpecificPart = tableUri.getSchemeSpecificPart(); if (schemeSpecificPart == null) { throw new URISyntaxException(tableUri.toString(), "Database and Table are missing"); } String[] paths = schemeSpecificPart.split(INPUT_PATH_SEPARATOR); if (paths.length != 2) { throw new URISyntaxException(tableUri.toString(), "URI path is not in expected format: database:table"); } database = paths[0]; table = paths[1]; if (database == null || database.length() == 0) { throw new URISyntaxException(tableUri.toString(), "DB name is missing"); } if (table == null || table.length() == 0) { throw new URISyntaxException(tableUri.toString(), "Table name is missing"); } String partRaw = tableUri.getFragment(); if (partRaw == null || partRaw.length() == 0) { throw new URISyntaxException(tableUri.toString(), "Partition details are missing"); } final String rawPartition = partRaw.replaceAll(DOLLAR_EXPR_START_NORMALIZED, DOLLAR_EXPR_START_REGEX) .replaceAll(EXPR_CLOSE_NORMALIZED, EXPR_CLOSE_REGEX); partitions = new LinkedHashMap<String, String>(); // preserve insertion order String[] parts = rawPartition.split(PARTITION_SEPARATOR); for (String part : parts) { if (part == null || part.length() == 0) { continue; } String[] keyVal = part.split(PARTITION_KEYVAL_SEPARATOR); if (keyVal.length != 2) { throw new URISyntaxException(tableUri.toString(), "Partition key value pair is not specified properly in (" + part + ")"); } partitions.put(keyVal[0], keyVal[1]); } } /** * Create an instance from the URI Template that was generated using * the getUriTemplate() method. * * @param uriTemplate the uri template from org.apache.falcon.entity.CatalogStorage#getUriTemplate * @throws URISyntaxException */ protected CatalogStorage(String uriTemplate) throws URISyntaxException { if (uriTemplate == null || uriTemplate.length() == 0) { throw new IllegalArgumentException("URI template cannot be null or empty"); } final String processed = uriTemplate.replaceAll(DOLLAR_EXPR_START_REGEX, DOLLAR_EXPR_START_NORMALIZED) .replaceAll("}", EXPR_CLOSE_NORMALIZED); URI uri = new URI(processed); this.catalogUrl = uri.getScheme() + "://" + uri.getAuthority(); parseUriTemplate(uri); } private void parseUriTemplate(URI uriTemplate) throws URISyntaxException { String path = uriTemplate.getPath(); String[] paths = path.split(OUTPUT_PATH_SEPARATOR); if (paths.length != 4) { throw new URISyntaxException(uriTemplate.toString(), "URI path is not in expected format: database:table"); } database = paths[1]; table = paths[2]; String partRaw = paths[3]; if (database == null || database.length() == 0) { throw new URISyntaxException(uriTemplate.toString(), "DB name is missing"); } if (table == null || table.length() == 0) { throw new URISyntaxException(uriTemplate.toString(), "Table name is missing"); } if (partRaw == null || partRaw.length() == 0) { throw new URISyntaxException(uriTemplate.toString(), "Partition details are missing"); } String rawPartition = partRaw.replaceAll(DOLLAR_EXPR_START_NORMALIZED, DOLLAR_EXPR_START_REGEX) .replaceAll(EXPR_CLOSE_NORMALIZED, EXPR_CLOSE_REGEX); partitions = new LinkedHashMap<String, String>(); String[] parts = rawPartition.split(PARTITION_SEPARATOR); for (String part : parts) { if (part == null || part.length() == 0) { continue; } String[] keyVal = part.split(PARTITION_KEYVAL_SEPARATOR); if (keyVal.length != 2) { throw new URISyntaxException(uriTemplate.toString(), "Partition key value pair is not specified properly in (" + part + ")"); } partitions.put(keyVal[0], keyVal[1]); } } public String getCatalogUrl() { return catalogUrl; } public String getDatabase() { return database; } public String getTable() { return table; } public Map<String, String> getPartitions() { return partitions; } /** * @param key partition key * @return partition value */ public String getPartitionValue(String key) { return partitions.get(key); } /** * @param key partition key * @return if partitions map includes the key or not */ public boolean hasPartition(String key) { return partitions.containsKey(key); } public List<String> getDatedPartitionKeys() { List<String> keys = new ArrayList<String>(); for (Map.Entry<String, String> entry : getPartitions().entrySet()) { Matcher matcher = FeedDataPath.PATTERN.matcher(entry.getValue()); if (matcher.find()) { keys.add(entry.getKey()); } } return keys; } /** * Convert the partition map to filter string. * Each key value pair is separated by ';'. * * @return filter string */ public String toPartitionFilter() { StringBuilder filter = new StringBuilder(); filter.append("("); for (Map.Entry<String, String> entry : partitions.entrySet()) { if (filter.length() > 1) { filter.append(PARTITION_SEPARATOR); } filter.append(entry.getKey()); filter.append(PARTITION_KEYVAL_SEPARATOR); filter.append(PARTITION_VALUE_QUOTE); filter.append(entry.getValue()); filter.append(PARTITION_VALUE_QUOTE); } filter.append(")"); return filter.toString(); } /** * Convert the partition map to path string. * Each key value pair is separated by '/'. * * @return path string */ public String toPartitionAsPath() { StringBuilder partitionFilter = new StringBuilder(); for (Map.Entry<String, String> entry : getPartitions().entrySet()) { partitionFilter.append(entry.getKey()) .append(PARTITION_KEYVAL_SEPARATOR) .append(entry.getValue()) .append(OUTPUT_PATH_SEPARATOR); } partitionFilter.setLength(partitionFilter.length() - 1); return partitionFilter.toString(); } @Override public TYPE getType() { return TYPE.TABLE; } /** * LocationType does NOT matter here. */ @Override public String getUriTemplate() { return getUriTemplate(LocationType.DATA); } /** * LocationType does NOT matter here. */ @Override public String getUriTemplate(LocationType locationType) { StringBuilder uriTemplate = new StringBuilder(); uriTemplate.append(catalogUrl); uriTemplate.append(OUTPUT_PATH_SEPARATOR); uriTemplate.append(database); uriTemplate.append(OUTPUT_PATH_SEPARATOR); uriTemplate.append(table); uriTemplate.append(OUTPUT_PATH_SEPARATOR); for (Map.Entry<String, String> entry : partitions.entrySet()) { uriTemplate.append(entry.getKey()); uriTemplate.append(PARTITION_KEYVAL_SEPARATOR); uriTemplate.append(entry.getValue()); uriTemplate.append(PARTITION_SEPARATOR); } uriTemplate.setLength(uriTemplate.length() - 1); return uriTemplate.toString(); } @Override public boolean isIdentical(Storage toCompareAgainst) throws FalconException { if (!(toCompareAgainst instanceof CatalogStorage)) { return false; } CatalogStorage catalogStorage = (CatalogStorage) toCompareAgainst; return !(getCatalogUrl() != null && !getCatalogUrl().equals(catalogStorage.getCatalogUrl())) && getDatabase().equals(catalogStorage.getDatabase()) && getTable().equals(catalogStorage.getTable()) && getPartitions().equals(catalogStorage.getPartitions()); } @Override public void validateACL(String owner, String group, String permissions) throws FalconException { // This is not supported in Hive today as authorization is not enforced on table and // partition listing } @Override public List<FeedInstanceStatus> getListing(Feed feed, String cluster, LocationType locationType, Date start, Date end) throws FalconException { throw new UnsupportedOperationException("getListing"); } @Override public String toString() { return "CatalogStorage{" + "catalogUrl='" + catalogUrl + '\'' + ", database='" + database + '\'' + ", table='" + table + '\'' + ", partitions=" + partitions + '}'; } }