/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.falcon.util;
import org.apache.commons.lang.RandomStringUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.falcon.entity.common.FeedDataPath;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Falcon specific utilities for the Radix Tree.
*/
public class FalconRadixUtils {
/**
* This interface implements the various algorithms to compare node's key with input based on whether you want
* a regular expression based algorithm or a character by character matching algorithm.
*/
public interface INodeAlgorithm {
/**
* Checks if the given key and input match.
* @param key key of the node
* @param input input String to be matched against key.
* @return true if key and input match.
*/
boolean match(String key, String input);
boolean startsWith(String key, String input);
/**
* Finds next node to take for traversal among currentNode's children.
* @param currentNode of RadixTree which has been matched.
* @param input input String to be searched.
* @return Node to be traversed next.
*/
RadixNode getNextCandidate(RadixNode currentNode, String input);
// for the given node and input key, finds the remainingText to be matched with child sub tree.
String getRemainingText(RadixNode currentNode, String key);
}
/**
* This Algorithm does a plain string comparison for all
* type of operations on a node.
*/
static class StringAlgorithm implements INodeAlgorithm {
@Override
public boolean match(String key, String input) {
return StringUtils.equals(key, input);
}
@Override
public boolean startsWith(String nodeKey, String inputKey) {
return inputKey.startsWith(nodeKey);
}
@Override
public RadixNode getNextCandidate(RadixNode currentNode, String input) {
RadixNode newRoot = null;
String remainingText = input.substring(currentNode.getKey().length());
List<RadixNode> result = currentNode.getChildren();
for(RadixNode child : result){
if (child.getKey().charAt(0) == remainingText.charAt(0)){
newRoot = child;
break;
}
}
return newRoot;
}
@Override
public String getRemainingText(RadixNode currentNode, String key) {
return key.substring(currentNode.getKey().length());
}
}
/**
* Regular Expression Algorithm for the radix tree.
*
* It traverses the radix tree and matches expressions like ${YEAR} etc. with their allowable values e.g. 2014
*/
public static class FeedRegexAlgorithm implements INodeAlgorithm {
/**
* This function matches a feed path template with feed instance's path string.
*
* Key is assumed to be a feed's path template and inputString is assumed to be an instance's path string.
* Variable/Regex parts of the feed's template are matched against the corresponding parts in inputString
* using regular expression and for other parts a character by character match is performed.
* e.g. Given templateString (/data/cas/${YEAR}/${MONTH}/${DAY}) and inputString (/data/cas/2014/09/09)
* the function will return true.
* @param templateString Node's key (Feed's template path)
* @param inputString inputString String to be matched against templateString(instance's path)
* @return true if the templateString and inputString match, false otherwise.
*/
@Override
public boolean match(String templateString, String inputString) {
if (StringUtils.isBlank(templateString)) {
return false;
}
// Divide the templateString and inputString into templateParts of regex and character matches
List<String> templateParts = getPartsInPathTemplate(templateString);
List<String> inputStringParts = getCorrespondingParts(inputString, templateParts);
if (inputStringParts.size() != templateParts.size()) {
return false;
}
int counter = 0;
while (counter < inputStringParts.size()) {
if (!matchPart(templateParts.get(counter), inputStringParts.get(counter))) {
return false;
}
counter++;
}
return true;
}
/**
*
* Finds if the current node's key is a prefix of the given inputString or not.
*
* @param inputTemplate inputTemplate String
* @param inputString inputString to be checked
* @return true if inputString starts with inputTemplate, false otherwise.
*/
@Override
public boolean startsWith(String inputTemplate, String inputString) {
if (StringUtils.isBlank(inputString)) {
return false;
}
if (StringUtils.isBlank(inputTemplate)) {
return true;
}
// divide inputTemplate and inputString into corresponding templateParts of regex and character only strings
List<String> templateParts = getPartsInPathTemplate(inputTemplate);
List<String> remainingPattern = getCorrespondingParts(inputString, templateParts);
if (templateParts.size() > remainingPattern.size()) {
return false;
}
int counter = 0;
// compare part by part till the templateParts end
for (String templatePart : templateParts) {
String part = remainingPattern.get(counter);
if (!matchPart(templatePart, part)) {
return false;
}
counter++;
}
return true;
}
@Override
public RadixNode getNextCandidate(RadixNode currentNode, String input) {
RadixNode newRoot = null;
// replace the regex with pattern's length
String remainingText = input.substring(getPatternsEffectiveLength(currentNode.getKey()));
List<RadixNode> result = currentNode.getChildren();
for(RadixNode child : result) {
String key = child.getKey();
if (key.startsWith("${")) {
// get the regex
String regex = key.substring(0, key.indexOf("}") + 1);
// match the text and the regex
FeedDataPath.VARS var = getMatchingRegex(regex);
if (matchPart(regex, remainingText.substring(0, var.getValueSize()))) {
newRoot = child; // if it matches then this is the newRoot
break;
}
} else if (child.getKey().charAt(0) == remainingText.charAt(0)) {
newRoot = child;
break;
}
}
return newRoot;
}
@Override
public String getRemainingText(RadixNode currentNode, String inputString) {
// find the match length for current inputString
return inputString.substring(getPatternsEffectiveLength(currentNode.getKey()));
}
private int getPatternsEffectiveLength(String templateString) {
if (StringUtils.isBlank(templateString)) {
return 0;
}
// Since we are only interested in the length, can replace pattern with a random string
for (FeedDataPath.VARS var : FeedDataPath.VARS.values()) {
templateString = templateString.replace("${" + var.name() + "}",
RandomStringUtils.random(var.getValueSize()));
}
return templateString.length();
}
/**
* Divide a given template string into parts of regex and character strings
* e.g. /data/cas/${YEAR}/${MONTH}/${DAY} will be converted to
* [/data/cas/, ${YEAR}, /, ${MONTH}, /, ${DAY}]
* @param templateString input string representing a feed's path template
* @return list of parts in input templateString which are either completely regex or normal string.
*/
private List<String> getPartsInPathTemplate(String templateString) {
//divide the node's templateString in parts of regular expression and normal string
List<String> parts = new ArrayList<String>();
Matcher matcher = FeedDataPath.PATTERN.matcher(templateString);
int currentIndex = 0;
while (matcher.find()) {
parts.add(templateString.substring(currentIndex, matcher.start()));
parts.add(matcher.group());
currentIndex = matcher.end();
}
if (currentIndex != templateString.length()) {
parts.add(templateString.substring(currentIndex));
}
return Collections.unmodifiableList(parts);
}
private FeedDataPath.VARS getMatchingRegex(String inputPart) {
//inputPart will be something like ${YEAR}
for (FeedDataPath.VARS var : FeedDataPath.VARS.values()) {
if (inputPart.equals("${" + var.name() + "}")) {
return var;
}
}
return null;
}
/**
* Divides a string into corresponding parts for the template to carry out comparison.
* templateParts = [/data/cas/, ${YEAR}, /, ${MONTH}, /, ${DAY}]
* inputString = /data/cas/2014/09/09
* returns [/data/cas/, 2014, /, 09, /, 09]
* @param inputString normal string representing feed instance path
* @param templateParts parts of feed's path template broken into regex and non-regex units.
* @return a list of strings where each part of the list corresponds to a part in list of template parts.
*/
private List<String> getCorrespondingParts(String inputString, List<String> templateParts) {
List<String> stringParts = new ArrayList<String>();
int counter = 0;
while (StringUtils.isNotBlank(inputString) && counter < templateParts.size()) {
String currentTemplatePart = templateParts.get(counter);
int length = Math.min(getPatternsEffectiveLength(currentTemplatePart), inputString.length());
stringParts.add(inputString.substring(0, length));
inputString = inputString.substring(length);
counter++;
}
if (StringUtils.isNotBlank(inputString)) {
stringParts.add(inputString);
}
return stringParts;
}
/**
* Compare a pure regex or pure string part with a given string.
*
* @param template template part, which can either be a pure regex or pure non-regex string.
* @param input input String to be matched against the template part.
* @return true if the input string matches the template, in case of a regex component a regex comparison is
* made, else a character by character comparison is made.
*/
private boolean matchPart(String template, String input) {
if (template.startsWith("${")) { // if the part begins with ${ then it's a regex part, do regex match
template = template.replace("${", "\\$\\{");
template = template.replace("}", "\\}");
for (FeedDataPath.VARS var : FeedDataPath.VARS.values()) {//find which regex is this
if (StringUtils.equals(var.regex(), template)) {// regex found, do matching
//find part of the input string which should be matched against regex
String desiredPart = input.substring(0, var.getValueSize());
Pattern pattern = Pattern.compile(var.getValuePattern());
Matcher matcher = pattern.matcher(desiredPart);
if (!matcher.matches()) {
return false;
}
return true;
}
}
return false;
} else {// do exact match with normal strings
if (!input.startsWith(template)) {
return false;
}
}
return true;
}
}
}