/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flume.formatter.output;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import org.apache.flume.Clock;
import org.apache.flume.SystemClock;
import org.apache.flume.tools.TimestampRoundDownUtil;
import java.net.InetAddress;
import java.net.UnknownHostException;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.Date;
import java.util.HashMap;
import java.util.Map;
import java.util.TimeZone;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class BucketPath {
/**
* These are useful to other classes which might want to search for tags in
* strings.
*/
public static final String TAG_REGEX = "%(\\w|%)|%\\{([\\w\\.-]+)\\}|%\\[(\\w+)\\]";
public static final Pattern tagPattern = Pattern.compile(TAG_REGEX);
private static Clock clock = new SystemClock();
/**
* Returns true if in contains a substring matching TAG_REGEX (i.e. of the
* form %{...} or %x.
*/
@VisibleForTesting
@Deprecated
public static boolean containsTag(String in) {
return tagPattern.matcher(in).find();
}
@VisibleForTesting
@Deprecated
public static String expandShorthand(char c) {
// It's a date
switch (c) {
case 'a':
return "weekday_short";
case 'A':
return "weekday_full";
case 'b':
return "monthname_short";
case 'B':
return "monthname_full";
case 'c':
return "datetime";
case 'd':
return "day_of_month_xx"; // two digit
case 'e':
return "day_of_month_x"; // 1 or 2 digit
case 'D':
return "date_short"; // "MM/dd/yy";
case 'H':
return "hour_24_xx";
case 'I':
return "hour_12_xx";
case 'j':
return "day_of_year_xxx"; // three digits
case 'k':
return "hour_24"; // 1 or 2 digits
case 'l':
return "hour_12"; // 1 or 2 digits
case 'm':
return "month_xx";
case 'n':
return "month_x"; // 1 or 2 digits
case 'M':
return "minute_xx";
case 'p':
return "am_pm";
case 's':
return "unix_seconds";
case 'S':
return "seconds_xx";
case 't':
// This is different from unix date (which would insert a tab character
// here)
return "unix_millis";
case 'y':
return "year_xx";
case 'Y':
return "year_xxxx";
case 'z':
return "timezone_delta";
default:
// LOG.warn("Unrecognized escape in event format string: %" + c);
return "" + c;
}
}
/**
* Hardcoded lookups for %x style escape replacement. Add your own!
*
* All shorthands are Date format strings, currently.
*
* Returns the empty string if an escape is not recognized.
*
* Dates follow the same format as unix date, with a few exceptions.
*
* <p>This static method will be REMOVED in a future version of Flume</p>
*
*/
@VisibleForTesting
@Deprecated
public static String replaceShorthand(char c, Map<String, String> headers) {
return replaceShorthand(c, headers, false, 0, 0);
}
/**
* A wrapper around
* {@link BucketPath#replaceShorthand(char, Map, TimeZone, boolean, int,
* int, boolean)}
* with the timezone set to the default.
*
* <p>This static method will be REMOVED in a future version of Flume</p>
*/
@VisibleForTesting
@Deprecated
public static String replaceShorthand(char c, Map<String, String> headers,
boolean needRounding, int unit, int roundDown) {
return replaceShorthand(c, headers, null, needRounding, unit, roundDown,
false);
}
/**
* Hardcoded lookups for %x style escape replacement. Add your own!
*
* All shorthands are Date format strings, currently.
*
* Returns the empty string if an escape is not recognized.
*
* Dates follow the same format as unix date, with a few exceptions.
*
* <p>This static method will be REMOVED in a future version of Flume</p>
*
* @param c - The character to replace.
* @param headers - Event headers
* @param timeZone - The timezone to use for formatting the timestamp
* @param needRounding - Should the timestamp be rounded down?
* @param unit - if needRounding is true, what unit to round down to. This
* must be one of the units specified by {@link java.util.Calendar} -
* HOUR, MINUTE or SECOND. Defaults to second, if none of these are present.
* Ignored if needRounding is false.
* @param roundDown - if needRounding is true,
* The time should be rounded to the largest multiple of this
* value, smaller than the time supplied, defaults to 1, if <= 0(rounds off
* to the second/minute/hour immediately lower than the timestamp supplied.
* Ignored if needRounding is false.
*
* @return
*/
@VisibleForTesting
@Deprecated
public static String replaceShorthand(char c, Map<String, String> headers,
TimeZone timeZone, boolean needRounding, int unit, int roundDown,
boolean useLocalTimestamp) {
long ts = 0;
if (useLocalTimestamp) {
ts = clock.currentTimeMillis();
}
return replaceShorthand(c, headers, timeZone, needRounding, unit,
roundDown, false, ts);
}
protected static final ThreadLocal<HashMap<String, SimpleDateFormat>> simpleDateFormatCache =
new ThreadLocal<HashMap<String, SimpleDateFormat>>() {
@Override
protected HashMap<String, SimpleDateFormat> initialValue() {
return new HashMap<String, SimpleDateFormat>();
}
};
protected static SimpleDateFormat getSimpleDateFormat(String string) {
HashMap<String, SimpleDateFormat> localCache = simpleDateFormatCache.get();
SimpleDateFormat simpleDateFormat = localCache.get(string);
if (simpleDateFormat == null) {
simpleDateFormat = new SimpleDateFormat(string);
localCache.put(string, simpleDateFormat);
simpleDateFormatCache.set(localCache);
}
return simpleDateFormat;
}
/**
* Not intended as a public API
*/
@VisibleForTesting
protected static String replaceStaticString(String key) {
String replacementString = "";
switch (key.toLowerCase()) {
case "localhost":
replacementString = InetAddressCache.hostName;
break;
case "ip":
replacementString = InetAddressCache.hostAddress;
break;
case "fqdn":
replacementString = InetAddressCache.canonicalHostName;
break;
default:
throw new RuntimeException("The static escape string '" + key + "'"
+ " was provided but does not match any of (localhost,IP,FQDN)");
}
return replacementString;
}
/**
* Not intended as a public API
*/
@VisibleForTesting
protected static String replaceShorthand(char c, Map<String, String> headers,
TimeZone timeZone, boolean needRounding, int unit, int roundDown,
boolean useLocalTimestamp, long ts) {
String timestampHeader = null;
try {
if (!useLocalTimestamp) {
timestampHeader = headers.get("timestamp");
Preconditions.checkNotNull(timestampHeader, "Expected timestamp in " +
"the Flume event headers, but it was null");
ts = Long.valueOf(timestampHeader);
} else {
timestampHeader = String.valueOf(ts);
}
} catch (NumberFormatException e) {
throw new RuntimeException("Flume wasn't able to parse timestamp header"
+ " in the event to resolve time based bucketing. Please check that"
+ " you're correctly populating timestamp header (for example using"
+ " TimestampInterceptor source interceptor).", e);
}
if (needRounding) {
ts = roundDown(roundDown, unit, ts, timeZone);
}
// It's a date
String formatString = "";
switch (c) {
case '%':
return "%";
case 'a':
formatString = "EEE";
break;
case 'A':
formatString = "EEEE";
break;
case 'b':
formatString = "MMM";
break;
case 'B':
formatString = "MMMM";
break;
case 'c':
formatString = "EEE MMM d HH:mm:ss yyyy";
break;
case 'd':
formatString = "dd";
break;
case 'e':
formatString = "d";
break;
case 'D':
formatString = "MM/dd/yy";
break;
case 'H':
formatString = "HH";
break;
case 'I':
formatString = "hh";
break;
case 'j':
formatString = "DDD";
break;
case 'k':
formatString = "H";
break;
case 'l':
formatString = "h";
break;
case 'm':
formatString = "MM";
break;
case 'M':
formatString = "mm";
break;
case 'n':
formatString = "M";
break;
case 'p':
formatString = "a";
break;
case 's':
return "" + (ts / 1000);
case 'S':
formatString = "ss";
break;
case 't':
// This is different from unix date (which would insert a tab character
// here)
return timestampHeader;
case 'y':
formatString = "yy";
break;
case 'Y':
formatString = "yyyy";
break;
case 'z':
formatString = "ZZZ";
break;
default:
// LOG.warn("Unrecognized escape in event format string: %" + c);
return "";
}
SimpleDateFormat format = getSimpleDateFormat(formatString);
if (timeZone != null) {
format.setTimeZone(timeZone);
} else {
format.setTimeZone(TimeZone.getDefault());
}
Date date = new Date(ts);
return format.format(date);
}
private static long roundDown(int roundDown, int unit, long ts, TimeZone timeZone) {
long timestamp = ts;
if (roundDown <= 0) {
roundDown = 1;
}
switch (unit) {
case Calendar.SECOND:
timestamp = TimestampRoundDownUtil.roundDownTimeStampSeconds(
ts, roundDown, timeZone);
break;
case Calendar.MINUTE:
timestamp = TimestampRoundDownUtil.roundDownTimeStampMinutes(
ts, roundDown, timeZone);
break;
case Calendar.HOUR_OF_DAY:
timestamp = TimestampRoundDownUtil.roundDownTimeStampHours(
ts, roundDown, timeZone);
break;
default:
timestamp = ts;
break;
}
return timestamp;
}
/**
* Replace all substrings of form %{tagname} with get(tagname).toString() and
* all shorthand substrings of form %x with a special value.
*
* Any unrecognized / not found tags will be replaced with the empty string.
*
* TODO(henry): we may want to consider taking this out of Event and into a
* more general class when we get more use cases for this pattern.
*/
public static String escapeString(String in, Map<String, String> headers) {
return escapeString(in, headers, false, 0, 0);
}
/**
* A wrapper around
* {@link BucketPath#escapeString(String, Map, TimeZone, boolean, int, int,
boolean)}
* with the timezone set to the default.
*/
public static String escapeString(String in, Map<String, String> headers,
boolean needRounding, int unit, int roundDown) {
return escapeString(in, headers, null, needRounding, unit, roundDown,
false);
}
/**
* Replace all substrings of form %{tagname} with get(tagname).toString() and
* all shorthand substrings of form %x with a special value.
*
* Any unrecognized / not found tags will be replaced with the empty string.
*
* TODO(henry): we may want to consider taking this out of Event and into a
* more general class when we get more use cases for this pattern.
*
* @param needRounding - Should the timestamp be rounded down?
* @param unit - if needRounding is true, what unit to round down to. This
* must be one of the units specified by {@link java.util.Calendar} -
* HOUR, MINUTE or SECOND. Defaults to second, if none of these are present.
* Ignored if needRounding is false.
* @param roundDown - if needRounding is true,
* The time should be rounded to the largest multiple of this
* value, smaller than the time supplied, defaults to 1, if <= 0(rounds off
* to the second/minute/hour immediately lower than the timestamp supplied.
* Ignored if needRounding is false.
* @return Escaped string.
*/
public static String escapeString(String in, Map<String, String> headers,
TimeZone timeZone, boolean needRounding, int unit, int roundDown,
boolean useLocalTimeStamp) {
long ts = clock.currentTimeMillis();
Matcher matcher = tagPattern.matcher(in);
StringBuffer sb = new StringBuffer();
while (matcher.find()) {
String replacement = "";
// Group 2 is the %{...} pattern
if (matcher.group(2) != null) {
replacement = headers.get(matcher.group(2));
if (replacement == null) {
replacement = "";
// LOG.warn("Tag " + matcher.group(2) + " not found");
}
// Group 3 is the %[...] pattern.
} else if (matcher.group(3) != null) {
replacement = replaceStaticString(matcher.group(3));
} else {
// The %x pattern.
// Since we know the match is a single character, we can
// switch on that rather than the string.
Preconditions.checkState(matcher.group(1) != null
&& matcher.group(1).length() == 1,
"Expected to match single character tag in string " + in);
char c = matcher.group(1).charAt(0);
replacement = replaceShorthand(c, headers, timeZone,
needRounding, unit, roundDown, useLocalTimeStamp, ts);
}
// The replacement string must have '$' and '\' chars escaped. This
// replacement string is pretty arcane.
//
// replace : '$' -> for java '\$' -> for regex "\\$"
// replacement: '\$' -> for regex '\\\$' -> for java "\\\\\\$"
//
// replace : '\' -> for java "\\" -> for regex "\\\\"
// replacement: '\\' -> for regex "\\\\" -> for java "\\\\\\\\"
// note: order matters
replacement = replacement.replaceAll("\\\\", "\\\\\\\\");
replacement = replacement.replaceAll("\\$", "\\\\\\$");
matcher.appendReplacement(sb, replacement);
}
matcher.appendTail(sb);
return sb.toString();
}
/**
* Instead of replacing escape sequences in a string, this method returns a
* mapping of an attribute name to the value based on the escape sequence
* found in the argument string.
*/
@VisibleForTesting
@Deprecated
public static Map<String, String> getEscapeMapping(String in,
Map<String, String> headers) {
return getEscapeMapping(in, headers, false, 0, 0);
}
@VisibleForTesting
@Deprecated
public static Map<String, String> getEscapeMapping(String in,
Map<String, String> headers, boolean needRounding,
int unit, int roundDown) {
Map<String, String> mapping = new HashMap<String, String>();
Matcher matcher = tagPattern.matcher(in);
while (matcher.find()) {
String replacement = "";
// Group 2 is the %{...} pattern
if (matcher.group(2) != null) {
replacement = headers.get(matcher.group(2));
if (replacement == null) {
replacement = "";
// LOG.warn("Tag " + matcher.group(2) + " not found");
}
mapping.put(matcher.group(2), replacement);
} else {
// The %x pattern.
// Since we know the match is a single character, we can
// switch on that rather than the string.
Preconditions.checkState(matcher.group(1) != null
&& matcher.group(1).length() == 1,
"Expected to match single character tag in string " + in);
char c = matcher.group(1).charAt(0);
replacement = replaceShorthand(c, headers,
needRounding, unit, roundDown);
mapping.put(expandShorthand(c), replacement);
}
}
return mapping;
}
/*
* May not be called from outside unit tests.
*/
@VisibleForTesting
public static void setClock(Clock clk) {
clock = clk;
}
/*
* May not be called from outside unit tests.
*/
@VisibleForTesting
public static Clock getClock() {
return clock;
}
private static final class InetAddressCache {
static String hostName = null;
static String hostAddress = null;
static String canonicalHostName = null;
static {
try {
InetAddress addr = InetAddress.getLocalHost();
hostName = addr.getHostName();
hostAddress = addr.getHostAddress();
canonicalHostName = addr.getCanonicalHostName();
} catch (UnknownHostException e) {
throw new RuntimeException("Unable to get localhost", e);
}
}
}
}