/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package gobblin.source.extractor.utils;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Arrays;
import java.util.Calendar;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.concurrent.TimeUnit;
import org.apache.commons.lang3.StringUtils;
import org.codehaus.jackson.JsonNode;
import org.codehaus.jackson.map.ObjectMapper;
import org.joda.time.DateTime;
import org.joda.time.DateTimeZone;
import org.joda.time.format.DateTimeFormat;
import org.joda.time.format.DateTimeFormatter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.base.Strings;
import com.google.gson.Gson;
import com.google.gson.JsonArray;
import com.google.gson.JsonElement;
import com.google.gson.JsonObject;
import gobblin.configuration.ConfigurationKeys;
import gobblin.source.extractor.watermark.WatermarkType;
public class Utils {
private static final Logger LOG = LoggerFactory.getLogger(Utils.class);
private static final Gson GSON = new Gson();
private static final String CURRENT_DAY = "CURRENTDAY";
private static final String CURRENT_HOUR = "CURRENTHOUR";
private static final String CURRENT_DATE_FORMAT = "yyyyMMddHHmmss";
/**
* Get coalesce of columns if there are multiple comma-separated columns
*/
public static String getCoalesceColumnNames(String columnOrColumnList) {
if (Strings.isNullOrEmpty(columnOrColumnList)) {
return null;
}
if (columnOrColumnList.contains(",")) {
return "COALESCE(" + columnOrColumnList + ")";
}
return columnOrColumnList;
}
public static JsonArray removeElementFromJsonArray(JsonArray inputJsonArray, String key) {
JsonArray outputJsonArray = new JsonArray();
for (int i = 0; i < inputJsonArray.size(); i += 1) {
JsonObject jsonObject = inputJsonArray.get(i).getAsJsonObject();
outputJsonArray.add(removeElementFromJsonObject(jsonObject, key));
}
return outputJsonArray;
}
public static JsonObject removeElementFromJsonObject(JsonObject jsonObject, String key) {
if (jsonObject != null) {
jsonObject.remove(key);
return jsonObject;
}
return null;
}
public static String toDateTimeFormat(String input, String inputfmt, String outputfmt) {
Date date = null;
SimpleDateFormat infmt = new SimpleDateFormat(inputfmt);
try {
date = infmt.parse(input);
} catch (ParseException e) {
e.printStackTrace();
}
SimpleDateFormat outFormat = new SimpleDateFormat(outputfmt);
return outFormat.format(date);
}
public static Date toDate(String input, String inputfmt, String outputfmt) {
final SimpleDateFormat inputFormat = new SimpleDateFormat(inputfmt);
final SimpleDateFormat outputFormat = new SimpleDateFormat(outputfmt);
Date outDate = null;
try {
Date date = inputFormat.parse(input);
String dateStr = outputFormat.format(date);
outDate = outputFormat.parse(dateStr);
} catch (ParseException e) {
LOG.error("Parse to date failed", e);
}
return outDate;
}
public static String epochToDate(long epoch, String format) {
SimpleDateFormat sdf = new SimpleDateFormat(format);
Date date = new Date(epoch);
return sdf.format(date);
}
public static long getAsLong(String value) {
if (Strings.isNullOrEmpty(value)) {
return 0;
}
return Long.parseLong(value);
}
public static int getAsInt(String value) {
if (Strings.isNullOrEmpty(value)) {
return 0;
}
return Integer.parseInt(value);
}
public static Date toDate(long value, String format) {
SimpleDateFormat fmt = new SimpleDateFormat(format);
Date date = null;
try {
date = fmt.parse(Long.toString(value));
} catch (ParseException e) {
e.printStackTrace();
}
return date;
}
public static Date toDate(Date date, String format) {
SimpleDateFormat fmt = new SimpleDateFormat(format);
String dateStr = fmt.format(date);
Date outDate = null;
try {
outDate = fmt.parse(dateStr);
} catch (ParseException e) {
e.printStackTrace();
}
return outDate;
}
public static String dateToString(Date datetime, String format) {
SimpleDateFormat fmt = new SimpleDateFormat(format);
return fmt.format(datetime);
}
public static Date addDaysToDate(Date datetime, int days) {
Calendar calendar = Calendar.getInstance();
calendar.setTime(datetime);
calendar.add(Calendar.DATE, days);
return calendar.getTime();
}
public static Date addHoursToDate(Date datetime, int hours) {
Calendar calendar = Calendar.getInstance();
calendar.setTime(datetime);
calendar.add(Calendar.HOUR, hours);
return calendar.getTime();
}
public static Date addSecondsToDate(Date datetime, int seconds) {
Calendar calendar = Calendar.getInstance();
calendar.setTime(datetime);
calendar.add(Calendar.SECOND, seconds);
return calendar.getTime();
}
public static boolean isSimpleWatermark(WatermarkType watermarkType) {
if (watermarkType == WatermarkType.SIMPLE) {
return true;
}
return false;
}
/**
* Print time difference in minutes, seconds and milliseconds
*/
public static String printTiming(long start, long end) {
long totalMillis = end - start;
long mins = TimeUnit.MILLISECONDS.toMinutes(totalMillis);
long secs = TimeUnit.MILLISECONDS.toSeconds(totalMillis) - TimeUnit.MINUTES.toSeconds(mins);
long millis =
TimeUnit.MILLISECONDS.toMillis(totalMillis) - TimeUnit.MINUTES.toMillis(mins) - TimeUnit.SECONDS.toMillis(secs);
return String.format("%d min, %d sec, %d millis", mins, secs, millis);
}
/**
* get column list from the user provided query to build schema with the respective columns
* @param input query
* @return list of columns
*/
public static List<String> getColumnListFromQuery(String query) {
if (Strings.isNullOrEmpty(query)) {
return null;
}
String queryLowerCase = query.toLowerCase();
int startIndex = queryLowerCase.indexOf("select ") + 7;
int endIndex = queryLowerCase.indexOf(" from ");
if (startIndex < 0 || endIndex < 0) {
return null;
}
String[] inputQueryColumns = query.substring(startIndex, endIndex).toLowerCase().replaceAll(" ", "").split(",");
return Arrays.asList(inputQueryColumns);
}
/**
* Convert CSV record(List<Strings>) to JsonObject using header(column Names)
* @param header record
* @param data record
* @param column Count
* @return JsonObject
*/
public static JsonObject csvToJsonObject(List<String> bulkRecordHeader, List<String> record, int columnCount) {
ObjectMapper mapper = new ObjectMapper();
Map<String, String> resultInfo = new HashMap<>();
for (int i = 0; i < columnCount; i++) {
resultInfo.put(bulkRecordHeader.get(i), record.get(i));
}
JsonNode json = mapper.valueToTree(resultInfo);
JsonElement element = GSON.fromJson(json.toString(), JsonObject.class);
return element.getAsJsonObject();
}
public static int getAsInt(String value, int defaultValue) {
return (Strings.isNullOrEmpty(value) ? defaultValue : Integer.parseInt(value));
}
public static boolean getPropAsBoolean(Properties properties, String key, String defaultValue) {
return Boolean.valueOf(properties.getProperty(key, defaultValue));
}
// escape characters in column name or table name
public static String escapeSpecialCharacters(String columnName, String escapeChars, String character) {
if (Strings.isNullOrEmpty(columnName)) {
return null;
}
if (StringUtils.isEmpty(escapeChars)) {
return columnName;
}
List<String> specialChars = Arrays.asList(escapeChars.split(","));
for (String specialChar : specialChars) {
columnName = columnName.replace(specialChar, character);
}
return columnName;
}
/**
* Helper method for getting a value containing CURRENTDAY-1 or CURRENTHOUR-1 in the form yyyyMMddHHmmss
* @param value
* @param timezone
* @return
*/
public static long getLongWithCurrentDate(String value, String timezone) {
if (Strings.isNullOrEmpty(value)) {
return 0;
}
DateTime time = getCurrentTime(timezone);
DateTimeFormatter dtFormatter = DateTimeFormat.forPattern(CURRENT_DATE_FORMAT).withZone(time.getZone());
if (value.toUpperCase().startsWith(CURRENT_DAY)) {
return Long
.parseLong(dtFormatter.print(time.minusDays(Integer.parseInt(value.substring(CURRENT_DAY.length() + 1)))));
}
if (value.toUpperCase().startsWith(CURRENT_HOUR)) {
return Long
.parseLong(dtFormatter.print(time.minusHours(Integer.parseInt(value.substring(CURRENT_HOUR.length() + 1)))));
}
return Long.parseLong(value);
}
/**
* Convert joda time to a string in the given format
* @param input timestamp
* @param format expected format
* @param timezone time zone of timestamp
* @return string format of timestamp
*/
public static String dateTimeToString(DateTime input, String format, String timezone) {
String tz = StringUtils.defaultString(timezone, ConfigurationKeys.DEFAULT_SOURCE_TIMEZONE);
DateTimeZone dateTimeZone = getTimeZone(tz);
DateTimeFormatter outputDtFormat = DateTimeFormat.forPattern(format).withZone(dateTimeZone);
return outputDtFormat.print(input);
}
/**
* Get current time - joda
* @param timezone time zone of current time
* @return current datetime in the given timezone
*/
public static DateTime getCurrentTime(String timezone) {
String tz = StringUtils.defaultString(timezone, ConfigurationKeys.DEFAULT_SOURCE_TIMEZONE);
DateTimeZone dateTimeZone = getTimeZone(tz);
DateTime currentTime = new DateTime(dateTimeZone);
return currentTime;
}
/**
* Convert timestamp in a string format to joda time
* @param input timestamp
* @param format timestamp format
* @param timezone time zone of timestamp
* @return joda time
*/
public static DateTime toDateTime(String input, String format, String timezone) {
String tz = StringUtils.defaultString(timezone, ConfigurationKeys.DEFAULT_SOURCE_TIMEZONE);
DateTimeZone dateTimeZone = getTimeZone(tz);
DateTimeFormatter inputDtFormat = DateTimeFormat.forPattern(format).withZone(dateTimeZone);
DateTime outputDateTime = inputDtFormat.parseDateTime(input).withZone(dateTimeZone);
return outputDateTime;
}
/**
* Convert timestamp in a long format to joda time
* @param input timestamp
* @param format timestamp format
* @param timezone time zone of timestamp
* @return joda time
*/
public static DateTime toDateTime(long input, String format, String timezone) {
return toDateTime(Long.toString(input), format, timezone);
}
/**
* Get time zone of time zone id
* @param id timezone id
* @return timezone
*/
private static DateTimeZone getTimeZone(String id) {
DateTimeZone zone;
try {
zone = DateTimeZone.forID(id);
} catch (IllegalArgumentException e) {
throw new IllegalArgumentException("TimeZone " + id + " not recognized");
}
return zone;
}
}