/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package gobblin.source.extractor.utils; import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.Arrays; import java.util.Calendar; import java.util.Date; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Properties; import java.util.concurrent.TimeUnit; import org.apache.commons.lang3.StringUtils; import org.codehaus.jackson.JsonNode; import org.codehaus.jackson.map.ObjectMapper; import org.joda.time.DateTime; import org.joda.time.DateTimeZone; import org.joda.time.format.DateTimeFormat; import org.joda.time.format.DateTimeFormatter; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.google.common.base.Strings; import com.google.gson.Gson; import com.google.gson.JsonArray; import com.google.gson.JsonElement; import com.google.gson.JsonObject; import gobblin.configuration.ConfigurationKeys; import gobblin.source.extractor.watermark.WatermarkType; public class Utils { private static final Logger LOG = LoggerFactory.getLogger(Utils.class); private static final Gson GSON = new Gson(); private static final String CURRENT_DAY = "CURRENTDAY"; private static final String CURRENT_HOUR = "CURRENTHOUR"; private static final String CURRENT_DATE_FORMAT = "yyyyMMddHHmmss"; /** * Get coalesce of columns if there are multiple comma-separated columns */ public static String getCoalesceColumnNames(String columnOrColumnList) { if (Strings.isNullOrEmpty(columnOrColumnList)) { return null; } if (columnOrColumnList.contains(",")) { return "COALESCE(" + columnOrColumnList + ")"; } return columnOrColumnList; } public static JsonArray removeElementFromJsonArray(JsonArray inputJsonArray, String key) { JsonArray outputJsonArray = new JsonArray(); for (int i = 0; i < inputJsonArray.size(); i += 1) { JsonObject jsonObject = inputJsonArray.get(i).getAsJsonObject(); outputJsonArray.add(removeElementFromJsonObject(jsonObject, key)); } return outputJsonArray; } public static JsonObject removeElementFromJsonObject(JsonObject jsonObject, String key) { if (jsonObject != null) { jsonObject.remove(key); return jsonObject; } return null; } public static String toDateTimeFormat(String input, String inputfmt, String outputfmt) { Date date = null; SimpleDateFormat infmt = new SimpleDateFormat(inputfmt); try { date = infmt.parse(input); } catch (ParseException e) { e.printStackTrace(); } SimpleDateFormat outFormat = new SimpleDateFormat(outputfmt); return outFormat.format(date); } public static Date toDate(String input, String inputfmt, String outputfmt) { final SimpleDateFormat inputFormat = new SimpleDateFormat(inputfmt); final SimpleDateFormat outputFormat = new SimpleDateFormat(outputfmt); Date outDate = null; try { Date date = inputFormat.parse(input); String dateStr = outputFormat.format(date); outDate = outputFormat.parse(dateStr); } catch (ParseException e) { LOG.error("Parse to date failed", e); } return outDate; } public static String epochToDate(long epoch, String format) { SimpleDateFormat sdf = new SimpleDateFormat(format); Date date = new Date(epoch); return sdf.format(date); } public static long getAsLong(String value) { if (Strings.isNullOrEmpty(value)) { return 0; } return Long.parseLong(value); } public static int getAsInt(String value) { if (Strings.isNullOrEmpty(value)) { return 0; } return Integer.parseInt(value); } public static Date toDate(long value, String format) { SimpleDateFormat fmt = new SimpleDateFormat(format); Date date = null; try { date = fmt.parse(Long.toString(value)); } catch (ParseException e) { e.printStackTrace(); } return date; } public static Date toDate(Date date, String format) { SimpleDateFormat fmt = new SimpleDateFormat(format); String dateStr = fmt.format(date); Date outDate = null; try { outDate = fmt.parse(dateStr); } catch (ParseException e) { e.printStackTrace(); } return outDate; } public static String dateToString(Date datetime, String format) { SimpleDateFormat fmt = new SimpleDateFormat(format); return fmt.format(datetime); } public static Date addDaysToDate(Date datetime, int days) { Calendar calendar = Calendar.getInstance(); calendar.setTime(datetime); calendar.add(Calendar.DATE, days); return calendar.getTime(); } public static Date addHoursToDate(Date datetime, int hours) { Calendar calendar = Calendar.getInstance(); calendar.setTime(datetime); calendar.add(Calendar.HOUR, hours); return calendar.getTime(); } public static Date addSecondsToDate(Date datetime, int seconds) { Calendar calendar = Calendar.getInstance(); calendar.setTime(datetime); calendar.add(Calendar.SECOND, seconds); return calendar.getTime(); } public static boolean isSimpleWatermark(WatermarkType watermarkType) { if (watermarkType == WatermarkType.SIMPLE) { return true; } return false; } /** * Print time difference in minutes, seconds and milliseconds */ public static String printTiming(long start, long end) { long totalMillis = end - start; long mins = TimeUnit.MILLISECONDS.toMinutes(totalMillis); long secs = TimeUnit.MILLISECONDS.toSeconds(totalMillis) - TimeUnit.MINUTES.toSeconds(mins); long millis = TimeUnit.MILLISECONDS.toMillis(totalMillis) - TimeUnit.MINUTES.toMillis(mins) - TimeUnit.SECONDS.toMillis(secs); return String.format("%d min, %d sec, %d millis", mins, secs, millis); } /** * get column list from the user provided query to build schema with the respective columns * @param input query * @return list of columns */ public static List<String> getColumnListFromQuery(String query) { if (Strings.isNullOrEmpty(query)) { return null; } String queryLowerCase = query.toLowerCase(); int startIndex = queryLowerCase.indexOf("select ") + 7; int endIndex = queryLowerCase.indexOf(" from "); if (startIndex < 0 || endIndex < 0) { return null; } String[] inputQueryColumns = query.substring(startIndex, endIndex).toLowerCase().replaceAll(" ", "").split(","); return Arrays.asList(inputQueryColumns); } /** * Convert CSV record(List<Strings>) to JsonObject using header(column Names) * @param header record * @param data record * @param column Count * @return JsonObject */ public static JsonObject csvToJsonObject(List<String> bulkRecordHeader, List<String> record, int columnCount) { ObjectMapper mapper = new ObjectMapper(); Map<String, String> resultInfo = new HashMap<>(); for (int i = 0; i < columnCount; i++) { resultInfo.put(bulkRecordHeader.get(i), record.get(i)); } JsonNode json = mapper.valueToTree(resultInfo); JsonElement element = GSON.fromJson(json.toString(), JsonObject.class); return element.getAsJsonObject(); } public static int getAsInt(String value, int defaultValue) { return (Strings.isNullOrEmpty(value) ? defaultValue : Integer.parseInt(value)); } public static boolean getPropAsBoolean(Properties properties, String key, String defaultValue) { return Boolean.valueOf(properties.getProperty(key, defaultValue)); } // escape characters in column name or table name public static String escapeSpecialCharacters(String columnName, String escapeChars, String character) { if (Strings.isNullOrEmpty(columnName)) { return null; } if (StringUtils.isEmpty(escapeChars)) { return columnName; } List<String> specialChars = Arrays.asList(escapeChars.split(",")); for (String specialChar : specialChars) { columnName = columnName.replace(specialChar, character); } return columnName; } /** * Helper method for getting a value containing CURRENTDAY-1 or CURRENTHOUR-1 in the form yyyyMMddHHmmss * @param value * @param timezone * @return */ public static long getLongWithCurrentDate(String value, String timezone) { if (Strings.isNullOrEmpty(value)) { return 0; } DateTime time = getCurrentTime(timezone); DateTimeFormatter dtFormatter = DateTimeFormat.forPattern(CURRENT_DATE_FORMAT).withZone(time.getZone()); if (value.toUpperCase().startsWith(CURRENT_DAY)) { return Long .parseLong(dtFormatter.print(time.minusDays(Integer.parseInt(value.substring(CURRENT_DAY.length() + 1))))); } if (value.toUpperCase().startsWith(CURRENT_HOUR)) { return Long .parseLong(dtFormatter.print(time.minusHours(Integer.parseInt(value.substring(CURRENT_HOUR.length() + 1))))); } return Long.parseLong(value); } /** * Convert joda time to a string in the given format * @param input timestamp * @param format expected format * @param timezone time zone of timestamp * @return string format of timestamp */ public static String dateTimeToString(DateTime input, String format, String timezone) { String tz = StringUtils.defaultString(timezone, ConfigurationKeys.DEFAULT_SOURCE_TIMEZONE); DateTimeZone dateTimeZone = getTimeZone(tz); DateTimeFormatter outputDtFormat = DateTimeFormat.forPattern(format).withZone(dateTimeZone); return outputDtFormat.print(input); } /** * Get current time - joda * @param timezone time zone of current time * @return current datetime in the given timezone */ public static DateTime getCurrentTime(String timezone) { String tz = StringUtils.defaultString(timezone, ConfigurationKeys.DEFAULT_SOURCE_TIMEZONE); DateTimeZone dateTimeZone = getTimeZone(tz); DateTime currentTime = new DateTime(dateTimeZone); return currentTime; } /** * Convert timestamp in a string format to joda time * @param input timestamp * @param format timestamp format * @param timezone time zone of timestamp * @return joda time */ public static DateTime toDateTime(String input, String format, String timezone) { String tz = StringUtils.defaultString(timezone, ConfigurationKeys.DEFAULT_SOURCE_TIMEZONE); DateTimeZone dateTimeZone = getTimeZone(tz); DateTimeFormatter inputDtFormat = DateTimeFormat.forPattern(format).withZone(dateTimeZone); DateTime outputDateTime = inputDtFormat.parseDateTime(input).withZone(dateTimeZone); return outputDateTime; } /** * Convert timestamp in a long format to joda time * @param input timestamp * @param format timestamp format * @param timezone time zone of timestamp * @return joda time */ public static DateTime toDateTime(long input, String format, String timezone) { return toDateTime(Long.toString(input), format, timezone); } /** * Get time zone of time zone id * @param id timezone id * @return timezone */ private static DateTimeZone getTimeZone(String id) { DateTimeZone zone; try { zone = DateTimeZone.forID(id); } catch (IllegalArgumentException e) { throw new IllegalArgumentException("TimeZone " + id + " not recognized"); } return zone; } }