/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * <p> * http://www.apache.org/licenses/LICENSE-2.0 * <p> * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.pinterest.secor.parser; import com.google.protobuf.Timestamp; import com.google.protobuf.util.Timestamps; import com.pinterest.secor.common.SecorConfig; import com.pinterest.secor.message.Message; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.text.SimpleDateFormat; import java.util.Date; import java.util.List; public abstract class TimestampedMessageParser extends MessageParser implements Partitioner { private static final Logger LOG = LoggerFactory.getLogger(TimestampedMessageParser.class); private static final long HOUR_IN_MILLIS = 3600L * 1000L; private static final long DAY_IN_MILLIS = 3600L * 24 * 1000L; private static final long MINUTE_IN_MILLIS = 60L * 1000L; /* * IMPORTANT * SimpleDateFormat are NOT thread-safe. * Each parser needs to have their own local SimpleDateFormat or it'll cause race condition. */ protected final SimpleDateFormat mDtFormatter; protected final SimpleDateFormat mHrFormatter; protected final SimpleDateFormat mDtHrFormatter; protected final int mFinalizerDelaySeconds; protected final SimpleDateFormat mDtHrMinFormatter; protected final SimpleDateFormat mMinFormatter; protected final String mDtFormat; protected final String mHrFormat; protected final String mMinFormat; protected final String mDtPrefix; protected final String mHrPrefix; protected final String mMinPrefix; protected final boolean mUsingHourly; protected final boolean mUsingMinutely; public TimestampedMessageParser(SecorConfig config) { super(config); mUsingHourly = usingHourly(config); mUsingMinutely = usingMinutely(config); mDtFormat = usingDateFormat(config); mHrFormat = usingHourFormat(config); mMinFormat = usingMinuteFormat(config); mDtPrefix = usingDatePrefix(config); mHrPrefix = usingHourPrefix(config); mMinPrefix = usingMinutePrefix(config); LOG.info("UsingHourly: {}", mUsingHourly); LOG.info("UsingMin: {}", mUsingMinutely); mFinalizerDelaySeconds = config.getFinalizerDelaySeconds(); LOG.info("FinalizerDelaySeconds: {}", mFinalizerDelaySeconds); mDtFormatter = new SimpleDateFormat(mDtFormat); mDtFormatter.setTimeZone(config.getTimeZone()); mHrFormatter = new SimpleDateFormat(mHrFormat); mHrFormatter.setTimeZone(config.getTimeZone()); mMinFormatter = new SimpleDateFormat(mMinFormat); mMinFormatter.setTimeZone(config.getTimeZone()); mDtHrFormatter = new SimpleDateFormat(mDtFormat+ "-" + mHrFormat); mDtHrFormatter.setTimeZone(config.getTimeZone()); mDtHrMinFormatter = new SimpleDateFormat(mDtFormat+ "-" + mHrFormat + "-" + mMinFormat); mDtHrMinFormatter.setTimeZone(config.getTimeZone()); } static boolean usingHourly(SecorConfig config) { return config.getBoolean("partitioner.granularity.hour", false); } static boolean usingMinutely(SecorConfig config) { return config.getBoolean("partitioner.granularity.minute", false); } static String usingDateFormat(SecorConfig config) { return config.getString("partitioner.granularity.date.format", "yyyy-MM-dd"); } static String usingHourFormat(SecorConfig config) { return config.getString("partitioner.granularity.hour.format", "HH"); } static String usingMinuteFormat(SecorConfig config) { return config.getString("partitioner.granularity.min.format", "mm"); } static String usingDatePrefix(SecorConfig config) { return config.getString("partitioner.granularity.date.prefix", "dt="); } static String usingHourPrefix(SecorConfig config) { return config.getString("partitioner.granularity.hour.prefix", "hr="); } static String usingMinutePrefix(SecorConfig config) { return config.getString("partitioner.granularity.minute.prefix", "min="); } protected static long toMillis(final long timestamp) { final long nanosecondDivider = (long) Math.pow(10, 9 + 9); final long microsecondDivider = (long) Math.pow(10, 9 + 6); final long millisecondDivider = (long) Math.pow(10, 9 + 3); long timestampMillis; if (timestamp / nanosecondDivider > 0L) { timestampMillis = timestamp / (long) Math.pow(10, 6); } else if (timestamp / microsecondDivider > 0L) { timestampMillis = timestamp / (long) Math.pow(10, 3); } else if (timestamp / millisecondDivider > 0L) { timestampMillis = timestamp; } else { // assume seconds timestampMillis = timestamp * 1000L; } return timestampMillis; } public abstract long extractTimestampMillis(final Message message) throws Exception; protected String[] generatePartitions(long timestampMillis, boolean usingHourly, boolean usingMinutely) throws Exception { Date date = new Date(timestampMillis); String dt = mDtPrefix + mDtFormatter.format(date); String hr = mHrPrefix + mHrFormatter.format(date); String min = mMinPrefix + mMinFormatter.format(date); if (usingMinutely) { return new String[]{dt, hr, min}; } else if (usingHourly) { return new String[]{dt, hr}; } else { return new String[]{dt}; } } protected long parsePartitions(String[] partitions) throws Exception { String dtValue = partitions[0].split("=")[1]; String hrValue = partitions.length > 1 ? partitions[1].split("=")[1] : "00"; String minValue = partitions.length > 2 ? partitions[2].split("=")[1] : "00"; String value = dtValue + "-" + hrValue + "-" + minValue; Date date = mDtHrMinFormatter.parse(value); return date.getTime(); } @Override public String[] extractPartitions(Message message) throws Exception { // Date constructor takes milliseconds since epoch. long timestampMillis = extractTimestampMillis(message); return generatePartitions(timestampMillis, mUsingHourly, mUsingMinutely); } private long getFinalizedTimestampMillis(Message lastMessage, Message committedMessage) throws Exception { long lastTimestamp = extractTimestampMillis(lastMessage); long committedTimestamp = extractTimestampMillis(committedMessage); long now = System.currentTimeMillis(); if (lastTimestamp == committedTimestamp && (now - lastTimestamp) > mFinalizerDelaySeconds * 1000) { LOG.info("No new message coming, use the current time: " + now); return now; } return committedTimestamp; } @Override public String[] getFinalizedUptoPartitions(List<Message> lastMessages, List<Message> committedMessages) throws Exception { if (lastMessages == null || committedMessages == null) { LOG.error("Either: {} and {} is null", lastMessages, committedMessages); return null; } assert lastMessages.size() == committedMessages.size(); long minMillis = Long.MAX_VALUE; for (int i = 0; i < lastMessages.size(); i++) { long millis = getFinalizedTimestampMillis(lastMessages.get(i), committedMessages.get(i)); if (millis < minMillis) { LOG.info("partition {}, time {}", i, millis); minMillis = millis; } } if (minMillis == Long.MAX_VALUE) { LOG.error("No valid timestamps among messages: {} and {}", lastMessages, committedMessages); return null; } // add the safety lag for late-arrival messages minMillis -= mFinalizerDelaySeconds * 1000L; LOG.info("adjusted millis {}", minMillis); return generatePartitions(minMillis, mUsingHourly, mUsingMinutely); } @Override public String[] getPreviousPartitions(String[] partitions) throws Exception { long millis = parsePartitions(partitions); boolean usingHourly = mUsingHourly; boolean usingMinutely = mUsingMinutely; if (mUsingMinutely && millis % HOUR_IN_MILLIS == 0) { if (partitions.length == 3) { usingMinutely = false; if (millis % DAY_IN_MILLIS == 0) { millis -= DAY_IN_MILLIS; } else { millis -= HOUR_IN_MILLIS; usingHourly = true; } } else if (partitions.length == 2) { millis += HOUR_IN_MILLIS; millis -= MINUTE_IN_MILLIS; usingMinutely = true; } else { millis += DAY_IN_MILLIS; millis -= HOUR_IN_MILLIS; usingMinutely = false; usingHourly = true; } } else if (mUsingHourly && millis % DAY_IN_MILLIS == 0) { // On the day boundary, if the current partition is [dt=07-07, hr=00], the previous // one is dt=07-06; If the current one is [dt=07-06], the previous one is // [dt=07-06, hr-23] // So we would return in the order of: // dt=07-07, hr=01 // dt=07-07, hr=00 // dt=07-06 // dt=07-06, hr=23 if (partitions.length == 2) { usingHourly = false; millis -= DAY_IN_MILLIS; } else { usingHourly = true; millis += DAY_IN_MILLIS; millis -= HOUR_IN_MILLIS; } } else { long delta = mUsingHourly ? HOUR_IN_MILLIS : DAY_IN_MILLIS; if (mUsingMinutely) { delta = MINUTE_IN_MILLIS; } millis -= delta; } return generatePartitions(millis, usingHourly, usingMinutely); } }