package com.thinkbiganalytics.ingest;
/*-
* #%L
* thinkbig-nifi-core-processors
* %%
* Copyright (C) 2017 ThinkBig Analytics
* %%
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* #L%
*/
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.Validate;
import org.joda.time.DateTime;
import org.joda.time.DateTimeUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.Date;
/**
* Provides support for incremental
*/
public class GetTableDataSupport {
public static Logger logger = LoggerFactory.getLogger(GetTableDataSupport.class);
private Connection conn;
private int timeout;
public GetTableDataSupport(Connection conn, int timeout) {
Validate.notNull(conn);
this.conn = conn;
this.timeout = timeout;
}
protected static Date maxAllowableDateFromUnit(Date fromDate, UnitSizes unit) {
DateTime jodaDate = new DateTime(fromDate);
switch (unit) {
case HOUR:
return jodaDate.hourOfDay().roundFloorCopy().toDate();
case DAY:
return jodaDate.withHourOfDay(0).hourOfDay().roundFloorCopy().toDate();
case WEEK:
return jodaDate.weekOfWeekyear().roundFloorCopy().toDate();
case MONTH:
return jodaDate.monthOfYear().roundFloorCopy().toDate();
case YEAR:
return jodaDate.withMonthOfYear(1).withDayOfMonth(1).withHourOfDay(0).hourOfDay().roundFloorCopy().toDate();
}
return fromDate;
}
private String selectStatement(String[] selectFields) {
return StringUtils.join(selectFields, ",");
}
/**
* Performs a full extract of the data for the specified table
*/
public ResultSet selectFullLoad(String tableName, String[] selectFields) throws SQLException {
final Statement st = conn.createStatement();
st.setQueryTimeout(timeout);
String select = selectStatement(selectFields);
StringBuffer sb = new StringBuffer();
sb.append("SELECT ").append(select).append(" FROM ").append(tableName);
logger.info("Executing full GetTableData query {}", sb.toString());
st.setQueryTimeout(timeout);
return st.executeQuery(sb.toString());
}
/**
* Provides an incremental select based on a date field and last status. The overlap time will be subtracted from
* the last load date. This will cause duplicate records but also pickup records that were missed on the last scan
* due to long-running transactions.
*
* @param tableName the table
* @param dateField the name of the field containing last modified date used to perform the incremental load
* @param overlapTime the number of seconds to overlap with the last load status
* @param lastLoadDate the last batch load date
*/
public ResultSet selectIncremental(String tableName, String[] selectFields, String dateField, int overlapTime, Date lastLoadDate, int backoffTime, UnitSizes unit) throws SQLException {
ResultSet rs = null;
logger.info("selectIncremental tableName {} dateField {} overlapTime {} lastLoadDate {} backoffTime {} unit {}", tableName, dateField, overlapTime, lastLoadDate, backoffTime, unit.toString());
final Date now = new Date(DateTimeUtils.currentTimeMillis());
DateRange range = new DateRange(lastLoadDate, now, overlapTime, backoffTime, unit);
logger.info("Load range with min {} max {}", range.getMinDate(), range.getMaxDate());
StringBuffer sb = new StringBuffer();
String select = selectStatement(selectFields);
sb.append("select ").append(select).append(" from ").append(tableName).append(" WHERE " + dateField + " > ? and " + dateField + " < ?");
if (range.getMinDate().before(range.getMaxDate())) {
PreparedStatement ps = conn.prepareStatement(sb.toString());
ps.setQueryTimeout(timeout);
ps.setTimestamp(1, new java.sql.Timestamp(range.getMinDate().getTime()));
ps.setTimestamp(2, new java.sql.Timestamp(range.getMaxDate().getTime()));
logger.info("Executing incremental GetTableData query {}", ps);
rs = ps.executeQuery();
}
return rs;
}
/**
* Output format for table rows.
*/
public enum OutputType {
/**
* Columns are separated by a delimiter
*/
DELIMITED,
/**
* Each row is written as an Avro record
*/
AVRO
}
public enum UnitSizes {
NONE,
HOUR,
DAY,
WEEK,
MONTH,
YEAR
}
protected static class DateRange {
private Date minDate;
private Date maxDate;
public DateRange(Date lastLoadDate, Date currentDate, int overlapTime, int backoffTime, UnitSizes unit) {
lastLoadDate = (lastLoadDate == null ? new Date(0L) : lastLoadDate);
this.minDate = new Date(lastLoadDate.getTime() - (Math.abs(overlapTime) * 1000L));
// Calculate the max date
Date maxLoadDate = (currentDate == null ? new Date() : currentDate);
maxLoadDate = new Date(maxLoadDate.getTime() - Math.abs(backoffTime) * 1000L);
this.maxDate = maxAllowableDateFromUnit(maxLoadDate, unit);
}
public Date getMaxDate() {
return maxDate;
}
public Date getMinDate() {
return minDate;
}
public String toString() {
return "min (" + minDate + ") max(" + maxDate + ")";
}
}
}