/**
*
*/
package org.jhuapl.edu.sages.etl.strategy;
import java.io.File;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.ResultSetMetaData;
import java.sql.SQLException;
import java.sql.Savepoint;
import java.sql.Types;
import java.text.DateFormat;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import org.apache.commons.lang.StringUtils;
import org.apache.log4j.Logger;
import org.jhuapl.edu.sages.etl.ETLProperties;
import org.jhuapl.edu.sages.etl.SagesEtlException;
import org.jhuapl.edu.sages.etl.opencsvpods.DumbTestOpenCsvJar;
/**
* {@link ETLPostgresqlStrategy} is the Postgresql specific strategy for the ETL processing logic. Transaction handling,
* SQL syntax nuances, and error codes are for Postgresql.
*
* @author POKUAM1
* @created Nov 1, 2011
*/
public class ETLMySQLStrategy extends ETLStrategyTemplate {
private static final Logger log = Logger.getLogger(ETLMySQLStrategy.class);
private String src_table_name;
private String dst_table_name;
private static final
String
msgFatal =
"ETL_LOGGER: error did occur for this file, but data is rolled back to last good state.";
public static final Set<String> mysqlIgnorableErrorCodes = new HashSet<String>() {
/**
*
*/
private static final long serialVersionUID = 1L;
{
// "42P07" => relation "oevisit_etl_cleanse_table" already exists
// "42P07" => relation "oevisit_etl_staging_db" already exists
//add("42P07");
//add("25P02"); // column "etl_flag" of relation "oevisit_etl_staging_db" already exists
// "42701" => column "etl_flag" of relation "oevisit_etl_cleanse_table" already exists
// "42701" => column "etl_flag" of relation "oevisit_etl_staging_db" already exists
//add("42701");
// "42P01" => relation "oevisit_etl_cleanse_table" does not exist
// "42P01" => relation "oevisit_etl_staging_db" does not exist
//add("42P01");
// "1050" => relation "oevisit_etl_cleansing_table" already exists
// "1050" => relation "oevisit_etl_staging_table" already exists
add("42S02");
add("42S01");
add("42S21");
add("CODE");
add("CODE");
add("CODE");
}
};
public ETLMySQLStrategy(SagesOpenCsvJar socj) {
super();
m_socj = socj;
super.m_sqlStateHandler.setIgnorableErrorCodes(mysqlIgnorableErrorCodes);
}
public ETLMySQLStrategy() {
super.m_sqlStateHandler.setIgnorableErrorCodes(mysqlIgnorableErrorCodes);
}
/* (non-Javadoc)
* @see org.jhuapl.edu.sages.etl.strategy.ETLStrategy#buildStagingTable(java.sql.Connection, org.jhuapl.edu.sages.etl.opencsvpods.SagesOpenCsvJar, java.sql.Savepoint)
*/
@Override
public Savepoint buildStagingTable(Connection c, SagesOpenCsvJar socj,
Savepoint save1) throws SQLException, SagesEtlException {
/***************************************************************************
* build ETL_STAGING_TABLE
***************************************************************************
* SQL: "CREATE TABLE..."
* - all columns have sql-datatype identical to FINAL DESTINATION TABLE
* - column definitions build from metadata of FINAL DESTINATION TABLE
***************************************************************************/
/** get metadata for FINAL DESTINATION TABLE and use it to build STAGING */
ResultSetMetaData
rsmd =
c.prepareStatement("SELECT * FROM " + socj.props_etlconfig.getProperty("productionTableName"))
.getMetaData();
this.dst_table_name = socj.props_etlconfig.getProperty("dbprefix_dst") + "_" + SagesOpenCsvJar.ETL_STAGING_DB;
this.dst_table_name = this.dst_table_name.toLowerCase();
String destTableStr = "";
socj.dst_table_name = socj.props_etlconfig.getProperty("dbprefix_dst") + "_" + SagesOpenCsvJar.ETL_STAGING_DB;
socj.DEST_COLTYPE_MAP = new LinkedHashMap<String, String>();
socj.DEST_SQLTYPE_MAP = new LinkedHashMap<String, Integer>();
int zdst = 1;
/**
* figure out the columns in the target destination table, auto-inc columns are skipped
* additionally populate these maps:
* DEST_COLTYPE_MAP[colname:coltype]
* DEST_SQLTYPE_MAP[colname:colSQLtype]
* PARAMINDX_DST[colname:sqlparamindex]
*/
for (int i = rsmd.getColumnCount(); i > 0; i--) {
/**http://stackoverflow.com/questions/1870022/java-resultset-hasnext
//http://www.herongyang.com/JDBC/sqljdbc-jar-Column-List.html */
String colLength = "120";
String colName = rsmd.getColumnName(i);
String colType = rsmd.getColumnTypeName(i);
int colSqlType = rsmd.getColumnType(i);
String isAutoInc = "";
if (colType.equalsIgnoreCase("VARCHAR")) {
colType += "(" + colLength + ")";
}
try {
isAutoInc = rsmd.isAutoIncrement(i) ? "YES" : "NO"; /**YES, NO, or "" */
} catch (SQLException e) {
if ("S0022".equals(e.getSQLState())) { // MS Access specific
log.debug("ETL_LOGGER:"
+ "this database does not support IS_AUTOINCREMENT result meta data column. safe to ignore");
}
}
if ("serial".equalsIgnoreCase(colType) || "COUNTER".equalsIgnoreCase(colType) || "YES"
.equalsIgnoreCase(isAutoInc)) {
continue;
}
String colDef = colName + " " + colType + ",";
destTableStr += colDef + "\n";
log.debug(colName + "=" + colType + "(" + colSqlType + ")");
/** TODO: make sure using this staging table map <colName:colType> **/
socj.DEST_COLTYPE_MAP.put(colName, colType);
socj.DEST_SQLTYPE_MAP.put(colName, colSqlType);
socj.PARAMINDX_DST.put(colName, zdst);
zdst++;
}
/** the built "CREATE TABLE STAGING_TABLE..." string */
Savepoint createStagingSavepoint = c.setSavepoint("createStagingSavePoint");
ResultSet etl_status = c.prepareStatement("SHOW TABLES LIKE '" + this.dst_table_name + "';").executeQuery();
if (etl_status.next()) {
return createStagingSavepoint;
}
/** remove trailing ',' **/
destTableStr = StringUtils.substringBeforeLast(destTableStr, ",");
String createStagingStmt = "CREATE TABLE " + socj.dst_table_name + "\n(\n" + destTableStr + "\n);";
log.info(createStagingStmt);
PreparedStatement ps_CREATE_STAGING = c.prepareStatement(createStagingStmt);
try {
/** execute CREATE STAGING_TABLE sql */
ps_CREATE_STAGING.execute();
} catch (Exception e) {
m_sqlStateHandler.sqlExceptionHandlerBuildStagingTable(c, socj, save1, createStagingSavepoint, e);
}
return createStagingSavepoint;
}
@Override
public String buildInsertIntoCleansingTableSql(Connection c, SagesOpenCsvJar socj) throws SQLException {
/************************************************
* build reusable 'INSERT INTO CLEANSING_TABLE'
************************************************
* SQL: "INSERT INTO SRC_TABLE..."
* Example SQL: "INSERT INTO src_table_name VALUES (?, ?, ?, ?, ?,...)"
* data will be inserted as text sql-datatype
*
*/
String insertStmt_src = "INSERT INTO " + socj.src_table_name + " VALUES (";
for (int h = 0; h < socj.header_src.length + 1; h++) {
insertStmt_src = insertStmt_src + "?,";
}
/***
* MS Access specific
* 2351 - Microsoft Access can't represent an implicit VALUES clause in the query design grid.
* Edit this in SQL view.
*
* (this is for the "etl_flag" column that was added after table creation
*/
if (socj.dbms.equals(ETLProperties.dbid_msaccess)) {
insertStmt_src = insertStmt_src + "?,";
}
/** remove trailing ',' TODO CLEAN UP WITH StringUtils.join() */
insertStmt_src = StringUtils.substringBeforeLast(insertStmt_src, ",") + ");";
log.debug("ETL_LOGGER\ninsertstmt_src: " + insertStmt_src);
return insertStmt_src;
}
@Override
public void setAndExecuteInsertIntoCleansingTablePreparedStatement(
Connection c, SagesOpenCsvJar socj, ArrayList<String[]> entries_rawdata, Savepoint save2,
PreparedStatement ps_INSERT_CLEANSE) throws SQLException {
/** set values for the ? parameters, NOTE all values have text sql-datatype */
for (int e = 0; e < entries_rawdata.size(); e++) {
String[] entry = entries_rawdata.get(e);
String log_insertStmt = "VALUES:";
for (int p = 0; p < entry.length; p++) {
ps_INSERT_CLEANSE.setString(p + 1, entry[p]);
log_insertStmt += "'" + entry[p] + "',";
}
/***
* MS Access specific
* 2351 - Microsoft Access can't represent an implicit VALUES clause in the query design grid.
* Edit this in SQL view.
*
*/
if (socj.dbms.equals(ETLProperties.dbid_msaccess) || socj.dbms.equals(ETLProperties.dbid_mysql)) {
ps_INSERT_CLEANSE.setString(entry.length + 1, "no flag");
log_insertStmt += "'no flag'";
}
log.debug("ETL_LOGGER:(ps_INSERT_CLEANSE)= " + ps_INSERT_CLEANSE.toString());
log.debug("ETL_LOGGER: " + log_insertStmt);
try {
ps_INSERT_CLEANSE.execute();
} catch (Exception e1) {
if (m_sqlStateHandler
.errorCleanup(m_socj, save2, c, socj.getCurrentFile(), socj.getFaileddir_csvfiles(), e1)
== 2) {
break;
}
}
}
}
@Override
public void copyFromCleanseToStaging(Connection c, SagesOpenCsvJar socj, Savepoint save2)
throws SQLException, SagesEtlException {
PreparedStatement ps_SELECT_CLEANSING = c.prepareStatement("SELECT * FROM " + socj.src_table_name);
ResultSet rs_SELECT_CLEANSING = ps_SELECT_CLEANSING.executeQuery();
PreparedStatement ps_SELECT_STAGING = c.prepareStatement("SELECT * FROM " + socj.dst_table_name);
ResultSet rs_SELECT_STAGING = ps_SELECT_STAGING.executeQuery();
ResultSetMetaData rsmd = rs_SELECT_STAGING.getMetaData();
//ResultSetMetaData rsmd = rs_SELECT_CLEANSING.getMetaData();
String[] rsColsHERE = new String[rsmd.getColumnCount()];
int numberOfColumns = rsmd.getColumnCount();
for (int x = 1; x < numberOfColumns + 1; x++) {
log.debug("LABEL: " + rsmd.getColumnLabel(x) + ", TYPE: " + rsmd.getColumnTypeName(x));
}
for (Entry<String, String> dmap : socj.DEST_COLTYPE_MAP.entrySet()) {
log.debug("LABEL: " + dmap.getKey() + ", TYPE: " + dmap.getValue());
}
for (int m = 0; m < rsmd.getColumnCount(); m++) {
rsColsHERE[m] = rsmd.getColumnLabel(m + 1);
}
// build the "INSERT INTO ETL_STAGING_DB" as reusable prepared statement
String stagingInsertStmt = "INSERT INTO " + socj.dst_table_name;
String stagingColStmt = "(";
String stagingValuesStmt = " VALUES (";
for (String colHead : rsColsHERE) {
//createStmt += colHead + " " + sourceColTypeMap.get(colHead) + ",\n";
stagingColStmt += colHead + ",\n";
}
stagingColStmt = StringUtils.substringBeforeLast(stagingColStmt, ",\n") + "\n)";
for (int h = 0; h < rsColsHERE.length; h++) {
stagingValuesStmt = stagingValuesStmt + "?,";
}
stagingValuesStmt = StringUtils.substringBeforeLast(stagingValuesStmt, ",") + ");";
stagingInsertStmt = stagingInsertStmt + stagingColStmt + stagingValuesStmt;
log.debug("!!!!stagingInsertStmt!!!!: \n" + stagingInsertStmt);
/** Reusable Prepared Statement */
PreparedStatement ps_INSERT_STAGING = c.prepareStatement(stagingInsertStmt);
int z_colCount = rs_SELECT_CLEANSING.getMetaData().getColumnCount();
ResultSetMetaData rsmd2 = rs_SELECT_CLEANSING.getMetaData();
Map<String, Integer> z_indexMap = new HashMap<String, Integer>();
ArrayList<String> alist = new ArrayList<String>(socj.MAPPING_MAP.values());
Map<String, Integer> REV_INDX_MAPPING_MAP = new HashMap<String, Integer>();
int zIndx = -1;
for (int z = 0; z < z_colCount; z++) {
String currentColName = rsmd2.getColumnLabel(z + 1);
log.debug("currentColName: " + currentColName);
if (socj.MAPPING_MAP.get(currentColName) != null) {
String destColName = socj.MAPPING_MAP.get(currentColName);
log.debug("destColName: " + destColName);
zIndx = alist.indexOf(destColName);
z_indexMap.put(destColName, new Integer(zIndx));
}
}
int recNum = 0;
try {
while (rs_SELECT_CLEANSING.next()) {
// for (int z = 0; z<z_colCount; z++){
recNum++;
Set<Integer> masterindices_dst = new HashSet<Integer>(socj.PARAMINDX_DST.values());
for (Entry<String, Integer> z_indexEntry : z_indexMap.entrySet()) {
//String currentColName = rs_SELECT_CLEANSING.
//if (!MAPPING_MAP.containsKey(currentColName)) continue;
String destColName = z_indexEntry.getKey();
String sourcColName = socj.MAPPING_REV_MAP.get(destColName);
log.debug("destcolNAME: " + destColName);
Integer destIndx = z_indexEntry.getValue();
log.debug("destINDX: " + destIndx); //TODO: verify this zIndex make sure it's right
Object VALUE = null;
VALUE = rs_SELECT_CLEANSING.getObject(sourcColName);
log.debug("THE VALUE AWAITED: " + VALUE);
Integer SQL_TYPE = socj.DEST_SQLTYPE_MAP.get(destColName);
if (SQL_TYPE == null) {
log.fatal("'" + destColName
+ "' does not exist as a destination column in the production table, but was used in the src-to-dst-column-mappings.properties file. Check the mapping file for mistakes.");
throw SagesOpenCsvJar.abort("'" + destColName
+ "' does not exist as a destination column in the production table, but was used in the src-to-dst-column-mappings.properties file. Check the mapping file for mistakes."
+ destColName, new NullPointerException());
}
if (VALUE.equals("")) {
VALUE = null;
ps_INSERT_STAGING.setObject(socj.PARAMINDX_DST.get(destColName), VALUE, SQL_TYPE);
log.debug("SET NON DATE-" + VALUE);
masterindices_dst.remove(socj.PARAMINDX_DST.get(destColName));
} else if (SQL_TYPE == Types.DATE) {
/** http://postgresql.1045698.n5.nabble.com/insert-from-a-select-td3279325.html */
log.debug("date handling now occurring");
DateFormat formatter;
Date date = null;
java.sql.Date sqlDate = null;
String
formatToUse =
socj.props_dateformats
.getProperty(sourcColName); //i.e. "yyyy-MM-dd HH:mm:ss", "dd.MM.yyyy"
if (formatToUse == null) {
log.fatal("Date formatter was defined for a column '" + sourcColName
+ "' that does not exist in the .csv input files. Check dateformats.properties");
throw SagesOpenCsvJar.abort("Date formatter was defined for a column '" + sourcColName
+ "' that does not exist in the .csv input files: "
+ sourcColName, new NullPointerException());
} else {
formatToUse.trim();
}
formatter = new SimpleDateFormat(formatToUse);//grab configured date format
try {
if (VALUE.equals("")) { //"" => null date handling
} else {
date = (Date) formatter.parse(VALUE.toString());
sqlDate = new java.sql.Date(date.getTime());
log.debug(sqlDate.toString());
}
ps_INSERT_STAGING.setDate(socj.PARAMINDX_DST.get(destColName), sqlDate);
log.debug("SET THE DATE STUFF-" + sqlDate);
masterindices_dst.remove(socj.PARAMINDX_DST.get(destColName));
} catch (ParseException e1) {
log.fatal("ERROR: Check your date pattern in the file dateformats.properties:\n\t" +
sourcColName + "=" + socj.props_dateformats.getProperty(sourcColName) + "\n");
e1.printStackTrace();
throw new SagesEtlException(e1.getMessage(), e1);
}
} else {
// ps_INSERT_STAGING.setObject(destIndx+1, VALUE, SQL_TYPE);
ps_INSERT_STAGING.setObject(socj.PARAMINDX_DST.get(destColName), VALUE, SQL_TYPE);
log.debug("SET NON DATE-" + VALUE);
masterindices_dst.remove(socj.PARAMINDX_DST.get(destColName));
}
}
/** set NULLS for parameters with no values */
for (Integer nullparamindx : masterindices_dst) {
ps_INSERT_STAGING.setNull(nullparamindx, rsmd.getColumnType(nullparamindx));
}
/** THIS IS FOR THE COLUMN ETL_FLAG, which is at (1 + totalCols),
* it gets set to NULL in the STAGING TABLE **/
ps_INSERT_STAGING.setNull(socj.PARAMINDX_DST.size() + 1, Types.VARCHAR);
ps_INSERT_STAGING.executeUpdate();
}
} catch (SQLException se) {
log.fatal(msgFatal);
log.fatal("Line " + recNum + " caused an error.");
throw SagesOpenCsvJar.abort(se.getMessage(), se);
} catch (Exception e) {
log.fatal(msgFatal);
log.fatal("Line " + recNum + " caused an error.");
throw SagesOpenCsvJar.abort(e.getMessage(), e);
}
}
@Override
public void truncateCleanseAndStagingTables(DumbTestOpenCsvJar socj_dumb, Connection c, File file,
Savepoint baseLine)
throws SagesEtlException, SQLException {
log.info("--TRUNCATE CLEANSE & STAGING TABLES--");
socj_dumb.src_table_name =
socj_dumb.props_etlconfig.getProperty("dbprefix_src") + "_" + SagesOpenCsvJar.ETL_CLEANSE_TABLE;
socj_dumb.dst_table_name =
socj_dumb.props_etlconfig.getProperty("dbprefix_dst") + "_" + SagesOpenCsvJar.ETL_STAGING_DB;
/**
* Altered this for compatibility with MySQL/MariaDB
* The TRUNCATE*Table Prepared statements caused implicate commits of transactions when executed
* Fixed this issue by checking to see if the table exists before attempting to truncate
* http://dev.mysql.com/doc/refman/5.0/en/implicit-commit.html
*/
try {
int
sizeSrc =
c.prepareStatement("SHOW TABLES LIKE '" + socj_dumb.src_table_name + "'").executeQuery().getRow();
int
sizeDst =
c.prepareStatement("SHOW TABLES LIKE '" + socj_dumb.dst_table_name + "'").executeQuery().getRow();
if (!(sizeSrc == 0)) {
PreparedStatement
ps_TRUNCATECleanseTable =
c.prepareStatement("TRUNCATE TABLE " + socj_dumb.src_table_name);
ps_TRUNCATECleanseTable.execute();
c.commit();
}
if (!(sizeDst == 0)) {
PreparedStatement
ps_TRUNCATEStagingTable =
c.prepareStatement("TRUNCATE TABLE " + socj_dumb.dst_table_name);
ps_TRUNCATEStagingTable.execute();
c.commit();
}
} catch (Exception e) {
m_sqlStateHandler.sqlExceptionHandlerTruncateCleanseAndStagingTables(socj_dumb, c, baseLine, e);
}
}
@Override
public Savepoint buildEtlStatusTable(Connection c, SagesOpenCsvJar socj, Savepoint save1)
throws SagesEtlException, SQLException {
ResultSet etl_status = c.prepareStatement("SHOW TABLES LIKE 'etl_status'").executeQuery();
if (!etl_status.next()) {
return super.buildEtlStatusTable(c, socj, save1);
}
return c.setSavepoint("createEtlStatusSavepoint");
}
@Override
public Savepoint buildCleanseTable(Connection c, SagesOpenCsvJar socj, Savepoint save1)
throws SagesEtlException, SQLException {
this.src_table_name =
socj.props_etlconfig.getProperty("dbprefix_src") + "_" + SagesOpenCsvJar.ETL_CLEANSE_TABLE;
this.src_table_name = this.src_table_name.toLowerCase();
ResultSet etl_status = c.prepareStatement("SHOW TABLES LIKE '" + this.src_table_name + "'").executeQuery();
if (!etl_status.next()) {
return super.buildCleanseTable(c, socj, save1);
}
getHeaderColumns(socj);
return c.setSavepoint("createEtlCleanseSavepoint");
}
@Override
public void alterCleanseTableAddFlagColumn(Connection c, Savepoint save1, Savepoint createCleanseSavepoint)
throws SQLException, SagesEtlException {
ResultSet
columns =
c.prepareStatement("SHOW COLUMNS FROM " + this.src_table_name + " WHERE field='etl_flag';")
.executeQuery();
boolean flag_exists = (columns.next());
if (!flag_exists) {
super.alterCleanseTableAddFlagColumn(c, save1, createCleanseSavepoint);
}
}
@Override
public void alterStagingTableAddFlagColumn(Connection c, Savepoint save1, Savepoint createCleanseSavepoint)
throws SQLException, SagesEtlException {
ResultSet
columns =
c.prepareStatement("SHOW COLUMNS FROM " + this.dst_table_name + " WHERE field='etl_flag';")
.executeQuery();
if (!(columns.next())) {
super.alterStagingTableAddFlagColumn(c, save1, createCleanseSavepoint);
}
}
}