/** * */ package org.jhuapl.edu.sages.etl.strategy; import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.File; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.FileWriter; import java.io.IOException; import java.sql.Connection; import java.sql.PreparedStatement; import java.sql.SQLException; import java.sql.Savepoint; import java.util.ArrayList; import java.util.HashSet; import java.util.LinkedHashMap; import java.util.Map.Entry; import java.util.Set; import org.apache.commons.lang.StringUtils; import org.apache.log4j.Logger; import org.jhuapl.edu.sages.etl.ETLProperties; import org.jhuapl.edu.sages.etl.SagesEtlException; import org.jhuapl.edu.sages.etl.SqlStateHandler; import org.jhuapl.edu.sages.etl.opencsvpods.DumbTestOpenCsvJar; import au.com.bytecode.opencsv.CSVReader; /** * Design Pattern: Strategy {@link ETLStrategyTemplate} defines the various methods that the SAGES ETL process will need * to fulfill. The actual implementation can vary depending on the target database (i.e. Postgresql supports Savepoints, * MSAccess does not; i.e. Variations in SQL syntax) * * @author POKUAM1 * @created Nov 1, 2011 */ public abstract class ETLStrategyTemplate { private static final Logger log = Logger.getLogger(ETLStrategyTemplate.class); protected SagesOpenCsvJar m_socj; protected SqlStateHandler m_sqlStateHandler; static protected Set<String> ignorableErrorCodes = new HashSet<String>() { private static final long serialVersionUID = 1L; { add("CODE"); } }; /** * */ public ETLStrategyTemplate() { m_sqlStateHandler = new SqlStateHandler(); m_sqlStateHandler.setIgnorableErrorCodes(ignorableErrorCodes); } // /** // * @param e {@link SQLException} // * @return boolean if sql state code is in the ignorable set of codes // */ // protected boolean isSqlStateIgnorable(SQLException e) { // return ignorableErrorCodes.contains(e.getSQLState()); // } /** * extract headers from CSV file * */ public void extractHeaderColumns(SagesOpenCsvJar socj) throws FileNotFoundException, IOException { /** header columns used to define the CLEANSE table schema */ socj.header_src = new String[0]; /** determine header columns **/ File csvinputDir = new File(socj.inputdir_csvfiles); if (csvinputDir.isDirectory()) { socj.csvFiles = csvinputDir.listFiles(); if (socj.csvFiles.length == 0) { log.info("Input directory has no files. Nothing to load into database. Exiting."); log.info("Your input directory is located here: " + socj.inputdir_csvfiles); System.exit(0); } socj.header_src = determineHeaderColumns(socj.csvFiles[0]); } else { log.fatal(csvinputDir.getName() + " is not valid csv input directory. Exiting."); System.exit(-1); } } /** * determining the headers from CSV file * * @return headers */ public String[] determineHeaderColumns(File file) throws FileNotFoundException, IOException { CSVReader reader_rawdata = new CSVReader(new FileReader(file)); ArrayList<String[]> currentEntries = (ArrayList<String[]>) reader_rawdata.readAll(); String[] headerColumns = currentEntries.get(0); /** set header from the first csv file */ reader_rawdata.close(); //currentEntries.remove(0); /** remove the header row */ //master_entries_rawdata.addAll(currentEntries); return headerColumns; } /** * Adds ETL_FLAG column to a table * * * @param tableToModify name of table to modify * @return alter table statement */ public String addFlagColumn(String tableToModify) { String sqlaltertableAddColumn = "ALTER TABLE " + tableToModify + " ADD COLUMN etl_flag varchar(255)"; return sqlaltertableAddColumn; } /** * truncate cleansing & staging tables * * @param socj_dumb {@link DumbTestOpenCsvJar} * @param c {@link Connection} * @param file csv file being processed * @param baseLine {@link Savepoint} savepoint to rollback to if error occurs * @throws SQLException * */ public void truncateCleanseAndStagingTables(DumbTestOpenCsvJar socj_dumb, Connection c, File file, Savepoint baseLine) throws SagesEtlException, SQLException { log.info("--TRUNCATE CLEANSE & STAGING TABLES--"); socj_dumb.src_table_name = socj_dumb.props_etlconfig.getProperty("dbprefix_src") + "_" + SagesOpenCsvJar.ETL_CLEANSE_TABLE; socj_dumb.dst_table_name = socj_dumb.props_etlconfig.getProperty("dbprefix_dst") + "_" + SagesOpenCsvJar.ETL_STAGING_DB; try { PreparedStatement ps_TRUNCATECleanseTable = c.prepareStatement("TRUNCATE TABLE " + socj_dumb.src_table_name); PreparedStatement ps_TRUNCATEStagingTable = c.prepareStatement("TRUNCATE TABLE " + socj_dumb.dst_table_name); ps_TRUNCATECleanseTable.execute(); ps_TRUNCATEStagingTable.execute(); c.commit(); } catch (Exception e) { m_sqlStateHandler.sqlExceptionHandlerTruncateCleanseAndStagingTables(socj_dumb, c, baseLine, e); } } /** creating etl status table **/ /** * ********************************* build ETL_STATUS_TABLE ********************************** SQL: "CREATE TABLE..." * * ********************************** */ public Savepoint buildEtlStatusTable(Connection c, SagesOpenCsvJar socj, Savepoint save1) throws SagesEtlException, SQLException { Savepoint createEtlStatusSavepoint = c.setSavepoint("createEtlStatusSavepoint"); // CREATE etl_status table String createSql = ""; if (socj.dbms.equals(ETLProperties.dbid_mysql)) { createSql = "CREATE TABLE etl_status (" + "`filename` VARCHAR(500)," + "`filepath` VARCHAR(500)," + "`outcome` VARCHAR(255)," + "`processtime` TIMESTAMP)"; } else if (socj.dbms.equals(ETLProperties.dbid_postgresql)) { createSql = "CREATE TABLE etl_status (" + "filename character varying(500)," + "filepath character varying(500)," + "outcome character varying(255)," + "processtime timestamp without time zone)"; } try { PreparedStatement ps_CREATEFILESTATUS = c.prepareStatement(createSql); ps_CREATEFILESTATUS.execute(); c.commit(); } catch (Exception e) { m_sqlStateHandler.sqlExceptionHandlerBuildEtlStatusTable(c, socj, save1, createEtlStatusSavepoint, e); } return createEtlStatusSavepoint; } /** creating cleanse table **/ /** * ******************************** build ETL_CLEANSE_TABLE ********************************** SQL: "CREATE TABLE..." - * all columns have text sql-datatype - column definitions built from csv file header ************************** */ public Savepoint buildCleanseTable(Connection c, SagesOpenCsvJar socj, Savepoint save1) throws SagesEtlException, SQLException { //http://postgresql.1045698.n5.nabble.com/25P02-current-transaction-is-aborted-commands-ignored-until-end-of-transaction-block-td2174290.html socj.src_table_name = socj.props_etlconfig.getProperty("dbprefix_src") + "_" + SagesOpenCsvJar.ETL_CLEANSE_TABLE; String createStmt_src = "CREATE TABLE " + socj.src_table_name + " \n(\n"; /** * build schema definition & map column to its index for later use with ResultSet and PreparedStatment */ int zsrc = 1; for (String colHead_src : socj.header_src) { if (socj.dbms.equals(ETLProperties.dbid_msaccess) && "time".equals(colHead_src)) { colHead_src = "accessetl_time"; } createStmt_src += colHead_src + " varchar(255),\n"; socj.PARAMINDX_SRC.put(colHead_src, zsrc); zsrc++; } /**remove trailing ',' **/ createStmt_src = StringUtils.substringBeforeLast(createStmt_src, ",\n") + "\n);"; log.info("\ncreatestmt:\n" + createStmt_src); PreparedStatement PS_create_CLEANSE = c.prepareStatement(createStmt_src); Savepoint createCleanseSavepoint = c.setSavepoint("createCleanseSavepoint"); try { PS_create_CLEANSE.execute(); //TODO: pgadmin wanted a pk for me to edit thru gui // setHeaderColumns(socj); } catch (Exception e) { // getHeaderColumns(socj); m_sqlStateHandler.sqlExceptionHandlerBuildCleanseTable(c, socj, save1, createCleanseSavepoint, e); } return createCleanseSavepoint; } /** * Adds ETL_FLAG column to cleanse table * */ public void alterCleanseTableAddFlagColumn(Connection c, Savepoint save1, Savepoint createCleanseSavepoint) throws SQLException, SagesEtlException { String sqlaltertableAddColumn = addFlagColumn(m_socj.src_table_name); PreparedStatement PS_addcolumn_Flag = c.prepareStatement(sqlaltertableAddColumn); log.info("ALTER STATEMENT: " + sqlaltertableAddColumn); try { PS_addcolumn_Flag.execute(); } catch (Exception e) { m_sqlStateHandler.sqlExceptionHandlerAlterCleanseTableAddFlagColumn(c, m_socj, save1, createCleanseSavepoint, e); } } /** * create staging table * */ abstract Savepoint buildStagingTable(Connection c, SagesOpenCsvJar socj, Savepoint save1) throws SQLException, SagesEtlException; /** * Adds ETL_FLAG column to existing staging table * * @param c - {@link Connection} * @param save1 - {@link Savepoint} * @param createCleanseSavepoint - {@link Savepoint} * * * */ public void alterStagingTableAddFlagColumn(Connection c, Savepoint save1, Savepoint createCleanseSavepoint) throws SQLException, SagesEtlException { String sqlaltertableAddColumn; PreparedStatement PS_addcolumn_Flag; sqlaltertableAddColumn = addFlagColumn(m_socj.dst_table_name); PS_addcolumn_Flag = c.prepareStatement(sqlaltertableAddColumn); log.debug("ALTER STATEMENT: " + sqlaltertableAddColumn); try { PS_addcolumn_Flag.execute(); } catch (Exception e) { m_sqlStateHandler .sqlExceptionHandlerAlterStagingTableAddFlagColumn(c, m_socj, save1, createCleanseSavepoint, e); } } /** * determining source-to-destination column mappings * * @param socj {@link SagesOpenCsvJar} */ public void generateSourceDestMappings(SagesOpenCsvJar socj) { socj.MAPPING_MAP = new LinkedHashMap<String, String>(); socj.MAPPING_REV_MAP = new LinkedHashMap<String, String>(); for (Entry<Object, Object> e : socj.props_mappings.entrySet()) { String key = ((String) e.getKey()).trim(); String value = ((String) e.getValue()).trim(); socj.MAPPING_MAP.put(key, value); socj.MAPPING_REV_MAP.put(value, key); } } /** * building SQL for inserting into cleansing table * */ abstract String buildInsertIntoCleansingTableSql(Connection c, SagesOpenCsvJar socj) throws SQLException; /** * set & execute SQL for inserting into cleansing table * */ abstract void setAndExecuteInsertIntoCleansingTablePreparedStatement(Connection c, SagesOpenCsvJar socj, ArrayList<String[]> entries_rawdata, Savepoint save2, PreparedStatement ps_INSERT_CLEANSE) throws SQLException; /** * building & executing SQL for copying from cleansing to staging table */ abstract void copyFromCleanseToStaging(Connection c, SagesOpenCsvJar socj, Savepoint save2) throws SQLException, SagesEtlException; public int errorCleanup(SagesOpenCsvJar socj, Savepoint savepoint, Connection connection, File currentCsv, String failedDirPath, Exception e) { return m_sqlStateHandler.errorCleanup(socj, savepoint, connection, currentCsv, failedDirPath, e); } protected void getHeaderColumns(SagesOpenCsvJar socj) { ArrayList<String> header_names = new ArrayList<String>(); try { String currentLine; BufferedReader columns = new BufferedReader(new FileReader(socj.props_etlconfig.getProperty("csvcolumnheaders"))); while((currentLine = columns.readLine()) != null) { header_names.add(currentLine); } } catch (Exception e) { //TODO figure out what to do } socj.header_src = header_names.toArray(socj.header_src); } protected void setHeaderColumns(SagesOpenCsvJar socj) { BufferedWriter columns; try { String currentLine; columns = new BufferedWriter(new FileWriter(new File(socj.props_etlconfig.getProperty("csvcolumnheaders")).getAbsoluteFile())); for (String column: socj.header_src) { columns.write(column+"\n"); } columns.close(); } catch (IOException e) { } } }