/** * */ package org.jhuapl.edu.sages.etl.strategy; import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; import java.sql.Connection; import java.sql.PreparedStatement; import java.sql.SQLException; import java.sql.Savepoint; import java.sql.Timestamp; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Date; import java.util.HashMap; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Properties; import org.apache.commons.io.FileUtils; import org.apache.log4j.Logger; import org.jhuapl.edu.sages.etl.ConnectionFactory; import org.jhuapl.edu.sages.etl.ETLProperties; import org.jhuapl.edu.sages.etl.PropertiesLoader; import org.jhuapl.edu.sages.etl.SagesEtlException; import org.jhuapl.edu.sages.etl.opencsvpods.DumbTestOpenCsvJar; /** * {@link SagesOpenCsvJar} is the domain class and controls the execution of the overall ETL process. It contains an * {@link ETLStrategyTemplate} object that implements most of the ETL processing logic. * * @author POKUAM1 * @created Nov 1, 2011 */ public abstract class SagesOpenCsvJar { private static org.apache.log4j.Logger log = Logger.getLogger(SagesOpenCsvJar.class); private static org.apache.log4j.Logger outcomeLog = Logger.getLogger("FileProcessingOutcome"); /** * The {@link ETLStrategyTemplate} object * */ protected ETLStrategyTemplate etlStrategy; /** * The savepoints * */ protected Map<String, Savepoint> savepoints; /** * List of files that ETL loads into the production table * */ protected File[] csvFiles; /** * The current file that is being processed * */ private File currentFile; /** * Current file records that the ETL will load into the production table via SQL statements * */ protected ArrayList<String[]> currentEntries; /** * Flag used to determine whether a file should be moved to directory that signifies successful processing * */ private boolean success; /** * TODO: not used yet. The current file marked for deletion due to an error in processing * */ protected File fileMarkedForDeletion; /** * TODO: not used yet * */ protected int currentRecNum; /** * TODO: not used yet. List of files ETL encountered failure while processing * */ private static List<File> failedCsvFiles; /** * SimpleDateFormat to use for file processing timestamp * */ private static SimpleDateFormat sdf; /** csv files are loaded from inputdir and moved to outputdir after being processed successfully */ /** * ETL looks at the input directory for files to process * */ protected String inputdir_csvfiles; /** * ETL moves successfully processed files to the output directory * */ protected String outputdir_csvfiles; /** * ETL moves unsucessfully processed files to the failed directory * */ private String faileddir_csvfiles; protected static final String ETL_CLEANSE_TABLE = "ETL_CLEANSE_TABLE"; protected static final String ETL_STAGING_DB = "ETL_STAGING_DB"; protected String src_table_name; protected String dst_table_name; protected String prod_table_name; /** * maps the destination columns to their sql-datatype qualifier for generating the schema */ protected Map<String, String> DEST_COLTYPE_MAP; //http://download.oracle.com/javase/6/docs/api/constant-values.html#java.sql.Types.TIME /** * maps the destination columns to their java.sql.Types for setting ? parameters on prepared statements */ protected Map<String, Integer> DEST_SQLTYPE_MAP; /** * maps the source:destination columns */ protected Map<String, String> MAPPING_MAP; /** * maps the destination:source columns */ protected Map<String, String> MAPPING_REV_MAP; /** * properties holders */ protected Properties props_etlconfig; protected Properties props_mappings; protected Properties props_dateformats; protected Properties props_customsql_cleanse; protected Properties props_customsql_staging; protected Properties props_customsql_final_to_prod; /** * target database connection settings */ protected String dbms; protected int portNumber; protected String serverName; protected String dbName; protected String userName; protected String password; /** * maps source column name to its parameter index in the source table, indexing starts at 1 */ protected Map<String, Integer> PARAMINDX_SRC = new HashMap<String, Integer>(); /** * maps destination column name to its parameter index in the destination table, indexing starts at 1 */ protected Map<String, Integer> PARAMINDX_DST = new HashMap<String, Integer>(); /** * header columns used to define the CLEANSE table schema */ protected String[] header_src = new String[0]; //TODO use this to get the list of expected column headers /** * errorFlag to control what to do on certain errors */ protected int errorFlag = 0; /** * * @throws SagesEtlException */ public SagesOpenCsvJar() throws SagesEtlException { super(); PropertiesLoader etlProperties = new ETLProperties(); etlProperties.loadEtlProperties(); initializeProperties((ETLProperties) etlProperties); setFailedCsvFiles(new ArrayList<File>()); savepoints = new LinkedHashMap<String, Savepoint>(); setSimpleDateFormat(new SimpleDateFormat("yyyy-MM-dd-HHmmssZ")); } public void setEtlStrategy(ETLStrategyTemplate strategy) { etlStrategy = strategy; } public void extractHeaderColumns(SagesOpenCsvJar socj) throws FileNotFoundException, IOException { etlStrategy.extractHeaderColumns(socj); } ; String[] determineHeaderColumns(File file) throws FileNotFoundException, IOException { return etlStrategy.determineHeaderColumns(file); } ; public Savepoint buildCleanseTable(Connection c, SagesOpenCsvJar socj, Savepoint save1) throws SQLException, SagesEtlException { return etlStrategy.buildCleanseTable(c, socj, save1); } ; public Savepoint buildEtlStatusTable(Connection c, SagesOpenCsvJar socj, Savepoint save1) throws SQLException, SagesEtlException { return etlStrategy.buildEtlStatusTable(c, socj, save1); } ; public void truncateCleanseAndStagingTables(DumbTestOpenCsvJar socj_dumb, Connection c, File file, Savepoint baseLine) throws SagesEtlException, SQLException { etlStrategy.truncateCleanseAndStagingTables(socj_dumb, c, file, baseLine); } public Savepoint buildStagingTable(Connection c, SagesOpenCsvJar socj, Savepoint save1) throws SQLException, SagesEtlException { return etlStrategy.buildStagingTable(c, socj, save1); } ; public void generateSourceDestMappings(SagesOpenCsvJar socj) { etlStrategy.generateSourceDestMappings(socj); } public void setAndExecuteInsertIntoCleansingTablePreparedStatement( Connection c, SagesOpenCsvJar socj, ArrayList<String[]> entries_rawdata, Savepoint save2, PreparedStatement ps_INSERT_CLEANSE) throws SQLException { etlStrategy.setAndExecuteInsertIntoCleansingTablePreparedStatement(c, socj, entries_rawdata, save2, ps_INSERT_CLEANSE); } public String buildInsertIntoCleansingTableSql(Connection c, SagesOpenCsvJar socj) throws SQLException { return etlStrategy.buildInsertIntoCleansingTableSql(c, socj); } public void copyFromCleanseToStaging(Connection c, SagesOpenCsvJar socj, Savepoint save2) throws SQLException, SagesEtlException { etlStrategy.copyFromCleanseToStaging(c, socj, save2); } public int errorCleanup(SagesOpenCsvJar socj, Savepoint savepoint, Connection connection, File currentCsv, String failedDirPath, Exception e) { return etlStrategy.errorCleanup(socj, savepoint, connection, currentCsv, failedDirPath, e); } String addFlagColumn(String tableToModify) { return etlStrategy.addFlagColumn(tableToModify); } ; public void alterCleanseTableAddFlagColumn(Connection c, Savepoint save1, Savepoint createCleanseSavepoint) throws SQLException, SagesEtlException { etlStrategy.alterCleanseTableAddFlagColumn(c, save1, createCleanseSavepoint); } ; public void alterStagingTableAddFlagColumn(Connection c, Savepoint save1, Savepoint createCleanseSavepoint) throws SQLException, SagesEtlException { etlStrategy.alterStagingTableAddFlagColumn(c, save1, createCleanseSavepoint); } /** Public/protected helper methods **/ /** * Initializes the {@link SagesOpenCsvJar}' s etl properties * * @param etlProperties - these are configured by updating the set of ETL properties files * @throws SagesEtlException - if property doesn't exist, or issue loading properties file occurs */ protected void initializeProperties(ETLProperties etlProperties) throws SagesEtlException { this.props_etlconfig = etlProperties.getProps_etlconfig(); this.props_mappings = etlProperties.getProps_mappings(); this.props_dateformats = etlProperties.getProps_dateformats(); this.props_customsql_cleanse = etlProperties.getProps_customsql_cleanse(); this.props_customsql_staging = etlProperties.getProps_customsql_staging(); this.props_customsql_final_to_prod = etlProperties.getProps_customsql_final_to_prod(); this.dbms = etlProperties.getDbms(); this.portNumber = etlProperties.getPortNumber(); this.userName = etlProperties.getUserName(); this.password = etlProperties.getPassword(); this.serverName = etlProperties.getServerName(); this.dbName = etlProperties.getDbName(); this.inputdir_csvfiles = props_etlconfig.getProperty("csvinputdir"); this.outputdir_csvfiles = props_etlconfig.getProperty("csvoutputdir"); this.setFaileddir_csvfiles(props_etlconfig.getProperty("csvfaileddir")); } /** * Establishes database connection to the target database * * @return Connection */ public Connection getConnection() throws SagesEtlException { try { return ConnectionFactory.createConnection(this.dbms, this.serverName, this.dbName, this.userName, this.password, this.portNumber); } catch (SQLException e) { throw abort("Sorry, failed to establish database connection", e); } } /** * Copies file to the designated destination directory, and then deletes it from its original location. On failure, * copied to FAILED directory, On success copied to OUT directory */ protected static File etlMoveFile(File file, File destinationDir) throws IOException { Date date = new Date(); SimpleDateFormat sdf = SagesOpenCsvJar.getSimpleDateFormat(); String output = sdf.format(date); File newFileLocation = new File(destinationDir, output + "_" + file.getName()); /** Move file to new dir */ FileUtils.copyFile(file, newFileLocation); FileUtils.forceDelete(file); return newFileLocation; } /** * @param c * @throws SQLException */ protected static void runCustomSql(Connection c, Properties customSql, String targetTableName) throws SQLException { //Properties customCleanseSqlprops = socj_dumb.props_customsql_cleanse; Properties customCleanseSqlprops = customSql; int numSql = customCleanseSqlprops.size(); for (int i = 1; i <= numSql; i++) { String sql = customCleanseSqlprops.getProperty(String.valueOf(i)); sql = sql.replace("$table", targetTableName); log.debug("CUSTOM SQL: " + sql); PreparedStatement ps = c.prepareStatement(sql); ps.execute(); } } /** * @param socj TODO */ public static void logFileOutcome(SagesOpenCsvJar socj, Connection c, File file, String outcome) throws SagesEtlException { String sql = "INSERT INTO etl_status(filename,filepath,outcome,processtime) VALUES(?,?,?,?)"; String canonicalPath = "?"; try { canonicalPath = file.getCanonicalPath(); } catch (IOException e1) { canonicalPath = file.getAbsolutePath(); e1.printStackTrace(); } String fileName = file.getName(); Timestamp processtime = new Timestamp(new Date().getTime()); if (c != null) { boolean exceptionOcurred = false; Exception eTmp = null; /* // CREATE etl_status table String createSql = "CREATE TABLE etl_status (" + "filename character varying(500)," + "filepath character varying(500)," + "outcome character varying(255)," + "processtime timestamp without time zone)"; try { PreparedStatement ps_CREATEFILESTATUS = c.prepareStatement(createSql); ps_CREATEFILESTATUS.execute(); c.commit(); } catch (SQLException e) { if (!"42P07".equals(e.getSQLState())){ // TODO make dbms agnostic exceptionOcurred = true; outcomeLog.warn("Unable to create 'etl_status' table for file processing statistics. Please Investigate. \n" + e.getMessage()); eTmp = new SQLException(e); } } */ // INSERT stats into etl_status table try { PreparedStatement ps_INSERTFILESTATUS = c.prepareStatement(sql); ps_INSERTFILESTATUS.setString(1, fileName); ps_INSERTFILESTATUS.setString(2, canonicalPath); ps_INSERTFILESTATUS.setString(3, outcome); ps_INSERTFILESTATUS.setTimestamp(4, processtime); ps_INSERTFILESTATUS.execute(); } catch (SQLException e2) { exceptionOcurred = true; outcomeLog .warn("Unable to write file processing statistics to database. Statistics were still written" + " to the log files. Please Investigate. \n" + e2.getMessage()); eTmp = new SQLException(e2); } finally { /** TODO MAKE THIS GO INTO A SPECIAL LOG FILE NOT JUST CONSOLE **/ outcomeLog.info("--------File Processing Results--------"); outcomeLog.info("Filename: " + fileName); outcomeLog.info("Filepath: " + canonicalPath); outcomeLog.info("Processed Time: " + processtime); outcomeLog.info("Outcome: " + outcome); if (exceptionOcurred) { outcomeLog.warn("ALERT. Unable to write these statistics to database. " + "Please Investigate. \n" + eTmp.getMessage()); // NOT A TRUE ERROR, SO DON'T MARK FILES AS FAILED--FILE IS null AND SO IS THE FAILED CSV DIR // socj.errorCleanup(socj, socj.savepoints.get("finalCommit"), c, null, null, eTmp); //TODO OMG THIS IS GROSS FIX ASAP socj.etlStrategy.m_sqlStateHandler .errorCleanup(socj, socj.savepoints.get("finalCommit"), c, null, null, eTmp); } } } } /** * @param c database connection */ public static void closeDbConnection(Connection c) { /** CLOSE CONNECTION TO DATABASE **/ try { if (c != null) { c.close(); log.info("Closed Database Connection. Good-bye."); } } catch (SQLException e) { log.fatal("THIS IS BAD. PROBLEM OCURRED TRYING TO CLOSE DB CONNECTION."); } } /** * returns a SagesEtlException that wraps the original exception * * @param msg SAGES ETL message to display * @param e the original exception * @return SagesEtlException */ public static SagesEtlException abort(String msg, Throwable e) { return new SagesEtlException(e.getMessage(), e); } public String getFaileddir_csvfiles() { return faileddir_csvfiles; } public void setFaileddir_csvfiles(String faileddir_csvfiles) { this.faileddir_csvfiles = faileddir_csvfiles; } public File getCurrentFile() { return currentFile; } public void setCurrentFile(File currentFile) { this.currentFile = currentFile; } public static List<File> getFailedCsvFiles() { return failedCsvFiles; } public static void setFailedCsvFiles(List<File> failedCsvFiles) { SagesOpenCsvJar.failedCsvFiles = failedCsvFiles; } public boolean isSuccess() { return success; } public void setSuccess(boolean success) { this.success = success; } public static SimpleDateFormat getSimpleDateFormat() { return sdf; } private static void setSimpleDateFormat(SimpleDateFormat sdf) { SagesOpenCsvJar.sdf = sdf; } }