package org.jhuapl.edu.sages.etl.opencsvpods;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.SQLException;
import java.sql.Savepoint;
import java.util.ArrayList;
import java.util.Properties;
import org.apache.commons.lang.StringUtils;
import org.apache.log4j.Logger;
import org.jhuapl.edu.sages.etl.ETLProperties;
import org.jhuapl.edu.sages.etl.SagesEtlException;
import org.jhuapl.edu.sages.etl.strategy.ETLMySQLStrategy;
import org.jhuapl.edu.sages.etl.strategy.ETLPostgresqlStrategy;
import org.jhuapl.edu.sages.etl.strategy.SagesOpenCsvJar;
import au.com.bytecode.opencsv.CSVReader;
/**
* This is the main class that runs the ETL
*
* @author POKUAM1
* @created Oct 4, 2011
*/
public class DumbTestOpenCsvJar extends SagesOpenCsvJar {
private static Logger log = Logger.getLogger(DumbTestOpenCsvJar.class);
private static Logger prettyPrintLog = Logger.getLogger("HeadingPrinter");
private static Logger triageLog = Logger.getLogger("TriageLogger");
/**
* @throws SagesEtlException
*/
public DumbTestOpenCsvJar() throws SagesEtlException {
super();
}
private static boolean hasProperHeaders(String[] expectedHeaders, String[] actualHeaders) {
boolean hasProperHeaders = true;
if (expectedHeaders.length != actualHeaders.length || expectedHeaders.length <= 1 || actualHeaders.length <= 1) {
hasProperHeaders = false;
}
for (int i = 1; i < expectedHeaders.length && hasProperHeaders; i++) {
if (expectedHeaders[i].compareToIgnoreCase(actualHeaders[i]) != 0) {
hasProperHeaders = false;
}
}
return hasProperHeaders;
}
public Savepoint makeSavePoint(Connection c, File file, String name) {
Savepoint save = null;
try {
save = c.setSavepoint(name);
this.savepoints.put(name, save);
} catch (SQLException e) {
log.debug("Making the savepoint " + name + " failed.");
e.printStackTrace();
this.errorCleanup(this, save, c, file, this.getFaileddir_csvfiles(), e);
System.exit(-1);
}
return save;
}
/**
* @param args
* @throws IOException
* @throws Exception
*/
public static void main(String[] args) throws SagesEtlException, IOException {
/**
* 1. LOAD PROPERTIES
* 2. ESTABLISH DB CONNECTION
* 3. TOGGLE PROPS:
* TRANSACTION, LOGGING, STATS, ETC...
* 4. SET ETLSTRATEGY (*dpattern)
* 5. LOAD CSV FILES
* 6. ETL per CSV FILE
* (on success: copy out, delete from in ) COMMIT
* (on failure: copy fail, delete from in) ROLLBACK
*/
/** LOAD PROPERTIES *************************************************************/
DumbTestOpenCsvJar socj_dumb = new DumbTestOpenCsvJar();
// log.debug("Debug");
// log.info("Info");
// log.warn("Warn");
// log.error("Error");
// log.fatal("Fatal");
/** ESTABLISH DB CONNECTION *****************************************************/
Connection c = null;
c = socj_dumb.getConnection();
/** SET ETL Strategy *************************************************************/
if (socj_dumb.dbms.equals(ETLProperties.dbid_postgresql)) {
socj_dumb.setEtlStrategy(new ETLPostgresqlStrategy(socj_dumb));
} else if (socj_dumb.dbms.equals(ETLProperties.dbid_mysql)) {
socj_dumb.setEtlStrategy(new ETLMySQLStrategy(socj_dumb));
try {
PreparedStatement
useDatabase =
c.prepareStatement("USE " + socj_dumb.props_etlconfig.getProperty("dbName") + ";");
PreparedStatement startTransaction = c.prepareStatement("START TRANSACTION;");
c.setAutoCommit(false);
/** execute CREATE STAGING_TABLE sql */
useDatabase.execute();
startTransaction.execute();
} catch (Exception e) {
log.fatal("Could not connect to the Database");
e.printStackTrace();
System.exit(-1);
}
} else {
log.fatal("Unknown type of Database in etlconfig.properties: " + socj_dumb.dbms + " is not a known type.");
System.exit(-1);
}
ArrayList<String[]> master_entries_rawdata = new ArrayList<String[]>();
/**
* Extract header from the file
*/
try {
socj_dumb.extractHeaderColumns(socj_dumb);
} catch (FileNotFoundException e) {
log.fatal("problem occurred trying to process csv files:" + e.getMessage());
} catch (IOException e) {
log.fatal("problem occurred trying to process csv files:" + e.getMessage());
} catch (Exception e) {
log.fatal("problem occurred trying to process csv files:" + e.getMessage());
} finally {
//probaly should exit at this point
}
String DEBUGheader_src = StringUtils.join(socj_dumb.header_src);
log.debug("header_src:\n" + DEBUGheader_src + "\n");
/**
* LOAD CSV FILES
* *************************************************************** -
* load into memory all csv files in the inputdir - each iteration is
* new transaction so we can isolate erroneous files
*/
// build CLEANSE
// alter CLEANSE
// build STAGING
// alter STAGING
// build MAPPINGS
// insert into CLEANSE
// select insert into STAGING
// select insert into FINAL PRODUCTION
// --CLEANUP--
// copy file to OUT|FAILED
// delete file from IN
// COMMIT
log.info("-- STEP 1 -- FILES NOW BEING PROCESSED SEQUENTIALLY");
for (File file : socj_dumb.csvFiles) {
int step = 1;
/***********************************
* SAVEPOINT baseLine
***********************************
* Truncating tables
***********************************/
Savepoint baseLine = socj_dumb.makeSavePoint(c, file, "baseLine");
prettyPrintLog.info("--STEP " + (step++) + "-- TRUNCATING CLEANSE & STAGING TABLES");
try {
socj_dumb.truncateCleanseAndStagingTables(socj_dumb, c, null, baseLine);
} catch (SQLException e) {
log.fatal("the rollbacks failed.");
e.printStackTrace();
socj_dumb.errorCleanup(socj_dumb, baseLine, c, null, socj_dumb.getFaileddir_csvfiles(), e);
closeDbConnection(c);
System.exit(-1);
}
prettyPrintLog.info("--STEP " + (step++) + "-- PROCESSING A FILE");
socj_dumb.setCurrentFile(file);
socj_dumb.setSuccess(true);
log.debug("FILE SEEN: " + file.getName() + "\n");
/** loading file into memory for ETL processing." **/
log.debug("loading " + file.getName() + " into memory for ETL processing.");
master_entries_rawdata.clear();
CSVReader reader_rawdata2 = new CSVReader(new FileReader(file));
socj_dumb.currentEntries = (ArrayList<String[]>) reader_rawdata2.readAll();
String[] columns = socj_dumb.currentEntries.get(0);
socj_dumb.currentEntries.remove(0);
/** remove the header row, already got it above */
master_entries_rawdata.addAll(socj_dumb.currentEntries);
ArrayList<String[]> entries_rawdata = master_entries_rawdata;
reader_rawdata2.close();
/***********************************
* SAVEPOINT #1
***********************************
* before CLEANSE & STAGING built
***********************************/
log.debug("build CLEANSE");
Savepoint save1 = socj_dumb.makeSavePoint(c, file, "save1");
Savepoint createEtlStatusSavepoint = null;
try {
createEtlStatusSavepoint = socj_dumb.buildEtlStatusTable(c, socj_dumb, save1);
socj_dumb.savepoints.put("createEtlStatusSavepoint", createEtlStatusSavepoint);
} catch (SQLException e) {
log.debug("the rollbacks failed.");
e.printStackTrace();
socj_dumb.errorCleanup(socj_dumb, save1, c, file, socj_dumb.getFaileddir_csvfiles(), e);
}
Savepoint createCleanseSavepoint = null;
try {
createCleanseSavepoint = socj_dumb.buildCleanseTable(c, socj_dumb, save1);
socj_dumb.savepoints.put("createCleanseSavepoint", createCleanseSavepoint);
} catch (SQLException e) {
log.debug("the rollbacks failed.");
e.printStackTrace();
socj_dumb.errorCleanup(socj_dumb, save1, c, file, socj_dumb.getFaileddir_csvfiles(), e);
}
log.debug("alter CLEANSE");
try {
socj_dumb.alterCleanseTableAddFlagColumn(c, save1, createCleanseSavepoint);
} catch (SQLException e) {
e.printStackTrace();
socj_dumb.errorCleanup(socj_dumb, save1, c, file, socj_dumb.getFaileddir_csvfiles(), e);
}
log.debug("build STAGING");
Savepoint createStagingSavepoint = null;
try {
createStagingSavepoint = socj_dumb.buildStagingTable(c, socj_dumb, save1);
socj_dumb.savepoints.put("createStagingSavepoint", createStagingSavepoint);
} catch (SQLException e) {
log.debug("rollback failed.");
e.printStackTrace();
socj_dumb.errorCleanup(socj_dumb, save1, c, file, socj_dumb.getFaileddir_csvfiles(), e);
}
log.debug("alter STAGING");
try {
socj_dumb.alterStagingTableAddFlagColumn(c, save1,
createStagingSavepoint);
} catch (SQLException e) {
log.debug("rollback failed.");
e.printStackTrace();
socj_dumb.errorCleanup(socj_dumb, save1, c, file, socj_dumb.getFaileddir_csvfiles(), e);
}
log.debug("build MAPPINGS");
/**
* This is defined in the file:
* "src-to-dst-column-mappings.properties" and it must be configured
* per installation
*/
socj_dumb.generateSourceDestMappings(socj_dumb);
log.debug("MAPPING KEYS: " + StringUtils.join(socj_dumb.MAPPING_MAP.keySet(), ","));
log.debug("MAPPING VALUES: " + StringUtils.join(socj_dumb.MAPPING_MAP.values(), ","));
if (!hasProperHeaders(socj_dumb.header_src, columns)) {
File fileRefForStatusLogging = null;
try {
if (socj_dumb.isSuccess()) {
log.debug("copy file to 'RETRY' DIRECTORY");
/** Destination dir */
File destinationDir = new File(socj_dumb.props_etlconfig.getProperty("csvretrydir")); //TODO change this to correct directory
log.debug("delete file from 'IN' DIRECTORY");
fileRefForStatusLogging = etlMoveFile(file, destinationDir);
} else {
log.debug("fyi, was an unsuccessful run. errorcleanup already occurred.");
}
} catch (IOException io) {
triageLog.fatal("ALERT PROBLEM DELETING FILE: " + io.getMessage());
logFileOutcome(socj_dumb, c, fileRefForStatusLogging, "FAILURE: " + io.getMessage());
closeDbConnection(c);
System.exit(-1);
}
try {
logFileOutcome(socj_dumb, c, fileRefForStatusLogging, "TRY AGAIN: FILE COLUMN HEADERS DO NOT MATCH EXPECTED");
} catch (SagesEtlException e2) {
//???? this bombed when trying to log file processing stats...
triageLog.error(e2.getMessage());
}
continue;
}
/*****************************************
* SAVEPOINT #2
*****************************************
* before INSERT csv records into CLEANSE
*****************************************/
Savepoint save2 = null;
try {
save2 = c.setSavepoint("save2");
socj_dumb.savepoints.put("save2", save2);
} catch (SQLException e1) {
log.debug("making savepoint 'save2' failed.");
e1.printStackTrace();
socj_dumb.errorCleanup(socj_dumb, save2, c, file, socj_dumb.getFaileddir_csvfiles(), e1);
}
log.debug("insert into CLEANSE");
PreparedStatement ps_INSERT_CLEANSE = null;
try {
ps_INSERT_CLEANSE = c.prepareStatement(socj_dumb.buildInsertIntoCleansingTableSql(c, socj_dumb));
} catch (SQLException e) {
log.debug("error creating prepared statement");
e.printStackTrace();
socj_dumb.errorCleanup(socj_dumb, save2, c, file, socj_dumb.getFaileddir_csvfiles(), e);
}
try {
socj_dumb.setAndExecuteInsertIntoCleansingTablePreparedStatement(c, socj_dumb, entries_rawdata, save2,
ps_INSERT_CLEANSE);
} catch (SQLException e) {
log.debug("error setting params and executing the insert into cleanse prep stmt.");
e.printStackTrace();
socj_dumb.errorCleanup(socj_dumb, save2, c, file, socj_dumb.getFaileddir_csvfiles(), e);
}
prettyPrintLog.info("--STEP " + (step++) + "-- RUNNING CUSTOM SQL AGAINST CLEANSING");
/**
*
* !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
* INJECT THE CUSTOM SQL AGAINST THE CLEANSE TABLE HERE
* !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
*
* */
try {
runCustomSql(c, socj_dumb.props_customsql_cleanse, socj_dumb.src_table_name);
} catch (Exception e) {
triageLog.debug("ETL_LOGGER: failure with custom sql against cleansing.");
socj_dumb.errorCleanup(socj_dumb, save2, c, file, socj_dumb.getFaileddir_csvfiles(), e);
}
log.debug("(copying from CLEANSE to STAGING) select insert into STAGING");
try {
// IF ERROR OCCURS INSIDE THIS GUY, COME OUT AND DO ERROR
// CLEANUP. FILE IS BAD. ROLLBACK. PROCEED.
socj_dumb.copyFromCleanseToStaging(c, socj_dumb, save2);
} catch (SQLException e) {
log.debug("error occured copying from CLEANSE to STAGING.");
e.printStackTrace();
socj_dumb.errorCleanup(socj_dumb, save2, c, file, socj_dumb.getFaileddir_csvfiles(), e);
} catch (SagesEtlException e) {
log.debug("error occured copying from CLEANSE to STAGING.");
socj_dumb.errorCleanup(socj_dumb, save2, c, file, socj_dumb.getFaileddir_csvfiles(), e);
}
log.debug("FINISHED MESSY STUFF. ALMOST DONE...\n\n");
prettyPrintLog.info("--STEP " + (step++) + "-- RUNNING CUSTOM SQL AGAINST STAGING");
/**
* !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
* INJECT THE CUSTOM SQL AGAINST THE STAGING TABLE HERE
* !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
**/
try {
runCustomSql(c, socj_dumb.props_customsql_staging, socj_dumb.dst_table_name);
} catch (Exception e) {
triageLog.debug("ETL_LOGGER: failure with custom sql against staging.");
socj_dumb.errorCleanup(socj_dumb, save2, c, file, socj_dumb.getFaileddir_csvfiles(), e);
}
log.debug("select insert into FINAL PRODUCTION");
prettyPrintLog.info("--STEP " + (step++) + "-- FINAL LOAD INTO THE PRODUCTION TABLE STARTING.");
/**
*
*
* !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
* INJECT THE CUSTOM TRANSFER SQL AGAINST THE FINAL PRODUCTION TABLE HERE
* !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
*
* Takes the syntax:
* INSERT INTO $staging_table ($columnlist) SELECT $prodcolumnlist FROM $staging_table
**/
socj_dumb.prod_table_name = socj_dumb.props_etlconfig.getProperty("productionTableName");
String insertSelect_Production = "INSERT INTO " + socj_dumb.prod_table_name
+ " (_columnlist_) SELECT _columnlist_ FROM "
+ socj_dumb.dst_table_name;
// TODO: don't want to build this each time. enhance
String columnlist = StringUtils.join(socj_dumb.DEST_COLTYPE_MAP.keySet(), ",");
log.debug("ETL_LOGGER(columnList): " + columnlist);
insertSelect_Production = insertSelect_Production.replaceAll("_columnlist_", columnlist);
log.debug("ETL_LOGGER(insertSelect_Production): " + insertSelect_Production);
try {
PreparedStatement ps_finalToProd = c.prepareStatement(insertSelect_Production);
ps_finalToProd.execute();
} catch (Exception e) {
log.error("ETL_LOGGER: OOOOOOPS SOMETHING HAPPENED BAD IN THE END. SHUCKS");
socj_dumb.errorCleanup(socj_dumb, save2, c, file, socj_dumb.getFaileddir_csvfiles(), e);
}
try {
Properties customProdLoaderSqlprops = socj_dumb.props_customsql_final_to_prod;
int numSql3 = customProdLoaderSqlprops.size();
for (int i = 1; i <= numSql3; i++) {
String sql = customProdLoaderSqlprops.getProperty(String.valueOf(i));
sql = sql.replace("$table", socj_dumb.dst_table_name);
log.debug("CUSTOM SQL: " + sql);
PreparedStatement ps = c.prepareStatement(sql);
ps.execute();
}
} catch (Exception e) {
triageLog.debug("ETL_LOGGER: failure with custom sql final to prod: " + e.getMessage());
socj_dumb.errorCleanup(socj_dumb, save2, c, file, socj_dumb.getFaileddir_csvfiles(), e);
}
prettyPrintLog.info("--STEP " + (step++) + "---CLEANUP--");
File fileRefForStatusLogging = null;
try {
if (socj_dumb.isSuccess()) {
log.debug("copy file to 'OUT' DIRECTORY");
/** Destination dir */
File destinationDir = new File(socj_dumb.outputdir_csvfiles);
// ON FAILURE: destinationDir = faileddir_csvfiles -- this
// was done in the errorCleanup method
log.debug("delete file from 'IN' DIRECTORY");
fileRefForStatusLogging = etlMoveFile(file, destinationDir);
} else {
log.debug("fyi, was an unsuccessful run. errorcleanup already occurred.");
}
} catch (IOException io) {
triageLog.fatal("ALERT PROBLEM DELETING FILE: " + io.getMessage());
logFileOutcome(socj_dumb, c, fileRefForStatusLogging, "FAILURE: " + io.getMessage());
closeDbConnection(c);
System.exit(-1);
} finally {
try {
// is null if success=false
if (fileRefForStatusLogging != null) {
logFileOutcome(socj_dumb, c, fileRefForStatusLogging, "SUCCESS");
}
c.commit();
Savepoint finalCommit = c.setSavepoint("finalCommit");
socj_dumb.savepoints.put("finalCommit", finalCommit);
prettyPrintLog.info("--STEP " + (step++) + "-- COMMITED");
} catch (SQLException e) {
triageLog.fatal("UNEXPECTEDLY COULD NOT COMMIT CHANGES TO THE DATABASE. THIS IS BAD.");
try {
logFileOutcome(socj_dumb, c, fileRefForStatusLogging, "FAILURE: " + e.getMessage());
} catch (SagesEtlException e2) {
//???? this bombed when trying to log file processing stats...
triageLog.error(e2.getMessage());
}
closeDbConnection(c);
System.exit(-1);
}
}
}
closeDbConnection(c);
}
}