package org.jhuapl.edu.sages.etl.oldstuff; import java.io.File; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.IOException; import java.sql.Connection; import java.sql.SQLException; import java.util.ArrayList; import java.util.Properties; import org.apache.commons.io.FileUtils; import org.jhuapl.edu.sages.etl.ConnectionFactory; import org.jhuapl.edu.sages.etl.ETLProperties; import org.jhuapl.edu.sages.etl.SagesEtlException; import au.com.bytecode.opencsv.CSVReader; /** * @author POKUAM1 * @created Oct 4, 2011 */ public class EmptyTestOpenCsvJar { private File[] csvFiles; private File currentFile; private File fileMarkedForDeletion; private ArrayList<String[]> currentEntries; private int currentRecNum; //private static List<File> failedCsvFiles; private boolean success; /** csv files are loaded from inputdir and moved to outputdir after being processed successfully */ private String inputdir_csvfiles; private String outputdir_csvfiles; private String faileddir_csvfiles; /** properties holders */ private Properties props_etlconfig; private Properties props_mappings; private Properties props_dateformats; private Properties props_customsql_cleanse; private Properties props_customsql_staging; private Properties props_customsql_final_to_prod; /** target database connection settings*/ private String dbms; private int portNumber; private String serverName; private String dbName; private String userName; private String password; /** header columns used to define the CLEANSE table schema */ private static String[] header_src = new String[0]; /** errorFlag to control what to do on certain errors */ private static int errorFlag = 0; /** * @param dbms * @param portNumber * @param userName * @param password * @param serverName * @param dbName * @throws SagesEtlException */ public EmptyTestOpenCsvJar(String dbms, int portNumber, String userName, String password, String serverName, String dbName) throws SagesEtlException { super(); ETLProperties etlProperties = new ETLProperties(); etlProperties.loadEtlProperties(); initializeProperties(etlProperties); //override db connection settings this.dbms = dbms; this.portNumber = portNumber; this.userName = userName; this.password = password; this.serverName = serverName; this.dbName = dbName; } /** * @throws SagesEtlException */ public EmptyTestOpenCsvJar() throws SagesEtlException { super(); ETLProperties etlProperties = new ETLProperties(); etlProperties.loadEtlProperties(); initializeProperties(etlProperties); } /** * @throws SagesEtlException */ protected void initializeProperties(ETLProperties etlProperties) throws SagesEtlException { this.props_etlconfig = etlProperties.getProps_etlconfig(); this.props_mappings = etlProperties.getProps_mappings(); this.props_dateformats = etlProperties.getProps_dateformats(); this.props_customsql_cleanse = etlProperties.getProps_customsql_cleanse(); this.props_customsql_staging = etlProperties.getProps_customsql_staging(); this.props_customsql_final_to_prod = etlProperties.getProps_customsql_final_to_prod(); this.dbms = etlProperties.getDbms(); this.portNumber = etlProperties.getPortNumber(); this.userName = etlProperties.getUserName(); this.password = etlProperties.getPassword(); this.serverName = etlProperties.getServerName(); this.dbName = etlProperties.getDbName(); this.inputdir_csvfiles = props_etlconfig.getProperty("csvinputdir"); this.outputdir_csvfiles = props_etlconfig.getProperty("csvoutputdir"); this.faileddir_csvfiles = props_etlconfig.getProperty("csvfaileddir"); } /** * returns a SagesEtlException that wraps the original exception * @param msg SAGES ETL message to display * @param e the original exception * @return SagesEtlException */ public static SagesEtlException abort(String msg, Throwable e){ return new SagesEtlException(e.getMessage(), e); } /** * @param args * @throws Exception */ public static void main(String[] args) throws Exception { Connection c = null; EmptyTestOpenCsvJar tocj = null; try { tocj = new EmptyTestOpenCsvJar(); } catch (SagesEtlException e){ throw abort("Unable to load properties",e ); } c = tocj.getConnection(); System.out.println("catalog: " + c.getCatalog()); // LOAD PROPERTIES // ESTABLISH DB CONNECTION // TOGGLE: TRANSACTIONS, LOGGING, ETC. // LOAD CSV FILES // ETL STEPS per CSV (move, delete on success) // UNLOAD FILES CSVReader reader_rawdata = new CSVReader(new FileReader("C:\\dev\\git-repositories\\sandbox\\sandbox\\unittests\\datachunks\\modded_formdata_oevisit11106-1715.csv")); /** csv files are loaded from inputdir and moved to outputdir after being processed successfully */ // String inputdir_csvfiles = tocj.props_etlconfig.getProperty("csvinputdir"); // String outputdir_csvfiles = tocj.props_etlconfig.getProperty("csvoutputdir"); // String faileddir_csvfiles = tocj.props_etlconfig.getProperty("csvfaileddir"); ArrayList<String[]> master_entries_rawdata = new ArrayList<String[]>(); extractHeaderColumns(tocj); /** load into memory all csv files in the inputdir * - each iteration is new transaction so we can isolate erroneous files **/ String pathin = tocj.inputdir_csvfiles; File dirin = new File(pathin); File[] filesin = dirin.listFiles(); for (File file : filesin){ tocj.success = false; master_entries_rawdata.clear(); //notused tocj.currentFile = file; //TODO currentEntries, currentRecNum /* if (tocj.fileMarkedForDeletion != null) { FileUtils.forceDelete(tocj.fileMarkedForDeletion); c.commit(); tocj.fileMarkedForDeletion = null; }*/ CSVReader reader_rawdata2 = new CSVReader(new FileReader(file)); tocj.currentEntries = (ArrayList<String[]>) reader_rawdata2.readAll(); reader_rawdata2.close(); tocj.currentEntries.remove(0); /** remove the header row */ master_entries_rawdata.addAll(tocj.currentEntries); ArrayList<String[]> entries_rawdata = master_entries_rawdata; FileUtils.forceDelete(file); /* try{ Date date = new Date(); long dtime = date.getTime(); *//** Destination dir *//* File dir = new File(outputdir_csvfiles); *//** Move file to new dir *//* FileUtils.copyFile(file, new File(dir, dtime + "_"+ file.getName())); //boolean success = file.renameTo(new File(dir, dtime + "_"+ file.getName())); Thread.sleep(2000); //deletethismess = new File(file.getPath()); //FileUtils.forceDelete(deletethismess); FileUtils.forceDelete(file); //c.commit(); } catch (IOException io){ System.out.println("CRAP THAT'S NOT GOOD COULDNT MOVE/DELETE FILE TO OUTPUT: " + io.getMessage()); }*/ } } /** * Establishes database connection to the target database * @return Connection * @throws SQLException */ public Connection getConnection() throws SagesEtlException { try { return ConnectionFactory.createConnection(this.dbms, this.serverName, this.dbName, this.userName, this.password, this.portNumber); } catch (SQLException e){ throw abort("Sorry, failed to establish database connection", e); } } /** * @param tocj * @return * @throws FileNotFoundException * @throws IOException */ protected static void extractHeaderColumns(EmptyTestOpenCsvJar tocj) throws FileNotFoundException, IOException { /** header columns used to define the CLEANSE table schema */ header_src = new String[0]; //failedCsvFiles = new ArrayList<File>(); //File[] readCsvFiles = new File[0]; /** determine header columns **/ File csvinputDir = new File(tocj.inputdir_csvfiles); if (csvinputDir.isDirectory()){ tocj.csvFiles = csvinputDir.listFiles(); if (tocj.csvFiles.length == 0){ System.out.println("Nothing to load into database. Exiting."); //TODO: LOGGING explain this happens System.exit(0); } header_src = determineHeaderColumns(tocj.csvFiles[0]); } else { System.out.println(csvinputDir.getName() + "is not valid csv input directory. Exiting."); //TODO: LOGGING explain this happens System.exit(0); } //return readCsvFiles; } /** * @param file * @throws FileNotFoundException * @throws IOException */ protected static String[] determineHeaderColumns(File file) throws FileNotFoundException, IOException { CSVReader reader_rawdata2 = new CSVReader(new FileReader(file)); ArrayList<String[]> currentEntries = (ArrayList<String[]>) reader_rawdata2.readAll(); String[] headerColumns = currentEntries.get(0); /** set header from the first csv file */ //currentEntries.remove(0); /** remove the header row */ //master_entries_rawdata.addAll(currentEntries); reader_rawdata2.close(); return headerColumns; } }