/* * Copyright (c) 2011 Patrick Meyer * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ package com.itemanalysis.jmetrik.dao; import au.com.bytecode.opencsv.CSVReader; import com.itemanalysis.jmetrik.sql.DataTableName; import com.itemanalysis.jmetrik.sql.VariableTableName; import com.itemanalysis.jmetrik.utils.DerbyCSVWriter; import com.itemanalysis.jmetrik.workspace.ImportCommand; import com.itemanalysis.psychometrics.data.DataType; import com.itemanalysis.psychometrics.data.ItemType; import com.itemanalysis.psychometrics.data.VariableAttributes; import com.itemanalysis.psychometrics.data.VariableName; import com.itemanalysis.psychometrics.tools.StopWatch; import org.apache.log4j.Logger; import javax.swing.*; import java.io.*; import java.sql.*; import java.util.ArrayList; public class DerbyDelimitedFileImporter extends SwingWorker<String,Void> implements DelimitedFileImporter { private Connection conn = null; private ImportCommand command = null; private String tableNameString = ""; private DataTableName dataTableName = null; private VariableTableName variableTableName = null; private String fileName = ""; private char delimiter = ','; private String description = ""; private int numberOfVariables = 0; private boolean headerIncluded = true; private boolean display = true; private Throwable theException = null; private ArrayList<VariableAttributes> variables = null; private StopWatch timer = null; private boolean tablesCreated = false; private DerbyDatabaseAccessObject dao = null; static Logger logger = Logger.getLogger("jmetrik-logger"); static Logger scriptLogger = Logger.getLogger("jmetrik-script-logger"); public DerbyDelimitedFileImporter(Connection conn, ImportCommand command){ this.conn = conn; this.command = command; variables = new ArrayList<VariableAttributes>(); dao = new DerbyDatabaseAccessObject(); } public void parseCommand()throws IllegalArgumentException{ logger.info(command.paste()); headerIncluded = command.getSelectOneOption("header").getSelectedArgument().equals("included"); fileName = command.getFreeOption("file").getString(); String dl = command.getSelectOneOption("delimiter").getSelectedArgument(); delimiter = getDelimiter(dl); tableNameString = command.getPairedOptionList("data").getStringAt("table"); display = command.getSelectAllOption("options").isArgumentSelected("display"); description = command.getFreeOption("description").getString(); } public char getDelimiter(String delimiterName){ char d = ','; if(delimiterName.equals("tab")){ d='\t'; }else if(delimiterName.equals("semicolon")){ d=';'; }else if(delimiterName.equals("colon")){ d=':'; }else{ d=','; } return d; } public void importDelimitedFile() throws IOException, SQLException{ try{ //get variable names from header or create them processHeader(); //scan file to determine type of data in each column and eliminate white spaces File tempFile = writeTempFile(fileName); tempFile.deleteOnExit(); String strDelimiter = String.valueOf(delimiter).toString(); this.firePropertyChange("status", "", "Importing file..."); //create tables DerbyDatabaseAccessObject dbDao = new DerbyDatabaseAccessObject(); //get unique table name dataTableName = dbDao.getUniqueTableName(conn, tableNameString); variableTableName = new VariableTableName(dataTableName.toString()); dbDao.createTables(conn, dataTableName, variableTableName, variables); //import data into data table CallableStatement cs = conn.prepareCall("CALL SYSCS_UTIL.SYSCS_IMPORT_DATA(?, ?, ?, ?, ?, ?, ?, ?, ?)"); cs.setNull(1, Types.VARCHAR); cs.setString(2, dataTableName.getNameForDatabase()); cs.setNull(3, Types.VARCHAR); cs.setNull(4, Types.VARCHAR); cs.setString(5, tempFile.getAbsolutePath()); cs.setString(6, strDelimiter); cs.setNull(7, Types.CHAR); cs.setNull(8, Types.VARCHAR); cs.setInt(9, 1); cs.execute(); conn.commit(); cs.close(); conn.setAutoCommit(true);//close transaction setTableInformation(); }catch(SQLException ex){ conn.rollback(); conn.setAutoCommit(true); //delete tables and remove row count logger.fatal(ex.getMessage(), ex); throw new SQLException(ex.getMessage()); }catch(IOException ex){ conn.rollback(); conn.setAutoCommit(true); logger.fatal(ex.getMessage(), ex); throw new IOException(ex.getMessage()); } } /** * Process header and create list of VariableInfo objects. If the file includes a header, * variable names are taken from the header. Otherwise, variable names are created. * The first row of the data file establishes the number of variables in the data. * * @return * @throws IOException */ public String processHeader()throws IOException{ StringBuilder sb = new StringBuilder(); CSVReader reader = new CSVReader(new FileReader(fileName), delimiter); String[] header = reader.readNext();//first line will be header or data String tempVarName = ""; VariableName varName = null; for(int i=0;i<header.length;i++){ if(headerIncluded){ tempVarName = header[i].trim().replaceAll("\"", ""); }else{ tempVarName = "Var" + (i+1); } varName = new VariableName(tempVarName); VariableAttributes var = new VariableAttributes( varName.toString(), "", ItemType.NOT_ITEM, DataType.DOUBLE, (i+1), ""); if(var.getName().nameChanged() || !headerIncluded){ sb.append(var.getName().printNameChangeInformation()); sb.append("\n"); } variables.add(var); } reader.close(); numberOfVariables = variables.size(); return sb.toString(); } /** * Read file and determine the type of data in each field. Must be called prior to * writing the temporary file that will be imported so that doubles and strings * make correct use of quote characters. * * @param file * @throws IOException */ public void scanFile(File file) throws IOException{ CSVReader reader = new CSVReader(new FileReader(file), delimiter); int firstDimensionMismatch = 0; String[] line; String tempString = ""; VariableAttributes tempVar; int ncol = 0; int count = 0; int colWidth = 50; try{ while((line=reader.readNext())!=null){ ncol = line.length; if(firstDimensionMismatch==0 && ncol!=numberOfVariables){ firstDimensionMismatch = (count+1); logger.fatal("Import dimension mismatch: The number of columns in row " + firstDimensionMismatch + " of the data file " + "does not match the number of variables in the first row."); } if(headerIncluded && count>0){ //start processing first line of data for(int i=0;i<ncol;i++){ tempString = line[i].trim(); if("".equals(tempString)){ //White spaces found. Eliminate white spaces. //IMPORTANT NOTE: Empty strings and NAs will be imported as empty strings and not as null values // when the database column type is String. line[i] = tempString; }else{ //no white spaces (i.e. no missing data) try{ //default data type is double Double.parseDouble(tempString); }catch(NumberFormatException ex){ //string detected, change data type in VariableInfo tempVar = variables.get(i); //if number format exception is encountered, data type is set to string tempVar.getType().setDataType(DataType.STRING); //set varChar size to be between 50 and 255 colWidth = Math.max(tempVar.getVarcharSize(), tempString.length()); tempVar.setVarcharSize(Math.min(colWidth, 255)); } } }//end loop over columns }//end if count++; } }catch(IOException ex){ throw ex; }finally { if(reader!=null) reader.close(); } } /** * Determines the type of data in each column. It limits text data to a maximum of 255 characters. * It also creates a temp file that does not contains empty spaces. Eliminating empty spaces * is necessary for the Derby import. An empty space for a double throws an exception. * * @throws IOException */ public File writeTempFile(String scanFileName)throws IOException{ File tempFile = File.createTempFile("jmk-import-temp", ".txt"); tempFile.deleteOnExit(); scanFile(tempFile); CSVReader reader = new CSVReader(new FileReader(new File(scanFileName)), delimiter); DerbyCSVWriter writer = new DerbyCSVWriter(new FileWriter(tempFile), delimiter, variables); String[] line = null; int count = 0; int colWidth = 50; VariableAttributes tempVar = null; String tempString = ""; int ncol = 0; int firstDimensionMismatch = 0; try{ while((line=reader.readNext())!=null){ ncol = line.length; if(firstDimensionMismatch==0 && ncol!=numberOfVariables){ firstDimensionMismatch = (count+1); logger.fatal("Import dimension mismatch: The number of columns in row " + firstDimensionMismatch + " of the data file " + "does not match the number of variables in the first row."); } if(headerIncluded && count==0){ //do not write header. Header has already been processed with processHeader(). //Derby CallableStatement cannot be used with a file that has names in the first row. }else if((headerIncluded && count>0) || !headerIncluded){ //start processing first line of data for(int i=0;i<ncol;i++){ tempString = line[i].trim(); if("".equals(tempString) || "NA".equals(tempString)){ //White spaces found or missing data code NA found. Eliminate white spaces and missing data. //IMPORTANT NOTE: Empty strings and NAs will be imported as empty strings and not as null values // when the database column type is String. line[i] = ""; }else{ //no white spaces (i.e. no missing data) try{ //default data type is double Double.parseDouble(tempString); }catch(NumberFormatException ex){ //string detected, change data type in VariableInfo tempVar = variables.get(i); //if number format exception is encountered, data type is set to string tempVar.getType().setDataType(DataType.STRING); //set varChar size to be between 50 and 255 colWidth = Math.max(tempVar.getVarcharSize(), tempString.length()); tempVar.setVarcharSize(Math.min(colWidth, 255)); } } }//end loop over columns //write line to temp file writer.writeNext(line); } count++;//line count }//end loop over file return tempFile; }catch(IOException ex){ throw ex; }finally { if(reader!=null) reader.close(); if(writer!=null) writer.close(); } } private void setTableInformation()throws SQLException{ Statement stmt = null; ResultSet rs = null; try{ String QUERY = "SELECT COUNT(*) FROM " + dataTableName.getNameForDatabase(); stmt = conn.createStatement(); rs = stmt.executeQuery(QUERY); rs.next(); int rowCount = rs.getInt(1); rs.close(); if("".equals(description.trim())){ description = "Import of " + fileName; } dao.setTableInformation(conn, dataTableName, rowCount, description); }catch(SQLException ex){ throw(ex); }finally{ if(rs!=null) rs.close(); if(stmt!=null) stmt.close(); } } protected String doInBackground() throws Exception{ timer = new StopWatch(); try{ this.firePropertyChange("status", "", "Scanning file..."); this.firePropertyChange("progress-ind-on", null, null); parseCommand(); importDelimitedFile(); logger.info("Import complete: " + timer.getElapsedTime()); }catch(Throwable t){ theException=t; } return ""; } protected void done(){ if(tablesCreated) this.firePropertyChange("table-added", "", dataTableName);//will add node to tree if(theException==null){ this.firePropertyChange("status", "", "Ready");//will display status in statusBar if(display) this.firePropertyChange("import", "", dataTableName);//will display data table in dialogs scriptLogger.info(command.paste()); }else{ logger.fatal(theException.getMessage(), theException); this.firePropertyChange("error", "", "Error - Check log for details."); } firePropertyChange("progress-off", null, null); } }