/*
* Concept profile generation tool suite
* Copyright (C) 2015 Biosemantics Group, Erasmus University Medical Center,
* Rotterdam, The Netherlands
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>
*/
/**
*
* This is a superclass for all the classes that parse the XML from a given node to populate
* the tables in the relational database
*
* @author Gaurav Bhalotia
* @author Ariel Schwartz
*
*/
package org.erasmusmc.dataimport.Medline.xmlparsers;
import java.io.PrintStream;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.SQLException;
import java.sql.Types;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Hashtable;
import java.util.Stack;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
public class NodeHandler extends DefaultHandler {
protected PreparedStatement pstmt = null;
protected static final Hashtable preparedStatements = new Hashtable();
protected int numColumns;
protected String tableName = null;
protected String xmlNodeName;
protected boolean updateChildren = true;
/* Names of the columns to be stored in the database */
protected String[] columnName;
/* Corresponding XML element names for each of the columns
* If element name is DATE and the type is Types.DATE then it
* is composed of Year, Month and Day
*/
protected String[] xmlElementName;
/* Corresponding the type for each of the columns */
protected int[] columnType;
/* The hashtable to store the values for each element obtained from the XML file */
protected Hashtable columnValues = null;
/* Hash Set to store column names for validating the parser */
protected HashSet columnNames = null;
/* The various columns needed for the record */
/* To store the current element and value in the parse stream from the GenericXMLParser */
protected String currentElement = null;
protected String currentValue = null;
static protected Stack elements = new Stack();
/* The ContentHandlers of the children nodes */
protected ArrayList childrenHandlers = new ArrayList();
/* The ContentHandlers of the parent nodes, e.g. Journal for Article */
protected ArrayList parentHandlers = new ArrayList();
/* The list of Columns Values that need to be returned from the corresponding parent handlers to
* this node, e.g. Journal returns ISSN to MedlineCitation */
protected ArrayList parentHandlerReturnColumns = new ArrayList();
protected String tagPrefix = "";
protected boolean ignoreDuplicateKeyError = true;
protected static final int DB2_DUPLICATE_ERROR = -803;
protected static final int MySQL_DUPLICATE_ERROR = 1062;
/** The constructor takes in the SAX event handler. Which is used to
* parse the elements in the authorlist subnode.
*
*/
public void initialize() throws Exception {
/* Initialize the number of columns */
numColumns = columnName.length;
/* Check if the statement is null then compile a statement */
if (pstmt == null) {
/* Now prepare the statement to be used for updating DB */
compileStatement();
}
/* Initialize the hash table */
columnValues = new Hashtable();
/* If parse validate option is on initialize the set object to hold the columnNames */
columnNames = new HashSet();
}
/** Compile a statement, later on while updating the DB we just need
* to supply the arguments*/
private void compileStatement() throws Exception {
try {
pstmt = (PreparedStatement) preparedStatements.get(tableName);
if (pstmt != null)
return;
Connection con = GenericXMLParser.dbConnection;
/* Create the parameter string */
if (numColumns < 1) {
throw new Exception("This table does not have any columns");
}
else {
String pString = "(";
String columnNameString = "";
for (int i = 0; i < numColumns - 1; i++) {
pString += "?,";
columnNameString += (columnName[i] + ",");
}
pString += "?)";
columnNameString += columnName[numColumns - 1];
pstmt = con.prepareStatement("REPLACE INTO " + tableName + " (" + columnNameString + ") VALUES " + pString);
preparedStatements.put(tableName, pstmt);
}
} catch (SQLException e) {
System.out.println("---" + e.getMessage());
throw new Exception("Problems with the connection to the database");
}
}
/** Function to handle the event where an element begins corresponding to the author stream
*
* @param namespaceURI The namespace information for this element
* @param localName The actual name of the element
* @param qName Combination of the Namespace alias and the localName
* @param atts Any attributes for the element
*/
@Override
public void startElement(String namespaceURI, String localName, String qName, Attributes atts) throws SAXException {
/* Assuming well formed XML, previous element ends before a new one starts */
currentElement = qName;
elements.push(qName);
tagPrefix += qName + ".";
/* Add an empty string for this element to the hashtable. to be later filled in the character()
* method
*/
currentValue = "";
/* Add the attributes with their values to the hashtable */
for (int att = 0; att < atts.getLength(); att++) {
String attName = atts.getQName(att);
/* Prepend the atribute names by the element name */
putColumnValue(tagPrefix + attName, atts.getValue(att));
}
}
/** Function to handle the end element event from the GenericXMLParser
*
* @param namespaceURI
* @param localName
* @param qName
*/
@Override
public void endElement(String namespaceURI, String localName, String qName) throws SAXException {
/* Assuming here that the XML is well formed and the endElement are in correct order */
elements.pop();
if (qName.equals(currentElement)) {
tagPrefix = tagPrefix.substring(0, tagPrefix.length() - currentElement.length() - 1);
if (currentValue != null) {
/* Prefix the TAG with the ancestors (using the stack), e.g. DateCreated.Year */
/* For now just appending with the parents */
putColumnValue(tagPrefix + currentElement, currentValue);
}
try {
currentElement = (String) elements.peek();
currentValue = null;
} catch (Exception e) {
throw new SAXException("Empty stack. currentElement: " + currentElement + " qName: " + qName);
}
if (tagPrefix.equals("")) {
/* This just means the end of the Table type, e.g Author */
GenericXMLParser.removeChildHandler();
}
}
else if (currentElement != null) {
System.out.println(elements);
throw new SAXException("misformed XML currentElement: " + currentElement + " qName: " + qName);
}
}
/** Function to handle the characters that have been passed to this object from the main
* GenericXMLParser; The element these characters belong to has been set by the previous startElement
* event
*
* @param ch The character array containing the characters
* @param start The position where the characters corresponding to this element start
* @param length The length of the character string for the current element
*/
@Override
public void characters(char[] ch, int start, int length) throws SAXException {
/* I assume that the XML is well formed, the characters coming now should correspond to the
* current element
*/
if (currentElement == null) {
throw new SAXException("misformed XML");
}
else {
currentValue += new String(ch, start, length);
}
}
/** Function to materialize the elements in this object to the database
* This method has to be implemented by the extending class
*/
public void updateDB() throws Exception {
String retColumnName = null;
String retColumnValue = null;
String pmid = null;
/* Execute updates for parent nodes */
for (int i = 0; i < parentHandlers.size(); i++) {
retColumnName = (String) parentHandlerReturnColumns.get(i);
if (retColumnName.toLowerCase().endsWith("pmid"))
pmid = retColumnValue;
if (retColumnName.equals("")) {
/* Do nothing */
}
else {
try {
/* Get the column value and put in this current node */
retColumnValue = ((NodeHandler) parentHandlers.get(i)).getColumnValue(retColumnName);
putColumnValue(retColumnName, retColumnValue);
} catch (NullPointerException e) {
System.out.println(retColumnName);
System.out.println(retColumnValue);
/* Also print the hashtable to know the context */
System.out.println(columnValues);
e.printStackTrace();
System.exit(1);
}
}
/* Update update for the parent */
((NodeHandler) parentHandlers.get(i)).updateDB();
}
try {
Object tempVal;
for (int i = 0; i < numColumns; i++) {
try {
if (columnType[i] == Types.DATE) {
/* handle this separately */
String date = getColumnValue(xmlElementName[i] + ".Year");
if (date == null) {
pstmt.setNull(i + 1, Types.DATE);
}
else {
date += "-" + getColumnValue(xmlElementName[i] + ".Month") + "-" + getColumnValue(xmlElementName[i] + ".Day");
/* Now create a date type from this */
pstmt.setString(i + 1, date); /* DB converts from string to DATE*/
}
}
else {
tempVal = getColumnValue(xmlElementName[i]);
if (tempVal == null) {
/* Set the parameter to be null */
pstmt.setNull(i + 1, columnType[i]);
}
else {
/* Set the parameter value with appropriate type */
pstmt.setObject(i + 1, tempVal, columnType[i]);
}
}
} catch (ArrayIndexOutOfBoundsException e) {
throw new Exception("Problem updating table " + tableName + " i: " + i + " columnType.length: " + columnType.length);
}
}
/* If parse validation is ON check if all the values in the hashtable have been used */
if (GenericXMLParser.parseValidate == true) {
if (columnNames.isEmpty()) {
/* Parse is good, all values are being used */
}
else {
/* There are some values that are not being used */
/* Print the current hashtable */
//System.out.println("Some unused values in the hashtable for " + this);
System.out.println(columnNames + "\n" + columnValues + "\n");
}
}
}
catch (SQLException e) {
System.out.println(e.getMessage());
e.printStackTrace();
System.exit(-1);
//throw new Exception("Problems with the prepared statement");
}
/* Now execute the update with the database */
try {
if (GenericXMLParser.toFile == false) {
// if(this instanceof Author) {
// Author that = (Author)this;
// Statement delete_stmt = GenericXMLParser.getDbConnection().createStatement();
// delete_stmt.execute("DELETE FROM " + tableName + " where pmid = " + that.getPMID());
// }
pstmt.executeUpdate();
}
else {
/* Write it to a file (Opened already) */
//GenericXMLParser.outfile.write(pstmt);
PrintStream out = new PrintStream(GenericXMLParser.outfile);
out.println(pstmt);
//System.out.println(pstmt);
}
} catch (SQLException e) {
if (!handleSQLException(e)) {
System.err.println(pstmt);
System.err.println(e.getMessage());
System.err.println(columnValues);
throw new Exception("Problem in inserting values into the " + tableName + " table");
}
}
/* Execute updates for children nodes */
if (updateChildren == true) {
for (int i = 0; i < childrenHandlers.size(); i++) {
((NodeHandler) childrenHandlers.get(i)).updateDB();
}
}
}
/**
* Handles SQLException. Should be overloaded by inheriting classes to handle special cases
* @returns true if the exception has been handled, false otherwise
*/
protected boolean handleSQLException(SQLException e) {
if (ignoreDuplicateKeyError && e.getErrorCode() == DB2_DUPLICATE_ERROR) {
updateChildren = false;
return true;
/* Don't do anything, the tuple for this primary key has already been inserted */
}
else if (ignoreDuplicateKeyError && e.getErrorCode() == MySQL_DUPLICATE_ERROR) {
updateChildren = false;
return true;
/* Don't do anything, the tuple for this primary key has already been inserted */
}
else {
System.err.println("ERROR CODE == " + e.getErrorCode());
return false;
}
}
/** Normal Content Handler where the handler being set has to be serialized
* to the DB after this node
*/
public void setContentHandler(NodeHandler childHandler, String namespaceURI, String localName, String qName, Attributes atts) throws SAXException {
if (this != childHandler) {
childrenHandlers.add(childHandler);
}
GenericXMLParser.addChildHandler(childHandler);
childHandler.startElement(namespaceURI, localName, qName, atts);
currentValue = null;
}
/** Content handler for a node that needs to be serialized to the DB before this node
*
* @param returnColumn Stores the column name that needs to be returned by the parent
* should be "" if no return is desired
*
*/
public void setContentHandlerParent(String returnColumn, NodeHandler childHandler, String namespaceURI, String localName, String qName, Attributes atts) throws SAXException {
if (this != childHandler) {
parentHandlers.add(childHandler);
parentHandlerReturnColumns.add(returnColumn);
}
GenericXMLParser.addChildHandler(childHandler);
childHandler.startElement(namespaceURI, localName, qName, atts);
}
/* Return the value of a column given its name, removing it from the hashtable */
protected String getColumnValue(String columnName) {
if (GenericXMLParser.parseValidate == true) {
/* remove the name from the hashset, as it has been used */
columnNames.remove(columnName);
}
return (String) columnValues.get(columnName);
}
/* Store the value of a column given its name */
protected void putColumnValue(String columnName, String columnValue) {
columnValues.put(columnName, columnValue);
if (GenericXMLParser.parseValidate == true) {
/* Store the column name in the set */
columnNames.add(columnName);
}
}
/** Adds an element to the element stack
* @param qName the element to be added
*/
public static void pushElement(Object qName) {
elements.push(qName);
}
}