/* * Copyright 2012 FundaciĆ³ Barcelona Media * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * http://www.apache.org/licenses/LICENSE-2.0 * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.barcelonamedia.uima.consumer.SQLConsumer; import java.io.ByteArrayOutputStream; import java.io.File; import java.io.IOException; import java.net.URL; import java.util.Hashtable; import java.util.logging.Level; import java.util.logging.Logger; import java.util.zip.Deflater; import java.util.zip.DeflaterOutputStream; import org.apache.uima.cas.CAS; import org.apache.uima.cas.FSIterator; import org.apache.uima.cas.impl.XmiCasSerializer; import org.apache.uima.collection.CasConsumer_ImplBase; import org.apache.uima.examples.SourceDocumentInformation; import org.apache.uima.jcas.JCas; import org.apache.uima.resource.ResourceInitializationException; import org.apache.uima.resource.ResourceProcessException; import org.apache.uima.util.ProcessTrace; import org.apache.uima.util.XMLSerializer; import org.barcelonamedia.uima.consumer.SQLConsumer.DAO.DAOException; import org.barcelonamedia.uima.consumer.SQLConsumer.DAO.DAOFactory; import org.barcelonamedia.uima.consumer.SQLConsumer.DAO.XMIDAO; import org.barcelonamedia.uima.consumer.SQLConsumer.DTO.XMIDTO; public class DBXMICASConsumer extends CasConsumer_ImplBase{ /** The logger object. */ private static final Logger logger = Logger.getLogger(DBXMICASConsumer.class.toString()); // Suported DBMS: ----------------------------------------- private static final String MySQL = "MySQL"; //---------------------------------------------------------- /** Correponds to a parameter that specifies DBMS to be used. * The value of this variable is 'DBMS' which is the name of * the parameter in the descriptor file that must be set. * @see "/DBXMICASConsumer/desc/DBXMICASConsumer.xml" **/ private static final String PARAM_DBMS = "DBMS"; /** Correponds to a parameter that specifies the server where DBMS is being hosted. * The value of this variable is 'Server' which is the name of * the parameter in the descriptor file that must be set. * @see "/DBXMICASConsumer/desc/DBXMICASConsumer.xml" **/ private static final String PARAM_SERVER = "Server"; /** Correponds to a parameter that specifies port to be used to connect to the specified DBMS. * The value of this variable is 'Port' which is the name of * the parameter in the descriptor file that must be set. * @see "/DBXMICASConsumer/desc/DBXMICASConsumer.xml" **/ private static final String PARAM_PORT = "Port"; /** Correponds to a parameter that specifies the name of the database to be used. * The value of this variable is 'Database' which is the name of * the parameter in the descriptor file that must be set. * @see "/DBXMICASConsumer/desc/DBXMICASConsumer.xml" **/ private static final String PARAM_DATABASE = "Database"; /** Correponds to a parameter that specifies the username fof the specified database. * The value of this variable is 'User' which is the name of * the parameter in the descriptor file that must be set. * @see "/DBXMICASConsumer/desc/DBXMICASConsumer.xml" **/ private static final String PARAM_USER = "User"; /** Correponds to a parameter that specifies the password fof the specified database. * The value of this variable is 'Password' which is the name of * the parameter in the descriptor file that must be set. * @see "/DBXMICASConsumer/desc/DBXMICASConsumer.xml" **/ private static final String PARAM_PASSWORD = "Password"; /** Correponds to a parameter that specifies the database table to be used. * The value of this variable is 'table' which is the name of * the parameter in the descriptor file that must be set. * @see "/DBXMICASConsumer/desc/DBXMICASConsumer.xml" **/ private static final String PARAM_TABLE = "table"; /** Correponds to a parameter that specifies the field of the specified database table to be used for inserting xmi id * The value of this variable is 'xmi_id_field' which is the name of * the parameter in the descriptor file that must be set. * @see "/DBXMICASConsumer/desc/DBXMICASConsumer.xml" **/ private static final String PARAM_XMI_ID_FIELD = "xmi_id_field"; /** Correponds to a parameter that specifies the field of the specified database table to be used for inserting xmi data * The value of this variable is 'xmi_data_field' which is the name of * the parameter in the descriptor file that must be set. * @see "/DBXMICASConsumer/desc/DBXMICASConsumer.xml" **/ private static final String PARAM_XMI_DATA_FIELD = "xmi_data_field"; /** Correponds to a parameter that specifies whether XMI is to be compressed or not before inserting it into database. * The value of this variable is 'compression' which is the name of * the parameter in the descriptor file that must be set. * @see "/DBXMICASConsumer/desc/DBXMICASConsumer.xml" **/ private static final String PARAM_DO_COMPRESSION = "compression"; /** Correponds to a parameter that specifies whether whole document URI or document name is to be used as document ID into the database. * The value of this variable is 'fullURI' which is the name of * the parameter in the descriptor file that must be set. * @see "/DBXMICASConsumer/desc/DBXMICASConsumer.xml" **/ private static final String FULL_URI = "fullURI"; /** DAO Factory object. */ private DAOFactory daoFactory; /** XMI DAO object. */ private XMIDAO xmiDAO; /** document number to use as id when no SourceDocumentInformation available */ private int mDocNum; /** XMI compression flag **/ private Boolean do_compression; /** Full URI flag **/ private Boolean fullURI; /** * Initialize the component. Retrieve the parameters and process them, * parsing the field descriptions and preparing the structures needed to * process the documents. * * @param aContext * The UIMA context. * * @throws ResourceInitializationException * If an error occurs with some resource. * * @see org.apache.uima.analysis_component.AnalysisComponent_ImplBase#initialize(org.apache.uima.UimaContext) */ public void initialize() throws ResourceInitializationException{ System.out.println("DBXMICASConsumer: initialize()..."); logger.info("initialize()..."); String dbms = (String) getUimaContext().getConfigParameterValue(PARAM_DBMS); String server = (String) getUimaContext().getConfigParameterValue(PARAM_SERVER); int port = (Integer) getUimaContext().getConfigParameterValue(PARAM_PORT); String database = (String) getUimaContext().getConfigParameterValue(PARAM_DATABASE); String user = (String) getUimaContext().getConfigParameterValue(PARAM_USER); String password = (String) getUimaContext().getConfigParameterValue(PARAM_PASSWORD); String table = (String) getUimaContext().getConfigParameterValue(PARAM_TABLE); String xmi_id_field = (String) getUimaContext().getConfigParameterValue(PARAM_XMI_ID_FIELD); String xmi_data_field = (String) getUimaContext().getConfigParameterValue(PARAM_XMI_DATA_FIELD); this.mDocNum = 0; this.do_compression = (Boolean) getUimaContext().getConfigParameterValue(PARAM_DO_COMPRESSION); this.fullURI = (Boolean) getUimaContext().getConfigParameterValue(FULL_URI); if((dbms == null || dbms.length() == 0) || (server == null || server.length() == 0) || (new Integer(port) == null) || (database == null || database.length() == 0) || (user == null || user.length() == 0) || (password == null || password.length() == 0) || (table == null || table.length() == 0) || (xmi_id_field == null || xmi_id_field.length() == 0) || (xmi_data_field == null || xmi_data_field.length() == 0)){ throw new ResourceInitializationException(); } logger.info("initialize() - dbms: " + dbms); logger.info("initialize() - server: " + server); logger.info("initialize() - port: " + port); logger.info("initialize() - database: " + database); logger.info("initialize() - user: " + user); logger.info("initialize() - password: " + password); logger.info("initialize() - table: " + table); logger.info("initialize() - xmi_id_field: " + xmi_id_field); logger.info("initialize() - xmi_data_field: " + xmi_data_field); if(dbms.equals(MySQL)){ System.out.println("DBXMICASConsumer: initialize() - Using MySQL as DBMS."); this.daoFactory = DAOFactory.getDAOFactory(DAOFactory.MYSQL); Hashtable<String, String> connectionParams = new Hashtable<String, String>(); connectionParams.put("server", server); connectionParams.put("port", String.valueOf(port)); connectionParams.put("database", database); connectionParams.put("user", user); connectionParams.put("password", password); this.xmiDAO = this.daoFactory.getXMIDAO(connectionParams); Hashtable<String, String> tableInfo = new Hashtable<String, String>(); tableInfo.put("table", table); tableInfo.put("xmi_id_field", xmi_id_field); tableInfo.put("xmi_data_field", xmi_data_field); this.xmiDAO.setTableInfo(tableInfo); try{ this.xmiDAO.init(); } catch(DAOException e){ throw new ResourceInitializationException(); } } logger.info("initialize() - Done."); } /** * Processes the CAS which was populated by the TextAnalysisEngines. <br> * In this case, the CAS is converted to XMI and written into the output file . * * @param aCAS * a CAS which has been populated by the TAEs * * @throws ResourceProcessException * if there is an error in processing the Resource * * @see org.apache.uima.collection.base_cpm.CasObjectProcessor#processCas(org.apache.uima.cas.CAS) */ public void processCas(CAS cas) throws ResourceProcessException{ try{ ByteArrayOutputStream xmi_baos = new ByteArrayOutputStream(); XmiCasSerializer ser = new XmiCasSerializer(cas.getTypeSystem()); XMLSerializer xmlSer = new XMLSerializer(xmi_baos, false); ser.serialize(cas, xmlSer.getContentHandler()); // Retrieve XMI id String xmi_id = new String(); JCas jcas = cas.getJCas(); FSIterator it = jcas.getAnnotationIndex(SourceDocumentInformation.type).iterator(); if(it.hasNext()){ SourceDocumentInformation sdi = (SourceDocumentInformation) it.next(); if(this.fullURI){ xmi_id = sdi.getUri().toString(); } else{ xmi_id = new File(new URL(sdi.getUri()).getPath()).getName(); } if(sdi.getOffsetInSource() > 0 || !sdi.getLastSegment()){ xmi_id += ("_" + sdi.getOffsetInSource() + "_" + sdi.getDocumentSize()); } } if(xmi_id.length() == 0){ xmi_id = "doc" + this.mDocNum++; } /////////////////////////////////////////////////////////////////////////////////// XMIDTO xmidto = null; //XMI compression if(this.do_compression){ ByteArrayOutputStream compressed_xmi_baos = new ByteArrayOutputStream(); Deflater deflater = new Deflater(); DeflaterOutputStream deflaterOutputStream = new DeflaterOutputStream(compressed_xmi_baos, deflater); deflaterOutputStream.write(xmi_baos.toByteArray()); deflaterOutputStream.close(); xmidto = new XMIDTO(xmi_id, compressed_xmi_baos.toByteArray()); } else{ xmidto = new XMIDTO(xmi_id, xmi_baos.toByteArray()); } this.xmiDAO.insert(xmidto); } catch(Exception e){ logger.log(Level.SEVERE, e.getMessage()); throw new ResourceProcessException(e); } } /** * * */ public void collectionProcessComplete(ProcessTrace arg0) throws ResourceProcessException, IOException{ try{ this.xmiDAO.closeConnection(); } catch(DAOException e){ logger.log(Level.SEVERE, e.getMessage()); throw new ResourceProcessException(e); } System.out.println("DBXMICASConsumer: collectionProcessComplete()..."); logger.info("collectionProcessComplete() - Done."); } }