/* * eXist Open Source Native XML Database * Copyright (C) 2010-2012 The eXist Project * http://exist-db.org * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * * $Id$ */ package org.exist.collections.triggers; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Properties; import java.util.Stack; import javax.xml.XMLConstants; import javax.xml.namespace.QName; import org.exist.collections.Collection; import org.exist.dom.persistent.DocumentImpl; import org.exist.storage.DBBroker; import org.exist.storage.txn.Txn; import org.exist.xmldb.XmldbURI; import org.xml.sax.Attributes; import org.xml.sax.SAXException; /** * Extracts CSV data from an element into a number of new child elements * * Mainly designed to be used at STORE event, but should also be usable at UPDATE event * * Example configuration - * * <collection xmlns="http://exist-db.org/collection-config/1.0"> * <triggers> * <trigger event="store" class="org.exist.collections.triggers.CSVExtractingTrigger"> * * <parameter name="separator" value="|"/> * * <parameter name="path"> * <xpath>/content/properties/value[@key eq "product_model"]</xpath> * <extract index="0" element-name="product_name"/> * <extract index="1" element-name="product_code"/> * </parameter> * * </trigger> * </triggers> * </collection> * * Currently the configuration of this trigger only supports basic attribute predicates or a name eq value syntax. * * * So for example, when storing a Document with content like the following - * * <content> * <properties> * <value key="product_model">SomeName|SomeCode12345</value> * </properties> * </content> * * The document will be translated at insertion time into - * * <content> * <properties> * <value key="product_model"> * <product_name>SomeName</product_name> * <product_code>SomeCode12345</product_code> * </value> * </properties> * </content> * * @author Adam Retter <adam@exist-db.org> */ public class CSVExtractingTrigger extends FilteringTrigger { //the separator characted for CSV files private String separator; //key is the xpath to extract for, and value is the extractions to make from the value at that path private Map<String, Extraction> extractions = new HashMap<String, Extraction>(); //the current node path of the SAX stream private NodePath currentNodePath = new NodePath(); private boolean capture = false; //flag to indicate whether to buffer character data for extraction of csv values private StringBuilder charactersBuf = new StringBuilder(); //buffer for character data, which will then be parsed to extract csv values @Override public void configure(DBBroker broker, Collection parent, Map<String, List<?>> parameters) throws TriggerException { super.configure(broker, parent, parameters); //get the separator final List<String> separators = (List<String>)parameters.get("separator"); if(separators == null || separators.size() != 1) { throw new TriggerException("A separator parameter must be provided to the CSVExtractingTrigger configuration"); } else { this.separator = separators.get(0); } //get the extractions final List<Map<String, List>> paths = (List<Map<String, List>>)parameters.get("path"); for(final Map<String, List> path : paths){ final List<String> xpaths = path.get("xpath"); if(xpaths != null && xpaths.size() == 1) { String xpath = xpaths.get(0); //split out the path and preficate (if present) from the xpath String pathExpr; String attrPredicate = null; if(xpath.indexOf("[") > -1) { pathExpr = xpath.substring(0, xpath.indexOf("[")); if(xpath.indexOf("[@") > -1) { attrPredicate = xpath.substring(xpath.indexOf("[@")+2, xpath.indexOf("]")); } } else { pathExpr = xpath; } Extraction extraction = extractions.get(pathExpr); if(extraction == null) { extraction = new Extraction(); if(attrPredicate != null) { final String attrNameValueMatch[] = attrPredicate.split(" eq "); extraction.setMatchAttribute(attrNameValueMatch[0], attrNameValueMatch[1]); } } final List<Properties> extracts = path.get("extract"); if(extracts != null) { for(final Properties extract : extracts) { final ExtractEntry extractEntry = new ExtractEntry(Integer.parseInt(extract.getProperty("index")), extract.getProperty("element-name")); extraction.getExtractEntries().add(extractEntry); } } Collections.sort(extraction.getExtractEntries()); //pre sort extractions.put(pathExpr, extraction); } } } @Override public void startElement(String namespaceURI, String localName, String qname, Attributes attributes) throws SAXException { //skips nested elements or already extracted nodes (i.e. during update events) //TODO needs through testing during update phase if(capture == true) { capture = false; charactersBuf.delete(0, charactersBuf.length()); } super.startElement(namespaceURI, localName, qname, attributes); currentNodePath.add(namespaceURI, localName); final Extraction extraction = extractions.get(currentNodePath.toLocalPath()); if(extraction != null) { //do we have to match an attribute predicate from the xpath in the trigger config? if(extraction.mustMatchAttribute()){ //yes - so try and match for(int i = 0; i < attributes.getLength(); i++){ if(extraction.matchesAttribute(attributes.getLocalName(i), attributes.getValue(i))){ //matched the predicate, so staty capturing capture = true; break; } } } else { //no, so start capturing capture = true; } } } @Override public void characters(char[] ch, int start, int length) throws SAXException { if(capture){ charactersBuf.append(ch, start, length); } else { super.characters(ch, start, length); } } @Override public void endElement(String namespaceURI, String localName, String qname) throws SAXException { if(capture) { extractCSVValuesToElements(); capture = false; charactersBuf.delete(0, charactersBuf.length()); } super.endElement(namespaceURI, localName, qname); currentNodePath.removeLast(); } private void extractCSVValuesToElements() throws SAXException { //split the csv values final String seperatedValues[] = charactersBuf.toString().split(getEscapedSeparatorForRegExp()); //get the extractions for the current path final Extraction extraction = extractions.get(currentNodePath.toLocalPath()); for(final ExtractEntry extractEntry : extraction.getExtractEntries()) { //extract the value by index final int index = extractEntry.getIndex(); if(index < seperatedValues.length) { final char seperatedValue[] = seperatedValues[index].toCharArray(); //create a new element for the extracted value final String localName = extractEntry.getElementName(); super.startElement(XMLConstants.NULL_NS_URI, localName, localName, new EmptyAttributes()); super.characters(seperatedValue, 0, seperatedValue.length); super.endElement(XMLConstants.NULL_NS_URI, localName, localName); } } } private String getEscapedSeparatorForRegExp() { if(separator.length() == 1) { //escape the separator character if it is a java regexp character if("|".equals(separator) || ",".equals(separator) || "$".equals(separator) || "^".equals(separator)) { return "\\" + separator; } } return separator; } private static class NodePath { private Stack<QName> pathSegments = new Stack<QName>(); public void add(String namespaceUri, String localName) { pathSegments.push(new QName(namespaceUri, localName)); } public void removeLast() { pathSegments.pop(); } public int length() { return pathSegments.size(); } //TODO replace with qname path once we understand how to pass in qnames in the xpath parameter to the trigger public String toLocalPath() { final StringBuilder localPath = new StringBuilder(); localPath.append("/"); for(int i = 0; i < pathSegments.size(); i++) { localPath.append(pathSegments.get(i).getLocalPart()); if(i + 1 < pathSegments.size()) { localPath.append("/"); } } return localPath.toString(); } } /*** configuration data classes ***/ private static class Extraction { private List<ExtractEntry> extractEntries = new ArrayList<ExtractEntry>(); private String matchAttrName; private String matchAttrValue; public List<ExtractEntry> getExtractEntries() { return extractEntries; } public void setMatchAttribute(String attrName, String attrValue) { this.matchAttrName = attrName.trim(); this.matchAttrValue = attrValue.replaceAll("\"", "").trim(); } public boolean mustMatchAttribute() { return(this.matchAttrName != null && this.matchAttrValue != null); } public boolean matchesAttribute(String attrName, String attrValue) { //if there is no matching then return true if(!mustMatchAttribute()) { return true; } else { return this.matchAttrName.equals(attrName) && this.matchAttrValue.equals(attrValue); } } } private static class ExtractEntry implements Comparable<ExtractEntry> { private final int index; private final String elementName; public ExtractEntry(int index, String elementName) { this.index = index; this.elementName = elementName; } public int getIndex() { return index; } public String getElementName() { return elementName; } @Override public int compareTo(ExtractEntry other) { if(other == null) { return -1; } else { return other.getIndex() - this.getIndex(); } } } private static class EmptyAttributes implements Attributes { @Override public int getLength() { return 0; } @Override public String getURI(int index) { return null; } @Override public String getLocalName(int index) { return null; } @Override public String getQName(int index) { return null; } @Override public String getType(int index) { return null; } @Override public String getValue(int index) { return null; } @Override public int getIndex(String uri, String localName) { return -1; } @Override public int getIndex(String qName) { return -1; } @Override public String getType(String uri, String localName) { return null; } @Override public String getType(String qName) { return null; } @Override public String getValue(String uri, String localName) { return null; } @Override public String getValue(String qName) { return null; } } @Override public void beforeCreateDocument(DBBroker broker, Txn txn, XmldbURI uri) throws TriggerException { } @Override public void afterCreateDocument(DBBroker broker, Txn txn, DocumentImpl document) throws TriggerException { } @Override public void beforeUpdateDocument(DBBroker broker, Txn txn, DocumentImpl document) throws TriggerException { } @Override public void afterUpdateDocument(DBBroker broker, Txn txn, DocumentImpl document) throws TriggerException { } @Override public void beforeCopyDocument(DBBroker broker, Txn txn, DocumentImpl document, XmldbURI newUri) throws TriggerException { } @Override public void afterCopyDocument(DBBroker broker, Txn txn, DocumentImpl document, XmldbURI newUri) throws TriggerException { } @Override public void beforeMoveDocument(DBBroker broker, Txn txn, DocumentImpl document, XmldbURI newUri) throws TriggerException { } @Override public void afterMoveDocument(DBBroker broker, Txn txn, DocumentImpl document, XmldbURI newUri) throws TriggerException { } @Override public void beforeDeleteDocument(DBBroker broker, Txn txn, DocumentImpl document) throws TriggerException { } @Override public void afterDeleteDocument(DBBroker broker, Txn txn, XmldbURI uri) throws TriggerException { } @Override public void beforeUpdateDocumentMetadata(DBBroker broker, Txn txn, DocumentImpl document) throws TriggerException { } @Override public void afterUpdateDocumentMetadata(DBBroker broker, Txn txn, DocumentImpl document) throws TriggerException { } }