/* * Copyright (c) 2007-2010 Concurrent, Inc. All Rights Reserved. * * Project and contact information: http://www.cascading.org/ * * This file is part of the Cascading project. * * Cascading is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Cascading is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Cascading. If not, see <http://www.gnu.org/licenses/>. */ package cascading.operation.xml; import javax.xml.parsers.DocumentBuilder; import javax.xml.xpath.XPathConstants; import javax.xml.xpath.XPathExpressionException; import cascading.flow.FlowProcess; import cascading.operation.Function; import cascading.operation.FunctionCall; import cascading.operation.OperationException; import cascading.tuple.Fields; import cascading.tuple.Tuple; import cascading.tuple.TupleEntry; import org.apache.log4j.Logger; import org.w3c.dom.Document; import org.w3c.dom.NodeList; /** * XPathGenerator is a Generator function that will emit a new Tuple for every Node returned by * the given XPath expression. */ public class XPathGenerator extends XPathOperation implements Function<DocumentBuilder> { /** Field LOG */ private static final Logger LOG = Logger.getLogger( XPathGenerator.class ); /** * Constructor XPathGenerator creates a new XPathGenerator instance. * * @param fieldDeclaration of type Fields * @param namespaces of type String[][] * @param paths of type String... */ public XPathGenerator( Fields fieldDeclaration, String[][] namespaces, String... paths ) { super( 1, fieldDeclaration, namespaces, paths ); if( fieldDeclaration.size() != 1 ) throw new IllegalArgumentException( "only one field can be declared: " + fieldDeclaration.print() ); } /** @see Function#operate(cascading.flow.FlowProcess,cascading.operation.FunctionCall) */ public void operate( FlowProcess flowProcess, FunctionCall<DocumentBuilder> functionCall ) { TupleEntry input = functionCall.getArguments(); if( input.getObject( 0 ) == null || !( input.getObject( 0 ) instanceof String ) ) return; String value = (String) input.getString( 0 ); if( value.length() == 0 ) // intentionally not trim()ing this value return; Document document = parseDocument( functionCall.getContext(), value ); for( int i = 0; i < getExpressions().size(); i++ ) { try { NodeList nodeList = (NodeList) getExpressions().get( i ).evaluate( document, XPathConstants.NODESET ); if( LOG.isDebugEnabled() ) LOG.debug( "xpath: " + paths[ i ] + " was: " + ( nodeList != null && nodeList.getLength() != 0 ) ); if( nodeList == null ) continue; for( int j = 0; j < nodeList.getLength(); j++ ) functionCall.getOutputCollector().add( new Tuple( writeAsXML( nodeList.item( j ) ) ) ); } catch( XPathExpressionException exception ) { throw new OperationException( "could not evaluate xpath expression: " + paths[ i ], exception ); } } } }