/*! ******************************************************************************
*
* Pentaho Data Integration
*
* Copyright (C) 2002-2017 by Pentaho : http://www.pentaho.com
*
*******************************************************************************
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
******************************************************************************/
package org.pentaho.di.trans.steps.xmlinputsax;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.pentaho.di.core.Const;
import org.pentaho.di.core.exception.KettleValueException;
import org.pentaho.di.core.logging.LogChannelInterface;
import org.pentaho.di.core.row.RowDataUtil;
import org.pentaho.di.core.row.ValueMetaInterface;
import org.pentaho.di.core.xml.XMLParserFactoryProducer;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.SAXNotRecognizedException;
import org.xml.sax.SAXNotSupportedException;
import org.xml.sax.helpers.DefaultHandler;
/**
*
* @author Youssef
* @since 22-may-2006
*/
public class XMLInputSaxDataRetriever extends DefaultHandler {
XMLInputSaxMeta meta;
XMLInputSaxData data;
int[] position = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 };
// list of elements to the root element
private List<XMLInputSaxFieldPosition> pathToRootElement = new ArrayList<XMLInputSaxFieldPosition>();
// list of elements to the root element
private List<XMLInputSaxFieldPosition> _pathToRootElement = new ArrayList<XMLInputSaxFieldPosition>();
private List<XMLInputSaxField> fields = new ArrayList<XMLInputSaxField>();
private int fieldToFill = -1;
/** Empty row */
private Object[] emptyRow;
/** Temporary row of data */
private Object[] row;
/** List of datarows retreived from the xml */
private List<Object[]> rowSet = new ArrayList<Object[]>();
// count the deep to the current element in pathToStartElement
private int counter = 0;
// count the deep to the current element in xml file
private int _counter = -1;
// true when the root element is reached
private boolean rootFound = false;
// source xml file name
private String sourceFile;
private String tempVal;
private StringBuffer charactersBuffer;
private LogChannelInterface log;
/**
* Constructor of xmlDataRetreiver class.
*
* @param sourceFile
* The XML file containing data.
*
* @param meta
* The metadata to use
* @param data
* the (temporary) data to reference
*
*/
public XMLInputSaxDataRetriever( LogChannelInterface log, String sourceFile, XMLInputSaxMeta meta,
XMLInputSaxData data ) {
this.log = log;
this.meta = meta;
this.data = data;
charactersBuffer = new StringBuffer();
for ( int i = 0; i < meta.getInputPosition().length; i++ ) {
this.pathToRootElement.add( meta.getInputPosition()[i] );
}
for ( int i = 0; i < meta.getInputFields().length; i++ ) {
this.fields.add( meta.getInputFields()[i] );
}
this.sourceFile = sourceFile;
this.emptyRow = buildEmptyRow();
try {
this.row = new Object[emptyRow.length];
System.arraycopy( emptyRow, 0, this.row, 0, emptyRow.length );
} catch ( NullPointerException e ) {
throw e;
}
}
public void runExample() {
parseDocument();
}
private void parseDocument() {
// get a factory
SAXParserFactory spf = null;
try {
spf = XMLParserFactoryProducer.createSecureSAXParserFactory();
} catch ( SAXNotSupportedException | SAXNotRecognizedException | ParserConfigurationException ex ) {
log.logError( ex.getMessage() );
}
try {
// get a new instance of parser
SAXParser sp = spf.newSAXParser();
// parse the file and also register this class for call backs
sp.parse( sourceFile, this );
} catch ( SAXException se ) {
log.logError( Const.getStackTracker( se ) );
} catch ( ParserConfigurationException pce ) {
log.logError( Const.getStackTracker( pce ) );
} catch ( IOException ie ) {
log.logError( Const.getStackTracker( ie ) );
}
}
private XMLInputSaxFieldPosition[] pathFromRoot() {
int s = _pathToRootElement.size() - pathToRootElement.size();
if ( s > 0 ) {
XMLInputSaxFieldPosition[] ret = new XMLInputSaxFieldPosition[s];
for ( int i = 0; i < s; i++ ) {
ret[i] = pathToRootElement.get( i + pathToRootElement.size() );
}
return ret;
}
return null;
}
private String naming( XMLInputSaxFieldPosition[] path ) {
String ret = "";
for ( int i = 0; i < path.length; i++ ) {
String name;
if ( path[i].getType() == XMLInputSaxFieldPosition.XML_ELEMENT_ATT ) {
name = path[i].getAttributeValue();
} else {
name = path[i].getName() + path[i].getElementNr();
}
if ( i > 0 ) {
ret += "_" + name;
} else {
ret += name;
}
}
return ret;
}
/**
* Build an empty row based on the meta-data...
*
* @return
*/
private Object[] buildEmptyRow() {
XMLInputSaxField[] fields = meta.getInputFields();
Object[] row = RowDataUtil.allocateRowData( fields.length );
return row;
}
private void counterUp() {
if ( counter == pathToRootElement.size() - 1 ) {
rootFound = true;
counter++;
} else {
counter++;
}
}
private boolean comparePaths( int count ) {
for ( int i = 0; i <= count; i++ ) {
if ( !_pathToRootElement.get( i ).equals( pathToRootElement.get( i ) ) ) {
return false;
}
}
return true;
}
private void counterDown() {
if ( ( counter - 1 == _counter ) && comparePaths( _counter ) ) {
_pathToRootElement.remove( _counter );
counter--;
_counter--;
if ( rootFound ) {
rootFound = false;
rowSet.add( row );
this.row = new Object[emptyRow.length];
System.arraycopy( emptyRow, 0, this.row, 0, emptyRow.length );
}
} else {
_pathToRootElement.remove( _counter );
_counter--;
}
}
// Event Handlers
public void startElement( String uri, String localName, String qName, Attributes attributes ) throws SAXException {
// set the _counter level
position[_counter + 1] += 1;
_counter++;
try {
if ( !rootFound ) {
XMLInputSaxFieldPosition el = pathToRootElement.get( counter );
if ( ( counter == _counter ) && qName.equalsIgnoreCase( el.getName() ) ) {
if ( el.getType() == XMLInputSaxFieldPosition.XML_ELEMENT_ATT ) {
String att1 = attributes.getValue( el.getAttribute() );
String att2 = el.getAttributeValue();
if ( att1.equals( att2 ) ) {
_pathToRootElement.add( new XMLInputSaxFieldPosition( qName, el.getAttribute(), el
.getAttributeValue() ) );
if ( counter == pathToRootElement.size() - 1 ) {
int i = 0;
while ( i < attributes.getLength() ) {
XMLInputSaxFieldPosition tempP =
new XMLInputSaxFieldPosition(
attributes.getQName( i ), XMLInputSaxFieldPosition.XML_ATTRIBUTE, i + 1 );
XMLInputSaxField tempF =
new XMLInputSaxField( tempP.getName(), new XMLInputSaxFieldPosition[] { tempP } );
int p = fields.indexOf( tempF );
if ( p >= 0 ) {
setValueToRow( attributes.getValue( i ), p );
}
i++;
}
}
counterUp();
} else {
_pathToRootElement.add( new XMLInputSaxFieldPosition(
qName, XMLInputSaxFieldPosition.XML_ELEMENT_POS, position[_counter] + 1 ) );
}
} else {
_pathToRootElement.add( new XMLInputSaxFieldPosition(
qName, XMLInputSaxFieldPosition.XML_ELEMENT_POS, position[_counter] + 1 ) );
counterUp();
}
// normal attributes in root
if ( rootFound && attributes.getLength() > 0 ) {
int i = 0;
while ( i < attributes.getLength() ) {
int attributeID = meta.getDefiningAttributeNormalID( attributes.getQName( i ) );
if ( attributeID >= 0 ) {
setValueToRow( attributes.getValue( i ), attributeID );
}
i++;
}
}
} else {
_pathToRootElement.add( new XMLInputSaxFieldPosition(
qName, XMLInputSaxFieldPosition.XML_ELEMENT_POS, position[_counter] + 1 ) );
}
} else {
XMLInputSaxField tempF = null;
if ( attributes.getLength() == 0 ) {
_pathToRootElement.add( new XMLInputSaxFieldPosition(
qName, XMLInputSaxFieldPosition.XML_ELEMENT_POS, position[_counter] + 1 ) );
XMLInputSaxFieldPosition[] path = pathFromRoot();
tempF = new XMLInputSaxField( naming( path ), path );
} else {
String attribute = meta.getDefiningAttribute( qName );
_pathToRootElement
.add( new XMLInputSaxFieldPosition( qName, attribute, attributes.getValue( attribute ) ) );
XMLInputSaxFieldPosition[] path = pathFromRoot();
tempF = new XMLInputSaxField( naming( path ), path );
}
int p = fields.indexOf( tempF );
if ( p >= 0 ) {
this.fieldToFill = p;
}
}
} catch ( KettleValueException e ) {
throw new RuntimeException( e ); // signal error to the transformation don't ignore it
}
}
public void characters( char[] ch, int start, int length ) throws SAXException {
tempVal = new String( ch, start, length );
if ( tempVal != null ) {
charactersBuffer.append( tempVal );
}
}
public void endElement( String uri, String localName, String qName ) throws SAXException {
tempVal = charactersBuffer.toString();
charactersBuffer = new StringBuffer(); // start again.
try {
if ( this.fieldToFill >= 0 ) {
if ( tempVal == null ) {
tempVal = "";
}
setValueToRow( tempVal, fieldToFill );
}
fieldToFill = -1;
} catch ( KettleValueException e ) {
throw new RuntimeException( e ); // signal error to the transformation don't ignore it
}
position[_counter + 1] = -1;
counterDown();
}
private void setValueToRow( String value, int fieldnr ) throws KettleValueException {
XMLInputSaxField xmlInputField = fields.get( fieldnr );
switch ( xmlInputField.getTrimType() ) {
case XMLInputSaxField.TYPE_TRIM_LEFT:
value = Const.ltrim( value );
break;
case XMLInputSaxField.TYPE_TRIM_RIGHT:
value = Const.rtrim( value );
break;
case XMLInputSaxField.TYPE_TRIM_BOTH:
value = Const.trim( value );
break;
default:
break;
}
// DO CONVERSIONS...
ValueMetaInterface targetValueMeta = data.outputRowMeta.getValueMeta( fieldnr );
ValueMetaInterface sourceValueMeta = data.convertRowMeta.getValueMeta( fieldnr );
row[fieldnr] = targetValueMeta.convertData( sourceValueMeta, value );
// Do we need to repeat this field if it is null?
if ( xmlInputField.isRepeated() ) {
if ( row[fieldnr] == null && data.previousRow != null ) {
Object previous = data.previousRow[fieldnr];
row[fieldnr] = previous;
}
}
}
public boolean hasNext() {
synchronized ( rowSet ) {
return !rowSet.isEmpty();
}
}
public Object[] getNext() {
synchronized ( rowSet ) {
if ( !rowSet.isEmpty() ) {
Object[] ret = rowSet.get( 0 );
rowSet.remove( 0 );
return ret;
} else {
return null;
}
}
}
/*
* public static void main(String[] args){ XMLvInputFieldPosition[] path=new XMLvInputFieldPosition[3]; try {
* path[0]=new XMLvInputFieldPosition("Ep=raml"); path[1]=new XMLvInputFieldPosition("Ep=cmData"); path[2]=new
* XMLvInputFieldPosition("Ea=managedObject/class:BTS"); } catch (KettleValueException e) { // TODO Auto-generated
* catch block LogWriter.getInstance().logError(toString(), Const.getStackTracker(e)); } //System.out.println(new
* xmlElement("hello","hello","hello").equals(new xmlElement("hello","hello","hello"))); XMLvSaxFieldRetreiver spe =
* new XMLvSaxFieldRetreiver("D:\\NOKIA\\Project\\Ressources\\CASA-1.XML",path,"name");
*
* ArrayList l=spe.getFields(); XMLvInputData data=new XMLvInputData(); XMLvInputMeta meta=new XMLvInputMeta();
*
* XMLvInputField [] a=new XMLvInputField[l.size()]; for(int i=0;i<l.size();i++){ XMLvInputField
* f=(XMLvInputField)l.get(i); XMLvInputField field=new XMLvInputField(); field.setName(f.getName()); try {
* field.setFieldPosition(f.getFieldPositionsCode(path.length)); } catch (KettleException e) {
* LogWriter.getInstance().logError(toString(), Const.getStackTracker(e)); } a[i]=field; }
*
* meta.setInputFields(a); System.out.println(a.length); meta.setInputPosition(path);
*
* XMLvSaxDataRetreiver r=new XMLvSaxDataRetreiver("D:\\NOKIA\\Project\\Ressources\\CASA-1.XML",meta,data,"name");
* r.runExample(); }
*/
}