/*! ******************************************************************************
*
* Pentaho Data Integration
*
* Copyright (C) 2002-2017 by Pentaho : http://www.pentaho.com
*
*******************************************************************************
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
******************************************************************************/
package org.pentaho.di.trans.steps.http;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.UnknownHostException;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.httpclient.Credentials;
import org.apache.commons.httpclient.Header;
import org.apache.commons.httpclient.HostConfiguration;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.HttpMethod;
import org.apache.commons.httpclient.NameValuePair;
import org.apache.commons.httpclient.UsernamePasswordCredentials;
import org.apache.commons.httpclient.auth.AuthScope;
import org.apache.commons.httpclient.methods.GetMethod;
import org.apache.commons.httpclient.util.URIUtil;
import org.json.simple.JSONObject;
import org.pentaho.di.cluster.SlaveConnectionManager;
import org.pentaho.di.core.Const;
import org.pentaho.di.core.util.Utils;
import org.pentaho.di.core.exception.KettleException;
import org.pentaho.di.core.exception.KettleStepException;
import org.pentaho.di.core.exception.KettleValueException;
import org.pentaho.di.core.row.RowDataUtil;
import org.pentaho.di.core.row.RowMetaInterface;
import org.pentaho.di.i18n.BaseMessages;
import org.pentaho.di.trans.Trans;
import org.pentaho.di.trans.TransMeta;
import org.pentaho.di.trans.step.BaseStep;
import org.pentaho.di.trans.step.StepDataInterface;
import org.pentaho.di.trans.step.StepInterface;
import org.pentaho.di.trans.step.StepMeta;
import org.pentaho.di.trans.step.StepMetaInterface;
/**
* Retrieves values from a database by calling database stored procedures or functions
*
* @author Matt
* @since 26-apr-2003
*/
public class HTTP extends BaseStep implements StepInterface {
private static Class<?> PKG = HTTPMeta.class; // for i18n purposes, needed by Translator2!! $NON-NLS-1$
private HTTPMeta meta;
private HTTPData data;
public HTTP( StepMeta stepMeta, StepDataInterface stepDataInterface, int copyNr, TransMeta transMeta, Trans trans ) {
super( stepMeta, stepDataInterface, copyNr, transMeta, trans );
}
private Object[] execHttp( RowMetaInterface rowMeta, Object[] row ) throws KettleException {
if ( first ) {
first = false;
data.argnrs = new int[meta.getArgumentField().length];
for ( int i = 0; i < meta.getArgumentField().length; i++ ) {
data.argnrs[i] = rowMeta.indexOfValue( meta.getArgumentField()[i] );
if ( data.argnrs[i] < 0 ) {
logError( BaseMessages.getString( PKG, "HTTP.Log.ErrorFindingField" ) + meta.getArgumentField()[i] + "]" );
throw new KettleStepException( BaseMessages.getString( PKG, "HTTP.Exception.CouldnotFindField", meta
.getArgumentField()[i] ) );
}
}
}
return callHttpService( rowMeta, row );
}
private Object[] callHttpService( RowMetaInterface rowMeta, Object[] rowData ) throws KettleException {
String url = determineUrl( rowMeta, rowData );
try {
if ( isDetailed() ) {
logDetailed( BaseMessages.getString( PKG, "HTTP.Log.Connecting", url ) );
}
// Prepare HTTP get
//
HttpClient httpClient = SlaveConnectionManager.getInstance().createHttpClient();
HttpMethod method = new GetMethod( url );
// Set timeout
if ( data.realConnectionTimeout > -1 ) {
httpClient.getHttpConnectionManager().getParams().setConnectionTimeout( data.realConnectionTimeout );
}
if ( data.realSocketTimeout > -1 ) {
httpClient.getHttpConnectionManager().getParams().setSoTimeout( data.realSocketTimeout );
}
if ( !Utils.isEmpty( data.realHttpLogin ) ) {
httpClient.getParams().setAuthenticationPreemptive( true );
Credentials defaultcreds = new UsernamePasswordCredentials( data.realHttpLogin, data.realHttpPassword );
httpClient.getState().setCredentials( AuthScope.ANY, defaultcreds );
}
HostConfiguration hostConfiguration = new HostConfiguration();
if ( !Utils.isEmpty( data.realProxyHost ) ) {
hostConfiguration.setProxy( data.realProxyHost, data.realProxyPort );
}
// Add Custom HTTP headers
if ( data.useHeaderParameters ) {
for ( int i = 0; i < data.header_parameters_nrs.length; i++ ) {
method.addRequestHeader( data.headerParameters[i].getName(), data.inputRowMeta.getString( rowData,
data.header_parameters_nrs[i] ) );
if ( isDebug() ) {
log.logDebug( BaseMessages.getString( PKG, "HTTPDialog.Log.HeaderValue",
data.headerParameters[i].getName(), data.inputRowMeta
.getString( rowData, data.header_parameters_nrs[i] ) ) );
}
}
}
InputStreamReader inputStreamReader = null;
Object[] newRow = null;
if ( rowData != null ) {
newRow = rowData.clone();
}
// Execute request
//
try {
// used for calculating the responseTime
long startTime = System.currentTimeMillis();
int statusCode = requestStatusCode( method, hostConfiguration, httpClient );
// calculate the responseTime
long responseTime = System.currentTimeMillis() - startTime;
if ( log.isDetailed() ) {
log.logDetailed( BaseMessages.getString( PKG, "HTTP.Log.ResponseTime", responseTime, url ) );
}
String body = null;
String headerString = null;
// The status code
if ( isDebug() ) {
logDebug( BaseMessages.getString( PKG, "HTTP.Log.ResponseStatusCode", "" + statusCode ) );
}
if ( statusCode != -1 ) {
if ( statusCode == 204 ) {
body = "";
} else {
// if the response is not 401: HTTP Authentication required
if ( statusCode != 401 ) {
// guess encoding
//
Header[] headers = searchForHeaders( method );
String encoding = meta.getEncoding();
// Try to determine the encoding from the Content-Type value
//
if ( Utils.isEmpty( encoding ) ) {
String contentType = method.getResponseHeader( "Content-Type" ).getValue();
if ( contentType != null && contentType.contains( "charset" ) ) {
encoding = contentType.replaceFirst( "^.*;\\s*charset\\s*=\\s*", "" ).replace( "\"", "" ).trim();
}
}
JSONObject json = new JSONObject();
for ( Header header : headers ) {
Object previousValue = json.get( header.getName() );
if ( previousValue == null ) {
json.put( header.getName(), header.getValue() );
} else if ( previousValue instanceof List ) {
List<String> list = (List<String>) previousValue;
list.add( header.getValue() );
} else {
ArrayList<String> list = new ArrayList<String>();
list.add( (String) previousValue );
list.add( (String) header.getValue() );
json.put( header.getName(), list );
}
}
headerString = json.toJSONString();
if ( isDebug() ) {
log.logDebug( toString(), BaseMessages.getString( PKG, "HTTP.Log.ResponseHeaderEncoding", encoding ) );
}
// the response
inputStreamReader = openStream( encoding, method );
StringBuilder bodyBuffer = new StringBuilder();
int c;
while ( ( c = inputStreamReader.read() ) != -1 ) {
bodyBuffer.append( (char) c );
}
inputStreamReader.close();
body = bodyBuffer.toString();
if ( isDebug() ) {
logDebug( "Response body: " + body );
}
} else { // the status is a 401
throw new KettleStepException( BaseMessages
.getString( PKG, "HTTP.Exception.Authentication", data.realUrl ) );
}
}
}
int returnFieldsOffset = rowMeta.size();
if ( !Utils.isEmpty( meta.getFieldName() ) ) {
newRow = RowDataUtil.addValueData( newRow, returnFieldsOffset, body );
returnFieldsOffset++;
}
if ( !Utils.isEmpty( meta.getResultCodeFieldName() ) ) {
newRow = RowDataUtil.addValueData( newRow, returnFieldsOffset, new Long( statusCode ) );
returnFieldsOffset++;
}
if ( !Utils.isEmpty( meta.getResponseTimeFieldName() ) ) {
newRow = RowDataUtil.addValueData( newRow, returnFieldsOffset, new Long( responseTime ) );
returnFieldsOffset++;
}
if ( !Utils.isEmpty( meta.getResponseHeaderFieldName() ) ) {
newRow = RowDataUtil.addValueData( newRow, returnFieldsOffset, headerString );
}
} finally {
if ( inputStreamReader != null ) {
inputStreamReader.close();
}
// Release current connection to the connection pool once you are done
method.releaseConnection();
if ( data.realcloseIdleConnectionsTime > -1 ) {
httpClient.getHttpConnectionManager().closeIdleConnections( data.realcloseIdleConnectionsTime );
}
}
return newRow;
} catch ( UnknownHostException uhe ) {
throw new KettleException( BaseMessages.getString( PKG, "HTTP.Error.UnknownHostException", uhe.getMessage() ) );
} catch ( Exception e ) {
throw new KettleException( BaseMessages.getString( PKG, "HTTP.Log.UnableGetResult", url ), e );
}
}
private String determineUrl( RowMetaInterface outputRowMeta, Object[] row ) throws KettleValueException,
KettleException {
try {
if ( meta.isUrlInField() ) {
// get dynamic url
data.realUrl = outputRowMeta.getString( row, data.indexOfUrlField );
}
StringBuilder url = new StringBuilder( data.realUrl ); // the base URL with variable substitution
for ( int i = 0; i < data.argnrs.length; i++ ) {
if ( i == 0 && url.indexOf( "?" ) < 0 ) {
url.append( '?' );
} else {
url.append( '&' );
}
url.append( URIUtil.encodeWithinQuery( meta.getArgumentParameter()[i] ) );
url.append( '=' );
String s = outputRowMeta.getString( row, data.argnrs[i] );
if ( s != null ) {
s = URIUtil.encodeWithinQuery( s );
}
url.append( s );
}
return url.toString();
} catch ( Exception e ) {
throw new KettleException( BaseMessages.getString( PKG, "HTTP.Log.UnableCreateUrl" ), e );
}
}
protected int requestStatusCode( HttpMethod method, HostConfiguration hostConfiguration, HttpClient httpClient ) throws IOException {
return httpClient.executeMethod( hostConfiguration, method );
}
protected InputStreamReader openStream( String encoding, HttpMethod method ) throws Exception {
if ( !Utils.isEmpty( encoding ) ) {
return new InputStreamReader( method.getResponseBodyAsStream(), encoding );
} else {
return new InputStreamReader( method.getResponseBodyAsStream() );
}
}
protected Header[] searchForHeaders( HttpMethod method ) {
return method.getResponseHeaders();
}
public boolean processRow( StepMetaInterface smi, StepDataInterface sdi ) throws KettleException {
meta = (HTTPMeta) smi;
data = (HTTPData) sdi;
Object[] r = getRow(); // Get row from input rowset & set row busy!
if ( r == null ) { // no more input to be expected...
setOutputDone();
return false;
}
if ( first ) {
data.outputRowMeta = getInputRowMeta().clone();
data.inputRowMeta = getInputRowMeta();
meta.getFields( data.outputRowMeta, getStepname(), null, null, this, repository, metaStore );
if ( meta.isUrlInField() ) {
if ( Utils.isEmpty( meta.getUrlField() ) ) {
logError( BaseMessages.getString( PKG, "HTTP.Log.NoField" ) );
throw new KettleException( BaseMessages.getString( PKG, "HTTP.Log.NoField" ) );
}
// cache the position of the field
if ( data.indexOfUrlField < 0 ) {
String realUrlfieldName = environmentSubstitute( meta.getUrlField() );
data.indexOfUrlField = getInputRowMeta().indexOfValue( realUrlfieldName );
if ( data.indexOfUrlField < 0 ) {
// The field is unreachable !
logError( BaseMessages.getString( PKG, "HTTP.Log.ErrorFindingField", realUrlfieldName ) );
throw new KettleException( BaseMessages.getString( PKG, "HTTP.Exception.ErrorFindingField",
realUrlfieldName ) );
}
}
} else {
data.realUrl = environmentSubstitute( meta.getUrl() );
}
// check for headers
int nrHeaders = meta.getHeaderField().length;
if ( nrHeaders > 0 ) {
data.useHeaderParameters = true;
}
data.header_parameters_nrs = new int[nrHeaders];
data.headerParameters = new NameValuePair[nrHeaders];
// get the headers
for ( int i = 0; i < nrHeaders; i++ ) {
int fieldIndex = data.inputRowMeta.indexOfValue( meta.getHeaderField()[i] );
if ( fieldIndex < 0 ) {
logError( BaseMessages.getString( PKG,
"HTTP.Exception.ErrorFindingField" ) + meta.getHeaderField()[i] + "]" );
throw new KettleStepException( BaseMessages.getString( PKG, "HTTP.Exception.ErrorFindingField", meta
.getHeaderField()[i] ) );
}
data.header_parameters_nrs[i] = fieldIndex;
data.headerParameters[i] =
new NameValuePair( environmentSubstitute( meta.getHeaderParameter()[i] ), data.outputRowMeta.getString( r,
data.header_parameters_nrs[i] ) );
}
} // end if first
try {
Object[] outputRowData = execHttp( getInputRowMeta(), r ); // add new values to the row
putRow( data.outputRowMeta, outputRowData ); // copy row to output rowset(s);
if ( checkFeedback( getLinesRead() ) ) {
if ( isDetailed() ) {
logDetailed( BaseMessages.getString( PKG, "HTTP.LineNumber" ) + getLinesRead() );
}
}
} catch ( KettleException e ) {
boolean sendToErrorRow = false;
String errorMessage = null;
if ( getStepMeta().isDoingErrorHandling() ) {
sendToErrorRow = true;
errorMessage = e.toString();
} else {
logError( BaseMessages.getString( PKG, "HTTP.ErrorInStepRunning" ) + e.getMessage() );
setErrors( 1 );
stopAll();
setOutputDone(); // signal end to receiver(s)
return false;
}
if ( sendToErrorRow ) {
// Simply add this row to the error row
putError( getInputRowMeta(), r, 1, errorMessage, null, "HTTP001" );
}
}
return true;
}
public boolean init( StepMetaInterface smi, StepDataInterface sdi ) {
meta = (HTTPMeta) smi;
data = (HTTPData) sdi;
if ( super.init( smi, sdi ) ) {
// get authentication settings once
data.realProxyHost = environmentSubstitute( meta.getProxyHost() );
data.realProxyPort = Const.toInt( environmentSubstitute( meta.getProxyPort() ), 8080 );
data.realHttpLogin = environmentSubstitute( meta.getHttpLogin() );
data.realHttpPassword = Utils.resolvePassword( variables, meta.getHttpPassword() );
data.realSocketTimeout = Const.toInt( environmentSubstitute( meta.getSocketTimeout() ), -1 );
data.realConnectionTimeout = Const.toInt( environmentSubstitute( meta.getSocketTimeout() ), -1 );
return true;
}
return false;
}
public void dispose( StepMetaInterface smi, StepDataInterface sdi ) {
meta = (HTTPMeta) smi;
data = (HTTPData) sdi;
super.dispose( smi, sdi );
}
}