/*!
* This program is free software; you can redistribute it and/or modify it under the
* terms of the GNU Lesser General Public License, version 2.1 as published by the Free Software
* Foundation.
*
* You should have received a copy of the GNU Lesser General Public License along with this
* program; if not, you can obtain a copy at http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html
* or from the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*
* This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
* See the GNU Lesser General Public License for more details.
*
* Copyright (c) 2002-2016 Pentaho Corporation.. All rights reserved.
*/
package org.pentaho.platform.dataaccess.datasource.wizard.service.agile;
import java.io.PrintWriter;
import java.io.StringWriter;
import java.io.Writer;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.pentaho.di.core.Const;
import org.pentaho.di.core.SQLStatement;
import org.pentaho.di.core.database.Database;
import org.pentaho.di.core.database.DatabaseMeta;
import org.pentaho.di.core.exception.KettleDatabaseException;
import org.pentaho.di.core.exception.KettleException;
import org.pentaho.di.core.row.RowMetaInterface;
import org.pentaho.di.core.row.ValueMetaInterface;
import org.pentaho.di.core.row.value.ValueMetaBase;
import org.pentaho.di.trans.Trans;
import org.pentaho.di.trans.TransHopMeta;
import org.pentaho.di.trans.TransMeta;
import org.pentaho.di.trans.step.StepErrorMeta;
import org.pentaho.di.trans.step.StepInterface;
import org.pentaho.di.trans.step.StepMeta;
import org.pentaho.di.trans.steps.calculator.CalculatorMeta;
import org.pentaho.di.trans.steps.calculator.CalculatorMetaFunction;
import org.pentaho.di.trans.steps.dummytrans.DummyTransMeta;
import org.pentaho.di.trans.steps.tableoutput.TableOutputMeta;
import org.pentaho.metadata.model.concept.types.AggregationType;
import org.pentaho.metadata.model.concept.types.DataType;
import org.pentaho.platform.api.engine.IPentahoSession;
import org.pentaho.platform.dataaccess.datasource.wizard.models.ColumnInfo;
import org.pentaho.platform.dataaccess.datasource.wizard.models.CsvTransformGeneratorException;
import org.pentaho.platform.dataaccess.datasource.wizard.models.ModelInfo;
import org.pentaho.platform.dataaccess.datasource.wizard.service.messages.Messages;
import org.pentaho.platform.dataaccess.datasource.wizard.sources.csv.FileTransformStats;
import org.pentaho.platform.engine.core.system.PentahoBase;
public abstract class StagingTransformGenerator extends PentahoBase {
private static final long serialVersionUID = -185098401772609035L;
private static final String DUMMY = "dummy"; //$NON-NLS-1$
private static final String TABLE_OUTPUT = "output"; //$NON-NLS-1$
private static final String TRANS_SESSION_ATTR = "PDI_Trans"; //$NON-NLS-1$
private static final Log log = LogFactory.getLog( StagingTransformGenerator.class );
private DatabaseMeta targetDatabaseMeta;
private String tableName = null;
private FileTransformStats transformStats;
protected abstract StepMeta[] getSteps( TransMeta transMeta );
protected abstract String[] getIndexedColumnNames();
protected long errorRowCount;
protected long maxErrorRows = 100;
protected long csvErrorRowCount;
private ModelInfo modelInfo;
/**
* Default constructor that uses the JNDI datasource configured in the plugin.xml file.
*/
public StagingTransformGenerator() {
targetDatabaseMeta = AgileHelper.getDatabaseMeta();
}
/**
* Use this contructor if you want to specify a different datasource than the one configured in plugin.xml. Typically
* used for unit testing.
*
* @param databaseMeta
*/
public StagingTransformGenerator( DatabaseMeta databaseMeta ) {
this.targetDatabaseMeta = databaseMeta;
}
private static String getStackTraceAsString( Throwable aThrowable ) {
final Writer result = new StringWriter();
final PrintWriter printWriter = new PrintWriter( result );
aThrowable.printStackTrace( printWriter );
return result.toString();
}
public void preview( IPentahoSession session ) throws CsvTransformGeneratorException {
Trans trans = createTransform( false );
try {
prepareTransform( trans, session );
} catch ( Exception e ) {
error( "Preview Failed: transformation preparation", e ); //$NON-NLS-1$
throw new CsvTransformGeneratorException( "Preview Failed: transformation preparation: preview", e,
getStackTraceAsString( e ) ); //$NON-NLS-1$
}
String[] stepNames = trans.getTransMeta().getStepNames();
executeTransformSync( trans, stepNames[ stepNames.length - 1 ], session );
}
public void dropTable( String tableName ) throws CsvTransformGeneratorException {
if ( tableName == null ) {
throw new IllegalArgumentException( "Table Name cannot be null" ); //$NON-NLS-1$
}
String schemaTableName =
targetDatabaseMeta.getQuotedSchemaTableCombination( AgileHelper.getSchemaName(), tableName );
if ( checkTableExists( schemaTableName ) ) {
// TODO this should be dialected
String ddl = "DROP TABLE " + schemaTableName;
execSqlStatement( ddl, targetDatabaseMeta, null );
}
}
public void createOrModifyTable( IPentahoSession session )
throws CsvTransformGeneratorException, IllegalArgumentException {
if ( session == null ) {
throw new IllegalArgumentException( "IPentahoSession cannot be null" ); //$NON-NLS-1$
}
if ( tableName == null ) {
throw new IllegalArgumentException( "Table name cannot be null" ); //$NON-NLS-1$
}
TransMeta transMeta = createTransMeta( true );
// the table output is the last step
StepMeta[] steps = transMeta.getStepsArray();
StepMeta tableStepMeta = steps[ steps.length - 1 ];
TableOutputMeta meta = (TableOutputMeta) tableStepMeta.getStepMetaInterface();
meta.setDatabaseMeta( targetDatabaseMeta );
try {
executeSql( meta, tableStepMeta, transMeta );
} catch ( CsvTransformGeneratorException e ) {
if ( !e.getMessage().equalsIgnoreCase( "No SQL generated" ) ) { //$NON-NLS-1$
error( e.getMessage() );
throw new CsvTransformGeneratorException( "Could not create or modify table", e, //$NON-NLS-1$
getStackTraceAsString( e ), null, Messages.getString( "StagingTransformGenerator.ERROR_0001_UNABLE_TO_CREATE_OR_MODIFY_TABLE" ) ); //$NON-NLS-1$
}
}
}
/**
* Stages the data from a CSV file into a database table. As the table is loading, a {@link TransformStats} monitors
* the progress. This is placed in the supplied {@link IPentahoSession} to allow interrogation under the attribute key
* <code>FileTransformStats_<em>fileName</em></code>
*
* @param truncate
* @param session
* @throws CsvTransformGeneratorException
*/
public void loadTable( boolean truncate, IPentahoSession session, boolean async )
throws CsvTransformGeneratorException {
if ( session == null ) {
throw new IllegalArgumentException( "IPentahoSession cannot be null" ); //$NON-NLS-1$
}
if ( tableName == null ) {
throw new IllegalArgumentException( "Table name cannot be null" ); //$NON-NLS-1$
}
if ( transformStats != null ) {
transformStats.setRowsFinished( false );
transformStats.setRowsStarted( true );
transformStats.setTotalRecords( 0 );
transformStats.setRowsRejected( 0 );
}
Trans trans = createTransform( true );
// the table output is the last step
StepMeta[] steps = trans.getTransMeta().getStepsArray();
StepMeta tableStepMeta = steps[ steps.length - 1 ];
TableOutputMeta meta = (TableOutputMeta) tableStepMeta.getStepMetaInterface();
meta.setDatabaseMeta( targetDatabaseMeta );
meta.setTruncateTable( truncate );
try {
prepareTransform( trans, session );
} catch ( Exception e ) {
error( "Preview Failed: transformation preparation", e ); //$NON-NLS-1$
Throwable e2 = e.getCause();
e2 = e2 == null ? e : e2;
throw new CsvTransformGeneratorException( "Preview Failed: transformation preparation: loadTable", e2,
getStackTraceAsString( e2 ) ); //$NON-NLS-1$
}
StepInterface step = trans.findRunThread( TABLE_OUTPUT );
PdiTransListener listener = new PdiTransListener( trans, step, transformStats );
// start the listener in a thread
Thread listenerThread = new Thread( listener );
listenerThread.start();
session.setAttribute( TRANS_SESSION_ATTR, trans );
if ( async ) {
executeTransformAsync( trans );
} else {
executeTransformSync( trans, null, session );
}
}
public int createIndices( IPentahoSession session ) {
if ( transformStats != null ) {
transformStats.setIndexFinished( false );
transformStats.setIndexStarted( true );
}
String tableName = getTableName();
Database db = new Database( targetDatabaseMeta );
String[] indexed = getIndexedColumnNames();
List<String> commands = new ArrayList<String>();
// TODO base this on the input rows meta for the table output step?
for ( String columnName : indexed ) {
String indexSql = db.getCreateIndexStatement( tableName, columnName + "_idx", //$NON-NLS-1$
new String[] { columnName }, false, false, false, true );
commands.add( indexSql );
}
if ( transformStats != null ) {
transformStats.setIndexCount( commands.size() );
}
int indexDone = 0;
int indexSuccess = 0;
for ( String command : commands ) {
try {
execSqlStatement( command, targetDatabaseMeta, null );
indexSuccess++;
} catch ( CsvTransformGeneratorException e ) {
// failed to execute
}
indexDone++;
if ( transformStats != null ) {
transformStats.setIndexDone( indexDone );
}
}
if ( transformStats != null ) {
transformStats.setIndexFinished( true );
transformStats.setIndexStarted( false );
}
return indexSuccess;
}
public void cancelLoad( IPentahoSession session ) {
Trans trans = (Trans) session.getAttribute( TRANS_SESSION_ATTR );
trans.stopAll();
}
protected StepMeta addDummyStep( TransMeta transMeta, String stepName ) {
DummyTransMeta meta = new DummyTransMeta();
// meta.setID(2);
StepMeta stepMeta = new StepMeta( stepName, stepName, meta );
transMeta.addStep( stepMeta );
return stepMeta;
}
protected StepMeta addTableOutputStep( TransMeta transMeta, String tableOutputStepName, String modelName ) {
TableOutputMeta tableOutputMeta = new TableOutputMeta();
tableOutputMeta.setCommitSize( 1000 );
tableOutputMeta.setIgnoreErrors( true );
tableOutputMeta.setPartitioningEnabled( false );
tableOutputMeta.setSchemaName( AgileHelper.getSchemaName() );
tableOutputMeta.setTableName( getTableName() );
tableOutputMeta.setUseBatchUpdate( false );
StepMeta tableOutputStepMeta = new StepMeta( tableOutputStepName, tableOutputStepName, tableOutputMeta );
transMeta.addStep( tableOutputStepMeta );
return tableOutputStepMeta;
}
protected void createHop( StepMeta fromStep, StepMeta toStep, TransMeta transMeta ) {
TransHopMeta hopMeta = new TransHopMeta();
hopMeta.setFromStep( fromStep );
hopMeta.setToStep( toStep );
hopMeta.setEnabled( true );
transMeta.addTransHop( hopMeta );
}
protected void prepareTransform( Trans trans, final IPentahoSession session ) throws KettleException {
trans.prepareExecution( trans.getArguments() );
StepInterface tableOutputStep = trans.findRunThread( TABLE_OUTPUT );
if ( tableOutputStep != null ) {
StepErrorMeta tableOutputErrorMeta = new StepErrorMeta( trans.getTransMeta(), tableOutputStep.getStepMeta() ) {
public void addErrorRowData( Object[] row, int startIndex, long nrErrors, String errorDescriptions,
String fieldNames, String errorCodes ) {
// don't overwhelm the user with too many errors
if ( errorRowCount < maxErrorRows ) {
StringBuffer sb = new StringBuffer();
sb.append( "Rejected Row: " );
for ( Object rowData : row ) {
sb.append( rowData );
sb.append( ", " );
}
sb.append( "\r\n" );
if ( transformStats != null ) {
transformStats.getErrors().add( sb.toString() + errorDescriptions );
}
}
errorRowCount++;
transformStats.setErrorCount( errorRowCount );
super.addErrorRowData( row, startIndex, nrErrors, errorDescriptions, fieldNames, errorCodes );
}
};
StepMeta outputDummyStepMeta = addDummyStep( trans.getTransMeta(), "TableOutputErrorDummy" );
tableOutputErrorMeta.setTargetStep( outputDummyStepMeta );
tableOutputErrorMeta.setEnabled( true );
tableOutputStep.getStepMeta().setStepErrorMeta( tableOutputErrorMeta );
}
}
protected void executeTransformSync( Trans trans, String listenerStepName, IPentahoSession session )
throws CsvTransformGeneratorException {
PdiRowListener rowListener = new PdiRowListener();
if ( listenerStepName != null ) {
trans.getStepInterface( listenerStepName, 0 ).addRowListener( rowListener );
}
try {
trans.startThreads();
} catch ( Exception e ) {
error( "Preview Failed: starting threads", e ); //$NON-NLS-1$
throw new CsvTransformGeneratorException( "Preview Failed: starting threads", e,
getStackTraceAsString( e ) ); //$NON-NLS-1$
}
try {
trans.waitUntilFinished();
} catch ( Exception e ) {
error( "Preview Failed: running", e ); //$NON-NLS-1$
throw new CsvTransformGeneratorException( "Preview Failed: running", e,
getStackTraceAsString( e ) ); //$NON-NLS-1$
}
try {
trans.cleanup();
} catch ( Exception e ) {
error( "Preview Failed: ending", e ); //$NON-NLS-1$
throw new CsvTransformGeneratorException( "Preview Failed: ending", e, getStackTraceAsString( e ) ); //$NON-NLS-1$
}
if ( transformStats != null ) {
transformStats.setDataRows( rowListener.getWrittenRows() );
}
}
protected void executeTransformAsync( Trans trans ) throws CsvTransformGeneratorException {
try {
trans.startThreads();
} catch ( Exception e ) {
error( "Preview Failed: starting threads", e ); //$NON-NLS-1$
throw new CsvTransformGeneratorException( "Preview Failed: starting threads", e,
getStackTraceAsString( e ) ); //$NON-NLS-1$
}
}
protected Trans createTransform( boolean doOutput ) {
TransMeta transMeta = createTransMeta( doOutput );
return new Trans( transMeta );
}
private TransMeta createTransMeta( boolean doOutput ) {
TransMeta transMeta = new TransMeta();
StepMeta[] steps = getSteps( transMeta );
StepMeta lastStep = steps[ steps.length - 1 ];
// create the table step if necessary
StepMeta tableStepMeta = null;
if ( doOutput ) {
tableStepMeta = addTableOutputStep( transMeta, TABLE_OUTPUT, tableName );
createHop( lastStep, tableStepMeta, transMeta );
lastStep = tableStepMeta;
}
// we need to create a dummy step as a sink, otherwise the transform won't execute anything
StepMeta dummyStepMeta = null;
if ( lastStep != tableStepMeta ) {
dummyStepMeta = addDummyStep( transMeta, DUMMY );
createHop( lastStep, dummyStepMeta, transMeta );
}
return transMeta;
}
protected void executeSql( TableOutputMeta meta, StepMeta stepMeta, TransMeta transMeta )
throws CsvTransformGeneratorException {
try {
RowMetaInterface prev = transMeta.getPrevStepFields( TABLE_OUTPUT );
SQLStatement sqlStatement = meta.getSQLStatements( transMeta, stepMeta, prev, null, false, null );
if ( !sqlStatement.hasError() ) {
if ( sqlStatement.hasSQL() ) {
// now we can execute the SQL
String sqlScript = sqlStatement.getSQL();
execSqlStatement( sqlScript, meta.getDatabaseMeta(), null );
} else {
// No SQL was generated
error( "No SQL generated" ); //$NON-NLS-1$
throw new CsvTransformGeneratorException( "No SQL generated" ); //$NON-NLS-1$
}
} else {
error( sqlStatement.getError() );
throw new CsvTransformGeneratorException( sqlStatement.getError() );
}
} catch ( KettleException ke ) {
error( "Exception encountered", ke ); //$NON-NLS-1$
throw new CsvTransformGeneratorException( "Exception encountered", ke,
getStackTraceAsString( ke ) ); //$NON-NLS-1$
}
}
public void execSqlStatement( String sqlScript, DatabaseMeta ci, StringBuilder message )
throws IllegalArgumentException, CsvTransformGeneratorException {
if ( ci == null ) {
throw new IllegalArgumentException( "DatabaesMeta cannot be null" ); //$NON-NLS-1$
}
Database db = getDatabase( ci );
try {
db.connect( null );
// Multiple statements have to be split into parts
// We use the ";" to separate statements...
String all = sqlScript + Const.CR;
int from = 0;
int to = 0;
int length = all.length();
while ( to < length ) {
char c = all.charAt( to );
if ( c == '"' ) {
to++;
c = ' ';
while ( to < length && c != '"' ) {
c = all.charAt( to );
to++;
}
} else if ( c == '\'' ) { // skip until next '
to++;
c = ' ';
while ( to < length && c != '\'' ) {
c = all.charAt( to );
to++;
}
}
c = all.charAt( to );
if ( c == ';' || to >= length - 1 ) { // end of statement
if ( to >= length - 1 ) {
to++; // grab last char also!
}
String stat = all.substring( from, to );
String sql = Const.trim( stat );
try {
if ( !sql.equals( "" ) ) { //$NON-NLS-1$
db.execStatement( sql );
}
} catch ( Exception dbe ) {
error( "Error executing DDL", dbe ); //$NON-NLS-1$
throw new CsvTransformGeneratorException( dbe.getMessage(), dbe, getStackTraceAsString( dbe ) );
}
to++;
from = to;
} else {
to++;
}
}
} catch ( KettleDatabaseException dbe ) {
error( "Connection error", dbe ); //$NON-NLS-1$
throw new CsvTransformGeneratorException( "Connection error", dbe, getStackTraceAsString( dbe ) ); //$NON-NLS-1$
} finally {
db.disconnect();
}
}
Database getDatabase( final DatabaseMeta databaseMeta ) {
return new Database( databaseMeta );
}
protected int convertDataType( ColumnInfo ci ) {
if ( ci != null && ci.getDataType() != null ) {
switch ( ci.getDataType() ) {
case NUMERIC:
if ( ci.getPrecision() <= 0 ) {
return ValueMetaInterface.TYPE_INTEGER;
} else {
return ValueMetaInterface.TYPE_NUMBER;
}
default:
return ValueMetaBase.getType( ci.getDataType().getName() );
}
} else {
return ValueMetaInterface.TYPE_STRING;
}
}
@Override
public Log getLogger() {
return log;
}
public FileTransformStats getTransformStats() {
return transformStats;
}
public void setTransformStats( FileTransformStats transformStats ) {
this.transformStats = transformStats;
}
public String getTableName() {
return tableName;
}
public void setTableName( String tableName ) {
this.tableName = tableName;
}
protected StepMeta createCalcStep( TransMeta transMeta, String stepName, ColumnInfo[] columns ) {
CalculatorMeta meta = new CalculatorMeta();
List<CalculatorMetaFunction> funcs = new ArrayList<CalculatorMetaFunction>();
for ( ColumnInfo column : columns ) {
if ( column != null && !column.isIgnore() && column.getDataType() == DataType.DATE ) {
// see if we need to break out the date fields
int dateBreakOut = column.getDateFieldBreakout();
if ( ( dateBreakOut & ColumnInfo.DATE_LEVEL_YEAR ) > 0 ) {
CalculatorMetaFunction func =
createDateCalc( CalculatorMetaFunction.CALC_YEAR_OF_DATE, column.getTitle() + " (year)", column.getId(),
4 );
funcs.add( func );
}
if ( ( dateBreakOut & ColumnInfo.DATE_LEVEL_QUARTER ) > 0 ) {
CalculatorMetaFunction func =
createDateCalc( CalculatorMetaFunction.CALC_QUARTER_OF_DATE, column.getTitle() + " (qtr)", column.getId(),
4 );
funcs.add( func );
}
if ( ( dateBreakOut & ColumnInfo.DATE_LEVEL_MONTH ) > 0 ) {
CalculatorMetaFunction func =
createDateCalc( CalculatorMetaFunction.CALC_MONTH_OF_DATE, column.getTitle() + " (month)", column.getId(),
4 );
funcs.add( func );
}
if ( ( dateBreakOut & ColumnInfo.DATE_LEVEL_WEEK ) > 0 ) {
CalculatorMetaFunction func =
createDateCalc( CalculatorMetaFunction.CALC_WEEK_OF_YEAR, column.getTitle() + " (week)", column.getId(),
4 );
funcs.add( func );
}
if ( ( dateBreakOut & ColumnInfo.DATE_LEVEL_DAY ) > 0 ) {
CalculatorMetaFunction func =
createDateCalc( CalculatorMetaFunction.CALC_DAY_OF_MONTH, column.getTitle() + " (day)", column.getId(), 4 );
funcs.add( func );
}
if ( ( dateBreakOut & ColumnInfo.DATE_LEVEL_DAYOFWEEK ) > 0 ) {
CalculatorMetaFunction func =
createDateCalc( CalculatorMetaFunction.CALC_DAY_OF_WEEK, column.getTitle() + " (day of week)",
column.getId(), 4 );
funcs.add( func );
}
}
}
if ( funcs.size() == 0 ) {
return null;
}
meta.setCalculation( funcs.toArray( new CalculatorMetaFunction[ funcs.size() ] ) );
StepMeta stepMeta = new StepMeta( stepName, stepName, meta );
transMeta.addStep( stepMeta );
return stepMeta;
}
/**
* Creates a calculation. Used to break out date fields
*
* @param calcType
* @param fieldName
* @param fieldId
* @param valueLength
* @return
*/
protected CalculatorMetaFunction createDateCalc( int calcType, String fieldName, String fieldId, int valueLength ) {
String fieldB = null;
System.out.println( 99 );
String fieldC = null;
int valueType = ValueMetaInterface.TYPE_INTEGER;
int valuePrecision = 0;
boolean removedFromResult = false;
String conversionMask = ""; //$NON-NLS-1$
String decimalSymbol = ""; //$NON-NLS-1$
String groupingSymbol = ""; //$NON-NLS-1$
String currencySymbol = ""; //$NON-NLS-1$
CalculatorMetaFunction func = new CalculatorMetaFunction( fieldName, calcType, fieldId, fieldB, fieldC,
valueType, valueLength, valuePrecision,
removedFromResult, conversionMask, decimalSymbol, groupingSymbol, currencySymbol );
// update the model
ColumnInfo column = new ColumnInfo();
column.setAggregateType( AggregationType.NONE.toString() );
column.setDataType( DataType.NUMERIC );
column.setFieldType( ColumnInfo.FIELD_TYPE_DIMENSION );
column.setIgnore( false );
column.setId( fieldId );
column.setIndex( true );
column.setTitle( fieldName );
return func;
}
public ModelInfo getModelInfo() {
return modelInfo;
}
public void setModelInfo( ModelInfo modelInfo ) {
this.modelInfo = modelInfo;
}
private boolean checkTableExists( String tableName ) throws CsvTransformGeneratorException {
Database db = getDatabase( targetDatabaseMeta );
try {
db.connect( null );
try {
return db.checkTableExists( tableName );
} catch ( KettleDatabaseException dbe ) {
error( "Error executing DDL", dbe );
throw new CsvTransformGeneratorException( dbe.getMessage(), dbe, getStackTraceAsString( dbe ) );
}
} catch ( KettleDatabaseException dbe ) {
error( "Connection error", dbe );
throw new CsvTransformGeneratorException( "Connection error", dbe, getStackTraceAsString( dbe ) );
} finally {
db.disconnect();
}
}
}