/*! ******************************************************************************
*
* Pentaho Data Integration
*
* Copyright (C) 2002-2016 by Pentaho : http://www.pentaho.com
*
*******************************************************************************
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
******************************************************************************/
package org.pentaho.di.ui.trans.steps.textfileinput;
import java.io.InputStreamReader;
import java.lang.reflect.InvocationTargetException;
import java.text.DecimalFormat;
import java.text.DecimalFormatSymbols;
import java.text.NumberFormat;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import org.eclipse.core.runtime.IProgressMonitor;
import org.eclipse.jface.dialogs.ProgressMonitorDialog;
import org.eclipse.jface.operation.IRunnableWithProgress;
import org.eclipse.swt.widgets.Shell;
import org.pentaho.di.core.Const;
import org.pentaho.di.core.util.Utils;
import org.pentaho.di.core.exception.KettleException;
import org.pentaho.di.core.logging.LogChannel;
import org.pentaho.di.core.logging.LogChannelInterface;
import org.pentaho.di.core.row.RowMeta;
import org.pentaho.di.core.row.RowMetaInterface;
import org.pentaho.di.core.row.ValueMetaInterface;
import org.pentaho.di.core.util.StringEvaluationResult;
import org.pentaho.di.core.util.StringEvaluator;
import org.pentaho.di.i18n.BaseMessages;
import org.pentaho.di.trans.TransMeta;
import org.pentaho.di.trans.steps.textfileinput.EncodingType;
import org.pentaho.di.trans.steps.textfileinput.InputFileMetaInterface;
import org.pentaho.di.trans.steps.textfileinput.TextFileInput;
import org.pentaho.di.trans.steps.textfileinput.TextFileInputField;
import org.pentaho.di.trans.steps.textfileinput.TextFileInputMeta;
import org.pentaho.di.trans.steps.textfileinput.TextFileLine;
import org.pentaho.di.ui.core.dialog.ErrorDialog;
/**
* Takes care of displaying a dialog that will handle the wait while we're finding out what tables, views etc we can
* reach in the database.
*
* @author Matt
* @since 07-apr-2005
* @deprecated replaced by implementation in the ...steps.fileinput.text package
*/
public class TextFileCSVImportProgressDialog {
private static Class<?> PKG = TextFileInputMeta.class; // for i18n purposes, needed by Translator2!!
private Shell shell;
private InputFileMetaInterface meta;
private int samples;
private boolean replaceMeta;
private String message;
private String debug;
private long rownumber;
private InputStreamReader reader;
private TransMeta transMeta;
private LogChannelInterface log;
private EncodingType encodingType;
/**
* Creates a new dialog that will handle the wait while we're finding out what tables, views etc we can reach in the
* database.
*/
public TextFileCSVImportProgressDialog( Shell shell, InputFileMetaInterface meta, TransMeta transMeta,
InputStreamReader reader, int samples, boolean replaceMeta ) {
this.shell = shell;
this.meta = meta;
this.reader = reader;
this.samples = samples;
this.replaceMeta = replaceMeta;
this.transMeta = transMeta;
message = null;
debug = "init";
rownumber = 1L;
this.log = new LogChannel( transMeta );
this.encodingType = EncodingType.guessEncodingType( reader.getEncoding() );
}
public String open() {
IRunnableWithProgress op = new IRunnableWithProgress() {
public void run( IProgressMonitor monitor ) throws InvocationTargetException, InterruptedException {
try {
message = doScan( monitor );
} catch ( Exception e ) {
e.printStackTrace();
throw new InvocationTargetException( e,
BaseMessages.getString( PKG, "TextFileCSVImportProgressDialog.Exception.ErrorScanningFile",
"" + rownumber, debug, e.toString() ) );
}
}
};
try {
ProgressMonitorDialog pmd = new ProgressMonitorDialog( shell );
pmd.run( true, true, op );
} catch ( InvocationTargetException e ) {
new ErrorDialog( shell,
BaseMessages.getString( PKG, "TextFileCSVImportProgressDialog.ErrorScanningFile.Title" ),
BaseMessages.getString( PKG, "TextFileCSVImportProgressDialog.ErrorScanningFile.Message" ), e );
} catch ( InterruptedException e ) {
new ErrorDialog( shell,
BaseMessages.getString( PKG, "TextFileCSVImportProgressDialog.ErrorScanningFile.Title" ),
BaseMessages.getString( PKG, "TextFileCSVImportProgressDialog.ErrorScanningFile.Message" ), e );
}
return message;
}
private String doScan( IProgressMonitor monitor ) throws KettleException {
if ( samples > 0 ) {
monitor.beginTask(
BaseMessages.getString( PKG, "TextFileCSVImportProgressDialog.Task.ScanningFile" ), samples + 1 );
} else {
monitor.beginTask( BaseMessages.getString( PKG, "TextFileCSVImportProgressDialog.Task.ScanningFile" ), 2 );
}
String line = "";
long fileLineNumber = 0;
DecimalFormatSymbols dfs = new DecimalFormatSymbols();
int nrfields = meta.getInputFields().length;
RowMetaInterface outputRowMeta = new RowMeta();
meta.getFields( outputRowMeta, null, null, null, transMeta, null, null );
// Remove the storage meta-data (don't go for lazy conversion during scan)
for ( ValueMetaInterface valueMeta : outputRowMeta.getValueMetaList() ) {
valueMeta.setStorageMetadata( null );
valueMeta.setStorageType( ValueMetaInterface.STORAGE_TYPE_NORMAL );
}
RowMetaInterface convertRowMeta = outputRowMeta.cloneToType( ValueMetaInterface.TYPE_STRING );
// How many null values?
int[] nrnull = new int[nrfields]; // How many times null value?
// String info
String[] minstr = new String[nrfields]; // min string
String[] maxstr = new String[nrfields]; // max string
boolean[] firststr = new boolean[nrfields]; // first occ. of string?
// Date info
boolean[] isDate = new boolean[nrfields]; // is the field perhaps a Date?
int[] dateFormatCount = new int[nrfields]; // How many date formats work?
boolean[][] dateFormat = new boolean[nrfields][Const.getDateFormats().length]; // What are the date formats that
// work?
Date[][] minDate = new Date[nrfields][Const.getDateFormats().length]; // min date value
Date[][] maxDate = new Date[nrfields][Const.getDateFormats().length]; // max date value
// Number info
boolean[] isNumber = new boolean[nrfields]; // is the field perhaps a Number?
int[] numberFormatCount = new int[nrfields]; // How many number formats work?
boolean[][] numberFormat = new boolean[nrfields][Const.getNumberFormats().length]; // What are the number format
// that work?
double[][] minValue = new double[nrfields][Const.getDateFormats().length]; // min number value
double[][] maxValue = new double[nrfields][Const.getDateFormats().length]; // max number value
int[][] numberPrecision = new int[nrfields][Const.getNumberFormats().length]; // remember the precision?
int[][] numberLength = new int[nrfields][Const.getNumberFormats().length]; // remember the length?
for ( int i = 0; i < nrfields; i++ ) {
TextFileInputField field = meta.getInputFields()[i];
if ( log.isDebug() ) {
debug = "init field #" + i;
}
if ( replaceMeta ) { // Clear previous info...
field.setName( meta.getInputFields()[i].getName() );
field.setType( meta.getInputFields()[i].getType() );
field.setFormat( "" );
field.setLength( -1 );
field.setPrecision( -1 );
field.setCurrencySymbol( dfs.getCurrencySymbol() );
field.setDecimalSymbol( "" + dfs.getDecimalSeparator() );
field.setGroupSymbol( "" + dfs.getGroupingSeparator() );
field.setNullString( "-" );
field.setTrimType( ValueMetaInterface.TRIM_TYPE_NONE );
}
nrnull[i] = 0;
minstr[i] = "";
maxstr[i] = "";
firststr[i] = true;
// Init data guess
isDate[i] = true;
for ( int j = 0; j < Const.getDateFormats().length; j++ ) {
dateFormat[i][j] = true;
minDate[i][j] = Const.MAX_DATE;
maxDate[i][j] = Const.MIN_DATE;
}
dateFormatCount[i] = Const.getDateFormats().length;
// Init number guess
isNumber[i] = true;
for ( int j = 0; j < Const.getNumberFormats().length; j++ ) {
numberFormat[i][j] = true;
minValue[i][j] = Double.MAX_VALUE;
maxValue[i][j] = -Double.MAX_VALUE;
numberPrecision[i][j] = -1;
numberLength[i][j] = -1;
}
numberFormatCount[i] = Const.getNumberFormats().length;
}
InputFileMetaInterface strinfo = (InputFileMetaInterface) meta.clone();
for ( int i = 0; i < nrfields; i++ ) {
strinfo.getInputFields()[i].setType( ValueMetaInterface.TYPE_STRING );
}
// Sample <samples> rows...
debug = "get first line";
StringBuilder lineBuffer = new StringBuilder( 256 );
int fileFormatType = meta.getFileFormatTypeNr();
// If the file has a header we overwrite the first line
// However, if it doesn't have a header, take a new line
//
line = TextFileInput.getLine( log, reader, encodingType, fileFormatType, lineBuffer );
fileLineNumber++;
int skipped = 1;
if ( meta.hasHeader() ) {
while ( line != null && skipped < meta.getNrHeaderLines() ) {
line = TextFileInput.getLine( log, reader, encodingType, fileFormatType, lineBuffer );
skipped++;
fileLineNumber++;
}
}
int linenr = 1;
List<StringEvaluator> evaluators = new ArrayList<StringEvaluator>();
// Allocate number and date parsers
DecimalFormat df2 = (DecimalFormat) NumberFormat.getInstance();
DecimalFormatSymbols dfs2 = new DecimalFormatSymbols();
SimpleDateFormat daf2 = new SimpleDateFormat();
boolean errorFound = false;
while ( !errorFound && line != null && ( linenr <= samples || samples == 0 ) && !monitor.isCanceled() ) {
monitor.subTask( BaseMessages.getString( PKG, "TextFileCSVImportProgressDialog.Task.ScanningLine", ""
+ linenr ) );
if ( samples > 0 ) {
monitor.worked( 1 );
}
if ( log.isDebug() ) {
debug = "convert line #" + linenr + " to row";
}
RowMetaInterface rowMeta = new RowMeta();
meta.getFields( rowMeta, "stepname", null, null, transMeta, null, null );
// Remove the storage meta-data (don't go for lazy conversion during scan)
for ( ValueMetaInterface valueMeta : rowMeta.getValueMetaList() ) {
valueMeta.setStorageMetadata( null );
valueMeta.setStorageType( ValueMetaInterface.STORAGE_TYPE_NORMAL );
}
String delimiter = transMeta.environmentSubstitute( meta.getSeparator() );
String enclosure = transMeta.environmentSubstitute( meta.getEnclosure() );
String escapeCharacter = transMeta.environmentSubstitute( meta.getEscapeCharacter() );
Object[] r =
TextFileInput.convertLineToRow(
log, new TextFileLine( line, fileLineNumber, null ), strinfo, null, 0, outputRowMeta,
convertRowMeta, meta.getFilePaths( transMeta )[0], rownumber, delimiter, enclosure, escapeCharacter,
null, false, false, false, false, false, false, false, false, null, null, false, null, null, null,
null, 0 );
if ( r == null ) {
errorFound = true;
continue;
}
rownumber++;
for ( int i = 0; i < nrfields && i < r.length; i++ ) {
StringEvaluator evaluator;
if ( i >= evaluators.size() ) {
evaluator = new StringEvaluator( true );
evaluators.add( evaluator );
} else {
evaluator = evaluators.get( i );
}
String string = rowMeta.getString( r, i );
if ( i == 0 ) {
System.out.println();
}
evaluator.evaluateString( string );
}
fileLineNumber++;
if ( r != null ) {
linenr++;
}
// Grab another line...
//
line = TextFileInput.getLine( log, reader, encodingType, fileFormatType, lineBuffer );
}
monitor.worked( 1 );
monitor.setTaskName( BaseMessages.getString( PKG, "TextFileCSVImportProgressDialog.Task.AnalyzingResults" ) );
// Show information on items using a dialog box
//
StringBuilder message = new StringBuilder();
message.append( BaseMessages.getString( PKG, "TextFileCSVImportProgressDialog.Info.ResultAfterScanning", ""
+ ( linenr - 1 ) ) );
message.append( BaseMessages.getString( PKG, "TextFileCSVImportProgressDialog.Info.HorizontalLine" ) );
for ( int i = 0; i < nrfields; i++ ) {
TextFileInputField field = meta.getInputFields()[i];
StringEvaluator evaluator = evaluators.get( i );
List<StringEvaluationResult> evaluationResults = evaluator.getStringEvaluationResults();
// If we didn't find any matching result, it's a String...
//
StringEvaluationResult result = evaluator.getAdvicedResult();
if ( evaluationResults.isEmpty() ) {
field.setType( ValueMetaInterface.TYPE_STRING );
field.setLength( evaluator.getMaxLength() );
}
if ( result != null ) {
// Take the first option we find, list the others below...
//
ValueMetaInterface conversionMeta = result.getConversionMeta();
field.setType( conversionMeta.getType() );
field.setTrimType( conversionMeta.getTrimType() );
field.setFormat( conversionMeta.getConversionMask() );
field.setDecimalSymbol( conversionMeta.getDecimalSymbol() );
field.setGroupSymbol( conversionMeta.getGroupingSymbol() );
field.setLength( conversionMeta.getLength() );
field.setPrecision( conversionMeta.getPrecision() );
nrnull[i] = result.getNrNull();
minstr[i] = result.getMin() == null ? "" : result.getMin().toString();
maxstr[i] = result.getMax() == null ? "" : result.getMax().toString();
}
message.append( BaseMessages.getString( PKG, "TextFileCSVImportProgressDialog.Info.FieldNumber", ""
+ ( i + 1 ) ) );
message.append( BaseMessages.getString( PKG, "TextFileCSVImportProgressDialog.Info.FieldName", field
.getName() ) );
message.append( BaseMessages.getString( PKG, "TextFileCSVImportProgressDialog.Info.FieldType", field
.getTypeDesc() ) );
switch ( field.getType() ) {
case ValueMetaInterface.TYPE_NUMBER:
message.append( BaseMessages.getString(
PKG, "TextFileCSVImportProgressDialog.Info.EstimatedLength", ( field.getLength() < 0 ? "-" : ""
+ field.getLength() ) ) );
message.append( BaseMessages.getString(
PKG, "TextFileCSVImportProgressDialog.Info.EstimatedPrecision", field.getPrecision() < 0 ? "-" : ""
+ field.getPrecision() ) );
message.append( BaseMessages.getString( PKG, "TextFileCSVImportProgressDialog.Info.NumberFormat", field
.getFormat() ) );
if ( !evaluationResults.isEmpty() ) {
if ( evaluationResults.size() > 1 ) {
message.append( BaseMessages
.getString( PKG, "TextFileCSVImportProgressDialog.Info.WarnNumberFormat" ) );
}
for ( StringEvaluationResult seResult : evaluationResults ) {
String mask = seResult.getConversionMeta().getConversionMask();
message.append( BaseMessages.getString(
PKG, "TextFileCSVImportProgressDialog.Info.NumberFormat2", mask ) );
message.append( BaseMessages
.getString( PKG, "TextFileCSVImportProgressDialog.Info.TrimType", seResult
.getConversionMeta().getTrimType() ) );
message.append( BaseMessages.getString(
PKG, "TextFileCSVImportProgressDialog.Info.NumberMinValue", seResult.getMin() ) );
message.append( BaseMessages.getString(
PKG, "TextFileCSVImportProgressDialog.Info.NumberMaxValue", seResult.getMax() ) );
try {
df2.applyPattern( mask );
df2.setDecimalFormatSymbols( dfs2 );
double mn = df2.parse( seResult.getMin().toString() ).doubleValue();
message.append( BaseMessages.getString(
PKG, "TextFileCSVImportProgressDialog.Info.NumberExample", mask, seResult.getMin(), Double
.toString( mn ) ) );
} catch ( Exception e ) {
if ( log.isDetailed() ) {
log.logDetailed( "This is unexpected: parsing ["
+ seResult.getMin() + "] with format [" + mask + "] did not work." );
}
}
}
}
message.append( BaseMessages.getString(
PKG, "TextFileCSVImportProgressDialog.Info.NumberNrNullValues", "" + nrnull[i] ) );
break;
case ValueMetaInterface.TYPE_STRING:
message.append( BaseMessages.getString( PKG, "TextFileCSVImportProgressDialog.Info.StringMaxLength", ""
+ field.getLength() ) );
message.append( BaseMessages.getString(
PKG, "TextFileCSVImportProgressDialog.Info.StringMinValue", minstr[i] ) );
message.append( BaseMessages.getString(
PKG, "TextFileCSVImportProgressDialog.Info.StringMaxValue", maxstr[i] ) );
message.append( BaseMessages.getString(
PKG, "TextFileCSVImportProgressDialog.Info.StringNrNullValues", "" + nrnull[i] ) );
break;
case ValueMetaInterface.TYPE_DATE:
message.append( BaseMessages.getString( PKG, "TextFileCSVImportProgressDialog.Info.DateMaxLength", field
.getLength() < 0 ? "-" : "" + field.getLength() ) );
message.append( BaseMessages.getString( PKG, "TextFileCSVImportProgressDialog.Info.DateFormat", field
.getFormat() ) );
if ( dateFormatCount[i] > 1 ) {
message.append( BaseMessages.getString( PKG, "TextFileCSVImportProgressDialog.Info.WarnDateFormat" ) );
}
if ( !Utils.isEmpty( minstr[i] ) ) {
for ( int x = 0; x < Const.getDateFormats().length; x++ ) {
if ( dateFormat[i][x] ) {
message.append( BaseMessages.getString(
PKG, "TextFileCSVImportProgressDialog.Info.DateFormat2", Const.getDateFormats()[x] ) );
Date mindate = minDate[i][x];
Date maxdate = maxDate[i][x];
message.append( BaseMessages.getString(
PKG, "TextFileCSVImportProgressDialog.Info.DateMinValue", mindate.toString() ) );
message.append( BaseMessages.getString(
PKG, "TextFileCSVImportProgressDialog.Info.DateMaxValue", maxdate.toString() ) );
daf2.applyPattern( Const.getDateFormats()[x] );
try {
Date md = daf2.parse( minstr[i] );
message.append( BaseMessages.getString(
PKG, "TextFileCSVImportProgressDialog.Info.DateExample", Const.getDateFormats()[x],
minstr[i], md.toString() ) );
} catch ( Exception e ) {
if ( log.isDetailed() ) {
log.logDetailed( "This is unexpected: parsing ["
+ minstr[i] + "] with format [" + Const.getDateFormats()[x] + "] did not work." );
}
}
}
}
}
message.append( BaseMessages.getString( PKG, "TextFileCSVImportProgressDialog.Info.DateNrNullValues", ""
+ nrnull[i] ) );
break;
default:
break;
}
if ( nrnull[i] == linenr - 1 ) {
message.append( BaseMessages.getString( PKG, "TextFileCSVImportProgressDialog.Info.AllNullValues" ) );
}
message.append( Const.CR );
}
monitor.worked( 1 );
monitor.done();
return message.toString();
}
}