/*! ****************************************************************************** * * Pentaho Data Integration * * Copyright (C) 2002-2013 by Pentaho : http://www.pentaho.com * ******************************************************************************* * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ******************************************************************************/ package org.pentaho.di.trans.steps.blockingstep; import java.io.BufferedInputStream; import java.io.BufferedOutputStream; import java.io.DataInputStream; import java.io.DataOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.net.SocketTimeoutException; import java.util.zip.GZIPInputStream; import java.util.zip.GZIPOutputStream; import org.apache.commons.vfs2.FileObject; import org.apache.commons.vfs2.FileSystemException; import org.pentaho.di.core.Const; import org.pentaho.di.core.exception.KettleException; import org.pentaho.di.core.exception.KettleFileException; import org.pentaho.di.core.row.RowMetaInterface; import org.pentaho.di.core.vfs.KettleVFS; import org.pentaho.di.i18n.BaseMessages; import org.pentaho.di.trans.Trans; import org.pentaho.di.trans.TransMeta; import org.pentaho.di.trans.step.BaseStep; import org.pentaho.di.trans.step.StepDataInterface; import org.pentaho.di.trans.step.StepInterface; import org.pentaho.di.trans.step.StepMeta; import org.pentaho.di.trans.step.StepMetaInterface; /** * A step that blocks throughput until the input ends, then it will either output the last row or the complete input. */ public class BlockingStep extends BaseStep implements StepInterface { private static Class<?> PKG = BlockingStepMeta.class; // for i18n purposes, needed by Translator2!! private BlockingStepMeta meta; private BlockingStepData data; private Object[] lastRow; public BlockingStep( StepMeta stepMeta, StepDataInterface stepDataInterface, int copyNr, TransMeta transMeta, Trans trans ) { super( stepMeta, stepDataInterface, copyNr, transMeta, trans ); } private boolean addBuffer( RowMetaInterface rowMeta, Object[] r ) { if ( r != null ) { data.buffer.add( r ); // Save row } // Time to write to disk: buffer in core is full! if ( data.buffer.size() == meta.getCacheSize() // Buffer is full: dump to disk || ( data.files.size() > 0 && r == null && data.buffer.size() > 0 ) // No more records: join from disk ) { // Then write them to disk... DataOutputStream dos; GZIPOutputStream gzos; int p; try { FileObject fileObject = KettleVFS.createTempFile( meta.getPrefix(), ".tmp", environmentSubstitute( meta.getDirectory() ), getTransMeta() ); data.files.add( fileObject ); // Remember the files! OutputStream outputStream = KettleVFS.getOutputStream( fileObject, false ); if ( meta.getCompress() ) { gzos = new GZIPOutputStream( new BufferedOutputStream( outputStream ) ); dos = new DataOutputStream( gzos ); } else { dos = new DataOutputStream( outputStream ); gzos = null; } // How many records do we have? dos.writeInt( data.buffer.size() ); for ( p = 0; p < data.buffer.size(); p++ ) { // Just write the data, nothing else rowMeta.writeData( dos, data.buffer.get( p ) ); } // Close temp-file dos.close(); // close data stream if ( gzos != null ) { gzos.close(); // close gzip stream } outputStream.close(); // close file stream } catch ( Exception e ) { logError( "Error processing tmp-file: " + e.toString() ); return false; } data.buffer.clear(); } return true; } private Object[] getBuffer() { Object[] retval; // Open all files at once and read one row from each file... if ( data.files.size() > 0 && ( data.dis.size() == 0 || data.fis.size() == 0 ) ) { if ( log.isBasic() ) { logBasic( BaseMessages.getString( PKG, "BlockingStep.Log.Openfiles" ) ); } try { FileObject fileObject = data.files.get( 0 ); String filename = KettleVFS.getFilename( fileObject ); if ( log.isDetailed() ) { logDetailed( BaseMessages.getString( PKG, "BlockingStep.Log.Openfilename1" ) + filename + BaseMessages.getString( PKG, "BlockingStep.Log.Openfilename2" ) ); } InputStream fi = KettleVFS.getInputStream( fileObject ); DataInputStream di; data.fis.add( fi ); if ( meta.getCompress() ) { GZIPInputStream gzfi = new GZIPInputStream( new BufferedInputStream( fi ) ); di = new DataInputStream( gzfi ); data.gzis.add( gzfi ); } else { di = new DataInputStream( fi ); } data.dis.add( di ); // How long is the buffer? int buffersize = di.readInt(); if ( log.isDetailed() ) { logDetailed( BaseMessages.getString( PKG, "BlockingStep.Log.BufferSize1" ) + filename + BaseMessages.getString( PKG, "BlockingStep.Log.BufferSize2" ) + buffersize + " " + BaseMessages.getString( PKG, "BlockingStep.Log.BufferSize3" ) ); } if ( buffersize > 0 ) { // Read a row from temp-file data.rowbuffer.add( data.outputRowMeta.readData( di ) ); } } catch ( Exception e ) { logError( BaseMessages.getString( PKG, "BlockingStepMeta.ErrorReadingFile" ) + e.toString() ); logError( Const.getStackTracker( e ) ); } } if ( data.files.size() == 0 ) { if ( data.buffer.size() > 0 ) { retval = data.buffer.get( 0 ); data.buffer.remove( 0 ); } else { retval = null; } } else { if ( data.rowbuffer.size() == 0 ) { retval = null; } else { retval = data.rowbuffer.get( 0 ); data.rowbuffer.remove( 0 ); // now get another FileObject file = data.files.get( 0 ); DataInputStream di = data.dis.get( 0 ); InputStream fi = data.fis.get( 0 ); GZIPInputStream gzfi = ( meta.getCompress() ) ? data.gzis.get( 0 ) : null; try { data.rowbuffer.add( 0, data.outputRowMeta.readData( di ) ); } catch ( SocketTimeoutException e ) { logError( BaseMessages.getString( PKG, "System.Log.UnexpectedError" ) + " : " + e.toString() ); logError( Const.getStackTracker( e ) ); setErrors( 1 ); stopAll(); } catch ( KettleFileException fe ) { // empty file or EOF mostly try { di.close(); fi.close(); if ( gzfi != null ) { gzfi.close(); } file.delete(); } catch ( IOException e ) { logError( BaseMessages.getString( PKG, "BlockingStepMeta.UnableDeleteFile" ) + file.toString() ); setErrors( 1 ); stopAll(); return null; } data.files.remove( 0 ); data.dis.remove( 0 ); data.fis.remove( 0 ); if ( gzfi != null ) { data.gzis.remove( 0 ); } } } } return retval; } public void dispose( StepMetaInterface smi, StepDataInterface sdi ) { if ( ( data.dis != null ) && ( data.dis.size() > 0 ) ) { for ( DataInputStream is : data.dis ) { BaseStep.closeQuietly( is ); } } // remove temp files for ( int f = 0; f < data.files.size(); f++ ) { FileObject fileToDelete = data.files.get( f ); try { if ( fileToDelete != null && fileToDelete.exists() ) { fileToDelete.delete(); } } catch ( FileSystemException e ) { logError( e.getLocalizedMessage(), e ); } } super.dispose( smi, sdi ); } public boolean init( StepMetaInterface smi, StepDataInterface sdi ) { meta = (BlockingStepMeta) smi; data = (BlockingStepData) sdi; if ( super.init( smi, sdi ) ) { // Add init code here. return true; } return false; } public boolean processRow( StepMetaInterface smi, StepDataInterface sdi ) throws KettleException { boolean err = true; Object[] r = getRow(); // Get row from input rowset & set row busy! // initialize if ( first && r != null ) { first = false; data.outputRowMeta = getInputRowMeta().clone(); } if ( !meta.isPassAllRows() ) { if ( r == null ) { // no more input to be expected... if ( lastRow != null ) { putRow( data.outputRowMeta, lastRow ); } setOutputDone(); return false; } lastRow = r; return true; } else { // The mode in which we pass all rows to the output. err = addBuffer( getInputRowMeta(), r ); if ( !err ) { setOutputDone(); // signal receiver we're finished. return false; } if ( r == null ) { // no more input to be expected... // Now we can start the output! r = getBuffer(); while ( r != null && !isStopped() ) { if ( log.isRowLevel() ) { logRowlevel( "Read row: " + getInputRowMeta().getString( r ) ); } putRow( data.outputRowMeta, r ); // copy row to possible alternate rowset(s). r = getBuffer(); } setOutputDone(); // signal receiver we're finished. return false; } return true; } } }