/*! ****************************************************************************** * * Pentaho Data Integration * * Copyright (C) 2002-2013 by Pentaho : http://www.pentaho.com * ******************************************************************************* * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ******************************************************************************/ package org.pentaho.di.core.hash; import org.pentaho.di.core.exception.KettleValueException; import org.pentaho.di.core.row.RowMeta; import org.pentaho.di.core.row.RowMetaInterface; public class ByteArrayHashIndex { private static final int STANDARD_INDEX_SIZE = 512; private static final float STANDARD_LOAD_FACTOR = 0.78f; private RowMetaInterface keyRowMeta; private ByteArrayHashIndexEntry[] index; private int size; private int resizeThresHold; /** * Create a Byte array hash index to store row * * @param rowMeta */ public ByteArrayHashIndex( RowMetaInterface keyRowMeta, int size ) { this.keyRowMeta = keyRowMeta; // Find a suitable capacity being a factor of 2: int factor2Size = 1; while ( factor2Size < size ) { factor2Size <<= 1; // Multiply by 2 } this.size = factor2Size; this.resizeThresHold = (int) ( factor2Size * STANDARD_LOAD_FACTOR ); index = new ByteArrayHashIndexEntry[factor2Size]; } public ByteArrayHashIndex( RowMetaInterface keyRowMeta ) { this( keyRowMeta, STANDARD_INDEX_SIZE ); } public int getSize() { return size; } public boolean isEmpty() { return size == 0; } public byte[] get( byte[] key ) throws KettleValueException { int hashCode = generateHashCode( key, keyRowMeta ); int indexPointer = hashCode & ( index.length - 1 ); ByteArrayHashIndexEntry check = index[indexPointer]; while ( check != null ) { if ( check.hashCode == hashCode && check.equalsKey( key ) ) { return check.value; } check = check.nextEntry; } return null; } public void put( byte[] key, byte[] value ) throws KettleValueException { int hashCode = generateHashCode( key, keyRowMeta ); int indexPointer = hashCode & ( index.length - 1 ); // First see if there is an entry on that pointer... // boolean searchEmptySpot = false; ByteArrayHashIndexEntry check = index[indexPointer]; ByteArrayHashIndexEntry previousCheck = null; while ( check != null ) { searchEmptySpot = true; // If there is an identical entry in there, we replace the value. // And then we just return... // if ( check.hashCode == hashCode && check.equalsKey( key ) ) { check.value = value; return; } previousCheck = check; check = check.nextEntry; } // If we are still here, that means that we are ready to put the value down... // Where do we need to search for an empty spot in the index? // while ( searchEmptySpot ) { indexPointer++; if ( indexPointer >= size ) { indexPointer = 0; } if ( index[indexPointer] == null ) { searchEmptySpot = false; } } // OK, now that we know where to put the entry, insert it... // index[indexPointer] = new ByteArrayHashIndexEntry( hashCode, key, value, index[indexPointer] ); // Don't forget to link to the previous check entry if there was any... // if ( previousCheck != null ) { previousCheck.nextEntry = index[indexPointer]; } // If required, resize the table... // resize(); } private final void resize() { // Increase the size of the index... // size++; // See if we've reached our resize threshold... // if ( size >= resizeThresHold ) { ByteArrayHashIndexEntry[] oldIndex = index; // Double the size to keep the size of the index a factor of 2... // Allocate the new array... // int newSize = 2 * index.length; ByteArrayHashIndexEntry[] newIndex = new ByteArrayHashIndexEntry[newSize]; // Loop over the old index and re-distribute the entries // We want to make sure that the calculation // entry.hashCode & ( size - 1) // ends up in the right location after re-sizing... // for ( int i = 0; i < oldIndex.length; i++ ) { ByteArrayHashIndexEntry entry = oldIndex[i]; if ( entry != null ) { oldIndex[i] = null; entry.nextEntry = null; // we assume there is plenty of room in the new index... // Make sure we follow all the linked entries... // TODO This is a lot of extra work, see how we can avoid it! // int newIndexPointer = entry.hashCode & ( newSize - 1 ); // Make sure on this new index pointer, we have room to put the entry // ByteArrayHashIndexEntry check = newIndex[newIndexPointer]; if ( check == null ) { // Yes, plenty of room // newIndex[newIndexPointer] = check; } else { // No, we need to look for a nice spot to put the hash entry... // ByteArrayHashIndexEntry previousCheck = null; while ( check != null ) { previousCheck = check; check = check.nextEntry; } while ( newIndex[newIndexPointer] != null ) { newIndexPointer++; if ( newIndexPointer >= newSize ) { newIndexPointer = 0; } } // OK, now that we have a nice spot to put the entry, link the previous check entry to this one... // previousCheck.nextEntry = entry; newIndex[newIndexPointer] = entry; } newIndex[newIndexPointer] = entry; } } // Replace the old index with the new one we just created... // index = newIndex; // Also change the resize threshold... // resizeThresHold = (int) ( newSize * STANDARD_LOAD_FACTOR ); } } public static int generateHashCode( byte[] key, RowMetaInterface rowMeta ) throws KettleValueException { Object[] rowData = RowMeta.getRow( rowMeta, key ); return rowMeta.hashCode( rowData ); } private static final class ByteArrayHashIndexEntry { private int hashCode; private byte[] key; private byte[] value; private ByteArrayHashIndexEntry nextEntry; /** * @param hashCode * @param key * @param value * @param nextEntry */ public ByteArrayHashIndexEntry( int hashCode, byte[] key, byte[] value, ByteArrayHashIndexEntry nextEntry ) { this.hashCode = hashCode; this.key = key; this.value = value; this.nextEntry = nextEntry; } public boolean equalsKey( byte[] cmpKey ) { return equalsByteArray( key, cmpKey ); } /** * The row is the same if the value is the same The data types are the same so no error is made here. */ @Override public boolean equals( Object obj ) { ByteArrayHashIndexEntry e = (ByteArrayHashIndexEntry) obj; return equalsValue( e.value ); } public boolean equalsValue( byte[] cmpValue ) { return equalsByteArray( value, cmpValue ); } public static final boolean equalsByteArray( byte[] value, byte[] cmpValue ) { if ( value.length != cmpValue.length ) { return false; } for ( int i = value.length - 1; i >= 0; i-- ) { if ( value[i] != cmpValue[i] ) { return false; } } return true; } } }