ColGroupRLE.java example

Explorer
incubator-systemml-master
- dev
  - release
    - src
      - test
        java
        org
        apache
        sysml
        validation
        Constants.java
        Utility.java
        ValidateLicAndNotice.java
- src
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * 
 *   http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.sysml.runtime.compress;

import java.util.Arrays;
import java.util.Iterator;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.sysml.runtime.DMLRuntimeException;
import org.apache.sysml.runtime.compress.utils.ConverterUtils;
import org.apache.sysml.runtime.compress.utils.LinearAlgebraUtils;
import org.apache.sysml.runtime.functionobjects.Builtin;
import org.apache.sysml.runtime.functionobjects.KahanFunction;
import org.apache.sysml.runtime.functionobjects.KahanPlus;
import org.apache.sysml.runtime.instructions.cp.KahanObject;
import org.apache.sysml.runtime.matrix.data.MatrixBlock;
import org.apache.sysml.runtime.matrix.data.Pair;
import org.apache.sysml.runtime.matrix.operators.ScalarOperator;


/** A group of columns compressed with a single run-length encoded bitmap. */
public class ColGroupRLE extends ColGroupOffset 
{
	private static final long serialVersionUID = 7450232907594748177L;

	private static final Log LOG = LogFactory.getLog(ColGroupRLE.class.getName());
	
	public ColGroupRLE() {
		super();
	}
	
	/**
	 * Main constructor. Constructs and stores the necessary bitmaps.
	 * 
	 * @param colIndices
	 *            indices (within the block) of the columns included in this
	 *            column
	 * @param numRows
	 *            total number of rows in the parent block
	 * @param ubm
	 *            Uncompressed bitmap representation of the block
	 */
	public ColGroupRLE(int[] colIndices, int numRows, UncompressedBitmap ubm) 
	{
		super(colIndices, numRows, ubm);
		
		// compress the bitmaps
		final int numVals = ubm.getNumValues();
		char[][] lbitmaps = new char[numVals][];
		int totalLen = 0;
		for( int k=0; k<numVals; k++ ) {
			lbitmaps[k] = BitmapEncoder.genRLEBitmap(
				ubm.getOffsetsList(k).extractValues(), ubm.getNumOffsets(k));
			totalLen += lbitmaps[k].length;
		}
		
		// compact bitmaps to linearized representation
		createCompressedBitmaps(numVals, totalLen, lbitmaps);
		
		//debug output
		double ucSize = MatrixBlock.estimateSizeDenseInMemory(numRows, colIndices.length);
		if( estimateInMemorySize() > ucSize )
			LOG.warn("RLE group larger than UC dense: "+estimateInMemorySize()+" "+ucSize);
	}

	public ColGroupRLE(int[] colIndices, int numRows, boolean zeros, double[] values, char[] bitmaps, int[] bitmapOffs) {
		super(colIndices, numRows, zeros, values);
		_data = bitmaps;
		_ptr = bitmapOffs;
	}
	
	@Override
	public CompressionType getCompType() {
		return CompressionType.RLE_BITMAP;
	}

	@Override
	public Iterator<Integer> getDecodeIterator(int k) {
		return new BitmapDecoderRLE(_data, _ptr[k], len(k)); 
	}
	
	@Override
	public void decompressToBlock(MatrixBlock target, int rl, int ru) 
	{
		if( LOW_LEVEL_OPT && getNumValues() > 1 )
		{
			final int blksz = 128 * 1024;
			final int numCols = getNumCols();
			final int numVals = getNumValues();
			
			//position and start offset arrays
			int[] astart = new int[numVals];
			int[] apos = skipScan(numVals, rl, astart);
			
			//cache conscious append via horizontal scans 
			for( int bi=rl; bi<ru; bi+=blksz ) {
				int bimax = Math.min(bi+blksz, ru);					
				for (int k=0, off=0; k < numVals; k++, off+=numCols) {
					int boff = _ptr[k];
					int blen = len(k);
					int bix = apos[k];
					int start = astart[k];
					for( ; bix<blen & start<bimax; bix+=2) {
						start += _data[boff + bix];
						int len = _data[boff + bix+1];
						for( int i=Math.max(rl,start); i<Math.min(start+len,ru); i++ )
							for( int j=0; j<numCols; j++ )
								if( _values[off+j]!=0 )
									target.appendValue(i, _colIndexes[j], _values[off+j]);
						start += len;
					}
					apos[k] = bix;	
					astart[k] = start;
				}
			}
		}
		else
		{
			//call generic decompression with decoder
			super.decompressToBlock(target, rl, ru);
		}
	}

	@Override
	public void decompressToBlock(MatrixBlock target, int[] colixTargets) 
	{
		if( LOW_LEVEL_OPT && getNumValues() > 1 )
		{
			final int blksz = 128 * 1024;
			final int numCols = getNumCols();
			final int numVals = getNumValues();
			final int n = getNumRows();
			
			//position and start offset arrays
			int[] apos = new int[numVals];
			int[] astart = new int[numVals];
			int[] cix = new int[numCols];
			
			//prepare target col indexes
			for( int j=0; j<numCols; j++ )
				cix[j] = colixTargets[_colIndexes[j]];
			
			//cache conscious append via horizontal scans 
			for( int bi=0; bi<n; bi+=blksz ) {
				int bimax = Math.min(bi+blksz, n);					
				for (int k=0, off=0; k < numVals; k++, off+=numCols) {
					int boff = _ptr[k];
					int blen = len(k);
					int bix = apos[k];
					if( bix >= blen ) 
						continue;
					int start = astart[k];
					for( ; bix<blen & start<bimax; bix+=2) {
						start += _data[boff + bix];
						int len = _data[boff + bix+1];
						for( int i=start; i<start+len; i++ )
							for( int j=0; j<numCols; j++ )
								if( _values[off+j]!=0 )
									target.appendValue(i, cix[j], _values[off+j]);
						start += len;
					}
					apos[k] = bix;	
					astart[k] = start;
				}
			}
		}
		else
		{
			//call generic decompression with decoder
			super.decompressToBlock(target, colixTargets);
		}
	}

	@Override
	public void decompressToBlock(MatrixBlock target, int colpos) 
	{
		final int blksz = 128 * 1024;
		final int numCols = getNumCols();
		final int numVals = getNumValues();
		final int n = getNumRows();
		double[] c = target.getDenseBlock();
		
		//position and start offset arrays
		int[] astart = new int[numVals];
		int[] apos = allocIVector(numVals, true);
		
		//cache conscious append via horizontal scans 
		int nnz = 0;
		for( int bi=0; bi<n; bi+=blksz ) {
			int bimax = Math.min(bi+blksz, n);
			Arrays.fill(c, bi, bimax, 0);
			for (int k=0, off=0; k < numVals; k++, off+=numCols) {
				int boff = _ptr[k];
				int blen = len(k);
				int bix = apos[k];
				if( bix >= blen )
					continue;
				int start = astart[k];
				for( ; bix<blen & start<bimax; bix+=2) {
					start += _data[boff + bix];
					int len = _data[boff + bix+1];
					Arrays.fill(c, start, start+len, _values[off+colpos]);
					nnz += len;
					start += len;
				}
				apos[k] = bix;	
				astart[k] = start;
			}
		}
		target.setNonZeros(nnz);
	}
	
	@Override 
	public int[] getCounts() {
		final int numVals = getNumValues();
		
		int[] counts = new int[numVals];
		for (int k = 0; k < numVals; k++) {
			int boff = _ptr[k];
			int blen = len(k);
			int curRunEnd = 0;
			int count = 0;
			for (int bix = 0; bix < blen; bix+=2) {
				int curRunStartOff = curRunEnd + _data[boff+bix];
				curRunEnd = curRunStartOff + _data[boff+bix+1];
				count += curRunEnd-curRunStartOff;
			}
			counts[k] = count;
		}
		
		return counts;
	}
	
	@Override
	public void rightMultByVector(MatrixBlock vector, MatrixBlock result, int rl, int ru)
			throws DMLRuntimeException 
	{
		double[] b = ConverterUtils.getDenseVector(vector);
		double[] c = result.getDenseBlock();
		final int numCols = getNumCols();
		final int numVals = getNumValues();
		
		//prepare reduced rhs w/ relevant values
		double[] sb = new double[numCols];
		for (int j = 0; j < numCols; j++) {
			sb[j] = b[_colIndexes[j]];
		}
		
		if( LOW_LEVEL_OPT && numVals > 1 
			&& _numRows > BitmapEncoder.BITMAP_BLOCK_SZ )
		{
			//L3 cache alignment, see comment rightMultByVector OLE column group 
			//core difference of RLE to OLE is that runs are not segment alignment,
			//which requires care of handling runs crossing cache-buckets
			final int blksz = ColGroupOffset.WRITE_CACHE_BLKSZ; 
			
			//step 1: prepare position and value arrays
			
			//current pos / values per RLE list
			int[] astart = new int[numVals];
			int[] apos = skipScan(numVals, rl, astart);
			double[] aval = preaggValues(numVals, sb);
			
			//step 2: cache conscious matrix-vector via horizontal scans 
			for( int bi=rl; bi<ru; bi+=blksz ) 
			{
				int bimax = Math.min(bi+blksz, ru);
					
				//horizontal segment scan, incl pos maintenance
				for (int k = 0; k < numVals; k++) {
					int boff = _ptr[k];
					int blen = len(k);
					double val = aval[k];
					int bix = apos[k];
					int start = astart[k];
					
					//compute partial results, not aligned
					while( bix<blen ) {
						int lstart = _data[boff + bix];
						int llen = _data[boff + bix + 1];
						LinearAlgebraUtils.vectAdd(val, c, Math.max(bi, start+lstart), 
								Math.min(start+lstart+llen,bimax) - Math.max(bi, start+lstart));
						if(start+lstart+llen >= bimax)
							break;
						start += lstart + llen;
						bix += 2;
					}
					
					apos[k] = bix;	
					astart[k] = start;
				}
			}
		}
		else
		{
			for (int k = 0; k < numVals; k++) {
				int boff = _ptr[k];
				int blen = len(k);
				double val = sumValues(k, sb);
				int bix = 0;
				int start = 0;
				
				//scan to beginning offset if necessary 
				if( rl > 0 ) { //rl aligned with blksz	
					while( bix<blen ) {	
						int lstart = _data[boff + bix]; //start
						int llen = _data[boff + bix + 1]; //len
						if( start+lstart+llen >= rl )
							break;
						start += lstart + llen;
						bix += 2;
					}
				}
				
				//compute partial results, not aligned
				while( bix<blen ) {
					int lstart = _data[boff + bix];
					int llen = _data[boff + bix + 1];
					LinearAlgebraUtils.vectAdd(val, c, Math.max(rl, start+lstart), 
							Math.min(start+lstart+llen,ru) - Math.max(rl, start+lstart));
					if(start+lstart+llen >= ru)
						break;
					start += lstart + llen;
					bix += 2;
				}
			}
		}
	}

	@Override
	public void leftMultByRowVector(MatrixBlock vector, MatrixBlock result)
			throws DMLRuntimeException 
	{		
		double[] a = ConverterUtils.getDenseVector(vector);
		double[] c = result.getDenseBlock();
		final int numCols = getNumCols();
		final int numVals = getNumValues();
		final int n = getNumRows();
		
		if( LOW_LEVEL_OPT && numVals > 1 
			&& _numRows > BitmapEncoder.BITMAP_BLOCK_SZ ) 
		{
			final int blksz = ColGroupOffset.READ_CACHE_BLKSZ; 
			
			//step 1: prepare position and value arrays
			
			//current pos per OLs / output values
			int[] astart = new int[numVals];
			int[] apos = allocIVector(numVals, true);
			double[] cvals = allocDVector(numVals, true);
			
			//step 2: cache conscious matrix-vector via horizontal scans 
			for( int ai=0; ai<n; ai+=blksz ) 
			{
				int aimax = Math.min(ai+blksz, n);
				
				//horizontal scan, incl pos maintenance
				for (int k = 0; k < numVals; k++) {
					int boff = _ptr[k];
					int blen = len(k);						
					int bix = apos[k];
					int start = astart[k];
					
					//compute partial results, not aligned
					while( bix<blen & start<aimax ) {
						start += _data[boff + bix];
						int len = _data[boff + bix+1];
						cvals[k] += LinearAlgebraUtils.vectSum(a, start, len);
						start += len;
						bix += 2;
					}
					
					apos[k] = bix;	
					astart[k] = start;
				}
			}
			
			//step 3: scale partial results by values and write to global output
			for (int k = 0, valOff=0; k < numVals; k++, valOff+=numCols)
				for( int j = 0; j < numCols; j++ )
					c[ _colIndexes[j] ] += cvals[k] * _values[valOff+j];
			
		}
		else
		{
			//iterate over all values and their bitmaps
			for (int k=0, valOff=0; k<numVals; k++, valOff+=numCols) 
			{	
				int boff = _ptr[k];
				int blen = len(k);
				
				double vsum = 0;
				int curRunEnd = 0;
				for ( int bix = 0; bix < blen; bix+=2 ) {
					int curRunStartOff = curRunEnd + _data[boff+bix];
					int curRunLen = _data[boff+bix+1];
					vsum += LinearAlgebraUtils.vectSum(a, curRunStartOff, curRunLen);
					curRunEnd = curRunStartOff + curRunLen;
				}
				
				//scale partial results by values and write results
				for( int j = 0; j < numCols; j++ )
					c[ _colIndexes[j] ] += vsum * _values[ valOff+j ];
			}
		}
	}

	@Override
	public void leftMultByRowVector(ColGroupDDC a, MatrixBlock result)
			throws DMLRuntimeException 
	{
		//note: this method is only applicable for numrows < blocksize
		double[] c = result.getDenseBlock();
		final int numCols = getNumCols();
		final int numVals = getNumValues();

		//iterate over all values and their bitmaps
		for (int k=0, valOff=0; k<numVals; k++, valOff+=numCols) 
		{	
			int boff = _ptr[k];
			int blen = len(k);
			
			double vsum = 0;
			int curRunEnd = 0;
			for ( int bix = 0; bix < blen; bix+=2 ) {
				int curRunStartOff = curRunEnd + _data[boff+bix];
				int curRunLen = _data[boff+bix+1];
				for( int i=curRunStartOff; i<curRunStartOff+curRunLen; i++ )
					vsum += a.getData(i, 0);
				curRunEnd = curRunStartOff + curRunLen;
			}
			
			//scale partial results by values and write results
			for( int j = 0; j < numCols; j++ )
				c[ _colIndexes[j] ] += vsum * _values[ valOff+j ];
		}
	}
	
	@Override
	public ColGroup scalarOperation(ScalarOperator op)
			throws DMLRuntimeException 
	{
		double val0 = op.executeScalar(0);
		
		//fast path: sparse-safe operations
		// Note that bitmaps don't change and are shallow-copied
		if( op.sparseSafe || val0==0 ) {
			return new ColGroupRLE(_colIndexes, _numRows, _zeros,
					applyScalarOp(op), _data, _ptr);
		}
		
		//slow path: sparse-unsafe operations (potentially create new bitmap)
		//note: for efficiency, we currently don't drop values that become 0
		boolean[] lind = computeZeroIndicatorVector();
		int[] loff = computeOffsets(lind);
		if( loff.length==0 ) { //empty offset list: go back to fast path
			return new ColGroupRLE(_colIndexes, _numRows, true,
					applyScalarOp(op), _data, _ptr);
		}
		
		double[] rvalues = applyScalarOp(op, val0, getNumCols());		
		char[] lbitmap = BitmapEncoder.genRLEBitmap(loff, loff.length);
		char[] rbitmaps = Arrays.copyOf(_data, _data.length+lbitmap.length);
		System.arraycopy(lbitmap, 0, rbitmaps, _data.length, lbitmap.length);
		int[] rbitmapOffs = Arrays.copyOf(_ptr, _ptr.length+1);
		rbitmapOffs[rbitmapOffs.length-1] = rbitmaps.length; 
		
		return new ColGroupRLE(_colIndexes, _numRows, loff.length<_numRows,
				rvalues, rbitmaps, rbitmapOffs);
	}

	@Override
	protected final void computeSum(MatrixBlock result, KahanFunction kplus)
	{
		KahanObject kbuff = new KahanObject(result.quickGetValue(0, 0), result.quickGetValue(0, 1));
		
		final int numCols = getNumCols();
		final int numVals = getNumValues();
		
		for (int k = 0; k < numVals; k++) {
			int boff = _ptr[k];
			int blen = len(k);
			int valOff = k * numCols;
			int curRunEnd = 0;
			int count = 0;
			for (int bix = 0; bix < blen; bix+=2) {
				int curRunStartOff = curRunEnd + _data[boff+bix];
				curRunEnd = curRunStartOff + _data[boff+bix+1];
				count += curRunEnd-curRunStartOff;
			}
			
			//scale counts by all values
			for( int j = 0; j < numCols; j++ )
				kplus.execute3(kbuff, _values[ valOff+j ], count);
		}
		
		result.quickSetValue(0, 0, kbuff._sum);
		result.quickSetValue(0, 1, kbuff._correction);
	}

	@Override
	protected final void computeRowSums(MatrixBlock result, KahanFunction kplus, int rl, int ru)
	{
		KahanObject kbuff = new KahanObject(0, 0);
		KahanPlus kplus2 = KahanPlus.getKahanPlusFnObject();
		
		final int numVals = getNumValues();
		double[] c = result.getDenseBlock();
		
		if( ALLOW_CACHE_CONSCIOUS_ROWSUMS 
			&& LOW_LEVEL_OPT && numVals > 1 
			&& _numRows > BitmapEncoder.BITMAP_BLOCK_SZ )
		{
			final int blksz = ColGroupOffset.WRITE_CACHE_BLKSZ/2; 
			
			//step 1: prepare position and value arrays
			
			//current pos / values per RLE list
			int[] astart = new int[numVals];
			int[] apos = skipScan(numVals, rl, astart);
			double[] aval = sumAllValues(kplus, kbuff, false);
			
			//step 2: cache conscious matrix-vector via horizontal scans 
			for( int bi=rl; bi<ru; bi+=blksz ) 
			{
				int bimax = Math.min(bi+blksz, ru);
					
				//horizontal segment scan, incl pos maintenance
				for (int k = 0; k < numVals; k++) {
					int boff = _ptr[k];
					int blen = len(k);
					double val = aval[k];
					int bix = apos[k];
					int start = astart[k];
					
					//compute partial results, not aligned
					while( bix<blen ) {
						int lstart = _data[boff + bix];
						int llen = _data[boff + bix + 1];
						int from = Math.max(bi, start+lstart);
						int to = Math.min(start+lstart+llen,bimax);
						for (int rix=from; rix<to; rix++) {
							kbuff.set(c[2*rix], c[2*rix+1]);
							kplus2.execute2(kbuff, val);
							c[2*rix] = kbuff._sum;
							c[2*rix+1] = kbuff._correction;
						}
						if(start+lstart+llen >= bimax)
							break;
						start += lstart + llen;
						bix += 2;
					}
					
					apos[k] = bix;	
					astart[k] = start;
				}
			}
		}
		else
		{
			for (int k = 0; k < numVals; k++) {
				int boff = _ptr[k];
				int blen = len(k);
				double val = sumValues(k, kplus, kbuff);
						
				if (val != 0.0) {
					Pair<Integer,Integer> tmp = skipScanVal(k, rl);
					int bix = tmp.getKey();
					int curRunStartOff = tmp.getValue();
					int curRunEnd = tmp.getValue();
					for ( ; bix<blen && curRunEnd<ru; bix+=2) {
						curRunStartOff = curRunEnd + _data[boff+bix];
						curRunEnd = curRunStartOff + _data[boff+bix+1];
						for (int rix=curRunStartOff; rix<curRunEnd && rix<ru; rix++) {
							kbuff.set(c[2*rix], c[2*rix+1]);
							kplus2.execute2(kbuff, val);
							c[2*rix] = kbuff._sum;
							c[2*rix+1] = kbuff._correction;
						}
					}
				}
			}
		}
	}

	@Override
	protected final void computeColSums(MatrixBlock result, KahanFunction kplus)
	{
		KahanObject kbuff = new KahanObject(0, 0);
		
		final int numCols = getNumCols();
		final int numVals = getNumValues();
		
		for (int k = 0; k < numVals; k++) {
			int boff = _ptr[k];
			int blen = len(k);
			int valOff = k * numCols;
			int curRunEnd = 0;
			int count = 0;
			for (int bix=0; bix < blen; bix+=2) {
				int curRunStartOff = curRunEnd + _data[boff+bix];
				curRunEnd = curRunStartOff + _data[boff+bix+1];
				count += curRunEnd-curRunStartOff;
			}
			
			//scale counts by all values
			for( int j = 0; j < numCols; j++ ) {
				kbuff.set(result.quickGetValue(0, _colIndexes[j]),result.quickGetValue(1, _colIndexes[j]));
				kplus.execute3(kbuff, _values[ valOff+j ], count);
				result.quickSetValue(0, _colIndexes[j], kbuff._sum);
				result.quickSetValue(1, _colIndexes[j], kbuff._correction);
			}
		}
	}

	@Override
	protected final void computeRowMxx(MatrixBlock result, Builtin builtin, int rl, int ru)
	{
		//NOTE: zeros handled once for all column groups outside
		final int numVals = getNumValues();
		double[] c = result.getDenseBlock();
		
		for (int k = 0; k < numVals; k++) {
			int boff = _ptr[k];
			int blen = len(k);
			double val = mxxValues(k, builtin);
			
			Pair<Integer,Integer> tmp = skipScanVal(k, rl);
			int bix = tmp.getKey();
			int curRunStartOff = tmp.getValue();
			int curRunEnd = tmp.getValue();
			for(; bix < blen && curRunEnd < ru; bix+=2) {
				curRunStartOff = curRunEnd + _data[boff+bix];
				curRunEnd = curRunStartOff + _data[boff+bix+1];
				for (int rix=curRunStartOff; rix<curRunEnd && rix<ru; rix++)
					c[rix] = builtin.execute2(c[rix], val);
			}
		}
	}
	
	public boolean[] computeZeroIndicatorVector()
		throws DMLRuntimeException 
	{	
		boolean[] ret = new boolean[_numRows];
		final int numVals = getNumValues();

		//initialize everything with zero
		Arrays.fill(ret, true);
		
		for (int k = 0; k < numVals; k++) {
			int boff = _ptr[k];
			int blen = len(k);
			
			int curRunStartOff = 0;
			int curRunEnd = 0;
			for (int bix = 0; bix < blen; bix+=2) {
				curRunStartOff = curRunEnd + _data[boff+bix];
				curRunEnd = curRunStartOff + _data[boff+bix + 1];
				Arrays.fill(ret, curRunStartOff, curRunEnd, false);
			}
		}
		
		return ret;
	}
	
	@Override
	protected void countNonZerosPerRow(int[] rnnz, int rl, int ru)
	{
		final int numVals = getNumValues();
		final int numCols = getNumCols();
		
		//current pos / values per RLE list
		int[] astart = new int[numVals];
		int[] apos = skipScan(numVals, rl, astart);
		
		for (int k = 0; k < numVals; k++) {
			int boff = _ptr[k];
			int blen = len(k);
			int bix = apos[k];
					
			int curRunStartOff = 0;
			int curRunEnd = 0;
			for( ; bix < blen && curRunStartOff<ru; bix+=2) {
				curRunStartOff = curRunEnd + _data[boff+bix];
				curRunEnd = curRunStartOff + _data[boff+bix + 1];
				for( int i=Math.max(curRunStartOff,rl); i<Math.min(curRunEnd, ru); i++ )
					rnnz[i-rl] += numCols;
			}
		}
	}
	
	/////////////////////////////////
	// internal helper functions

	
	/**
	 * Scans to given row_lower position by scanning run length 
	 * fields. Returns array of positions for all values and modifies
	 * given array of start positions for all values too. 
	 * 
	 * @param numVals number of values
	 * @param rl lower row position
	 * @param astart start positions
	 * @return array of positions for all values
	 */
	private int[] skipScan(int numVals, int rl, int[] astart) {
		int[] apos = allocIVector(numVals, rl==0);
		
		if( rl > 0 ) { //rl aligned with blksz	
			for (int k = 0; k < numVals; k++) {
				int boff = _ptr[k];
				int blen = len(k);
				int bix = 0;
				int start = 0;
				while( bix<blen ) {	
					int lstart = _data[boff + bix]; //start
					int llen = _data[boff + bix + 1]; //len
					if( start+lstart+llen >= rl )
						break;
					start += lstart + llen;
					bix += 2;
				}
				apos[k] = bix;
				astart[k] = start;
			}
		}
		
		return apos;
	}

	private Pair<Integer,Integer> skipScanVal(int k, int rl) {
		int apos = 0; 
		int astart = 0;
		
		if( rl > 0 ) { //rl aligned with blksz	
			int boff = _ptr[k];
			int blen = len(k);
			int bix = 0;
			int start = 0;
			while( bix<blen ) {	
				int lstart = _data[boff + bix]; //start
				int llen = _data[boff + bix + 1]; //len
				if( start+lstart+llen >= rl )
					break;
				start += lstart + llen;
				bix += 2;
			}
			apos = bix;
			astart = start;
		}
		
		return new Pair<Integer,Integer>(apos, astart);
	}
}