/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.apache.sysml.runtime.compress; import java.util.ArrayList; import org.apache.sysml.runtime.compress.utils.DblArray; import org.apache.sysml.runtime.compress.utils.DblArrayIntListHashMap; import org.apache.sysml.runtime.compress.utils.DoubleIntListHashMap; import org.apache.sysml.runtime.compress.utils.IntArrayList; import org.apache.sysml.runtime.matrix.data.MatrixBlock; import org.apache.sysml.runtime.matrix.data.SparseBlock; /** * Static functions for encoding bitmaps in various ways. * */ public class BitmapEncoder { /** Size of the blocks used in a blocked bitmap representation. */ public static final int BITMAP_BLOCK_SZ = 65536; /** * Generate uncompressed bitmaps for a set of columns in an uncompressed * matrix block. * * @param colIndices * indexes (within the block) of the columns to extract * @param rawblock * an uncompressed matrix block; can be dense or sparse * @return uncompressed bitmap representation of the columns */ public static UncompressedBitmap extractBitmap(int[] colIndices, MatrixBlock rawblock) { //note: no sparse column selection reader because low potential //single column selection if( colIndices.length==1 ) { return extractBitmap(colIndices[0], rawblock, !CompressedMatrixBlock.MATERIALIZE_ZEROS); } //multiple column selection (general case) else { ReaderColumnSelection reader = null; if( rawblock.isInSparseFormat() && CompressedMatrixBlock.TRANSPOSE_INPUT ) reader = new ReaderColumnSelectionSparse(rawblock, colIndices, !CompressedMatrixBlock.MATERIALIZE_ZEROS); else reader = new ReaderColumnSelectionDense(rawblock, colIndices, !CompressedMatrixBlock.MATERIALIZE_ZEROS); return extractBitmap(colIndices, rawblock, reader); } } public static UncompressedBitmap extractBitmapFromSample(int[] colIndices, MatrixBlock rawblock, int[] sampleIndexes) { //note: no sparse column selection reader because low potential //single column selection if( colIndices.length==1 ) { return extractBitmap(colIndices[0], rawblock, sampleIndexes, !CompressedMatrixBlock.MATERIALIZE_ZEROS); } //multiple column selection (general case) else { return extractBitmap(colIndices, rawblock, new ReaderColumnSelectionDenseSample(rawblock, colIndices, sampleIndexes, !CompressedMatrixBlock.MATERIALIZE_ZEROS)); } } /** * Encodes the bitmap as a series of run lengths and offsets. * <p> * <b>NOTE: This method must be kept in sync with {@link BitmapDecoderRLE} * !</b> * * @param offsets uncompressed offset list * @param len logical length of the given offset list * * @return compressed version of said bitmap */ public static char[] genRLEBitmap(int[] offsets, int len) { if( len == 0 ) return new char[0]; //empty list // Use an ArrayList for correctness at the expense of temp space ArrayList<Character> buf = new ArrayList<Character>(); // 1 + (position of last 1 in the previous run of 1's) // We add 1 because runs may be of length zero. int lastRunEnd = 0; // Offset between the end of the previous run of 1's and the first 1 in // the current run. Initialized below. int curRunOff; // Length of the most recent run of 1's int curRunLen = 0; // Current encoding is as follows: // Negative entry: abs(Entry) encodes the offset to the next lone 1 bit. // Positive entry: Entry encodes offset to next run of 1's. The next // entry in the bitmap holds a run length. // Special-case the first run to simplify the loop below. int firstOff = offsets[0]; // The first run may start more than a short's worth of bits in while (firstOff > Character.MAX_VALUE) { buf.add(Character.MAX_VALUE); buf.add((char) 0); firstOff -= Character.MAX_VALUE; lastRunEnd += Character.MAX_VALUE; } // Create the first run with an initial size of 1 curRunOff = firstOff; curRunLen = 1; // Process the remaining offsets for (int i = 1; i < len; i++) { int absOffset = offsets[i]; // 1 + (last position in run) int curRunEnd = lastRunEnd + curRunOff + curRunLen; if (absOffset > curRunEnd || curRunLen >= Character.MAX_VALUE) { // End of a run, either because we hit a run of 0's or because the // number of 1's won't fit in 16 bits. Add run to bitmap and start a new one. buf.add((char) curRunOff); buf.add((char) curRunLen); lastRunEnd = curRunEnd; curRunOff = absOffset - lastRunEnd; while (curRunOff > Character.MAX_VALUE) { // SPECIAL CASE: Offset to next run doesn't fit into 16 bits. // Add zero-length runs until the offset is small enough. buf.add(Character.MAX_VALUE); buf.add((char) 0); lastRunEnd += Character.MAX_VALUE; curRunOff -= Character.MAX_VALUE; } curRunLen = 1; } else { // Middle of a run curRunLen++; } } // Close out the last run if (curRunLen >= 1) { buf.add((char) curRunOff); buf.add((char) curRunLen); } // Convert wasteful ArrayList to packed array. char[] ret = new char[buf.size()]; for(int i = 0; i < buf.size(); i++ ) ret[i] = buf.get(i); return ret; } /** * Encodes the bitmap in blocks of offsets. Within each block, the bits are * stored as absolute offsets from the start of the block. * * @param offsets uncompressed offset list * @param len logical length of the given offset list * * @return compressed version of said bitmap */ public static char[] genOffsetBitmap(int[] offsets, int len) { int lastOffset = offsets[len - 1]; // Build up the blocks int numBlocks = (lastOffset / BITMAP_BLOCK_SZ) + 1; // To simplify the logic, we make two passes. // The first pass divides the offsets by block. int[] blockLengths = new int[numBlocks]; for (int ix = 0; ix < len; ix++) { int val = offsets[ix]; int blockForVal = val / BITMAP_BLOCK_SZ; blockLengths[blockForVal]++; } // The second pass creates the blocks. int totalSize = numBlocks; for (int block = 0; block < numBlocks; block++) { totalSize += blockLengths[block]; } char[] encodedBlocks = new char[totalSize]; int inputIx = 0; int blockStartIx = 0; for (int block = 0; block < numBlocks; block++) { int blockSz = blockLengths[block]; // First entry in the block is number of bits encodedBlocks[blockStartIx] = (char) blockSz; for (int i = 0; i < blockSz; i++) { encodedBlocks[blockStartIx + i + 1] = (char) (offsets[inputIx+i] % BITMAP_BLOCK_SZ); } inputIx += blockSz; blockStartIx += blockSz + 1; } return encodedBlocks; } private static UncompressedBitmap extractBitmap(int colIndex, MatrixBlock rawblock, boolean skipZeros) { //probe map for distinct items (for value or value groups) DoubleIntListHashMap distinctVals = new DoubleIntListHashMap(); //scan rows and probe/build distinct items final int m = CompressedMatrixBlock.TRANSPOSE_INPUT ? rawblock.getNumColumns():rawblock.getNumRows(); if( rawblock.isInSparseFormat() //SPARSE && CompressedMatrixBlock.TRANSPOSE_INPUT ) { SparseBlock a = rawblock.getSparseBlock(); if( a != null && !a.isEmpty(colIndex) ) { int apos = a.pos(colIndex); int alen = a.size(colIndex); int[] aix = a.indexes(colIndex); double[] avals = a.values(colIndex); IntArrayList lstPtr0 = new IntArrayList(); //for 0 values int last = -1; //iterate over non-zero entries but fill in zeros for( int j=apos; j<apos+alen; j++ ) { //fill in zero values if( !skipZeros ) for( int k=last+1; k<aix[j]; k++ ) lstPtr0.appendValue(k); //handle non-zero value IntArrayList lstPtr = distinctVals.get(avals[j]); if( lstPtr == null ) { lstPtr = new IntArrayList(); distinctVals.appendValue(avals[j], lstPtr); } lstPtr.appendValue(aix[j]); last = aix[j]; } //fill in remaining zero values if( !skipZeros ) { for( int k=last+1; k<m; k++ ) lstPtr0.appendValue(k); if( lstPtr0.size()>0 ) distinctVals.appendValue(0, lstPtr0); } } else if( !skipZeros ) { //full 0 column IntArrayList lstPtr = new IntArrayList(); for( int i=0; i<m; i++ ) lstPtr.appendValue(i); distinctVals.appendValue(0, lstPtr); } } else //GENERAL CASE { for( int i=0; i<m; i++ ) { double val = CompressedMatrixBlock.TRANSPOSE_INPUT ? rawblock.quickGetValue(colIndex, i): rawblock.quickGetValue(i, colIndex); if( val!=0 || !skipZeros ) { IntArrayList lstPtr = distinctVals.get(val); if( lstPtr == null ) { lstPtr = new IntArrayList(); distinctVals.appendValue(val, lstPtr); } lstPtr.appendValue(i); } } } return new UncompressedBitmap(distinctVals); } private static UncompressedBitmap extractBitmap(int colIndex, MatrixBlock rawblock, int[] sampleIndexes, boolean skipZeros) { //note: general case only because anyway binary search for small samples //probe map for distinct items (for value or value groups) DoubleIntListHashMap distinctVals = new DoubleIntListHashMap(); //scan rows and probe/build distinct items final int m = sampleIndexes.length; for( int i=0; i<m; i++ ) { int rowIndex = sampleIndexes[i]; double val = CompressedMatrixBlock.TRANSPOSE_INPUT ? rawblock.quickGetValue(colIndex, rowIndex) : rawblock.quickGetValue(rowIndex, colIndex); if( val!=0 || !skipZeros ) { IntArrayList lstPtr = distinctVals.get(val); if( lstPtr == null ) { lstPtr = new IntArrayList(); distinctVals.appendValue(val, lstPtr); } lstPtr.appendValue(i); } } return new UncompressedBitmap(distinctVals); } private static UncompressedBitmap extractBitmap(int[] colIndices, MatrixBlock rawblock, ReaderColumnSelection rowReader) { //probe map for distinct items (for value or value groups) DblArrayIntListHashMap distinctVals = new DblArrayIntListHashMap(); //scan rows and probe/build distinct items DblArray cellVals = null; while ((cellVals = rowReader.nextRow()) != null) { IntArrayList lstPtr = distinctVals.get(cellVals); if (lstPtr == null) { //create new objects only on demand lstPtr = new IntArrayList(); distinctVals.appendValue(new DblArray(cellVals), lstPtr); } lstPtr.appendValue(rowReader.getCurrentRowIndex()); } return new UncompressedBitmap(distinctVals, colIndices.length); } }