/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.sysml.runtime.compress;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.io.Serializable;
import java.util.List;
import org.apache.sysml.runtime.DMLRuntimeException;
import org.apache.sysml.runtime.matrix.data.MatrixBlock;
import org.apache.sysml.runtime.matrix.operators.AggregateUnaryOperator;
import org.apache.sysml.runtime.matrix.operators.ScalarOperator;
/**
* Class that stores information about a column group within a compressed matrix
* block. There are subclasses specific to each compression type.
*
*/
public abstract class ColGroup implements Serializable
{
private static final long serialVersionUID = 2439785418908671481L;
public enum CompressionType {
UNCOMPRESSED, //uncompressed sparse/dense
RLE_BITMAP, //RLE bitmap
OLE_BITMAP, //OLE bitmap
DDC1, //DDC 1 byte
DDC2; //DDC 2 byte
}
/**
* Offsets of the columns that make up the column group. Zero-based, and
* relative to the matrix block.
*/
protected int[] _colIndexes;
/** Number of rows in the matrix, for use by child classes. */
protected int _numRows;
/**
* Main constructor.
*
* @param colIndices
* offsets of the columns in the matrix block that make up the
* group
* @param numRows
* total number of rows in the parent block
*/
protected ColGroup(int[] colIndices, int numRows) {
_colIndexes = colIndices;
_numRows = numRows;
}
/**
* Convenience constructor for converting indices to a more compact format.
*
* @param colIndicesList list of column indices
* @param numRows total number of rows in the parent block
*/
protected ColGroup(List<Integer> colIndicesList, int numRows) {
_colIndexes = new int[colIndicesList.size()];
int i = 0;
for (Integer index : colIndicesList)
_colIndexes[i++] = index;
_numRows = numRows;
}
/**
* Obtain the offsets of the columns in the matrix block that make up the group
*
* @return offsets of the columns in the matrix block that make up the group
*/
public int[] getColIndices() {
return _colIndexes;
}
/**
* Obtain a column index value.
*
* @param colNum column number
* @return column index value
*/
public int getColIndex(int colNum) {
return _colIndexes[colNum];
}
public int getNumRows() {
return _numRows;
}
/**
* Obtain the number of columns in this column group.
*
* @return number of columns in this column group
*/
public int getNumCols() {
return _colIndexes.length;
}
/**
* Obtain the compression type.
*
* @return How the elements of the column group are compressed.
*/
public abstract CompressionType getCompType();
public void shiftColIndices(int offset) {
for( int i=0; i<_colIndexes.length; i++ )
_colIndexes[i] += offset;
}
/**
* Note: Must be overridden by child classes to account for additional data
* and metadata
*
* @return an upper bound on the number of bytes used to store this ColGroup
* in memory.
*/
public long estimateInMemorySize() {
// object (12B padded to factors of 8), int numRows (4B),
// array reference colIndices (8B)
//+ array object overhead if exists (32B) + 4B per element
long size = 24;
return (_colIndexes == null) ? size :
size + 32 + 4 * _colIndexes.length;
}
/**
* Decompress the contents of this column group into the specified full
* matrix block.
*
* @param target
* a matrix block where the columns covered by this column group
* have not yet been filled in.
* @param rl row lower
* @param ru row upper
*/
public abstract void decompressToBlock(MatrixBlock target, int rl, int ru);
/**
* Decompress the contents of this column group into uncompressed packed
* columns
*
* @param target
* a dense matrix block. The block must have enough space to hold
* the contents of this column group.
* @param colIndexTargets
* array that maps column indices in the original matrix block to
* columns of target.
*/
public abstract void decompressToBlock(MatrixBlock target, int[] colIndexTargets);
/**
* Decompress to block.
*
* @param target dense output vector
* @param colpos column to decompress, error if larger or equal numCols
*/
public abstract void decompressToBlock(MatrixBlock target, int colpos);
/**
* Serializes column group to data output.
*
* @param out data output
* @throws IOException if IOException occurs
*/
public abstract void write(DataOutput out)
throws IOException;
/**
* Deserializes column group from data input.
*
* @param in data input
* @throws IOException if IOException occurs
*/
public abstract void readFields(DataInput in)
throws IOException;
/**
* Returns the exact serialized size of column group.
* This can be used for example for buffer preallocation.
*
* @return exact serialized size for column group
*/
public abstract long getExactSizeOnDisk();
/**
* Get the value at a global row/column position.
*
* @param r row
* @param c column
* @return value at the row/column position
*/
public abstract double get(int r, int c);
/**
* Multiply the slice of the matrix that this column group represents by a
* vector on the right.
*
* @param vector
* vector to multiply by (tall vector)
* @param result
* accumulator for holding the result
* @param rl row lower
* @param ru row upper
* @throws DMLRuntimeException
* if the internal SystemML code that performs the
* multiplication experiences an error
*/
public abstract void rightMultByVector(MatrixBlock vector,
MatrixBlock result, int rl, int ru) throws DMLRuntimeException;
/**
* Multiply the slice of the matrix that this column group represents by a
* row vector on the left (the original column vector is assumed to be
* transposed already i.e. its size now is 1xn).
*
* @param vector row vector
* @param result matrix block result
* @throws DMLRuntimeException if DMLRuntimeException occurs
*/
public abstract void leftMultByRowVector(MatrixBlock vector,
MatrixBlock result) throws DMLRuntimeException;
/**
* Perform the specified scalar operation directly on the compressed column
* group, without decompressing individual cells if possible.
*
* @param op
* operation to perform
* @return version of this column group with the operation applied
* @throws DMLRuntimeException if DMLRuntimeException occurs
*/
public abstract ColGroup scalarOperation(ScalarOperator op)
throws DMLRuntimeException;
public abstract void unaryAggregateOperations(AggregateUnaryOperator op, MatrixBlock result)
throws DMLRuntimeException;
/**
* Count the number of non-zeros per row
*
* @param rnnz non-zeros per row
* @param rl row lower bound, inclusive
* @param ru row upper bound, exclusive
*/
protected abstract void countNonZerosPerRow(int[] rnnz, int rl, int ru);
}