/**
* (C) Copyright IBM Corp. 2010, 2015
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package com.ibm.bi.dml.runtime.matrix.data;
import java.io.Serializable;
import java.util.Arrays;
import com.ibm.bi.dml.runtime.util.SortUtils;
public class SparseRow implements Serializable
{
private static final long serialVersionUID = 5806895317005796456L;
//initial capacity of any created sparse row
//WARNING: be aware that this affects the core memory estimates (incl. implicit assumptions)!
public static final int initialCapacity = 4;
private int estimatedNzs = initialCapacity;
private int maxNzs = Integer.MAX_VALUE;
private int size = 0;
private double[] values = null;
private int[] indexes = null;
public SparseRow(int capacity)
{
estimatedNzs = capacity;
values = new double[capacity];
indexes = new int[capacity];
}
public SparseRow(int estnnz, int maxnnz)
{
if( estnnz > initialCapacity )
estimatedNzs = estnnz;
maxNzs = maxnnz;
int capacity = ((estnnz<initialCapacity && estnnz>0) ?
estnnz : initialCapacity);
values = new double[capacity];
indexes = new int[capacity];
}
public SparseRow(SparseRow that)
{
size = that.size;
int cap = Math.max(initialCapacity, that.size);
//allocate arrays and copy new values
values = Arrays.copyOf(that.values, cap);
indexes = Arrays.copyOf(that.indexes, cap);
}
public void truncate(int newsize)
{
if( newsize>size || newsize<0 )
throw new RuntimeException("truncate size: "+newsize+" should <= size: "+size+" and >=0");
size = newsize;
}
public int size()
{
return size;
}
public void setSize(int newsize)
{
size = newsize;
}
public boolean isEmpty()
{
return (size == 0);
}
public double[] getValueContainer()
{
return values;
}
public int[] getIndexContainer()
{
return indexes;
}
public void setValueContainer(double[] d) {
values = d;
}
public void setIndexContainer(int[] i) {
indexes = i;
}
public int capacity()
{
return values.length;
}
/**
*
* @param that
*/
public void copy(SparseRow that)
{
//note: no recap (if required) + copy, in order to prevent unnecessary copy of old values
//in case we have to reallocate the arrays
if( values.length < that.size ) {
//reallocate arrays and copy new values
values = Arrays.copyOf(that.values, that.size);
indexes = Arrays.copyOf(that.indexes, that.size);
}
else {
//copy new values
System.arraycopy(that.values, 0, values, 0, that.size);
System.arraycopy(that.indexes, 0, indexes, 0, that.size);
}
size = that.size;
}
/**
*
* @param estnns
* @param maxnns
*/
public void reset(int estnns, int maxnns)
{
estimatedNzs = estnns;
maxNzs = maxnns;
size = 0;
}
/**
*
* @param newCap
*/
public void recap(int newCap)
{
if( newCap<=values.length )
return;
//reallocate arrays and copy old values
values = Arrays.copyOf(values, newCap);
indexes = Arrays.copyOf(indexes, newCap);
}
/**
* Heuristic for resizing:
* doubling before capacity reaches estimated nonzeros, then 1.1x after that for default behavior: always 1.1x
* (both with exponential size increase for log N steps of reallocation and shifting)
*
* @return
*/
private int newCapacity()
{
if( values.length < estimatedNzs )
return Math.min(estimatedNzs, values.length*2);
else
return (int) Math.min(maxNzs, Math.ceil((double)(values.length)*1.1));
}
/**
* In-place compaction of non-zero-entries; removes zero entries and
* shifts non-zero entries to the left if necessary.
*/
public void compact()
{
int nnz = 0;
for( int i=0; i<size; i++ )
if( values[i] != 0 ){
values[nnz] = values[i];
indexes[nnz] = indexes[i];
nnz++;
}
size = nnz; //adjust row size
}
/**
*
* @param col
* @param v
* @return
*/
public boolean set(int col, double v)
{
//search for existing col index
int index = Arrays.binarySearch(indexes, 0, size, col);
if( index >= 0 ) {
//delete/overwrite existing value (on value delete, we shift
//left for (1) correct nnz maintenance, and (2) smaller size)
if( v == 0 ) {
shiftLeftAndDelete(index);
return true; // nnz--
}
else {
values[index] = v;
return false;
}
}
//early abort on zero (if no overwrite)
if( v==0.0 ) {
return false;
}
//insert new index-value pair
index = Math.abs( index+1 );
if( size==values.length )
resizeAndInsert(index, col, v);
else
shiftRightAndInsert(index, col, v);
return true; // nnz++
}
/**
*
* @param col
* @param v
*/
public void append(int col, double v)
{
//early abort on zero
if( v==0.0 ) {
return;
}
//resize if required
if( size==values.length )
recap(newCapacity());
//append value at end
values[size] = v;
indexes[size] = col;
size++;
}
/**
*
* @param col
* @return
*/
public double get(int col)
{
//search for existing col index
int index = Arrays.binarySearch(indexes, 0, size, col);
if( index >= 0 )
return values[index];
else
return 0;
}
/**
*
* @param col
* @return
*/
public int searchIndexesFirstLTE(int col)
{
//search for existing col index
int index = Arrays.binarySearch(indexes, 0, size, col);
if( index >= 0 ) {
if( index < size )
return index;
else
return -1;
}
//search lt col index (see binary search)
index = Math.abs( index+1 );
if( index-1 < size )
return index-1;
else
return -1;
}
/**
*
* @param col
* @return
*/
public int searchIndexesFirstGTE(int col)
{
//search for existing col index
int index = Arrays.binarySearch(indexes, 0, size, col);
if( index >= 0 ) {
if( index < size )
return index;
else
return -1;
}
//search gt col index (see binary search)
index = Math.abs( index+1 );
if( index < size )
return index;
else
return -1;
}
/**
*
* @param col
* @return
*/
public int searchIndexesFirstGT(int col)
{
//search for existing col index
int index = Arrays.binarySearch(indexes, 0, size, col);
if( index >= 0 ) {
if( index+1 < size )
return index+1;
else
return -1;
}
//search gt col index (see binary search)
index = Math.abs( index+1 );
if( index < size )
return index;
else
return -1;
}
/**
*
* @param col
*/
public void delete(int col)
{
//search for existing col index
int index = Arrays.binarySearch(indexes, 0, size, col);
if( index >= 0 ) {
//shift following entries left by 1
shiftLeftAndDelete(index);
}
}
/**
*
* @param lowerCol
* @param upperCol
*/
public void deleteIndexRange(int lowerCol, int upperCol)
{
int start = searchIndexesFirstGTE(lowerCol);
if( start < 0 ) //nothing to delete
return;
int end = searchIndexesFirstGT(upperCol);
if( end < 0 ) //delete all remaining
end = size;
//overlapping array copy (shift rhs values left)
System.arraycopy(values, end, values, start, size-end);
System.arraycopy(indexes, end, indexes, start, size-end);
size -= (end-start);
}
/**
* Inserts a dense vector into a column range; calling this methods helps to
* avoid repeated shifting of remaining values/indexes for every set value.
*
* @param lowerCol
* @param upperCol
* @param v
* @param vix
* @param len
*/
public void setIndexRange(int lowerCol, int upperCol, double[] v, int vix, int len)
{
int start = searchIndexesFirstGTE(lowerCol);
if( start < 0 ) { //nothing to delete/shift
for( int i=vix; i<vix+len; i++ )
append(lowerCol+i-vix, v[i]);
return;
}
int end = searchIndexesFirstGT(upperCol);
if( end < 0 ) { //delete all remaining
size = start;
for( int i=vix; i<vix+len; i++ )
append(lowerCol+i-vix, v[i]);
return;
}
//determine input nnz
int lnnz = 0;
for( int i=vix; i<vix+len; i++ )
lnnz += ( v[i] != 0 ) ? 1 : 0;
//prepare free space (allocate and shift)
int lsize = size+lnnz-(end-start);
if( values.length < lsize )
recap(lsize);
shiftRightByN(end, lnnz-(end-start));
//insert values
for( int i=vix, pos=start; i<vix+len; i++ )
if( v[i] != 0 ) {
values[ pos ] = v[i];
indexes[ pos ] = lowerCol+i-vix;
pos++;
}
}
/**
*
* @param index
* @param col
* @param v
*/
private void resizeAndInsert(int index, int col, double v)
{
//allocate new arrays
int newCap = newCapacity();
double[] oldvalues = values;
int[] oldindexes = indexes;
values = new double[newCap];
indexes = new int[newCap];
//copy lhs values to new array
System.arraycopy(oldvalues, 0, values, 0, index);
System.arraycopy(oldindexes, 0, indexes, 0, index);
//insert new value
indexes[index] = col;
values[index] = v;
//copy rhs values to new array
System.arraycopy(oldvalues, index, values, index+1, size-index);
System.arraycopy(oldindexes, index, indexes, index+1, size-index);
size++;
}
/**
*
* @param index
* @param col
* @param v
*/
private void shiftRightAndInsert(int index, int col, double v)
{
//overlapping array copy (shift rhs values right by 1)
System.arraycopy(values, index, values, index+1, size-index);
System.arraycopy(indexes, index, indexes, index+1, size-index);
//insert new value
values[index] = v;
indexes[index] = col;
size++;
}
/**
*
* @param index
* @param n
*/
private void shiftRightByN(int index, int n)
{
//overlapping array copy (shift rhs values right by 1)
System.arraycopy(values, index, values, index+n, size-index);
System.arraycopy(indexes, index, indexes, index+n, size-index);
size += n;
}
/**
*
* @param index
*/
private void shiftLeftAndDelete(int index)
{
//overlapping array copy (shift rhs values left by 1)
System.arraycopy(values, index+1, values, index, size-index-1);
System.arraycopy(indexes, index+1, indexes, index, size-index-1);
size--;
}
/**
* In-place sort of column-index value pairs in order to allow binary search
* after constant-time append was used for reading unordered sparse rows. We
* first check if already sorted and subsequently sort if necessary in order
* to get O(n) bestcase.
*
* Note: In-place sort necessary in order to guarantee the memory estimate
* for operations that implicitly read that data set.
*/
public void sort()
{
if( size<=100 || !SortUtils.isSorted(0, size, indexes) )
SortUtils.sortByIndex(0, size, indexes, values);
}
/**
*
*/
@Override
public String toString()
{
StringBuilder sb = new StringBuilder();
for(int i=0; i<size; i++) {
sb.append(indexes[i]);
sb.append(": ");
sb.append(values[i]);
sb.append("\t");
}
return sb.toString();
}
}