/*
* #%L
* gitools-core
* %%
* Copyright (C) 2013 Universitat Pompeu Fabra - Biomedical Genomics group
* %%
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program. If not, see
* <http://www.gnu.org/licenses/gpl-3.0.html>.
* #L%
*/
package org.gitools.matrix.model.compressmatrix;
import com.google.common.cache.CacheBuilder;
import com.google.common.cache.CacheLoader;
import com.google.common.cache.LoadingCache;
import org.gitools.api.matrix.IMatrixLayer;
import org.gitools.matrix.model.AbstractMatrix;
import org.gitools.matrix.model.MatrixLayer;
import org.gitools.matrix.model.MatrixLayers;
import org.gitools.utils.MemoryUtils;
import java.io.ByteArrayInputStream;
import java.io.DataInputStream;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.zip.Inflater;
/**
* The type Compress matrix.
* <p/>
* This format keep the rows compressed at memory, and has a dynamic cache that can expand or
* contract depending on the user free memory.
*/
public class CompressMatrix extends AbstractMatrix<MatrixLayers<? extends IMatrixLayer>, CompressDimension> {
private final byte[] dictionary;
private final Inflater decompresser = new Inflater();
private final Map<Integer, CompressRow> values;
private final LoadingCache<Integer, double[][]> rowsCache;
/**
* Instantiates a new Compress matrix.
*
* @param rows the rows
* @param columns the columns
* @param dictionary the compression dictionary common for all compressed values
* @param headers the identifiers of the attributes
* @param values the values a map with the row position as a key and a {@link CompressRow} with all the column values.
*/
public CompressMatrix(CompressDimension rows, CompressDimension columns, byte[] dictionary, String[] headers, Map<Integer, CompressRow> values) {
super(createMatrixLayers(headers), rows, columns);
this.dictionary = dictionary;
this.values = values;
// Force a garbage collector now
Runtime.getRuntime().gc();
// Use a maximum of 50% of the available memory
long availableMemory = MemoryUtils.getAvailableMemory() / 2;
// Estimate uncompress matrix size
int matrixSize = 0;
for (CompressRow value : values.values()) {
matrixSize = matrixSize + value.getNotCompressedLength() + 4;
}
// Calculate rows cache size
double fact = (double) availableMemory / (double) matrixSize;
int cacheSize = (int) ((double) values.size() * fact);
cacheSize = (cacheSize > values.size() ? values.size() + (values.size() / 2) : cacheSize);
cacheSize = (cacheSize < 40 ? 40 : cacheSize);
// Create the rows cache
rowsCache = CacheBuilder.newBuilder()
.maximumSize(cacheSize)
.build(
new CacheLoader<Integer, double[][]>() {
public double[][] load(Integer row) {
return uncompress(CompressMatrix.this.values.get(row));
}
});
// Fill the cache in background
final int max = Math.min(values.size(), cacheSize);
Runnable fillCache = new Runnable() {
@Override
public void run() {
for (int i = 0; i < max; i++) {
rowsCache.getUnchecked(i);
}
}
};
(new Thread(fillCache, "LoadingCache")).start();
}
@Override
public <T> T get(IMatrixLayer<T> layer, String... identifiers) {
int rowIndex = getRows().indexOf(identifiers[0]);
int columnIndex = getColumns().indexOf(identifiers[1]);
int layerIndex = getLayers().indexOf(layer.getId());
if (rowIndex == -1 || columnIndex == -1 || layerIndex == -1) {
return null;
}
// The cache is who loads the value if it's not already loaded.
double[][] rowValues = rowsCache.getUnchecked(rowIndex);
if (rowValues != null) {
if (columnIndex != -1) {
Double result = rowValues[columnIndex][layerIndex];
if (Double.isNaN(result)) {
return null;
}
return (T) result;
}
}
return null;
}
@Override
public <T> void set(IMatrixLayer<T> layer, T value, String... identifiers) {
throw new UnsupportedOperationException("Read only matrix");
}
/**
* Expands a compressed row
*
* @param compressRow The compressed row
* @return A map from column to an array of strings with the values
*/
private synchronized double[][] uncompress(CompressRow compressRow) {
double[][] values = new double[getColumns().size()][getLayers().size()];
// Initialize all to NaN
for (int i = 0; i < getColumns().size(); i++) {
for (int j = 0; j < getLayers().size(); j++) {
values[i][j] = Double.NaN;
}
}
try {
byte[] result = new byte[compressRow.getNotCompressedLength()];
// Expand the row
decompresser.setInput(compressRow.getContent());
decompresser.inflate(result);
decompresser.setDictionary(dictionary);
int resultLength = decompresser.inflate(result);
decompresser.reset();
// Read all the columns
// [column position int],[values length int],[values byte buffer]
DataInputStream in = new DataInputStream(new ByteArrayInputStream(result));
while (in.available() > 0) {
int column = in.readInt();
for (int i = 0; i < getLayers().size(); i++) {
values[column][i] = in.readDouble();
}
}
in.close();
return values;
} catch (Exception e) {
throw new RuntimeException("Error extracting the matrix", e);
}
}
public void detach(IMatrixLayer topLayer) {
this.rowsCache.invalidateAll();
}
public byte[] getDictionary() {
return dictionary;
}
public Map<Integer, CompressRow> getCompressRows() {
return values;
}
@Override
public boolean isChanged() {
return false;
}
private static MatrixLayers createMatrixLayers(String[] headers) {
// We assume that all the attributes are doubles.
List<MatrixLayer> matrixLayers = new ArrayList<>(headers.length);
for (String header : headers) {
matrixLayers.add(new MatrixLayer(header, double.class));
}
return new MatrixLayers<>(matrixLayers);
}
}