/*
* #%L
* gitools-core
* %%
* Copyright (C) 2013 Universitat Pompeu Fabra - Biomedical Genomics group
* %%
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program. If not, see
* <http://www.gnu.org/licenses/gpl-3.0.html>.
* #L%
*/
package org.gitools.matrix.format;
import org.apache.commons.io.IOUtils;
import org.gitools.api.PersistenceException;
import org.gitools.api.analysis.IProgressMonitor;
import org.gitools.api.matrix.IMatrix;
import org.gitools.api.matrix.MatrixDimensionKey;
import org.gitools.api.resource.IResourceLocator;
import org.gitools.matrix.model.compressmatrix.AbstractCompressor;
import org.gitools.matrix.model.compressmatrix.CompressDimension;
import org.gitools.matrix.model.compressmatrix.CompressMatrix;
import org.gitools.matrix.model.compressmatrix.CompressRow;
import org.gitools.resource.AbstractResourceFormat;
import javax.enterprise.context.ApplicationScoped;
import java.io.*;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Pattern;
import java.util.zip.GZIPInputStream;
@ApplicationScoped
public class CmatrixMatrixFormat extends AbstractResourceFormat<IMatrix> {
public static final String EXTENSION = "cmatrix";
public CmatrixMatrixFormat() {
super(EXTENSION, IMatrix.class);
}
@Override
protected CompressMatrix readResource(IResourceLocator resourceLocator, IProgressMonitor progressMonitor) throws PersistenceException {
try {
DataInputStream in = new DataInputStream(resourceLocator.openInputStream(progressMonitor));
// Format version
int formatVersion = in.readInt();
// Dictionary
byte[] dictionary = readBuffer(in);
// Columns
String[] columns = splitBuffer(readBuffer(in));
// Rows
String[] rows = splitBuffer(readBuffer(in));
// Headers
String[] headers = splitBuffer(readBuffer(in));
// Values
Map<Integer, CompressRow> values = new HashMap<>(rows.length);
for (int i = 0; i < rows.length; i++) {
int row = in.readInt();
int uncompressLength = in.readInt();
values.put(row, new CompressRow(uncompressLength, readBuffer(in)));
}
in.close();
CompressDimension rowDim = new CompressDimension(MatrixDimensionKey.ROWS, rows);
CompressDimension colDim = new CompressDimension(MatrixDimensionKey.COLUMNS, columns);
return new CompressMatrix(rowDim, colDim, dictionary, headers, values);
} catch (IOException e) {
throw new PersistenceException(e);
}
}
@Override
protected void writeResource(IResourceLocator resourceLocator, IMatrix resource, IProgressMonitor progressMonitor) throws PersistenceException {
if (!(resource instanceof CompressMatrix)) {
throw new UnsupportedOperationException("It is not possible to convert into a compress matrix");
}
CompressMatrix matrix = (CompressMatrix) resource;
try {
DataOutputStream out = new DataOutputStream(resourceLocator.openOutputStream(progressMonitor));
int formatVersion = 0;
out.writeInt(0);
progressMonitor.begin("Writing dictionary...", 1);
byte[] dictionary = matrix.getDictionary();
out.writeInt(dictionary.length);
out.write(dictionary);
progressMonitor.begin("Writing columns...", 1);
byte[] buffer = AbstractCompressor.stringToByteArray(matrix.getColumns().getLabels());
out.writeInt(buffer.length);
out.write(buffer);
progressMonitor.begin("Writing rows...", 1);
buffer = AbstractCompressor.stringToByteArray(matrix.getRows().getLabels());
out.writeInt(buffer.length);
out.write(buffer);
progressMonitor.begin("Writing headers...", 1);
String[] headers = new String[resource.getLayers().size()];
for (int i = 0; i < resource.getLayers().size(); i++) {
headers[i] = resource.getLayers().get(i).getId();
}
buffer = AbstractCompressor.stringToByteArray(headers);
out.writeInt(buffer.length);
out.write(buffer);
Map<Integer, CompressRow> compressRowMap = matrix.getCompressRows();
progressMonitor.begin("Writing values...", compressRowMap.size());
for (Map.Entry<Integer, CompressRow> value : compressRowMap.entrySet()) {
progressMonitor.worked(1);
// The row position
out.writeInt(value.getKey());
// Compress the row
CompressRow compressRow = value.getValue();
// Write the length of the buffer before compression
out.writeInt(compressRow.getNotCompressedLength());
// The length of the compressed buffer with the columns
out.writeInt(compressRow.getContent().length);
// The buffer with all the columns
out.write(compressRow.getContent());
}
out.close();
} catch (IOException e) {
throw new PersistenceException(e);
}
}
/**
* Read a byte array that starts with an integer that contains the buffer length to read.
*
* @param in the input stream
* @return the byte array
* @throws IOException
*/
public static byte[] readBuffer(DataInputStream in) throws IOException {
int length = in.readInt();
return IOUtils.toByteArray(in, length);
}
private static Pattern TAB = Pattern.compile("\t");
private static String[] splitBuffer(byte[] buffer) throws UnsupportedEncodingException {
String line = new String(buffer, "UTF-8");
return TAB.split(line);
}
public static String[] readHeader(File file) {
try {
DataInputStream in;
if (file.getName().endsWith(".gz")) {
in = new DataInputStream(new GZIPInputStream(new FileInputStream(file)));
} else {
in = new DataInputStream(new FileInputStream(file));
}
// Format version
int version = in.readInt();
// Dictionary
byte[] dictionary = readBuffer(in);
// Columns
String[] columns = splitBuffer(readBuffer(in));
// Rows
String[] rows = splitBuffer(readBuffer(in));
// Headers
String[] headers = splitBuffer(readBuffer(in));
in.close();
return headers;
} catch (IOException e) {
throw new PersistenceException(e);
}
}
}