/*
* #%L
* gitools-core
* %%
* Copyright (C) 2013 Universitat Pompeu Fabra - Biomedical Genomics group
* %%
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program. If not, see
* <http://www.gnu.org/licenses/gpl-3.0.html>.
* #L%
*/
package org.gitools.matrix.format;
import edu.upf.bg.mtabix.MTabixConfig;
import edu.upf.bg.mtabix.MTabixIndex;
import edu.upf.bg.mtabix.compress.BlockCompressedStreamConstants;
import edu.upf.bg.mtabix.parse.DefaultKeyParser;
import org.apache.commons.io.IOUtils;
import org.gitools.api.PersistenceException;
import org.gitools.api.analysis.IProgressMonitor;
import org.gitools.api.matrix.IMatrix;
import org.gitools.api.matrix.IMatrixDimension;
import org.gitools.api.matrix.IMatrixLayer;
import org.gitools.api.matrix.IMatrixLayers;
import org.gitools.api.resource.IResourceLocator;
import org.gitools.matrix.model.MatrixLayer;
import org.gitools.matrix.model.MatrixLayers;
import org.gitools.matrix.model.hashmatrix.HashMatrix;
import org.gitools.matrix.model.mtabixmatrix.MTabixMatrix;
import org.gitools.utils.readers.text.CSVReader;
import org.gitools.utils.readers.text.RawFlatTextWriter;
import org.gitools.utils.translators.DoubleTranslator;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.enterprise.context.ApplicationScoped;
import java.io.*;
import java.net.URISyntaxException;
import java.net.URL;
import java.security.NoSuchAlgorithmException;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.CancellationException;
import java.util.zip.GZIPInputStream;
import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;
import static com.google.common.collect.Lists.newArrayList;
import static org.gitools.api.matrix.MatrixDimensionKey.COLUMNS;
import static org.gitools.api.matrix.MatrixDimensionKey.ROWS;
@ApplicationScoped
public class TdmMatrixFormat extends AbstractMatrixFormat {
private static final Logger LOGGER = LoggerFactory.getLogger(TdmMatrixFormat.class);
public static final String EXSTENSION = "tdm";
public TdmMatrixFormat() {
super(EXSTENSION);
}
@Override
public boolean isDefaultExtension() {
return true;
}
@Override
protected IMatrix readResource(IResourceLocator resourceLocator, IProgressMonitor progressMonitor) throws PersistenceException {
try {
MTabixIndex index = readMtabixIndex(resourceLocator, progressMonitor);
InputStream in = resourceLocator.openInputStream(progressMonitor);
CSVReader parser = new CSVReader(new InputStreamReader(in));
String[] header = parser.readNext();
if (header.length < 3) {
throw new PersistenceException("At least 3 fields expected on one line.");
}
MatrixLayer<Double> layers[] = new MatrixLayer[header.length - 2];
for (int i = 2; i < header.length; i++) {
layers[i - 2] = new MatrixLayer<>(header[i], Double.class);
}
if (index != null) {
in.close();
return new MTabixMatrix(index, new MatrixLayers<MatrixLayer>(layers), ROWS, COLUMNS);
}
// Load all the matrix into memory
HashMatrix resultsMatrix = new HashMatrix(new MatrixLayers<MatrixLayer>(layers), ROWS, COLUMNS);
// read body
String fields[];
while ((fields = parser.readNext()) != null) {
if (progressMonitor.isCancelled()) {
throw new CancellationException();
}
checkLine(fields, header, parser.getLineNumber());
final String columnId = fields[0];
final String rowId = fields[1];
for (int i = 2; i < fields.length; i++) {
Double value = DoubleTranslator.get().stringToValue(fields[i]);
resultsMatrix.set(layers[i - 2], value, rowId, columnId);
}
}
in.close();
return resultsMatrix;
} catch (Exception e) {
throw new PersistenceException(e);
}
}
private MTabixIndex readMtabixIndex(IResourceLocator resourceLocator, IProgressMonitor progressMonitor) throws IOException, URISyntaxException {
// Check if we are using mtabix
URL dataURL = resourceLocator.getURL();
URL indexURL = null;
if (!dataURL.getPath().endsWith("zip")) {
IResourceLocator mtabix = resourceLocator.getReferenceLocator(resourceLocator.getName() + ".gz.mtabix");
indexURL = mtabix.getURL();
} else {
//ZipFile zipFile = new ZipFile(new File(dataURL.toURI()));
ZipFile zipFile = new ZipFile(resourceLocator.getReadFile());
ZipEntry entry = zipFile.getEntry(resourceLocator.getName() + ".gz.mtabix");
if (entry == null) {
return null;
}
// Copy index to a temporal file
File indexFile = File.createTempFile("gitools-cache-", "zip_mtabix");
indexFile.deleteOnExit();
IOUtils.copy(zipFile.getInputStream(entry), new FileOutputStream(indexFile));
indexURL = indexFile.toURL();
// Copy data to a temporal file
File dataFile = File.createTempFile("gitools-cache-", "zip_bgz");
dataFile.deleteOnExit();
InputStream dataStream = resourceLocator.getParentLocator().openInputStream(progressMonitor);
IOUtils.copy(dataStream, new FileOutputStream(dataFile));
dataURL = dataFile.toURL();
}
File dataFile = new File(dataURL.toURI());
File indexFile = new File(indexURL.toURI());
if (!indexFile.exists()) {
return null;
}
MTabixConfig mtabixConfig = new MTabixConfig(dataFile, indexFile, new DefaultKeyParser(1, 0));
MTabixIndex index = new MTabixIndex(mtabixConfig);
index.loadIndex();
return index;
}
@Override
protected void writeResource(IResourceLocator resourceLocator, IMatrix results, IProgressMonitor monitor) throws PersistenceException {
monitor.begin("Saving results...", results.getColumns().size());
try {
OutputStream out = resourceLocator.openOutputStream(monitor);
Writer writer = new OutputStreamWriter(new BufferedOutputStream(out, BlockCompressedStreamConstants.DEFAULT_UNCOMPRESSED_BLOCK_SIZE * 100));
writeCells(writer, results, monitor);
writer.close();
} catch (Exception e) {
throw new PersistenceException(e);
}
try {
writeMtabixIndex(resourceLocator, results, monitor);
} catch (Exception e) {
LOGGER.warn("Error creating mtabix index", e);
}
}
private void writeMtabixIndex(IResourceLocator resourceLocator, IMatrix results, IProgressMonitor monitor) throws URISyntaxException, IOException, NoSuchAlgorithmException {
IResourceLocator mtabix = resourceLocator.getReferenceLocator(resourceLocator.getName() + ".gz.mtabix");
Map<Integer, List<String>> identifiers = new HashMap<>(2);
identifiers.put(0, newArrayList(results.getColumns()));
identifiers.put(1, newArrayList(results.getRows()));
MTabixConfig mtabixConfig = new MTabixConfig(
resourceLocator.getWriteFile(),
mtabix.getWriteFile(),
new DefaultKeyParser(1, 0),
identifiers);
MTabixIndex index = new MTabixIndex(mtabixConfig);
index.buildIndex();
mtabix.close(monitor);
}
private void writeCells(Writer writer, IMatrix resultsMatrix, IProgressMonitor progressMonitor) {
RawFlatTextWriter out = new RawFlatTextWriter(writer, '\t', '"');
out.writeQuotedValue("column");
out.writeSeparator();
out.writeQuotedValue("row");
for (IMatrixLayer layer : resultsMatrix.getLayers()) {
out.writeSeparator();
out.writeQuotedValue(layer.getId());
}
out.writeNewLine();
IMatrixDimension columns = resultsMatrix.getColumns();
IMatrixDimension rows = resultsMatrix.getRows();
IMatrixLayers layers = resultsMatrix.getLayers();
String[] values = new String[layers.size()];
for (String column : columns) {
for (String row : rows) {
boolean allNulls = true;
for (int l=0; l < layers.size(); l++) {
IMatrixLayer layer = layers.get(l);
Object value = resultsMatrix.get(layer, row, column);
//TODO Use IMatrixLayer translator
if (value instanceof Double) {
Double v = (Double) value;
values[l] = DoubleTranslator.get().valueToString(v);
allNulls = false;
} else if (value != null) {
values[l] = value.toString();
allNulls = false;
} else {
values[l] = "-";
}
}
if (!allNulls) {
out.writeValue(column);
out.writeSeparator();
out.writeValue(row);
for (int l = 0; l < layers.size(); l++) {
out.writeSeparator();
out.writeValue(values[l]);
}
out.writeNewLine();
}
}
progressMonitor.worked(1);
if (progressMonitor.isCancelled()) {
throw new CancellationException();
}
}
}
@Deprecated
public static String[] readHeader(File file) throws PersistenceException {
String[] matrixHeaders = null;
try {
Reader reader = openReader(file);
CSVReader parser = new CSVReader(reader);
String[] line = parser.readNext();
// read header
if (line.length < 3) {
throw new PersistenceException("At least 3 columns expected.");
}
int numAttributes = line.length - 2;
matrixHeaders = new String[numAttributes];
System.arraycopy(line, 2, matrixHeaders, 0, numAttributes);
} catch (Exception e) {
throw new PersistenceException(e);
}
return matrixHeaders;
}
private static Reader openReader(File path) throws IOException {
if (path == null) {
return null;
}
if (path.getName().endsWith(".gz")) {
return new InputStreamReader(new GZIPInputStream(new FileInputStream(path)));
} else {
return new BufferedReader(new FileReader(path));
}
}
}