/*
* Copyright 2015-2016 OpenCB
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.opencb.opencga.storage.core.alignment;
import org.opencb.biodata.formats.alignment.io.AlignmentDataReader;
import org.opencb.biodata.formats.alignment.io.AlignmentRegionDataReader;
import org.opencb.biodata.formats.alignment.io.AlignmentRegionDataWriter;
import org.opencb.biodata.formats.alignment.sam.io.AlignmentBamDataReader;
import org.opencb.biodata.formats.io.FileFormatException;
import org.opencb.biodata.models.alignment.AlignmentRegion;
import org.opencb.biodata.tools.alignment.BamManager;
import org.opencb.biodata.tools.alignment.BamUtils;
import org.opencb.biodata.tools.alignment.tasks.AlignmentRegionCoverageCalculatorTask;
import org.opencb.commons.datastore.core.ObjectMap;
import org.opencb.commons.io.DataWriter;
import org.opencb.commons.run.Runner;
import org.opencb.commons.run.Task;
import org.opencb.commons.utils.FileUtils;
import org.opencb.opencga.core.common.UriUtils;
import org.opencb.opencga.storage.core.StoragePipeline;
import org.opencb.opencga.storage.core.StorageEngine;
import org.opencb.opencga.storage.core.alignment.json.AlignmentCoverageJsonDataReader;
import org.opencb.opencga.storage.core.alignment.json.AlignmentCoverageJsonDataWriter;
import org.opencb.opencga.storage.core.alignment.json.AlignmentJsonDataReader;
import org.opencb.opencga.storage.core.alignment.json.AlignmentJsonDataWriter;
import org.opencb.opencga.storage.core.config.StorageConfiguration;
import org.opencb.opencga.storage.core.config.StorageEtlConfiguration;
import org.opencb.opencga.storage.core.exceptions.StorageEngineException;
import org.slf4j.LoggerFactory;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.net.URI;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Arrays;
import java.util.LinkedList;
import java.util.List;
/**
* Created by jacobo on 14/08/14.
*/
public abstract class AlignmentStorageEngineOld extends StorageEngine<AlignmentDBAdaptor> implements StoragePipeline {
protected StorageEtlConfiguration storageEtlConfiguration;
public enum Options {
MEAN_COVERAGE_SIZE_LIST ("mean_coverage_size_list", Arrays.asList("200", "10000")),
PLAIN ("plain", false),
TRANSFORM_REGION_SIZE ("transform.region_size", 200000),
TRANSFORM_COVERAGE_CHUNK_SIZE ("transform.coverage_chunk_size", 1000),
WRITE_COVERAGE ("transform.write_coverage", true),
STUDY ("study", true),
FILE_ID ("fileId", ""),
FILE_ALIAS ("fileAlias", ""),
WRITE_ALIGNMENTS ("writeAlignments", false),
INCLUDE_COVERAGE ("includeCoverage", true),
CREATE_BAM_INDEX ("createBai", true),
ADJUST_QUALITY("adjustQuality", false),
ENCRYPT ("encrypt", false),
COPY_FILE ("copy", false),
DB_NAME ("database.name", "opencga"),
@Deprecated
TOOLS_SAMTOOLS("tools.samtools", null);
private final String key;
private final Object value;
Options(String key, Object value) {
this.key = key;
this.value = value;
}
public String key() {
return key;
}
@SuppressWarnings("unchecked")
public <T> T defaultValue() {
return (T) value;
}
}
public AlignmentStorageEngineOld() {
logger = LoggerFactory.getLogger(AlignmentStorageEngineOld.class);
}
public AlignmentStorageEngineOld(StorageConfiguration configuration) {
super(configuration);
logger = LoggerFactory.getLogger(AlignmentStorageEngineOld.class);
}
public AlignmentStorageEngineOld(String storageEngineId, StorageConfiguration configuration) {
super(storageEngineId, configuration);
logger = LoggerFactory.getLogger(AlignmentStorageEngineOld.class);
}
@Override
public void setConfiguration(StorageConfiguration configuration, String storageEngineId) {
super.setConfiguration(configuration, storageEngineId);
storageEtlConfiguration = configuration.getStorageEngine(storageEngineId).getAlignment();
}
@Override
public URI extract(URI input, URI ouput) throws StorageEngineException {
return input;
}
@Override
public URI preTransform(URI inputUri) throws IOException, FileFormatException {
UriUtils.checkUri(inputUri, "input file", "file");
Path input = Paths.get(inputUri.getPath());
BamUtils.checkBamOrCramFile(new FileInputStream(input.toFile()), input.getFileName().toString(), true);
return inputUri;
}
/**
* If FILE_ALIAS == null.
* FILE_ALIAS = fileName - ".bam"
* <p>
* if ENCRYPT
* Copy into the output path : <outputPath>/<FILE_ALIAS>.encrypt.bam (pending)
* if !ENCRYPT && COPY_FILE
* Encrypt into the output path : <outputPath>/<FILE_ALIAS>.bam (pending)
* if CREATE_BAM_INDEX
* Create the bai with the samtools : <outputPath>/<FILE_ALIAS>.bam.bai
* if WRITE_ALIGNMENTS
* Write Json alignments : <outputPath>/<FILE_ALIAS>.bam.alignments.json[.gz]
* if INCLUDE_COVERAGE
* Calculate the coverage : <outputPath>/<FILE_ALIAS>.bam.coverage.json[.gz]
* if INCLUDE_COVERAGE && MEAN_COVERAGE_SIZE_LIST
* Calculate the meanCoverage : <outputPath>/<FILE_ALIAS>.bam.mean-coverage.json[.gz]
*
* @param inputUri Sorted bam file
* @param pedigree Not used
* @param outputUri Output path where files are created
* @throws IOException
* @throws FileFormatException
*/
@Override
public URI transform(URI inputUri, URI pedigree, URI outputUri)
throws IOException, FileFormatException, StorageEngineException {
Path input = Paths.get(inputUri.getPath());
FileUtils.checkFile(input);
Path output = Paths.get(outputUri.getPath());
FileUtils.checkDirectory(output);
// Check if a BAM file is passed and it is sorted.
// Only binaries and sorted BAM files are accepted at this point.
BamUtils.checkBamOrCramFile(new FileInputStream(input.toFile()), input.getFileName().toString(), true);
ObjectMap options = storageEtlConfiguration.getOptions();
boolean plain = options.getBoolean(Options.PLAIN.key, Options.PLAIN.defaultValue());
boolean createBai = options.getBoolean(Options.CREATE_BAM_INDEX.key(), Options.CREATE_BAM_INDEX
.defaultValue());
boolean includeCoverage = options.getBoolean(Options.INCLUDE_COVERAGE.key, Options.INCLUDE_COVERAGE
.defaultValue());
boolean writeJsonAlignments = options.getBoolean(Options.WRITE_ALIGNMENTS.key, Options
.WRITE_ALIGNMENTS.defaultValue());
int regionSize = options.getInt(Options.TRANSFORM_REGION_SIZE.key, Options.TRANSFORM_REGION_SIZE
.defaultValue());
//1 Encrypt
//encrypt(encrypt, bamFile, fileId, output, copy);
//2 Index (bai)
if (createBai) {
new BamManager(input).createIndex(output.resolve(input.getFileName().toString() + ".bai"));
// Path bamIndexPath = AlignmentFileUtils.createIndex(input, output.resolve(input.getFileName().toString() + ".bai"));
}
//3 Calculate Coverage and transform
//Tasks
// tasks.add(new AlignmentRegionCompactorTask(new SqliteSequenceDBAdaptor(sqliteSequenceDBPath)));
List<Task<AlignmentRegion>> tasks = new LinkedList<>();
// Reader and Writer creation
AlignmentDataReader reader = new AlignmentBamDataReader(input, null); //Read from sorted BamFile
List<DataWriter<AlignmentRegion>> writers = new LinkedList<>();
// String jsonOutputFiles = output.resolve(fileAlias + ".bam").toString();
String jsonOutputFiles = output.resolve(input.getFileName()).toString();
String outputFile = null;
// We set the different coverage size regions
if (includeCoverage) {
AlignmentRegionCoverageCalculatorTask coverageCalculatorTask = new AlignmentRegionCoverageCalculatorTask();
List<String> meanCoverageSizeList = options.getAsStringList(Options.MEAN_COVERAGE_SIZE_LIST.key);
meanCoverageSizeList.forEach(coverageCalculatorTask::addMeanCoverageCalculator);
tasks.add(coverageCalculatorTask);
}
// TODO
// This must be deleted, alignments are not stored any more in JSON
if (writeJsonAlignments) {
AlignmentJsonDataWriter alignmentDataWriter = new AlignmentJsonDataWriter(reader, jsonOutputFiles, !plain);
writers.add(new AlignmentRegionDataWriter(alignmentDataWriter));
outputFile = alignmentDataWriter.getAlignmentFilename();
}
if (includeCoverage) {
boolean writeMeanCoverage = !options.getList(Options.MEAN_COVERAGE_SIZE_LIST.key, Options
.MEAN_COVERAGE_SIZE_LIST.defaultValue()).isEmpty();
boolean writeCoverage = options.getBoolean(Options.WRITE_COVERAGE.key, Options.WRITE_COVERAGE
.defaultValue());
AlignmentCoverageJsonDataWriter alignmentCoverageJsonDataWriter =
new AlignmentCoverageJsonDataWriter(jsonOutputFiles, writeCoverage, writeMeanCoverage, !plain);
alignmentCoverageJsonDataWriter.setChunkSize(
options.getInt(Options.TRANSFORM_COVERAGE_CHUNK_SIZE.key, Options
.TRANSFORM_COVERAGE_CHUNK_SIZE.defaultValue()));
writers.add(alignmentCoverageJsonDataWriter);
if (outputFile == null) {
outputFile = alignmentCoverageJsonDataWriter.getCoverageFilename();
}
}
if (writers.isEmpty()) {
logger.warn("No writers for transform-alignments!");
return inputUri;
}
//Runner
AlignmentRegionDataReader regionReader = new AlignmentRegionDataReader(reader);
regionReader.setMaxSequenceSize(regionSize);
Runner<AlignmentRegion> runner = new Runner<>(regionReader, writers, tasks, 1);
logger.info("Transforming alignments...");
long start = System.currentTimeMillis();
runner.run();
long end = System.currentTimeMillis();
logger.info("end - start = " + (end - start) / 1000.0 + "s");
return outputUri.resolve(outputFile);
}
@Override
public URI postTransform(URI input) throws IOException, FileFormatException {
return input;
}
@Override
public void testConnection() throws StorageEngineException {
}
@Override
public StoragePipeline newStoragePipeline(boolean connected) {
return this;
}
protected Path encrypt(String encrypt, Path bamFile, String fileName, Path outdir, boolean copy) throws IOException {
logger.info("Copying file. Encryption : " + encrypt);
long start = System.currentTimeMillis();
if (fileName == null || fileName.isEmpty()) {
fileName = bamFile.getFileName().toString();
} else {
fileName += ".bam";
}
Path destFile;
switch (encrypt) {
case "aes-256": {
destFile = outdir.resolve(fileName + ".encrypt");
// InputStream inputStream = new BufferedInputStream(new FileInputStream(sortBam.toFile()), 50000000);
// OutputStream outputStream = new BufferedOutputStream(new FileOutputStream(bamFile.toFile()), 50000000); //TODO:
// ENCRYPT OUTPUT
//
// SAMFileReader reader = new SAMFileReader(inputStream);
// BAMFileWriter writer = new BAMFileWriter(outputStream, bamFile.toFile());
//
// writer.setSortOrder(reader.getFileHeader().getSortOrder(), true); //Must be called before calling setHeader()
// writer.setHeader(reader.getFileHeader());
// SAMRecordIterator iterator = reader.iterator();
// while(iterator.hasNext()){
// writer.addAlignment(iterator.next());
// }
//
// writer.close();
// reader.close();
// break;
throw new UnsupportedOperationException("Encryption not supported");
}
default: {
if (copy) {
destFile = outdir.resolve(fileName);
Files.copy(bamFile, destFile);
} else {
logger.info("copy = false. Don't copy file.");
destFile = bamFile;
}
}
}
long end = System.currentTimeMillis();
logger.info("end - start = " + (end - start) / 1000.0 + "s");
return destFile;
}
protected AlignmentJsonDataReader getAlignmentJsonDataReader(URI input) throws IOException {
if (!input.getScheme().equals("file")) {
throw new IOException("URI is not a valid path");
}
String baseFileName = input.getPath();
String alignmentFile = baseFileName;
String headerFile;
if (baseFileName.endsWith(".bam")) {
alignmentFile = baseFileName + (Paths.get(baseFileName + ".alignments.json").toFile().exists()
? ".alignments.json"
: ".alignments.json.gz");
headerFile = baseFileName + (Paths.get(baseFileName + ".header.json").toFile().exists()
? ".header.json"
: ".header.json.gz");
} else if (baseFileName.endsWith(".alignments.json")) {
headerFile = baseFileName.replaceFirst("alignments\\.json$", "header.json");
} else if (baseFileName.endsWith(".alignments.json.gz")) {
headerFile = baseFileName.replaceFirst("alignments\\.json\\.gz$", "header.json.gz");
} else {
throw new IOException("Invalid input file : " + input.toString());
}
if (!Paths.get(alignmentFile).toFile().exists()) {
throw new FileNotFoundException(alignmentFile);
}
if (!Paths.get(headerFile).toFile().exists()) {
throw new FileNotFoundException(headerFile);
}
return new AlignmentJsonDataReader(alignmentFile, headerFile);
}
protected AlignmentCoverageJsonDataReader getAlignmentCoverageJsonDataReader(Path input) {
String baseFileName = input.toString();
String meanCoverageFile;
String regionCoverageFile = baseFileName;
if (baseFileName.endsWith(".bam")) {
regionCoverageFile = baseFileName + (Paths.get(baseFileName + ".coverage.json").toFile().exists()
? ".coverage.json"
: ".coverage.json.gz");
meanCoverageFile = baseFileName + (Paths.get(baseFileName + ".mean-coverage.json").toFile().exists()
? ".mean-coverage.json"
: ".mean-coverage.json.gz");
} else if (baseFileName.endsWith(".coverage.json")) {
meanCoverageFile = baseFileName.replaceFirst("coverage\\.json$", "mean-coverage.json");
} else if (baseFileName.endsWith(".coverage.json.gz")) {
meanCoverageFile = baseFileName.replaceFirst("coverage\\.json\\.gz$", "mean-coverage.json.gz");
} else {
return null;
}
return new AlignmentCoverageJsonDataReader(regionCoverageFile, meanCoverageFile);
}
public ObjectMap getOptions() {
return storageEtlConfiguration.getOptions();
}
}