package org.opencb.opencga.storage.core.alignment.local;
import ga4gh.Reads;
import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.SAMSequenceRecord;
import org.apache.commons.lang3.time.StopWatch;
import org.codehaus.jackson.map.ObjectMapper;
import org.codehaus.jackson.map.ObjectWriter;
import org.ga4gh.models.ReadAlignment;
import org.opencb.biodata.models.alignment.RegionCoverage;
import org.opencb.biodata.models.core.Region;
import org.opencb.biodata.tools.alignment.AlignmentOptions;
import org.opencb.biodata.tools.alignment.BamManager;
import org.opencb.biodata.tools.alignment.BamUtils;
import org.opencb.biodata.tools.alignment.filters.AlignmentFilters;
import org.opencb.biodata.tools.alignment.filters.SamRecordFilters;
import org.opencb.biodata.tools.alignment.stats.AlignmentGlobalStats;
import org.opencb.biodata.tools.commons.ChunkFrequencyManager;
import org.opencb.commons.datastore.core.Query;
import org.opencb.commons.datastore.core.QueryOptions;
import org.opencb.commons.datastore.core.QueryResult;
import org.opencb.commons.utils.FileUtils;
import org.opencb.opencga.storage.core.alignment.AlignmentDBAdaptor;
import org.opencb.opencga.storage.core.alignment.iterators.AlignmentIterator;
import org.opencb.opencga.storage.core.alignment.iterators.ProtoAlignmentIterator;
import org.opencb.opencga.storage.core.alignment.iterators.SamRecordAlignmentIterator;
import java.io.IOException;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
/**
* Created by pfurio on 26/10/16.
*/
public class LocalAlignmentDBAdaptor implements AlignmentDBAdaptor {
private int chunkSize;
private static final int MINOR_CHUNK_SIZE = 1000;
private static final int DEFAULT_CHUNK_SIZE = 1000;
private static final int DEFAULT_WINDOW_SIZE = 1000000;
private static final String COVERAGE_SUFFIX = ".coverage";
private static final String COVERAGE_DATABASE_NAME = "coverage.db";
public LocalAlignmentDBAdaptor() {
this(DEFAULT_CHUNK_SIZE);
}
public LocalAlignmentDBAdaptor(int chunkSize) {
this.chunkSize = chunkSize;
}
@Override
public QueryResult getAllAlignmentsByRegion(List<Region> regions, QueryOptions options) {
throw new UnsupportedOperationException();
}
@Override
public QueryResult getAllAlignmentsByGene(String gene, QueryOptions options) {
throw new UnsupportedOperationException();
}
@Override
public QueryResult getCoverageByRegion(Region region, QueryOptions options) {
throw new UnsupportedOperationException();
}
@Override
public QueryResult getAlignmentsHistogramByRegion(Region region, boolean histogramLogarithm, int histogramMax) {
throw new UnsupportedOperationException();
}
@Override
public QueryResult getAllIntervalFrequencies(Region region, QueryOptions options) {
throw new UnsupportedOperationException();
}
@Override
public QueryResult getAlignmentRegionInfo(Region region, QueryOptions options) {
throw new UnsupportedOperationException();
}
@Override
public QueryResult<ReadAlignment> get(Path path, Query query, QueryOptions options) {
try {
StopWatch watch = new StopWatch();
watch.start();
FileUtils.checkFile(path);
BamManager alignmentManager = new BamManager(path);
AlignmentOptions alignmentOptions = parseQueryOptions(options);
AlignmentFilters<SAMRecord> alignmentFilters = parseQuery(query);
Region region = parseRegion(query);
String queryResultId;
List<ReadAlignment> readAlignmentList;
if (region != null) {
readAlignmentList = alignmentManager.query(region, alignmentFilters, alignmentOptions, ReadAlignment.class);
queryResultId = region.toString();
} else {
readAlignmentList = alignmentManager.query(alignmentFilters, alignmentOptions, ReadAlignment.class);
queryResultId = "Get alignments";
}
// List<String> stringFormatList = new ArrayList<>(readAlignmentList.size());
// for (Reads.ReadAlignment readAlignment : readAlignmentList) {
// stringFormatList.add(readAlignment());
// }
// List<JsonFormat> list = alignmentManager.query(region, alignmentOptions, alignmentFilters, Reads.ReadAlignment.class);
watch.stop();
return new QueryResult(queryResultId, ((int) watch.getTime()), readAlignmentList.size(), readAlignmentList.size(), null, null,
readAlignmentList);
} catch (Exception e) {
e.printStackTrace();
return new QueryResult<>();
}
}
@Override
public ProtoAlignmentIterator iterator(Path path) {
return iterator(path, new Query(), new QueryOptions());
}
@Override
public ProtoAlignmentIterator iterator(Path path, Query query, QueryOptions options) {
return (ProtoAlignmentIterator) iterator(path, query, options, Reads.ReadAlignment.class);
}
@Override
public <T> AlignmentIterator<T> iterator(Path path, Query query, QueryOptions options, Class<T> clazz) {
try {
FileUtils.checkFile(path);
if (query == null) {
query = new Query();
}
if (options == null) {
options = new QueryOptions();
}
BamManager alignmentManager = new BamManager(path);
AlignmentFilters<SAMRecord> alignmentFilters = parseQuery(query);
AlignmentOptions alignmentOptions = parseQueryOptions(options);
Region region = parseRegion(query);
if (region != null) {
if (Reads.ReadAlignment.class == clazz) {
return (AlignmentIterator<T>) new ProtoAlignmentIterator(alignmentManager.iterator(region,
alignmentFilters, alignmentOptions, Reads.ReadAlignment.class));
} else if (SAMRecord.class == clazz) {
return (AlignmentIterator<T>) new SamRecordAlignmentIterator(alignmentManager.iterator(region,
alignmentFilters, alignmentOptions, SAMRecord.class));
}
} else {
if (Reads.ReadAlignment.class == clazz) {
return (AlignmentIterator<T>) new ProtoAlignmentIterator(alignmentManager.iterator(alignmentFilters,
alignmentOptions, Reads.ReadAlignment.class));
} else if (SAMRecord.class == clazz) {
return (AlignmentIterator<T>) new SamRecordAlignmentIterator(alignmentManager.iterator(alignmentFilters,
alignmentOptions, SAMRecord.class));
}
}
} catch (Exception e) {
e.printStackTrace();
}
return null;
}
@Override
public QueryResult<Long> count(Path path, Query query, QueryOptions options) {
StopWatch watch = new StopWatch();
watch.start();
ProtoAlignmentIterator iterator = iterator(path, query, options);
long cont = 0;
while (iterator.hasNext()) {
iterator.next();
cont++;
}
watch.stop();
return new QueryResult<>("Get count", (int) watch.getTime(), 1, 1, "", "", Arrays.asList(cont));
}
@Override
public QueryResult<AlignmentGlobalStats> stats(Path path, Path workspace) throws Exception {
StopWatch watch = new StopWatch();
watch.start();
FileUtils.checkFile(path);
FileUtils.checkDirectory(workspace);
Path statsPath = workspace.resolve(path.getFileName() + ".stats");
AlignmentGlobalStats alignmentGlobalStats;
if (statsPath.toFile().exists()) {
// Read the file of stats
ObjectMapper objectMapper = new ObjectMapper();
alignmentGlobalStats = objectMapper.readValue(statsPath.toFile(), AlignmentGlobalStats.class);
} else {
BamManager alignmentManager = new BamManager(path);
alignmentGlobalStats = alignmentManager.stats();
ObjectMapper objectMapper = new ObjectMapper();
ObjectWriter objectWriter = objectMapper.typedWriter(AlignmentGlobalStats.class);
objectWriter.writeValue(statsPath.toFile(), alignmentGlobalStats);
}
watch.stop();
return new QueryResult<>("Get stats", (int) watch.getTime(), 1, 1, "", "", Arrays.asList(alignmentGlobalStats));
}
@Override
public QueryResult<AlignmentGlobalStats> stats(Path path, Path workspace, Query query, QueryOptions options) throws Exception {
StopWatch watch = new StopWatch();
watch.start();
if (options == null) {
options = new QueryOptions();
}
if (query == null) {
query = new Query();
}
if (options.size() == 0 && query.size() == 0) {
return stats(path, workspace);
}
FileUtils.checkFile(path);
AlignmentOptions alignmentOptions = parseQueryOptions(options);
AlignmentFilters alignmentFilters = parseQuery(query);
Region region = parseRegion(query);
BamManager alignmentManager = new BamManager(path);
AlignmentGlobalStats alignmentGlobalStats = alignmentManager.stats(region, alignmentFilters, alignmentOptions);
watch.stop();
return new QueryResult<>("Get stats", (int) watch.getTime(), 1, 1, "", "", Arrays.asList(alignmentGlobalStats));
}
@Override
public QueryResult<RegionCoverage> coverage(Path path, Path workspace) throws Exception {
QueryOptions options = new QueryOptions();
options.put(QueryParams.WINDOW_SIZE.key(), DEFAULT_WINDOW_SIZE);
options.put(QueryParams.CONTAINED.key(), false);
return coverage(path, workspace, new Query(), options);
}
@Override
public QueryResult<RegionCoverage> coverage(Path path, Path workspace, Query query, QueryOptions options) throws Exception {
StopWatch watch = new StopWatch();
watch.start();
FileUtils.checkFile(path);
if (query == null) {
query = new Query();
}
if (options == null) {
options = new QueryOptions();
}
AlignmentOptions alignmentOptions = parseQueryOptions(options);
AlignmentFilters alignmentFilters = parseQuery(query);
Region region = parseRegion(query);
String queryResultId;
int windowSize;
RegionCoverage coverage = null;
Path coverageDBPath = workspace.toAbsolutePath().resolve(COVERAGE_DATABASE_NAME);
if (!coverageDBPath.toFile().exists()
&& (region == null || (region.getEnd() - region.getStart() > 50 * MINOR_CHUNK_SIZE))) {
createDBCoverage(path, workspace);
}
ChunkFrequencyManager chunkFrequencyManager = new ChunkFrequencyManager(coverageDBPath, chunkSize);
ChunkFrequencyManager.ChunkFrequency chunkFrequency = null;
if (region != null) {
if (region.getEnd() - region.getStart() > 50 * MINOR_CHUNK_SIZE) {
// if region is too big then we calculate the mean. We need to protect this code!
// and query SQLite database
windowSize = options.getInt(QueryParams.WINDOW_SIZE.key(), DEFAULT_WINDOW_SIZE);
chunkFrequency = chunkFrequencyManager.query(region, path, windowSize);
} else {
// if region is small enough we calculate all coverage for all positions dynamically
// calling the biodata alignment manager
BamManager alignmentManager = new BamManager(path);
coverage = alignmentManager.coverage(region, alignmentFilters, alignmentOptions);
}
queryResultId = region.toString();
} else {
// if no region is given we set up the windowSize to default value,
// we should return a few thousands mean values
// and query SQLite database
windowSize = DEFAULT_WINDOW_SIZE;
SAMFileHeader fileHeader = BamUtils.getFileHeader(path);
SAMSequenceRecord seq = fileHeader.getSequenceDictionary().getSequences().get(0);
int arraySize = Math.min(50 * MINOR_CHUNK_SIZE, seq.getSequenceLength());
region = new Region(seq.getSequenceName(), 1, arraySize * MINOR_CHUNK_SIZE);
queryResultId = "Get coverage";
chunkFrequency = chunkFrequencyManager.query(region, path, windowSize);
}
if (coverage == null) {
coverage = new RegionCoverage(region, chunkFrequency.getWindowSize(), chunkFrequency.getValues());
}
watch.stop();
return new QueryResult(queryResultId, ((int) watch.getTime()), 1, 1, null, null, Arrays.asList(coverage));
}
private Region parseRegion(Query query) {
Region region = null;
String regionString = query.getString(QueryParams.REGION.key());
if (regionString != null && !regionString.isEmpty()) {
region = new Region(regionString);
}
return region;
}
private AlignmentFilters<SAMRecord> parseQuery(Query query) {
AlignmentFilters<SAMRecord> alignmentFilters = SamRecordFilters.create();
int minMapQ = query.getInt(QueryParams.MIN_MAPQ.key());
if (minMapQ > 0) {
alignmentFilters.addMappingQualityFilter(minMapQ);
}
return alignmentFilters;
}
private AlignmentOptions parseQueryOptions(QueryOptions options) {
AlignmentOptions alignmentOptions = new AlignmentOptions()
.setContained(options.getBoolean(QueryParams.CONTAINED.key()));
int windowSize = options.getInt(QueryParams.WINDOW_SIZE.key());
if (windowSize > 0) {
alignmentOptions.setWindowSize(windowSize);
}
int limit = options.getInt(QueryParams.LIMIT.key());
if (limit > 0) {
alignmentOptions.setLimit(limit);
}
return alignmentOptions;
}
private void createDBCoverage(Path filePath, Path workspace) throws IOException {
SAMFileHeader fileHeader = BamUtils.getFileHeader(filePath);
Path coverageDBPath = workspace.toAbsolutePath().resolve(COVERAGE_DATABASE_NAME);
ChunkFrequencyManager chunkFrequencyManager = new ChunkFrequencyManager(coverageDBPath);
List<String> chromosomeNames = new ArrayList<>();
List<Integer> chromosomeLengths = new ArrayList<>();
fileHeader.getSequenceDictionary().getSequences().forEach(
seq -> {
chromosomeNames.add(seq.getSequenceName());
chromosomeLengths.add(seq.getSequenceLength());
});
chunkFrequencyManager.init(chromosomeNames, chromosomeLengths);
Path coveragePath = workspace.toAbsolutePath().resolve(filePath.getFileName() + COVERAGE_SUFFIX);
AlignmentOptions options = new AlignmentOptions();
options.setContained(false);
BamUtils.createCoverageWigFile(filePath, coveragePath, 200);
chunkFrequencyManager.loadWigFile(coveragePath, filePath);
}
}