package org.opencb.opencga.analysis.execution.plugins.hist;
import org.apache.commons.lang3.StringUtils;
import org.opencb.biodata.models.core.Region;
import org.opencb.biodata.models.variant.Variant;
import org.opencb.commons.datastore.core.ObjectMap;
import org.opencb.commons.datastore.core.Query;
import org.opencb.commons.datastore.core.QueryOptions;
import org.opencb.opencga.analysis.execution.plugins.OpenCGAAnalysis;
import org.opencb.opencga.catalog.models.tool.Execution;
import org.opencb.opencga.catalog.models.tool.Manifest;
import org.opencb.opencga.catalog.models.tool.Option;
import org.opencb.opencga.storage.core.manager.variant.VariantStorageManager;
import org.opencb.opencga.storage.core.variant.adaptors.VariantField;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.PrintStream;
import java.nio.file.Path;
import java.util.*;
import static java.util.Arrays.asList;
/**
* Created on 15/03/17.
*
* new PluginExecutor(catalogManager, sessionId)
* .execute(VariantHistogramAnalysis.class, "default", studyId, params);
*
* @author Jacobo Coll <jacobo167@gmail.com>
*/
public class VariantHistogramAnalysis extends OpenCGAAnalysis {
public static final String PLUGIN_ID = "variant_histogram";
public static final String OUTDIR = "outdir";
public static final String FILENAME = "fileName";
public static final String INTERVAL = "interval";
private Manifest manifest;
public VariantHistogramAnalysis() {
manifest = new Manifest(null, "0.1.0", PLUGIN_ID, PLUGIN_ID, "", "", "", null, Collections.emptyList(),
asList(
new Execution("default", "default", "",
Collections.emptyList(),
Collections.emptyList(),
OUTDIR,
asList(
new Option(OUTDIR, "", true),
new Option(FILENAME, "", false),
new Option(INTERVAL, "", false)
), Collections.emptyList(), null, null)
), null, null);
}
@Override
public String getIdentifier() {
return PLUGIN_ID;
}
@Override
public Manifest getManifest() {
return manifest;
}
@Override
public int run(Map<String, Path> input, Path outdir, ObjectMap params) throws Exception {
Query query = VariantStorageManager.getVariantQuery(params);
String fileName = params.getString(FILENAME);
return run(query, params.getInt(INTERVAL, 1000), outdir, fileName);
}
protected int run(Query query, int interval, Path outdir, String fileName) throws Exception {
//ParallelTaskRunner<Variant, Pair<Region, Integer>> ?
Region region = new Region("", 0, 0);
List<Variant> variants = new ArrayList<>();
PrintStream out;
File file = outdir.toAbsolutePath().toFile();
boolean stdout = file.isDirectory() && StringUtils.isEmpty(fileName);
if (stdout) {
out = System.out;
} else {
if (StringUtils.isNotEmpty(fileName)) {
file = outdir.resolve(fileName).toFile();
}
out = new PrintStream(new BufferedOutputStream(new FileOutputStream(file)));
}
try {
out.println("#CHR\tSTART\tEND\tCOUNT");
QueryOptions options = new QueryOptions(QueryOptions.SORT, true)
.append(QueryOptions.EXCLUDE, Arrays.asList(VariantField.STUDIES, VariantField.ANNOTATION));
getVariantStorageManager().iterable(getSessionId()).forEach(query, variant -> {
if (checkVariant(variant)) {
if (region.overlaps(variant.getChromosome(), variant.getStart(), variant.getEnd())) {
variants.add(variant);
} else {
if (!variants.isEmpty()) {
out.print(region.getChromosome());
out.print('\t');
out.print(region.getStart());
out.print('\t');
out.print(region.getEnd());
out.print('\t');
out.print(variants.size());
out.println();
}
region.setChromosome(variant.getChromosome());
region.setStart(variant.getStart() / interval * interval);
region.setEnd(region.getStart() + interval);
variants.clear();
variants.add(variant);
}
}
}, options);
} finally {
if (!stdout) {
out.close();
}
}
return 0;
}
private boolean checkVariant(Variant variant) {
return true;
}
}