/*
* Copyright 2015-2016 OpenCB
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.opencb.opencga.core.tools.accession;
import java.io.IOException;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.commons.lang3.ArrayUtils;
import org.opencb.biodata.formats.variant.vcf4.VcfRecord;
import org.opencb.biodata.models.variant.Variant;
import org.opencb.biodata.models.variant.VariantAggregatedVcfFactory;
import org.opencb.biodata.models.variant.VariantSource;
import org.opencb.biodata.models.variant.VariantVcfFactory;
import org.opencb.commons.run.Task;
/**
*
* @author Cristina Yenyxe Gonzalez Garcia <cyenyxe@ebi.ac.uk>
*/
public class CreateAccessionTask extends Task<VcfRecord> {
private final Character[] validCharacters = {
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
'B', 'C', 'D', 'F', 'G', 'H', 'J', 'K', 'L', 'M',
'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'X', 'Y', 'Z'
};
private VariantSource source;
private String globalPrefix;
private String studyPrefix;
/**
* Last accessions used, in case they would need to be reused. They are
* grouped by chromosome, position and alternate allele.
*/
private LRUCache<String, Map<String, String>> currentAccessions;
private String lastAccession;
private CombinationIterator<Character> iterator;
private VariantVcfFactory variantFactory;
public CreateAccessionTask(VariantSource source, String globalPrefix, String studyPrefix) {
this(source, globalPrefix, studyPrefix, 0);
}
public CreateAccessionTask(VariantSource source, String globalPrefix, String studyPrefix, int priority) {
this(source, globalPrefix, studyPrefix, null, priority);
}
public CreateAccessionTask(VariantSource source, String globalPrefix, String studyPrefix, String lastAccession) {
this(source, globalPrefix, studyPrefix, lastAccession, 0);
}
public CreateAccessionTask(VariantSource source, String globalPrefix, String studyPrefix, String lastAccession, int priority) {
super(priority);
this.source = source;
this.globalPrefix = globalPrefix != null ? globalPrefix : "";
this.studyPrefix = studyPrefix;
this.lastAccession = lastAccession;
this.currentAccessions = new LRUCache<>(10);
if (lastAccession != null && lastAccession.length() == 7) {
this.iterator = new CombinationIterator(7, validCharacters, ArrayUtils.toObject(this.lastAccession.toCharArray()));
} else {
this.iterator = new CombinationIterator(7, validCharacters);
}
this.variantFactory = new VariantAggregatedVcfFactory(); // Do not even try to parse the samples, it's useless
}
@Override
public boolean apply(List<VcfRecord> batch) throws IOException {
for (VcfRecord record : batch) {
List<Variant> variants = variantFactory.create(source, record.toString());
StringBuilder allAccessionsInRecord = new StringBuilder();
for (Variant v : variants) {
Map<String, String> variantAccession = currentAccessions.get(getKey(v));
if (variantAccession != null) {
String accessionGroup = variantAccession.get(getValue(v));
if (accessionGroup != null) {
allAccessionsInRecord = appendAccession(allAccessionsInRecord, accessionGroup);
} else {
resetAccessions(v);
allAccessionsInRecord = appendAccession(allAccessionsInRecord, lastAccession);
}
} else {
resetAccessions(v);
allAccessionsInRecord = appendAccession(allAccessionsInRecord, lastAccession);
}
}
// Set accession/s for this record (be it in a new genomic position or not)
record.addInfoField("ACC=" + allAccessionsInRecord.toString());
}
return true;
}
private String getKey(Variant v) {
return v.getChromosome() + "_" + v.getStart();
}
private String getValue(Variant v) {
return v.getReference() + "_" + v.getAlternate();
}
private void resetAccessions(Variant v) {
Character[] next = (Character[]) iterator.next();
StringBuilder sb = new StringBuilder(next.length);
for (Character c : next) {
sb.append(c);
}
lastAccession = sb.toString();
Map<String, String> variantAccession = currentAccessions.get(getKey(v));
if (variantAccession == null) {
variantAccession = new HashMap<>();
variantAccession.put(getValue(v), lastAccession);
currentAccessions.put(getKey(v), variantAccession);
} else {
String accessionGroup = variantAccession.get(getValue(v));
if (accessionGroup == null) {
variantAccession.put(getValue(v), lastAccession);
}
}
}
private StringBuilder appendAccession(StringBuilder allAccessionsInRecord, String newAccession) {
if (allAccessionsInRecord.length() == 0) {
return allAccessionsInRecord.append(globalPrefix).append(studyPrefix).append(newAccession);
} else {
return allAccessionsInRecord.append(",").append(globalPrefix).append(studyPrefix).append(newAccession);
}
}
}