/*
* The MIT License (MIT)
*
* Copyright (c) 2007-2015 Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
package org.broad.igv.feature.tribble;
import org.apache.log4j.Logger;
import org.broad.igv.Globals;
import org.broad.igv.cli_plugin.Argument;
import org.broad.igv.cli_plugin.LineFeatureDecoder;
import org.broad.igv.cli_plugin.LineFeatureEncoder;
import org.broad.igv.feature.*;
import org.broad.igv.feature.genome.Genome;
import org.broad.igv.ui.color.ColorUtilities;
import org.broad.igv.util.StringUtils;
import org.broad.igv.util.collections.MultiMap;
import htsjdk.tribble.Feature;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;
/**
* Created by IntelliJ IDEA.
* User: jrobinso
* Date: Dec 20, 2009
* Time: 10:15:49 PM
*/
public class IGVBEDCodec extends UCSCCodec<BasicFeature> implements LineFeatureEncoder<Feature>, LineFeatureDecoder<BasicFeature> {
private static final Logger log = Logger.getLogger(IGVBEDCodec.class);
static final Pattern BR_PATTERN = Pattern.compile("<br>");
static final Pattern EQ_PATTERN = Pattern.compile("=");
Genome genome;
public IGVBEDCodec() {
this(null);
}
public IGVBEDCodec(Genome genome) {
this(genome, FeatureType.BED);
}
public IGVBEDCodec(Genome genome, FeatureType featureType) {
super(BasicFeature.class, featureType);
this.genome = genome;
}
//@Override
public BasicFeature decode(String[] tokens) {
// The first 3 columns are non optional for BED. We will relax this
// and only require 2.
int tokenCount = tokens.length;
if (tokenCount < 2) {
return null;
}
String c = tokens[0];
String chr = genome == null ? c : genome.getCanonicalChrName(c);
//BED format, and IGV, use starting element as 0.
int start = Integer.parseInt(tokens[1]);
int end = start + 1;
if (tokenCount > 2) {
end = Integer.parseInt(tokens[2]);
}
BasicFeature feature = featureType == FeatureType.SPLICE_JUNCTION ?
new SpliceJunctionFeature(chr, start, end) :
new BasicFeature(chr, start, end);
// The rest of the columns are optional. Stop parsing upon encountering
// a non-expected value
// Name
if (tokenCount > 3) {
if (isGffTags()) {
MultiMap<String, String> atts = new MultiMap<String, String>();
tagHelper.parseAttributes(tokens[3], atts);
String name = tagHelper.getName(atts);
feature.setName(name);
String id = atts.get("ID");
if (id != null) {
FeatureDB.addFeature(id, feature, genome);
feature.setIdentifier(id);
} else {
feature.setIdentifier(name);
}
String alias = atts.get("Alias");
if (alias != null) {
FeatureDB.addFeature(alias, feature, genome);
}
String geneSymbols = atts.get("Symbol");
if (geneSymbols != null) {
String[] symbols = geneSymbols.split(",");
for (String sym : symbols) {
FeatureDB.addFeature(sym.trim(), feature, genome);
}
}
feature.setAttributes(atts);
} else {
String name = tokens[3].replaceAll("\"", "");
if (name.equals(".")) name = ""; // Convention
feature.setName(name);
feature.setIdentifier(name);
}
}
// Bed files are not always to-spec after the name field. Stop parsing when we find an unexpected column.
// Score
if (tokenCount > 4) {
try {
float score = Float.parseFloat(tokens[4]);
feature.setScore(score);
if (featureType == FeatureType.SPLICE_JUNCTION ) {
((SpliceJunctionFeature) feature).setJunctionDepth((int) score);
}
} catch (NumberFormatException numberFormatException) {
// Unexpected, but does not invalidate the previous values.
// Stop parsing the line here but keep the feature
// Don't log, would just slow parsing down.
return feature;
}
}
// Strand
if (tokenCount > 5) {
String strandString = tokens[5].trim();
char strand = (strandString.length() == 0)
? ' ' : strandString.charAt(0);
if (strand == '-') {
feature.setStrand(Strand.NEGATIVE);
} else if (strand == '+') {
feature.setStrand(Strand.POSITIVE);
} else {
feature.setStrand(Strand.NONE);
}
}
// Thick ends
if (tokenCount > 7) {
try {
int thickStart = Integer.parseInt(tokens[6]);
int thickEnd = Integer.parseInt(tokens[7]);
if (thickStart >= start && thickEnd <= end) {
feature.setThickStart(Integer.parseInt(tokens[6]));
feature.setThickEnd(Integer.parseInt(tokens[7]));
}
} catch (NumberFormatException e) {
return feature;
}
}
// Color
if (tokenCount > 8 && featureType != FeatureType.GAPPED_PEAK) {
String colorString = tokens[8];
if (colorString.trim().length() > 0 && !colorString.equals(".")) {
feature.setColor(ColorUtilities.stringToColor(colorString));
}
}
// Exons
if (tokenCount > 11) {
createExons(start, tokens, feature, chr, feature.getStrand());
//todo: some refactoring that allows this hack to be removed
if (featureType == FeatureType.SPLICE_JUNCTION ) {
SpliceJunctionFeature junctionFeature = (SpliceJunctionFeature) feature;
List<Exon> exons = feature.getExons();
junctionFeature.setJunctionStart(start + exons.get(0).getLength());
junctionFeature.setJunctionEnd(end - exons.get(1).getLength());
}
}
if (tokenCount > 14 && featureType == FeatureType.GAPPED_PEAK) {
MultiMap<String, String> attributes = new MultiMap<String, String>();
attributes.put("Signal Value", tokens[12]);
attributes.put("pValue (-log10)", tokens[13]);
attributes.put("qValue (-log10)", tokens[14]);
feature.setAttributes(attributes);
}
else if(tokenCount > 13 && featureType == FeatureType.SPLICE_JUNCTION ) {
try {
String [] startFlanking = tokens[12].split(",");
int [] startFlankingDeptyArray = new int[startFlanking.length];
for(int i=0; i<startFlanking.length; i++) {
startFlankingDeptyArray[i] = Integer.parseInt(startFlanking[i]);
}
String [] endFlanking = tokens[13].split(",");
int [] endFlankingDeptyArray = new int[endFlanking.length];
for(int i=0; i<endFlanking.length ;i++) {
endFlankingDeptyArray[i] = Integer.parseInt(endFlanking[i]);
}
((SpliceJunctionFeature) feature).setStartFlankingRegionDepthArray(startFlankingDeptyArray);
((SpliceJunctionFeature) feature).setEndFlankingRegionDepthArray(endFlankingDeptyArray);
} catch (NumberFormatException e) {
log.error("Error parsing flanking array", e);
}
}
return feature;
}
private String[] tokens = new String[50];
@Override
public BasicFeature decode(String nextLine) {
String trimLine = nextLine.trim();
if (trimLine.length() == 0) {
return null;
}
if (nextLine.startsWith("#") || nextLine.startsWith("track") || nextLine.startsWith("browser")) {
return null;
}
tokens = Globals.whitespacePattern.split(trimLine);
BasicFeature feature = decode(tokens);
feature.setRepresentation(nextLine);
return feature;
}
/**
* This function returns true iff the File potentialInput can be parsed by this
* codec.
* <p/>
* There is an assumption that there's never a situation where two different Codecs
* return true for the same file. If this occurs, the recommendation would be to error out.
* <p/>
* Note this function must never throw an error. All errors should be trapped
* and false returned.
*
* @param path the file to test for parsability with this codec
* @return true if potentialInput can be parsed, false otherwise
*/
@Override
public boolean canDecode(String path) {
return path.toLowerCase().endsWith(".bed") || path.toLowerCase().endsWith(".bed.gz");
}
private void createExons(int start, String[] tokens, BasicFeature gene, String chr,
Strand strand) throws NumberFormatException {
int cdStart = Integer.parseInt(tokens[6]);
int cdEnd = Integer.parseInt(tokens[7]);
int exonCount = Integer.parseInt(tokens[9]);
String[] exonSizes = Globals.commaPattern.split(tokens[10]);
String[] startsBuffer = Globals.commaPattern.split(tokens[11]);
int exonNumber = (strand == Strand.NEGATIVE ? exonCount : 1);
if (startsBuffer.length == exonSizes.length) {
for (int i = 0; i < startsBuffer.length; i++) {
int exonStart = start + Integer.parseInt(startsBuffer[i]);
int exonEnd = exonStart + Integer.parseInt(exonSizes[i]);
Exon exon = new Exon(chr, exonStart, exonEnd, strand);
exon.setCodingStart(cdStart);
exon.setCodingEnd(cdEnd);
gene.addExon(exon);
exon.setNumber(exonNumber);
if (strand == Strand.NEGATIVE) {
exonNumber--;
} else {
exonNumber++;
}
}
}
}
/**
* Encode a feature as a BED string.
*
* @param feature - feature to encode
* @return the encoded string
*/
public String encode(Feature feature) {
if(feature instanceof BasicFeature) {
String rep = ((BasicFeature) feature).getRepresentation();
if(rep != null) return rep;
}
StringBuffer buffer = new StringBuffer();
buffer.append(feature.getChr());
buffer.append("\t");
final int featureStart = feature.getStart();
buffer.append(String.valueOf(featureStart));
buffer.append("\t");
buffer.append(String.valueOf(feature.getEnd()));
BasicFeature basicFeature = null;
if (!(feature instanceof BasicFeature)) {
return buffer.toString();
} else {
basicFeature = (BasicFeature) feature;
}
boolean hasName = (basicFeature.getName() != null && basicFeature.getName().length() > 0) ||
(isGffTags() && basicFeature.getDescription() != null && basicFeature.getDescription().length() > 0);
if (hasName) {
buffer.append("\t");
if (isGffTags() && basicFeature.getDescription() != null) {
// mRNA<br>ID = LOC_Os01g01010.2<br>Name = LOC_Os01g01010.2<br>Parent = LOC_Os01g01010<br>
//ID=LOC_Os01g01010.1:exon_1;Parent=LOC_Os01g01010.1
String[] attrs = BR_PATTERN.split(basicFeature.getDescription());
buffer.append("\"");
for (String att : attrs) {
String[] kv = EQ_PATTERN.split(att, 2);
if (kv.length > 1) {
buffer.append(kv[0].trim());
buffer.append("=");
String value = kv[1].trim();
buffer.append(StringUtils.encodeURL(value));
buffer.append(";");
}
}
buffer.append("\"");
} else {
buffer.append(basicFeature.getName());
}
}
boolean more = !Float.isNaN(basicFeature.getScore()) || basicFeature.getStrand() != Strand.NONE ||
basicFeature.getColor() != null || basicFeature.getExonCount() > 0;
if (more) {
// Must have a non-whitespace name column to proceed
if(!hasName) {
buffer.append("\t.");
}
buffer.append("\t");
// UCSC scores are integers between 0 and 1000, but
float score = basicFeature.getScore();
if (Float.isNaN(score)) {
buffer.append("1000");
} else {
boolean isInt = (Math.floor(score) == score);
buffer.append(String.valueOf(isInt ? (int) score : score));
}
more = basicFeature.getStrand() != Strand.NONE || basicFeature.getColor() != null ||
(basicFeature.getThickStart() != basicFeature.getStart()) || basicFeature.getExonCount() > 0;
if (more) {
buffer.append("\t");
Strand strand = basicFeature.getStrand();
if (strand == Strand.NONE) buffer.append(" ");
else if (strand == Strand.POSITIVE) buffer.append("+");
else if (strand == Strand.NEGATIVE) buffer.append("-");
more = basicFeature.getColor() != null || basicFeature.getExonCount() > 0;
if (more) {
// Must continue if basicFeature has color or exons
java.util.List<Exon> exons = basicFeature.getExons();
int thickStart, thickEnd;
if (basicFeature.getColor() != null || exons != null) {
// Correct "thickStart" and "thickEnd"
if(exons != null && exons.size() > 0) {
thickStart = basicFeature.getEnd(); // This is not a typo
for (Exon ex : exons) {
if (!ex.isNonCoding()) {
thickStart = ex.getCdStart();
break;
}
}
thickEnd = basicFeature.getStart(); // Not a typo
for (int i = exons.size() - 1; i >= 0; i--) {
Exon ex = exons.get(i);
if (!ex.isNonCoding()) {
thickEnd = ex.getCdEnd();
break;
}
}
}
else {
thickStart = ((BasicFeature) feature).getThickStart();
thickEnd = ((BasicFeature) feature).getThickEnd();
}
buffer.append("\t");
buffer.append(String.valueOf(thickStart));
buffer.append("\t");
buffer.append(String.valueOf(thickEnd));
buffer.append("\t");
java.awt.Color c = basicFeature.getColor();
buffer.append(c == null ? "." : ColorUtilities.colorToString(c));
buffer.append("\t");
if (exons != null && exons.size() > 0) {
buffer.append(String.valueOf(exons.size()));
buffer.append("\t");
for (Exon exon : exons) {
buffer.append(String.valueOf(exon.getLength()));
buffer.append(",");
}
buffer.append("\t");
for (Exon exon : exons) {
int exonStart = exon.getStart() - featureStart;
buffer.append(String.valueOf(exonStart));
buffer.append(",");
}
}
}
if(basicFeature instanceof SpliceJunctionFeature) {
SpliceJunctionFeature spliceJunctionFeature = (SpliceJunctionFeature) basicFeature;
int [] startFlanking = spliceJunctionFeature.getStartFlankingRegionDepthArray();
int [] endFlanking = spliceJunctionFeature.getEndFlankingRegionDepthArray();
if(startFlanking != null && startFlanking.length > 0 && endFlanking != null && endFlanking.length > 0) {
buffer.append("\t" + startFlanking[0]);
for(int i=1; i<startFlanking.length; i++) {
buffer.append("," + startFlanking[i]);
}
buffer.append("\t" + endFlanking[0]);
for(int i=1; i<endFlanking.length; i++) {
buffer.append("," + endFlanking[i]);
}
}
}
}
}
}
return buffer.toString();
}
@Override
public int getNumCols(String line) {
return line.split("\t").length;
}
@Override
public String getHeader() {
return null;
}
@Override
public void setInputs(List<String> commands, Map<Argument, Object> argumentMap, Argument argument) {
//pass
}
}