/*
* Eoulsan development code
*
* This code may be freely distributed and modified under the
* terms of the GNU Lesser General Public License version 2.1 or
* later and CeCILL-C. This should be distributed with the code.
* If you do not have a copy, see:
*
* http://www.gnu.org/licenses/lgpl-2.1.txt
* http://www.cecill.info/licences/Licence_CeCILL-C_V1-en.txt
*
* Copyright for this code is held jointly by the Genomic platform
* of the Institut de Biologie de l'École normale supérieure and
* the individual authors. These should be listed in @author doc
* comments.
*
* For more information on the Eoulsan project and its aims,
* or to join the Eoulsan Google group, visit the home page
* at:
*
* http://outils.genomique.biologie.ens.fr/eoulsan
*
*/
package fr.ens.biologie.genomique.eoulsan.bio;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Pattern;
import fr.ens.biologie.genomique.eoulsan.util.StringUtils;
/**
* This class defines a GFF Entry.
* @since 1.0
* @author Laurent Jourdren
*/
public class GFFEntry {
private final Map<String, List<String>> metaData = new LinkedHashMap<>();
private int id;
private String seqId;
private String source;
private String type;
private int start;
private int end;
private double score;
private char strand;
private int phase;
private Map<String, String> attributes;
private String[] parsedFields;
private static final Pattern SEMI_COMA_SPLIT_PATTERN = Pattern.compile(";");
private static final Pattern COMA_SPLIT_PATTERN = Pattern.compile(",");
//
// Getters
//
/**
* Get the id.
* @return the id
*/
public final int getId() {
return this.id;
}
/**
* Get the seqId.
* @return the seqId
*/
public final String getSeqId() {
return this.seqId;
}
/**
* Get the source.
* @return The source
*/
public final String getSource() {
return this.source;
}
/**
* Get the type.
* @return the type
*/
public final String getType() {
return this.type;
}
/**
* Get the start position.
* @return the start position
*/
public final int getStart() {
return this.start;
}
/**
* Get the end position.
* @return the end position
*/
public final int getEnd() {
return this.end;
}
/**
* Get the score.
* @return the score
*/
public final double getScore() {
return this.score;
}
/**
* Get the strand.
* @return the strand
*/
public final char getStrand() {
return this.strand;
}
/**
* Get the phase.
* @return the phase
*/
public final int getPhase() {
return this.phase;
}
/**
* Get metadata keys names.
* @return the metadata keys names
*/
public final Set<String> getMetadataKeyNames() {
if (this.metaData == null) {
return Collections.emptySet();
}
return Collections.unmodifiableSet(this.metaData.keySet());
}
/**
* Get attributes names.
* @return the attributes names
*/
public final Set<String> getAttributesNames() {
if (this.attributes == null) {
return Collections.emptySet();
}
return Collections.unmodifiableSet(this.attributes.keySet());
}
/**
* test if a metadata key exists.
* @param key key name of the metadata
* @return true if the entry in the meta data exists
*/
public final boolean isMetaDataEntry(final String key) {
if (key == null) {
return false;
}
return this.metaData.containsKey(key);
}
/**
* test if an attribute exists.
* @param attributeName name of the attribute
* @return true if the attribute exits
*/
public final boolean isAttribute(final String attributeName) {
if (attributeName == null || this.attributes == null) {
return false;
}
return this.attributes.containsKey(attributeName);
}
/**
* Get the metadata values for a key.
* @param key name of the metadata entry
* @return the values of the attribute or null if the metadata name does not
* exists
*/
public final List<String> getMetadataEntryValues(final String key) {
if (key == null) {
return null;
}
final List<String> list = this.metaData.get(key);
if (list == null) {
return null;
}
return Collections.unmodifiableList(list);
}
/**
* Get attribute value.
* @param attributeName name of the attribute
* @return the value of the attribute or null if the attribute name does not
* exists
*/
public final String getAttributeValue(final String attributeName) {
if (attributeName == null || this.attributes == null) {
return null;
}
return this.attributes.get(attributeName);
}
//
// Setters
//
/**
* Set the id.
* @param id the id of the entry
*/
public final void setId(final int id) {
this.id = id;
}
/**
* Set the seqId.
* @param seqId the sequence id of the entry
*/
public final void setSeqId(final String seqId) {
if (seqId == null || ".".equals(seqId)) {
this.seqId = "";
} else {
this.seqId = seqId.trim();
}
}
/**
* Set the source of the entry.
* @param source the source of the entry
*/
public final void setSource(final String source) {
if (source == null || ".".equals(source)) {
this.source = "";
} else {
this.source = source.trim();
}
}
/**
* Set the type of the entry.
* @param type the type of the entry
*/
public final void setType(final String type) {
if (type == null || ".".equals(type)) {
this.type = "";
} else {
this.type = type.trim();
}
}
/**
* Set the start position of the entry.
* @param start the start position
*/
public final void setStart(final int start) {
if (start < 1) {
this.start = -1;
} else {
this.start = start;
}
}
/**
* Set the end of the position.
* @param end the end position
*/
public final void setEnd(final int end) {
if (end < 1) {
this.end = -1;
} else {
this.end = end;
}
}
/**
* Set the score of the position.
* @param score the score of the position
*/
public final void setScore(final double score) {
this.score = score;
}
/**
* Set the strand of the position.
* @param strand the strand of the position
*/
public final void setStrand(final char strand) {
switch (strand) {
case '.':
case '+':
case '-':
case '?':
this.strand = strand;
break;
default:
this.strand = '.';
}
}
/**
* Set the phase of the entry.
* @param phase the phase
*/
public final void setPhase(final int phase) {
if (phase < 0 || phase > 2) {
this.phase = -1;
} else {
this.phase = phase;
}
}
/**
* Add metadata entry value.
* @param key name of key of the metadata entry
* @param value The value
* @return true if the value is correctly added to the metadata
*/
public final boolean addMetaDataEntry(final String key, final String value) {
if (key == null || value == null) {
return false;
}
final List<String> list;
if (!this.metaData.containsKey(key)) {
list = new ArrayList<>();
this.metaData.put(key, list);
} else {
list = this.metaData.get(key);
}
list.add(value);
return true;
}
/**
* Add metadata entries values. Stop at first entry that fail to be added.
* @param entries the entries to add
* @return true if all the entries are correctly added to the metadata
*/
public final boolean addMetaDataEntries(
final Map<String, List<String>> entries) {
if (entries == null) {
return false;
}
for (Map.Entry<String, List<String>> e : entries.entrySet()) {
for (String v : e.getValue()) {
if (!addMetaDataEntry(e.getKey(), v)) {
return false;
}
}
}
return true;
}
/**
* Get attribute value.
* @param attributeName name of the attribute
* @param value The value
* @return true if the value is correctly added to the attributes of the entry
* exists
*/
public final boolean setAttributeValue(final String attributeName,
final String value) {
if (attributeName == null || value == null) {
return false;
}
if (this.attributes == null) {
this.attributes = new LinkedHashMap<>();
}
this.attributes.put(attributeName, value);
return true;
}
/**
* Remove a metadata entry.
* @param key key of the metadata entry to remove
* @return true if the entry is removed
*/
public final boolean removeMetaDataEntry(final String key) {
if (this.metaData.containsKey(key)) {
return false;
}
return this.metaData.remove(key) != null;
}
/**
* Remove an attribute.
* @param attributeName attribute to remove
* @return true if the attribute is removed
*/
public final boolean removeAttribute(final String attributeName) {
if (this.attributes == null || this.attributes.containsKey(attributeName)) {
return false;
}
return this.attributes.remove(attributeName) != null;
}
//
// Other methods
//
/**
* Clear the entry.
*/
public final void clear() {
this.seqId = "";
this.source = "";
this.type = "";
this.start = -1;
this.end = -1;
this.score = Double.NaN;
this.strand = '.';
this.phase = -1;
if (this.attributes != null) {
this.attributes.clear();
}
}
/**
* Clear metadata of the entry.
*/
public final void clearMetaData() {
this.metaData.clear();
}
//
// Test valid entries
//
/**
* Test if the entry is valid.
* @return true if the entry is valid
*/
public final boolean isValidEntry() {
return this.seqId != null
&& this.source != null && this.type != null && isValidStartAndEnd()
&& isValidStrand();
}
/**
* Test if the start and end position values are valid.
* @return true if the positions are valid
*/
public final boolean isValidStartAndEnd() {
if (this.start == Integer.MIN_VALUE || this.end == Integer.MAX_VALUE) {
return false;
}
if (this.start < 1) {
return false;
}
if (this.end < this.start) {
return false;
}
return true;
}
/**
* Test if the strand is valid.
* @return true if the strand is valid
*/
public final boolean isValidStrand() {
switch (this.strand) {
case '+':
case '-':
case '.':
case '?':
return true;
default:
return false;
}
}
/**
* Test if the phase is valid.
* @return true if the phase is valid
*/
public final boolean isValidPhase() {
if ("CDS".equals(this.type)) {
if (this.phase == -1) {
return false;
}
} else if (this.phase != -1) {
return false;
}
return true;
}
//
// Parsing / Write methods
//
private static int parseInt(final String s, final int defaultValue) {
if (s == null) {
return defaultValue;
}
try {
return Integer.parseInt(s.trim());
} catch (NumberFormatException e) {
return defaultValue;
}
}
private static double parseDouble(final String s, final double defaultValue) {
if (s == null) {
return defaultValue;
}
try {
return Double.parseDouble(s.trim());
} catch (NumberFormatException e) {
return defaultValue;
}
}
/**
* Parse the first fields of a GFF/GTF string.
* @param s the string to parse
* @return the last non parsed field
* @throws BadBioEntryException if an error occurs while parsing the string
*/
private String parseCommon(final String s) throws BadBioEntryException {
if (s == null) {
throw new IllegalArgumentException("String to parse is null");
}
if (this.parsedFields == null) {
this.parsedFields = new String[9];
} else {
Arrays.fill(this.parsedFields, null);
}
final String[] fields = this.parsedFields;
try {
StringUtils.fastSplit(s, fields);
} catch (ArrayIndexOutOfBoundsException e) {
throw new BadBioEntryException("Error in GFF parsing line ("
+ s.split("\t").length + " fields, 9 attempted)", s);
}
setSeqId(fields[0]);
setSource(fields[1]);
setType(fields[2]);
setStart(parseInt(fields[3], Integer.MIN_VALUE));
setEnd(parseInt(fields[4], Integer.MIN_VALUE));
setScore(parseDouble(fields[5], Double.NaN));
setStrand(fields[6] == null || fields[6].length() == 0
? '.' : fields[6].charAt(0));
setPhase(parseInt(fields[7], -1));
return fields[8];
}
/**
* Parse the attribute field in GFF3 format.
* @param attributesField the attribute field
*/
private void parseGFF3Attributes(final String attributesField) {
if (this.attributes != null) {
this.attributes.clear();
}
if (attributesField == null) {
return;
}
if ("".equals(attributesField) || ".".equals(attributesField)) {
return;
}
final String s = attributesField.trim();
final String[] fields = SEMI_COMA_SPLIT_PATTERN.split(s);
for (String f : fields) {
final int indexEquals = f.indexOf('=');
if (indexEquals == -1) {
continue;
}
final String key = f.substring(0, indexEquals).trim();
final String value = f.substring(indexEquals + 1).trim();
setAttributeValue(key, value);
}
}
/**
* Parse the attribute field in GTF format.
* @param attributesField the attribute field
*/
private void parseGTFAttributes(final String attributesField) {
if (this.attributes != null) {
this.attributes.clear();
}
if (attributesField == null) {
return;
}
if ("".equals(attributesField) || ".".equals(attributesField)) {
return;
}
final String s = attributesField.trim();
final String[] fields = SEMI_COMA_SPLIT_PATTERN.split(s);
for (String f : fields) {
f = f.trim();
if (f.isEmpty()) {
continue;
}
final int indexEquals = f.indexOf(' ');
if (indexEquals == -1) {
continue;
}
final String key = f.substring(0, indexEquals).trim();
final String value = StringUtils
.unDoubleQuotes(f.substring(indexEquals + 1).trim()).trim();
if (getAttributesNames().contains(key)) {
setAttributeValue(key, getAttributeValue(key) + ',' + value);
} else {
setAttributeValue(key, value);
}
}
}
/**
* Parse a GFF entry. This method is deprecated, use <tt>parseGFF3()</tt>
* instead.
* @param s String to parse
* @deprecated
*/
@Deprecated
public void parse(final String s) throws BadBioEntryException {
parseGFF3(s);
}
/**
* Parse a GFF3 entry.
* @param s String to parse
*/
public void parseGFF3(final String s) throws BadBioEntryException {
final String attributeField = parseCommon(s);
parseGFF3Attributes(attributeField);
}
/**
* Parse a GTF entry.
* @param s String to parse
*/
public void parseGTF(final String s) throws BadBioEntryException {
final String attributeField = parseCommon(s);
parseGTFAttributes(attributeField);
}
/**
* Convert the attributes to a GFF3 string.
* @return a the attribute in the GFF3 format
*/
private String attributesToGFF3String() {
if (this.attributes == null || this.attributes.size() == 0) {
return ".";
}
final StringBuilder sb = new StringBuilder();
boolean first = true;
for (Map.Entry<String, String> e : this.attributes.entrySet()) {
if (first) {
first = false;
} else {
sb.append(';');
}
sb.append(StringUtils.protectGFF(e.getKey()));
sb.append('=');
sb.append(StringUtils.protectGFF(e.getValue()).replace("\\,", ","));
}
return sb.toString();
}
/**
* Convert the attributes to a GTF string.
* @return a the attribute in the GTF format
*/
private String attributesToGTFString() {
if (this.attributes == null || this.attributes.size() == 0) {
return ".";
}
final StringBuilder sb = new StringBuilder();
boolean first = true;
for (Map.Entry<String, String> e : this.attributes.entrySet()) {
final String key = e.getKey();
for (String value : COMA_SPLIT_PATTERN.split(e.getValue())) {
if (first) {
first = false;
} else {
sb.append("; ");
}
sb.append(key);
sb.append(" \"");
sb.append(value);
sb.append('\"');
}
}
return sb.toString();
}
/**
* Override toString().
* @return the GFF entry in GFF3 format
*/
public String toGFF3() {
final String seqId = getSeqId();
final String source = getSource();
final String type = getType();
return ("".equals(seqId) ? "." : StringUtils.protectGFF(seqId))
+ '\t' + ("".equals(source) ? "." : StringUtils.protectGFF(source))
+ '\t' + ("".equals(type) ? "." : StringUtils.protectGFF(type)) + '\t'
+ (getStart() == -1 ? "." : getStart()) + '\t'
+ (getEnd() == -1 ? "." : getEnd()) + '\t'
+ (Double.isNaN(getScore()) ? "." : getScore()) + '\t' + getStrand()
+ '\t' + (getPhase() == -1 ? "." : getPhase()) + '\t'
+ attributesToGFF3String();
}
/**
* Override toString().
* @return the GFF entry in GTF format
*/
public String toGTF() {
final String seqId = getSeqId();
final String source = getSource();
final String type = getType();
return ("".equals(seqId) ? "." : StringUtils.protectGFF(seqId))
+ '\t' + ("".equals(source) ? "." : StringUtils.protectGFF(source))
+ '\t' + ("".equals(type) ? "." : StringUtils.protectGFF(type)) + '\t'
+ (getStart() == -1 ? "." : getStart()) + '\t'
+ (getEnd() == -1 ? "." : getEnd()) + '\t'
+ (Double.isNaN(getScore()) ? "." : getScore()) + '\t' + getStrand()
+ '\t' + (getPhase() == -1 ? "." : getPhase()) + '\t'
+ attributesToGTFString();
}
/**
* Override toString().
* @return the GFF entry in GFF3 format
*/
@Override
public String toString() {
return toGFF3();
}
//
// Constructor
//
/**
* Public constructor.
*/
public GFFEntry() {
clear();
}
}