package org.genedb.crawl.elasticsearch.index.gff;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import uk.ac.sanger.artemis.io.GFFStreamFeature;
import java.util.Map;
import java.util.Map.Entry;
import org.apache.commons.lang.StringUtils;
import org.apache.log4j.Logger;
public class GFFFeature {
private Logger logger = Logger.getLogger(GFFFeature.class);
public String seqid;
public String source;
public String type;
public int start;
public int end;
public String score;
public Strand strand;
public Phase phase;
public GFFAttributeMap attributes = new GFFAttributeMap(this);
public String id;
public enum Strand {
POSITIVE ("+"),
NEGATIVE ("-"),
NOT_STRANDED ("."),
UNKNOWN ("?");
private String text;
Strand(String text) {
this.text = text;
}
public String getStrand() {
return text;
}
public int getStrandInt() {
if (text.equals("+")) {
return 1;
} else if (text.equals("-")) {
return -1;
}
return 0;
}
public static Strand fromText(String text) {
if (text != null) {
for (Strand b : Strand.values()) {
if (text.equalsIgnoreCase(b.text)) {
return b;
}
}
}
return null;
}
};
public enum Phase {
ZERO ("0"),
ONE ("1"),
TWO ("2"),
NULL (".");
private String text;
Phase(String text) {
this.text = text;
}
public String getPhase() {
return text;
}
public Integer getPhaseInt() {
if (text.equals(ZERO)) {
return 0;
}
if (text.equals(ONE)) {
return 1;
}
if (text.equals(TWO)) {
return 2;
}
return null;
}
public static Phase fromText(String text) {
if (text != null) {
for (Phase b : Phase.values()) {
if (text.equalsIgnoreCase(b.text)) {
return b;
}
}
}
return null;
}
}
public GFFFeature(String line) {
this(line, true);
}
public GFFFeature(String line, boolean parseAttributes) {
//logger.debug(line);
String[] columns = line.split("\t");
seqid = columns[0];
source = columns[1];
type = columns[2];
start = new Integer (columns[3]) - 1;
end = new Integer (columns[4]);
score = columns[5];
strand = Strand.fromText(columns[6]);
phase = Phase.fromText(columns[7]);
//logger.info(columns[7]);
// try {
// phase = Integer.parseInt( columns[7] );
// } catch (NumberFormatException nfe) {
// if (type.equals("exon") || type.equals("CDS")) {
// logger.warn(String.format("%s features should have phase : \n\t %s", type, line));
// }
// }
//logger.info(seqid);
String attrs = columns[8];
if (! attrs.endsWith(";")) {
attrs += ";";
}
if (parseAttributes) {
this.attributes.parseAttributes(attrs);
}
//parseAttributes(attrs);
}
public String toString() {
StringBuffer s = new StringBuffer();
s.append(seqid + "\t");
s.append(source + "\t");
s.append(type + "\t");
s.append(start + "\t");
s.append(end + "\t");
s.append(score+ "\t");
s.append(strand.getStrand() + "\t");
s.append(phase + "\t");
String[] attrs = new String[attributes.map.size()];
int i = 0;
for (Entry<String, Object> entry : attributes.map.entrySet() ) {
attrs[i] = entry.getKey() + "=" + entry.getValue().toString();
i++;
}
s.append(StringUtils.join(attrs, ";"));
return s.toString();
}
public class GFFAttributeMap {
public Map<String,Object> map = new LinkedHashMap<String, Object>();
public boolean decode = true;
public GFFFeature feature;
public GFFAttributeMap() {
// deliberately empty
}
public GFFAttributeMap(GFFFeature feature) {
this.feature = feature;
}
public void put (String key, String obj) {
key = key.toLowerCase();
if (obj == null || obj.length() == 0) {
return;
}
if (obj.contains(";") && obj.contains(",")) {
GFFAttributeMapList list = new GFFAttributeMapList();
list.parseAttributes(obj);
map.put(key, list);
} else {
String quote = "\"";
String quote2 = "'";
if ((obj.startsWith(quote) && obj.endsWith(quote)) || (obj.startsWith(quote2) && obj.endsWith(quote2))) {
if (obj.length() <= 2)
return;
obj = obj.substring(1, obj.length() -1);
}
map.put(key, obj);
// attempt to store an ID for this feature.
if (key.equals("id")) {
if (this.feature != null) {
this.feature.id = obj;
}
}
}
}
public void parseAttributes(String attrs) {
//logger.info(attrs);
int start = 0;
int end = attrs.indexOf(";");
while (end > 0) {
String attribute = attrs.substring(start, end).trim();
//logger.info(attribute);
if (decode) {
attribute = GFFStreamFeature.decode(attribute);
}
//logger.info(attribute);
int equals = attribute.indexOf("=");
if (equals == -1) {
start = end + 1;
end = attrs.indexOf(";", start);
continue;
}
String key = attribute.substring(0, equals);
String value = attribute.substring(equals+1);
put(key, value);
start = end + 1;
end = attrs.indexOf(";", start);
}
}
public String toString() {
StringBuffer sb = new StringBuffer();
for (Entry<String,Object> entry : map.entrySet()) {
sb.append(entry.getKey());
sb.append("=");
sb.append(entry.getValue());
}
if (decode) {
return GFFStreamFeature.encode(sb.toString());
}
return sb.toString();
}
}
public class GFFAttributeMapList {
public List<GFFAttributeMap> list = new ArrayList<GFFAttributeMap>();
public void parseAttributes(String attrs) {
//logger.info(attrs);
String[] split = attrs.split(",");
for (String attr : split) {
GFFAttributeMap map = new GFFAttributeMap();
map.decode = false;
map.parseAttributes(attr);
list.add(map);
}
}
public String toString() {
StringBuffer sb = new StringBuffer();
for (GFFAttributeMap map : list) {
sb.append(map.toString());
}
return sb.toString();
}
}
}