Feature.java example

Explorer
biojava-master
/*
 *                    BioJava development code
 *
 * This code may be freely distributed and modified under the
 * terms of the GNU Lesser General Public Licence.  This should
 * be distributed with the code.  If you do not have a copy,
 * see:
 *
 *      http://www.gnu.org/copyleft/lesser.html
 *
 * Copyright for this code is held jointly by the individual
 * authors.  These should be listed in @author doc comments.
 *
 * For more information on the BioJava project and its aims,
 * or to join the biojava-l mailing list, visit the home page
 * at:
 *
 *      http://www.biojava.org/
 *
 */
package org.biojava.nbio.genome.parsers.gff;

import java.util.HashMap;


/**
 * A Feature corresponds to a single row in a GFF file.
 *
 * @author Hanno Hinsch
 */
public class Feature implements FeatureI {

	private Location mLocation;
	private String mSeqname;
	private String mSource;
	private String mType;
	private double mScore;			//or . if none
	private int mFrame;				//0,1,2
	private String mAttributes;			//any trailing stuff
	private HashMap<String, String> mUserMap;

	/**
	 * Get the sequence name. (GFF field 1). Note that feature objects have
	 * no link or reference to the actual sequence object to which
	 * they refer; they are completely uncoupled.
	 *
	 * @return Sequence name.
	 */
	@Override
	public String seqname() {
		return mSeqname;
	}

	;

	/**
	 * Get source (aka method). (GFF field 2). This is often the name of
	 * the program or procedure that created the features.
	 *
	 * @return Source field.
	 */
	public String source() {
		return mSource;
	}

	;

	/**
	 * Get feature type, such as "exon" or "CDS". (GFF field 3).
	 *
	 * @return Feature type.
	 */
	@Override
	public String type() {
		return mType;
	}

	;

	/**
	 * Get location of feature. Note that feature objects have
	 * no link or reference to the actual sequence object to which
	 * they refer; they are completely uncoupled.
	 *
	 * @return Location of feature.
	 */
	@Override
	public Location location() {
		return mLocation;
	}

	/**
	 * Get score. (GFF field 7). The meaning of the score varies from file to file.
	 *
	 * @return Score value.
	 */
	public double score() {
		return mScore;
	}

	;

	/**
	 * Get frame (aka phase). (GFF field 8). Specifies the offset of the
	 * first nucleotide of the first in-frame codon, assuming this feature
	 * is a dna/rna sequence that codes
	 * for a protein. If you
	 * intend to use this field, you probably want to look it up on the web first.
	 *
	 * @return The frame (0, 1, 2).
	 */
	public int frame() {
		return mFrame;
	}

	;

	/**
	 * Get the string of key/value attributes. (GFF field 9). The format and
	 * meaning of this field varies from flavor to flavor of GFF/GTF. This method
	 * simply returns the whole string. Other methods in this class make assumptions
	 * about its format and provide additional utility.
	 *
	 * @return The attribute string.
	 */
	public String attributes() {
		return mAttributes;
	}

	;

	@SuppressWarnings("unused")
	private Feature() {
	}

	;        //unavailable

	/**
	 * Make a copy of the specified feature. The mappings in the userMap() HashMap
	 * are copied, so each feature has independent user data. Note, however, that the
	 * actual objects in the HashMap are shared (not copied), so a change to such an object may
	 * affect multiple features.
	 *
	 * @param feature Feature to clone.
	 */
	public Feature(Feature feature) {

		mSeqname = feature.mSeqname;
		mSource = feature.mSource;
		mType = feature.mType;
		mLocation = feature.mLocation;
		mScore = feature.mScore;
		mFrame = feature.mFrame;
		mAttributes = feature.mAttributes;
		initAttributeHashMap();
		mUserMap = new HashMap<String, String>(feature.mUserMap);
	}

	/**
	 * Construct a new Feature from raw data (usually a GFF row).
	 *
	 * @param seqname The sequence name field (field 1).
	 * @param source The source or method field (field 2).
	 * @param type The type of feature field (field 3).
	 * @param location The location of the feature. (calculated from GFF start, end and strand fields).
	 * @param score The score field (field 7).
	 * @param frame The frame or phase field (field 8).
	 * @param attributes A string of key/value pairs separated by semicolons (field 9).
	 */
	public Feature(String seqname, String source, String type, Location location, Double score, int frame, String attributes) {

		mSeqname = seqname;
		mSource = source;
		mType = type;
		mLocation = location;
		mScore = score;
		mFrame = frame;
		mAttributes = attributes;
		initAttributeHashMap();
		mUserMap = new HashMap<String, String>();

	}

	/**
	 * Get HashMap of user data. Each Feature object has a Java HashMap object
	 * which can be used to annotate the Feature. JavaGene does not use or interpret
	 * the keys or values. The values can be any subtype of the Java Object class.
	 *<br><br>
	 * If a Feature is constructed from data fields, the initial HashMap has no mappings (is empty).
	 * If a Feature is constructed from another Feature, a copy of the mappings is made.
	 * Note that the Objects in the copied mapping are shared, even though the mapping itself
	 * is copied (not shared). Thus removing or adding a mapping to one Feature will not affect the
	 * other, but changing an Object which is part of an established mapping may affect both Features.
	 *
	 * @return The user HashMap.
	 */
	@Override
	public HashMap<String, String> userData() {
		return mUserMap;
	}

	 HashMap<String,String> attributeHashMap = new HashMap<String,String>();

	private void initAttributeHashMap(){
	   String[] values = mAttributes.split(";");
	   for(String attribute : values){
		   attribute = attribute.trim();
		   int equalindex = attribute.indexOf("=");
		   String splitData = "=";
		   if(equalindex == -1) //gtf uses space and gff3 uses =
			   splitData = " ";
		   String[] data = attribute.split(splitData);
		   String value = "";
		   if(data.length >= 2 && data[1].indexOf('"') != -1){ // an attibute field could be empty
			   value = data[1].replaceAll("\"","").trim();
		   }else if(data.length >= 2){
			   value = data[1].trim();
		   }
		   attributeHashMap.put(data[0].trim(), value);
	   }
	}

	/**
	 * Get value of specified attribute key. Returns null if the attribute key has no value (does not exist).
	 * Keys are case-sensitive. Assumes attributes are correctly formatted in GFF style.
	 * Known bug: a semicolon within a quoted value will cause parse failure.
	 *
	 * @param key The key.
	 * @return The corresponding value. Null if the key has no value defined.
	 */
	@Override
	public String getAttribute(String key) {

		return attributeHashMap.get(key);
	}

	public String getAttributeOld(String key) {
		int start = 0;

		int end = mAttributes.indexOf(';');
		while (0 < end) {
			//find the first word (up to space) in chunk,
			// see if it is this key
			int i = mAttributes.indexOf(' ', start);
			if (0 < i && i < end) {
				if (mAttributes.substring(start, i).equals(key)) {
					//remove quotes, if needed
					if (mAttributes.charAt(i + 1) == '\"' && mAttributes.charAt(end - 1) == '\"') {
						return mAttributes.substring(i + 2, end - 1);//return attribute
					} else {
						return mAttributes.substring(i + 1, end);	//return attribute
					}
				}
			}
			start = end + 2;	//skip required semicolon and single space
			end = mAttributes.indexOf(';', start);
		}

		return null;
	}

	@Override
	public boolean hasAttribute(String key) {
		return attributeHashMap.containsKey(key);
	}

	@Override
	public boolean hasAttribute(String key, String value) {
		String data = getAttribute(key);
		if(data == null)
			return false;
		if(data.equals(value))
			return true;
		else
			return false;
	}

	/**
	 * Get the first item (everything before first semicolon, if it has one)
	 * in the attribute field, which is assumed to
	 * be a group identifer. This is appropriate for GFF1 files and variants. It is not
	 * appropriate for GTF and GFF2 files, although they may use a named attribute key,
	 * such as "gene_id" or "transcript_id", for grouping.
	 *
	 * @return The group id. Everything before the first semicolon in the attributes string (minus trailing whitespace).
	 */
	@Override
	public String group() {
		int i = mAttributes.indexOf(';');
		return (i < 0) ? mAttributes.trim() : mAttributes.substring(0, i).trim();
	}

	/**
	 *
	 */
	@Override
	public String toString() {
		String s = mSeqname + '\t';
		s += mSource + '\t';
		s += mType + '\t';
		s += mLocation.start() + "\t";
		s += mLocation.end() + "\t";
		s += Double.toString(mScore) + "\t";

		if (mFrame == -1) {
			s += ".\t";
		} else {
			s += mFrame + "\t";
		}

		s += mAttributes;

		return s;
	}

	/**
	 * @deprecated
	 */
	@Deprecated
	public static void main(String[] args)
			throws Exception {
		//Feature f= new Feature();
		//intentionally perverse
		//f.group= "gene_id transcript; transcript \"gene_id fantom2\"; ";
		//	f.addAttribute( "author", "julian" );
		//	f.addAttribute( "curator", "nick" );
		//	f.addAttribute( "author", "hanno" );
		//Log.log( f.group );
		//f.addAttribute( "perverse", "foo;goo" );
		//assert f.getAttribute( "perverse").equals( "foo;goo" );
		//	assert f.getAttribute( "gene_id" ).equals( "transcript" );
		//	assert f.getAttribute( "author" ).equals( "julian hanno" );
		//	assert f.getAttribute( "curator" ).equals( "nick" );
		//	assert f.getAttribute( "transcript").equals( "gene_id fantom2" );
		//Log.log( "passed test." );
	}

	@Override
	public HashMap<String, String> getAttributes() {

		return attributeHashMap;
	}
}