AnnotationSet.java example

Explorer
BamQC-master
/**
 * Copyright Copyright 2014 Simon Andrews
 *
 *    This file is part of BamQC.
 *
 *    BamQC is free software; you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation; either version 3 of the License, or
 *    (at your option) any later version.
 *
 *    BamQC is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with BamQC; if not, write to the Free Software
 *    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 */
/*
 * Changelog: 
 * - Piero Dalle Pezze: Optimised data structures (removed unneeded concurrency), optimised algorithm. 
 * Merged with SeqMonk:AnnotationSet, use of ShortRead for caching.
 * - Simon Andrews: Class creation.
 */
package uk.ac.babraham.BamQC.DataTypes.Genome;

import java.io.File;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;

import uk.ac.babraham.BamQC.Modules.ModuleConfig;
import net.sf.samtools.SAMRecord;

/**
 * 
 * @author Simon Andrews
 * @author Piero Dalle Pezze
 *
 */
public class AnnotationSet {

	/** The reference file for this annotation set */
	private File file = null;
	
	private ChromosomeFactory factory = new ChromosomeFactory();
	
	private HashMap<String, FeatureClass> features = new HashMap<String, FeatureClass>();
	
	private HashSet<Feature> allFeatures = new HashSet<Feature>();
	
	private final int cacheCapacity = ModuleConfig.getParam("AnnotationSet_annotation_cache_capacity", "ignore").intValue();
	private List<ShortRead> readCache = new ArrayList<ShortRead>(cacheCapacity);

	
	public AnnotationSet() { }
	
	
	public File getFile() {
		return file;
	}

	public void setFile(File file) {
		this.file = file;
	}

	public ChromosomeFactory chromosomeFactory () {
		return factory;
	}
	
	public void addFeature (Feature f) {

		if (!features.containsKey(f.type())) {
			features.put(f.type(), new FeatureClass(this));
			allFeatures.add(f);
		}	
		features.get(f.type()).addFeature(f);
	}
	
	
	public Feature[] getAllFeatures() {
		return allFeatures.toArray(new Feature[0]);
	}
	
	
	
	public boolean hasFeatures () {
		return !features.isEmpty();
	}
	
	
	public String [] listFeatureTypes () {
		return features.keySet().toArray(new String [0]);
	}
	
	
	public FeatureClass getFeatureClassForType (String type) {
		return features.get(type);
	}
	
	
	public void processSequenceNoCache(SAMRecord r) {
		// implementation using ShortRead
		processCachedSequence(new ShortRead(r.getReferenceName(), r.getAlignmentStart(), r.getAlignmentEnd()));
	}
	
	
	public void processSequence (SAMRecord r) {
		// implementation using ShortRead
	    if(readCache.size() < cacheCapacity) {
	    	readCache.add(new ShortRead(r.getReferenceName(), r.getAlignmentStart(), r.getAlignmentEnd()));
	    } else {
	    	flushCache();
	    }       
	}
	

	public void flushCache() {
    	// sort the cache
    	Collections.sort(readCache);
    	// now parse the sorted cache
    	for(int i=0; i < readCache.size(); i++) {
    		processCachedSequence(readCache.get(i));
    	}
    	// let's clear and reuse the array for now, instead of reallocating a new one every time.
    	// Tricky to say what's the best is.. an O(n)remove vs allocation+GC ... 
    	readCache.clear();
	}	

	
	private void processCachedSequence(ShortRead r) {	
		if (!r.getReferenceName().equals("*")) {
			Chromosome c = factory.getChromosome(r.getReferenceName());
			c.processSequence(r);
		}
		for(FeatureClass value : features.values()) {
			value.processSequence(r);
		}
	}


	

		
}