// $Id: GoUtils.java,v 1.22 2010/06/23 22:22:10 cmzmasek Exp $
// FORESTER -- software libraries and applications
// for evolutionary biology research and applications.
//
// Copyright (C) 2008-2009 Christian M. Zmasek
// Copyright (C) 2008-2009 Burnham Institute for Medical Research
// All rights reserved
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
//
// Contact: cmzmasek@yahoo.com
// WWW: www.phylosoft.org/forester
package org.forester.go;
import java.io.BufferedReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.SortedMap;
import java.util.SortedSet;
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.forester.util.ForesterUtil;
public final class GoUtils {
private GoUtils() {
}
/**
* This is for counting the how many times each GO term in 'categories'
* is a (direct or indirect) super term of the GO terms in 'experiment_set'.
*
*
* @param categories the set of super terms to be counted
* @param experiment_set the list of GO terms to be analyzed
* @param all_go_terms all terms in the ontology
* @return
*/
public static LinkedHashMap<GoId, Integer> countCategories( final List<GoTerm> categories,
final List<GoTerm> experiment_set,
final Map<GoId, GoTerm> all_go_terms ) {
final LinkedHashMap<GoId, Integer> counts = new LinkedHashMap<GoId, Integer>();
for( final GoTerm experiment_term : experiment_set ) {
final Set<GoTerm> super_terms = getAllSuperGoTerms( experiment_term.getGoId(), all_go_terms );
super_terms.add( experiment_term );
for( final GoTerm cat : categories ) {
if ( !counts.containsKey( cat.getGoId() ) ) {
counts.put( cat.getGoId(), 0 );
}
if ( super_terms.contains( cat ) ) {
counts.put( cat.getGoId(), 1 + counts.get( cat.getGoId() ) );
}
}
}
return counts;
}
public static LinkedHashMap<GoId, Integer> countCategoriesId( final List<GoId> categories,
final List<GoId> experiment_set,
final Map<GoId, GoTerm> all_go_terms ) {
final LinkedHashMap<GoId, Integer> counts = new LinkedHashMap<GoId, Integer>();
for( final GoId experiment_id : experiment_set ) {
final Set<GoId> super_ids = new HashSet<GoId>();
for( final GoTerm term : getAllSuperGoTerms( experiment_id, all_go_terms ) ) {
super_ids.add( term.getGoId() );
}
super_ids.add( experiment_id );
for( final GoId cat : categories ) {
if ( !counts.containsKey( cat ) ) {
counts.put( cat, 0 );
}
if ( super_ids.contains( cat ) ) {
counts.put( cat, 1 + counts.get( cat ) );
}
}
}
return counts;
}
public static Map<GoId, GoTerm> createGoIdToGoTermMap( final List<GoTerm> go_terms ) {
final Map<GoId, GoTerm> go_id_to_term_map = new HashMap<GoId, GoTerm>();
for( final GoTerm go_term : go_terms ) {
go_id_to_term_map.put( go_term.getGoId(), go_term );
for( final GoId alt_id : go_term.getAltIds() ) {
go_id_to_term_map.put( alt_id, go_term );
}
}
return go_id_to_term_map;
}
public static SortedSet<GoId> getAllSuperGoIds( final GoId go_id, final Map<GoId, GoTerm> goid_to_term_map ) {
final SortedSet<GoId> ids = new TreeSet<GoId>();
final SortedSet<GoTerm> terms = GoUtils.getAllSuperGoTerms( go_id, goid_to_term_map );
for( final GoTerm term : terms ) {
ids.add( term.getGoId() );
}
return ids;
}
public static SortedSet<GoTerm> getAllSuperGoTerms( final GoId go_id, final List<GoTerm> go_terms ) {
final Map<GoId, GoTerm> goid_to_term_map = GoUtils.createGoIdToGoTermMap( go_terms );
return getAllSuperGoTerms( go_id, goid_to_term_map );
}
public static SortedSet<GoTerm> getAllSuperGoTerms( final GoId go_id, final Map<GoId, GoTerm> goid_to_term_map ) {
if ( !goid_to_term_map.containsKey( go_id ) ) {
throw new IllegalArgumentException( "GO id [" + go_id + "] not found in GO id to term map" );
}
final GoTerm go_term = goid_to_term_map.get( go_id );
return getAllSuperGoTerms( go_term, goid_to_term_map );
}
public static SortedSet<GoTerm> getAllSuperGoTerms( final GoTerm go_term, final Map<GoId, GoTerm> goid_to_term_map ) {
final SortedSet<GoTerm> supers = new TreeSet<GoTerm>();
getAllSuperGoTerms( go_term, goid_to_term_map, supers );
return supers;
}
private static void getAllSuperGoTerms( final GoTerm go_term,
final Map<GoId, GoTerm> goid_to_term_map,
final Set<GoTerm> supers ) {
if ( ( go_term.getSuperGoIds() != null ) && ( go_term.getSuperGoIds().size() > 0 ) ) {
for( final GoId super_go_id : go_term.getSuperGoIds() ) {
if ( !goid_to_term_map.containsKey( super_go_id ) ) {
throw new IllegalArgumentException( "GO id [" + super_go_id + "] not found in GO id to term map" );
}
final GoTerm super_go_term = goid_to_term_map.get( super_go_id );
supers.add( super_go_term );
getAllSuperGoTerms( super_go_term, goid_to_term_map, supers );
}
}
}
public static GoTerm getPenultimateGoTerm( final GoTerm go_term, final Map<GoId, GoTerm> map ) {
GoTerm my_go_term = go_term;
GoTerm penultimate = my_go_term;
while ( ( my_go_term.getSuperGoIds() != null ) && ( my_go_term.getSuperGoIds().size() > 0 ) ) {
penultimate = my_go_term;
if ( !map.containsKey( my_go_term.getSuperGoIds().get( 0 ) ) ) {
throw new IllegalArgumentException( "GO-id [" + my_go_term.getSuperGoIds().get( 0 )
+ "] not found in map" );
}
my_go_term = map.get( my_go_term.getSuperGoIds().get( 0 ) );
}
return penultimate;
}
public static GoTerm getUltimateGoTerm( final GoTerm go_term, final Map<GoId, GoTerm> map ) {
GoTerm my_go_term = go_term;
while ( ( my_go_term.getSuperGoIds() != null ) && ( my_go_term.getSuperGoIds().size() > 0 ) ) {
if ( !map.containsKey( my_go_term.getSuperGoIds().get( 0 ) ) ) {
throw new IllegalArgumentException( "GO-id [" + my_go_term.getSuperGoIds().get( 0 )
+ "] not found in map" );
}
my_go_term = map.get( my_go_term.getSuperGoIds().get( 0 ) );
}
return my_go_term;
}
public static SortedMap<String, List<GoId>> parseGoIds( final Object source,
final String start_of_comment_line,
final String start_of_label_line ) throws IOException {
final Pattern label_matcher = Pattern.compile( start_of_label_line + "\\s*(.+?)" );
final BufferedReader reader = ForesterUtil.obtainReader( source );
final SortedMap<String, List<GoId>> results = new TreeMap<String, List<GoId>>();
String line = "";
String label = "";
final boolean use_label = !ForesterUtil.isEmpty( start_of_label_line );
final boolean use_comment = !ForesterUtil.isEmpty( start_of_comment_line );
List<GoId> current_list = new ArrayList<GoId>();
while ( ( line = reader.readLine() ) != null ) {
line = line.trim();
if ( ForesterUtil.isEmpty( line ) || ( use_comment && line.startsWith( start_of_comment_line ) ) ) {
continue;
}
else if ( use_label && line.startsWith( start_of_label_line ) ) {
final Matcher matcher = label_matcher.matcher( line );
if ( matcher.matches() ) {
if ( !ForesterUtil.isEmpty( label ) ) {
results.put( label, current_list );
current_list = new ArrayList<GoId>();
}
label = matcher.group( 1 );
}
}
else {
final String[] s = line.split( "\\s+" );
final GoId id = new GoId( s[ 0 ] );
current_list.add( id );
}
}
if ( ForesterUtil.isEmpty( label ) ) {
label = "";
}
results.put( label, current_list );
reader.close();
return results;
}
}