/*
* EuroCarbDB, a framework for carbohydrate bioinformatics
*
* Copyright (c) 2006-2009, Eurocarb project, or third-party contributors as
* indicated by the @author tags or express copyright attribution
* statements applied by the authors.
*
* This copyrighted material is made available to anyone wishing to use, modify,
* copy, or redistribute it subject to the terms and conditions of the GNU
* Lesser General Public License, as published by the Free Software Foundation.
* A copy of this license accompanies this distribution in the file LICENSE.txt.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
* for more details.
*
* Last commit: $Rev: 1147 $ by $Author: glycoslave $ on $Date:: 2009-06-04 #$
*/
package org.eurocarbdb.util.mesh;
// 3rd party imports
import org.apache.log4j.Logger;
import java.util.regex.Pattern;
import java.util.regex.Matcher;
import org.eurocarbdb.dataaccess.core.Reference;
import org.eurocarbdb.dataaccess.BasicEurocarbObject;
/**
* Abstract base class for data objects based on
* {@link http://www.nlm.nih.gov/mesh/ MeSH}.
* @author mjh
*/
public abstract class MeshReference extends BasicEurocarbObject
{
/* enum Category *//*******************************************
*
* Enumeration of known MeSH categories.
*
*/
public enum Category
{
Anatomy( 'A', "Anatomy" ),
Organisms( 'B', "Organisms" ),
Diseases( 'C', "Diseases" ),
Chemicals_Drugs( 'D', "Chemicals and Drugs" )
/* there are others but they're just not listed here */
;
public final char id;
public final String name;
Category( char id, String name ) { this.id = id; this.name = name; }
}
/** Logging handle. */
static final Logger log = Logger.getLogger( MeshReference.class );
//~~~ STATIC FIELDS ~~~//
/** Pattern to match an embedded MeSH reference. This is typically
* just a sequence of one or more upper-case words LIKE THIS. These
* are references to other MeSH entries, and are typically only found
* in the DESCRIPTION field of an entry.
*/
public static final Pattern Regexp_Mesh_Reference
= Pattern.compile("\\b([A-Z]{4,}(?:[\\s-]?[A-Z]{3,})*)\\b");
//~~~ FIELDS ~~~//
//private String uniqueId;
protected Category category = null; // TODO
//~~~ STATIC METHODS ~~~//
/* markupMeshReferencesAsHTML *//******************************
*
* Returns a HTML marked-up version of the embedded MeSH references
* found in a passed string.
* @see #Regexp_Mesh_Reference
*/
public static final String markupMeshReferencesAsHTML( String string )
{
assert string != null;
if ( string.length() == 0 ) return string;
Matcher m = Regexp_Mesh_Reference.matcher( string );
StringBuffer sb = new StringBuffer();
while ( m.find() )
{
String s = m.group().toLowerCase();
try
{
m.appendReplacement(
sb,
"<a href=\"http://www.nlm.nih.gov/cgi/mesh/2006/MB_cgi?term="
+ java.net.URLEncoder.encode( s, "UTF-8" )
+ "\" title=\"MeSH reference for '"
+ s
+ "'\" >"
+ s
+ "</a>"
);
}
catch ( java.io.UnsupportedEncodingException fuck_you_java )
{
//log.fatal( fuck_you_java );
}
}
m.appendTail( sb );
return sb.toString();
}
//~~~ CONSTRUCTORS ~~~//
//~~~ METHODS ~~~//
/* getBriefDescription *//*************************************
*
* Returns a single sentence description of this disease.
* @see #getDescription()
*/
public String getBriefDescription()
{
String desc = this.getDescription();
if ( desc == null || desc.length() == 0 ) return "";
int fullstop = desc.indexOf('.');
return ( fullstop == -1 ) ? desc : desc.substring( 0, fullstop );
}
public abstract String getDescription();
/* @see BiologicalContextAssociation#getExternalReference */
public Reference getExternalReference()
{
// TODO - return a MeshReference object here
return null;
}
public abstract String getMeshId();
} // end class