/*
* EuroCarbDB, a framework for carbohydrate bioinformatics
*
* Copyright (c) 2006-2009, Eurocarb project, or third-party contributors as
* indicated by the @author tags or express copyright attribution
* statements applied by the authors.
*
* This copyrighted material is made available to anyone wishing to use, modify,
* copy, or redistribute it subject to the terms and conditions of the GNU
* Lesser General Public License, as published by the Free Software Foundation.
* A copy of this license accompanies this distribution in the file LICENSE.txt.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
* for more details.
*
* Last commit: $Rev: 1472 $ by $Author: hirenj $ on $Date:: 2009-07-10 #$
*/
package org.eurocarbdb.dataaccess.core;
// stdlib imports
import java.io.Serializable;
import java.util.List;
import java.util.ArrayList;
import java.util.Collections;
import java.util.regex.Pattern;
import java.util.regex.Matcher;
import java.net.URLEncoder;
// stdlib imports
import java.util.*;
import java.io.*;
import java.net.*;
// 3rd party imports
import org.apache.log4j.Logger;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
// eurocarb imports
import org.eurocarbdb.dataaccess.Eurocarb;
import org.eurocarbdb.dataaccess.EntityManager;
import org.eurocarbdb.dataaccess.core.Reference;
import org.eurocarbdb.application.glycanbuilder.XMLUtils;
// static imports
import static org.eurocarbdb.util.JavaUtils.*;
import static org.eurocarbdb.dataaccess.Eurocarb.getEntityManager;
/**
* Represents a standard journal article or periodical reference.
* Since JournalReferences are unique in the sense that only one
* instance is ever used to implement a specific journal/article
* reference, it is always best to use the {@link #createOrLookup}
* method to find or create JournalReference instances.
*
* @author mjh
*/
public class JournalReference extends Reference implements Serializable
{
/** Default query base URL for constructing Pubmed links */
public static final String Default_Pubmed_Query_Url
= "http://www.ncbi.nlm.nih.gov/sites/entrez"
+ "?EntrezSystem2.PEntrez.Pubmed.SearchBar.Term=";
private static final Logger log = Logger.getLogger( JournalReference.class );
/** Used to extract publication year from pubmed HTTP query */
private static Pattern year_pattern = Pattern.compile(".*([0-9]{4}).*");
private static Pattern pages_pattern = Pattern.compile("^([0-9]+)(?:\\-([0-9]+))?$");
private static final String Q = "org.eurocarbdb.dataaccess.core.JournalReference.";
//~~~~~~~~~~~~~~~~~~~~~~~~~ FIELDS ~~~~~~~~~~~~~~~~~~~~~~~~~~//
private int journalReferenceId;
private Journal journal;
private Integer pubmedId;
private String authors;
private String title;
private Integer publicationYear;
private Integer journalVolume;
private Integer journalStartPage;
private Integer journalEndPage;
/** Lazily instantiated from String {@link #authors}*/
private List<Author> authorlist = null;
//~~~~~~~~~~~~~~~~~~~~~~ CONSTRUCTORS ~~~~~~~~~~~~~~~~~~~~~~~//
/** default constructor */
public JournalReference()
{
// default journal reference provider is pubmed.
setExternalReferenceName("Pubmed");
// objects of this class are the Journal type by definition.
setReferenceType( Reference.Type.Journal.toString() );
}
//~~~~~~~~~~~~~~~~~~~~~~ STATIC METHODS ~~~~~~~~~~~~~~~~~~~~~//
/** Returns a count of all {@link JournalReference}s. */
public static long countJournalReferences()
{
Long count = (Long) Eurocarb.getEntityManager()
.getQuery( Q + "COUNT_ALL" )
.uniqueResult();
return count.intValue();
}
public Reference storeOrLookup() throws Exception
{
log.debug("storeOrLookup");
// search for existing references
JournalReference ret = lookupByPubmedId( this.pubmedId );
if( ret!=null )
{
if( !this.matches(ret) )
throw new Exception("The database contains an existing reference that matches only partially");
return ret;
}
if( this.journal!=null )
{
ret = lookupByCitation( this.journal.getJournalTitle(),
this.publicationYear,
this.journalVolume,
this.journalStartPage );
}
else
{
ret = lookupByCitation( "",
this.publicationYear,
this.journalVolume,
this.journalStartPage );
}
if( ret!=null )
{
if( !this.matches(ret) )
throw new Exception("The database contains an existing reference that matches only partially");
return ret;
}
// create a new reference
if( this.journal!=null )
this.journal = this.journal.storeOrLookup();
EntityManager em = getEntityManager();
em.store( this );
return this;
}
/**
* Returns an existing {@link JournalReference} if the given
* pubmed id argument matches a JournalReference already in
* the data store, otherwise returns a new (unsaved)
* JournalReference object.
*/
public static JournalReference createOrLookup( Integer pubmed_id )
{
JournalReference jr = lookupByPubmedId( pubmed_id );
if ( jr == null )
{
if ( log.isDebugEnabled() )
log.debug( "No journal references with pubmed_id="
+ pubmed_id
+ " so creating new JournalReference" );
jr = new JournalReference();
jr.setPubmedId( pubmed_id );
}
return jr;
}
/**
* Returns the list of {@link JournalReference}s that contain
* the given {@link Author}.
*/
@SuppressWarnings("unchecked") // cause hibernate is non-generic
public static List<JournalReference> lookupByAuthor( Author a )
{
checkNotNull( a );
String name = a.toCitationString();
if ( log.isDebugEnabled() )
{
log.debug( "looking up JournalReferences with author='"
+ name
+ "'"
);
}
List<JournalReference> jr_list = (List<JournalReference>)
Eurocarb.getEntityManager()
.getQuery("org.eurocarbdb.dataaccess.core.JournalReference.BY_AUTHOR")
.setParameter("name", "%" + name + "%" )
.list();
if ( jr_list == null )
{
if ( log.isDebugEnabled() )
log.debug( "No journal references with author="
+ name
);
return Collections.emptyList();
}
return jr_list;
}
/**
* Retrieves a JournalReference by citation, returning null if
* no reference matches the given parameters.
*/
public static JournalReference lookupByCitation(
String journal_title,
Integer pub_year,
Integer volume,
Integer page )
{
checkNotNull( journal_title );
checkNotEmpty( journal_title );
if ( pub_year <= 1800 )
throw new IllegalArgumentException(
"Expecting publication year > 1800");
if ( volume < 0 )
throw new IllegalArgumentException(
"Volume argument must be >= 0");
if ( page < 0 )
throw new IllegalArgumentException(
"Page argument must be >= 0");
JournalReference jr = (JournalReference)
Eurocarb.getEntityManager()
.getQuery("org.eurocarbdb.dataaccess.core.JournalReference.BY_JOURNAL_YEAR_VOLUME_PAGE")
.setParameter("title", journal_title )
.setParameter("year", pub_year )
.setParameter("volume", volume )
.setParameter("page", page )
.uniqueResult();
return jr;
}
/**
* Retrieves a JournalReference by Pubmed id.
*/
public static JournalReference lookupByPubmedId( Integer pubmed_id )
{
if ( pubmed_id <= 0 )
throw new IllegalArgumentException(
"Pubmed id argument must be a positive integer, got " + pubmed_id );
if ( log.isDebugEnabled() )
log.debug("looking up JournalReference with pubmed_id=" + pubmed_id );
JournalReference jr = (JournalReference)
Eurocarb.getEntityManager()
.getQuery("org.eurocarbdb.dataaccess.core.JournalReference.BY_PUBMED_ID")
.setParameter("pubmed_id", pubmed_id )
.uniqueResult();
return jr;
}
/**
* Create an instance of a JournalReference from a given pubmedId
*/
public static JournalReference createFromPubmedId(int pubmedId)
{
JournalReference result = lookupByPubmedId(pubmedId);
if (result != null) {
return result;
}
String xml_answer;
try {
xml_answer = makeHttpRequest(new URL("http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?db=pubmed&retmode=xml&id=" + pubmedId));
} catch (Exception e) {
log.debug("Error retrieving Pubmed result",e);
return null;
}
result = createFromPubmed(xml_answer);
if (result.getPubmedId() != pubmedId) {
return null;
}
return result;
}
/**
* Create an instance of JournalReference from an XML representation
* of a Pubmed record.
*/
private static JournalReference createFromPubmed(String data)
{
Document pmrec = XMLUtils.read(data);
if( pmrec==null )
return null;
JournalReference ret = new JournalReference();
// parse journal
ret.journal = Journal.createOrLookup(getPubmedItem(pmrec,"FullJournalName"));
if (ret.journal != null && ret.journal.getJournalId() == 0) {
ret.journal.setJournalAbbrev(getPubmedItem(pmrec,"Source"));
}
// parse fields
ret.pubmedId = getPubmedItemAsInt(pmrec,"Id",0);
ret.authors = getPubmedItem(pmrec,"Author");
ret.title = getPubmedItem(pmrec,"Title");
ret.journalVolume = getPubmedItemAsInt(pmrec,"Volume",0);
// parse year
Matcher ym = year_pattern.matcher(getPubmedItem(pmrec,"PubDate"));
if( ym.matches() )
ret.publicationYear = Integer.valueOf(ym.group(1));
// parse pages
Integer start = 0;
Integer end = 0;
Matcher pm = pages_pattern.matcher(getPubmedItem(pmrec,"Pages"));
if( pm.matches() ) {
if( pm.group(1)!=null && pm.group(1).length()>0 )
start = Integer.valueOf(pm.group(1));
if( pm.group(2)!=null && pm.group(2).length()>0 ) {
end = Integer.valueOf(pm.group(2));
end = start + ((10000+end-start)%100);
}
else
end = start;
}
ret.journalStartPage = start;
ret.journalEndPage = end;
return ret;
}
private static Integer getPubmedItemAsInt(Document pmrec, String name, Integer _default)
{
try
{
return Integer.valueOf(getPubmedItem(pmrec,name));
}
catch(NumberFormatException e)
{
return _default;
}
}
private static String getPubmedItem( Document pmrec, String name )
{
StringBuilder ret = new StringBuilder();
NodeList items = pmrec.getElementsByTagName(name);
if( items.getLength()>0 )
{
// get by tag name
for( int i=0; i<items.getLength(); i++ )
{
Node item = items.item(i);
if( ret.length()>0 )
ret.append("; ");
ret.append(XMLUtils.getText(item));
}
}
else
{
// get by attribute
items = pmrec.getElementsByTagName("Item");
for( int i=0; i<items.getLength(); i++ )
{
Node item = items.item(i);
String item_name = XMLUtils.getAttribute(item,"Name");
if( item_name!=null && item_name.equals(name) )
{
if( ret.length()>0 )
ret.append("; ");
ret.append(XMLUtils.getText(item));
}
}
}
return ret.toString();
}
private static String makeHttpRequest(URL url) throws Exception
{
// read result
URLConnection urlc = url.openConnection();
urlc.setUseCaches(false); // Don't look at possibly cached data
BufferedReader br = new BufferedReader(new InputStreamReader(urlc.getInputStream()));
int ch;
StringBuilder ret = new StringBuilder();
while( (ch = br.read())!=-1 ) {
ret.appendCodePoint(ch);
}
return ret.toString();
}
//~~~~~~~~~~~~~~~~~~~~~~~~~~ METHODS ~~~~~~~~~~~~~~~~~~~~~~~~//
public boolean matches( JournalReference other )
{
if( other==null ) {
log.debug("other is null");
return false;
}
if( !testNull(this.journal,other.journal)
|| !this.journal.getJournalTitle().equals(other.journal.getJournalTitle()) )
{
log.debug("incompatible journals");
return false;
}
if( !testNull(this.pubmedId,other.pubmedId)
|| !this.pubmedId.equals(other.pubmedId) )
{
log.debug("incompatible pubmed id");
return false;
}
if( !testNull(this.authors,other.authors)
|| !this.authors.equals(other.authors) )
{
log.debug("incompatible authors");
return false;
}
if( !testNull(this.title,other.title)
|| !this.title.equals(other.title) )
{
log.debug("incompatible title");
return false;
}
if( ! testNull(this.publicationYear,other.publicationYear)
|| !this.publicationYear.equals(other.publicationYear) )
{
log.debug("incompatible pub year " + this.publicationYear + "<>" + other.publicationYear);
return false;
}
if( ! testNull(this.journalVolume,other.journalVolume)
|| ! this.journalVolume.equals(other.journalVolume) )
{
log.debug("incompatible volume");
return false;
}
if( !testNull(this.journalStartPage,other.journalStartPage)
|| !this.journalStartPage.equals(other.journalStartPage) )
{
log.debug("incompatible start page");
return false;
}
if( !testNull(this.journalEndPage,other.journalEndPage)
|| !this.journalEndPage.equals(other.journalEndPage) )
{
log.debug("incompatible end page");
return false;
}
return true;
}
private boolean testNull( Object a, Object b )
{
if( a==null && b==null )
return true;
if( a!=null && b!=null )
return true;
return false;
}
public int getJournalReferenceId()
{
return this.journalReferenceId;
}
/**
* Journal references return their Pubmed ID as their external
* reference id. @see #getPubmedId
*/
public String getExternalReferenceId()
{
return "" + getPubmedId();
}
/**
* Equivalent to to {@link #setPubmedId}
*/
public void setExternalReferenceId( String pubmed_id )
{
checkNotNull( pubmed_id );
String id = pubmed_id.trim();
checkNotEmpty( id );
super.setExternalReferenceId( id );
if ( id.equals( "" + getPubmedId() ) )
return;
try
{
Integer i = new Integer(id);
setPubmedId( i );
}
catch ( NumberFormatException e )
{
log.warn( "Couldn't get a number from String '"
+ id
+ "'"
, e
);
}
}
/**
* Returns the {@link Journal} of this {@link JournalReference}.
*/
public Journal getJournal()
{
return this.journal;
}
public void setJournal( Journal j )
{
this.journal = j;
}
/**
* Returns a typical citation string for a journal reference, eg:
* <tt>Am J Hum Genet (1985) 37; 749-760</tt>
*/
public String getJournalAsCitationString()
{
StringBuilder sb = new StringBuilder();
sb.append( getJournal().getJournalTitle() );
sb.append(" (");
sb.append( publicationYear );
sb.append(") ");
sb.append( journalVolume );
sb.append("; ");
sb.append( journalStartPage );
sb.append("-");
sb.append( journalEndPage );
return sb.toString();
}
/**
* Return the PubmedID of this Journal Reference. If there is no pubmed
* ID return 0
*/
public Integer getPubmedId()
{
if (this.pubmedId == null) {
this.pubmedId = new Integer(0);
}
return this.pubmedId;
}
public void setPubmedId( Integer pubmedId )
{
this.pubmedId = pubmedId;
this.setExternalReferenceId( "" + pubmedId );
}
public String getAuthors()
{
return this.authors;
}
public void setAuthors( String authors )
{
this.authors = authors;
}
public List<Author> getAuthorList()
{
if ( this.authorlist != null )
return Collections.unmodifiableList( authorlist );
if ( authors == null || authors.length() == 0 )
{
log.warn("No authors currently set, returning empty authorlist");
return Collections.emptyList();
}
authorlist = Author.parseAuthorList( authors );
return Collections.unmodifiableList( authorlist );
}
public String getAuthorListAsCitationString()
{
StringBuilder sb = new StringBuilder();
boolean first = true;
for( Author a : getAuthorList() )
{
if( !first )
sb.append(", ");
sb.append(a.toCitationString());
first = false;
}
return sb.toString();
}
/**
* Returns a short citation String of form "Harrison et al., 2008"
* (for 2+ authors), "Harrison and Ceroni, 2008" (for 2 authors),
* or "Harrison, 2008" (for exactly 1 author)
*/
public String getCitationString()
{
switch ( getAuthorList().size() )
{
case 0:
return "";
case 1:
return getAuthorList().get(0).getLastname()
+ ", "
+ getPublicationYear();
case 2:
return getAuthorList().get(0).getLastname()
+ " and "
+ getAuthorList().get(1).getLastname()
+ ", "
+ getPublicationYear();
default:
return getAuthorList().get(0).getLastname()
+ " et al., "
+ getPublicationYear();
}
}
public Author getFirstAuthor()
{
if( getAuthorList().size()==0 )
return null;
return getAuthorList().get( 0 );
}
public String getTitle()
{
return this.title;
}
public void setTitle( String title )
{
this.title = title;
}
public Integer getPublicationYear()
{
return this.publicationYear;
}
public void setPublicationYear( Integer year )
{
this.publicationYear = year;
}
public Integer getJournalVolume()
{
return this.journalVolume;
}
public void setJournalVolume( Integer journalVolume )
{
this.journalVolume = journalVolume;
}
public Integer getFirstPage()
{
return this.journalStartPage;
}
public void setFirstPage( Integer page_number )
{
this.journalStartPage = page_number;
}
public Integer getLastPage()
{
return this.journalEndPage;
}
public void setLastPage( Integer page_number )
{
this.journalEndPage = page_number;
}
public String getUrl()
{
if ( super.getUrl() != null )
return super.getUrl();
Integer id = this.getPubmedId();
int pubmed_id = (id != null) ? id.intValue() : 0;
String citation = this.toCitationString();
if ( pubmed_id <= 0 && (citation == null || citation.length() == 0) )
{
log.warn("cannot return an url for JournalReference: insufficient data");
return null;
}
String base_url = Eurocarb.getProperty("pubmed.query.url");
if ( base_url == null || base_url.length() == 0 )
{
log.debug("Property 'pubmed.query.url' not defined, using default value");
base_url = Default_Pubmed_Query_Url;
}
try
{
String url = ( pubmed_id > 0 )
? base_url + URLEncoder.encode( pubmed_id + " [uid]", "UTF-8" )
: base_url + URLEncoder.encode( citation, "UTF-8" )
;
if ( log.isTraceEnabled() )
log.trace("pubmed url is " + url );
this.setUrl( url );
return url;
}
catch ( java.io.UnsupportedEncodingException ignored )
{
log.warn( ignored );
return null;
}
}
public int hashCode()
{
String unique = ""
+ pubmedId
+ publicationYear
+ journalVolume
+ journalStartPage
+ journalEndPage
+ (journal == null ? "" : journal.hashCode())
;
return unique.hashCode();
}
public boolean equals( Object x )
{
if ( this == x ) {
return true;
}
if ( (x == null) || ! (x instanceof JournalReference) ) {
return false;
}
// objects are the same class
final JournalReference r = (JournalReference) x;
return r.pubmedId == null ? this.pubmedId == null : r.pubmedId.equals(this.pubmedId)
&& r.publicationYear == null ? this.publicationYear == null : r.publicationYear.equals(publicationYear)
&& r.journalVolume == null ? this.journalVolume == null : r.journalVolume.equals(this.journalVolume)
&& r.journalStartPage == null ? this.journalStartPage == null : r.journalStartPage.equals(this.journalStartPage)
&& r.journalEndPage == null ? this.journalEndPage == null : r.journalEndPage.equals(this.journalEndPage)
&& (r.journal == null ? this.journal == null : r.journal.equals(this.journal))
;
}
/**
* Returns this journal reference as a formatted text journal
* reference citation.
* eg: <code></code>
*/
public String toCitationString2()
{
List<Author> authors = getAuthorList();
int count = authors.size();
assert count > 0;
String auth_str;
Author a = getFirstAuthor();
if( a != null )
auth_str = count > 1 ? a.getLastname() + ", " : a.getLastname() + " et al., ";
else
auth_str = "";
return auth_str
+ "("
+ getPublicationYear()
+ ") "
+ getJournal().getJournalTitle()
+ " "
+ getJournalVolume()
+ ": "
+ getFirstPage()
+ "-"
+ getLastPage()
;
}
/**
* Returns this journal reference as a formatted text journal
* reference citation, <string>without</strong> author names,
* ie: just the journal reference.
* eg: <code>Adv Microb Physiol (1993) 35: 135-246</code>
*/
public String toCitationString()
{
Journal j = this.getJournal();
if ( j == null )
return null;
return j.getJournalTitle()
+ " ("
+ getPublicationYear()
+ ") "
+ getJournalVolume()
+ ": "
+ getFirstPage()
+ "-"
+ getLastPage()
;
}
//~~~~~~~~~~~~~~~~~~~~~~ PRIVATE METHODS ~~~~~~~~~~~~~~~~~~~~~~~~
/** Internal use only! */
void setJournalReferenceId( int journalReferenceId )
{
this.journalReferenceId = journalReferenceId;
}
} // end class