// PathVisio,
// a tool for data visualization and analysis using Biological Pathways
// Copyright 2006-2009 BiGCaT Bioinformatics
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
package org.bridgedb.util.hmdb;
import java.io.IOException;
import java.io.LineNumberReader;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.bridgedb.DataSource;
import org.bridgedb.Xref;
import org.bridgedb.bio.BioDataSource;
/**
* Parse Metabocards from Hmdb
*/
public class ParseHmdb
{
/**
* represents some information from
* a single HMDB metabocard
*/
public static class Compound
{
private static final Xref[] EMPTY = new Xref[0];
private Xref[] makeXrefs (DataSource ds, String value)
{
if (value == null) return EMPTY;
String[] fields = value.split("; ");
Xref[] result = new Xref[fields.length];
for (int i = 0; i < fields.length; ++i)
{
if (ds == BioDataSource.CHEBI)
result[i] = new Xref("CHEBI:" + fields[i], ds);
else
result[i] = new Xref(fields[i], ds);
}
return result;
}
/** store a key, value field */
void storeField (String key, String value)
{
// check for "Not Available" lines
if (value.equals ("Not Available"))
{
value = null;
}
if (key.equals("name"))
{
symbol = value;
}
else if (key.equals("chemical_formula"))
{
formula = value;
}
else if (key.equals("kegg_compound_id"))
{
idKegg = makeXrefs (BioDataSource.KEGG_COMPOUND, value);
}
else if (key.equals("biocyc_id"))
{
idBioc = makeXrefs (BioDataSource.BIOCYC, value);
}
else if (key.equals("pubchem_compound_id"))
{
idPubchem = makeXrefs (BioDataSource.PUBCHEM_COMPOUND, value);
}
else if (key.equals("chebi_id"))
{
idChebi = makeXrefs (BioDataSource.CHEBI, value);
}
else if (key.equals("cas_number"))
{
idCas = makeXrefs (BioDataSource.CAS, value);
}
else if (key.equals("synonyms"))
{
synonyms = value == null ? null : value.split("; ");
}
else if (key.equals("wikipedia_link"))
{
idWikipedia = makeXrefs (BioDataSource.WIKIPEDIA, value);
}
else if (key.equals("smiles_canonical"))
{
smiles = value;
}
else if (key.equals("inchi_identifier"))
{
inchi = value;
}
}
public Xref idHmdb = null;
public String symbol = null;
public String formula = null;
public Xref[] idKegg = null;
public Xref[] idPubchem = null;
public Xref[] idChebi = null;
public Xref[] idCas = null;
public Xref[] idBioc = null;
public Xref[] idWikipedia = null;
public String smiles = null;
public String[] synonyms = null;
public String inchi = null;
}
/**
* Signals error while parsing a Metabocards-formatted file.
* This exception means that either the file is corrupt,
* not a valid metabocards file, or (possibly)
* that the metabocards format has changed.
*/
public static class ParseException extends Exception
{
ParseException (String message)
{
super(message);
}
ParseException (String message, int lineNo, String line)
{
super("Parse error: " + message + " at " + lineNo + "\n" + line);
}
}
/**
* Reads a single metabocard from a text file.
* returns null if there are no more records to read.
*/
public Compound readNext (LineNumberReader reader) throws IOException, ParseException
{
Compound result = new Compound();;
int state = 0;
String line;
String key = null;
String value = null;
Pattern p1 = Pattern.compile ("#BEGIN_METABOCARD (HMDB\\d+)");
Pattern p2 = Pattern.compile ("#END_METABOCARD (HMDB\\d+)");
Pattern p3 = Pattern.compile ("# ([a-zA-Z0-9_]+):");
while ((line = reader.readLine()) != null)
{
int newState = state;
switch (state)
{
// state 0: expect begin
case 0:
Matcher m1 = p1.matcher(line);
if (m1.matches())
{
newState = 1;
result.idHmdb = new Xref (m1.group(1), BioDataSource.HMDB);
}
else if (line.equals (""))
{
// ignore
}
else
{
throw new ParseException ("begin expected", reader.getLineNumber(), line);
}
break;
// state 1: expect key, end or empty
case 1:
Matcher m2 = p2.matcher (line);
Matcher m3 = p3.matcher (line);
if (m2.matches())
{
// end
return result;
}
else if (m3.matches())
{
// store unless this is the first key
if (key != null) result.storeField (key, value);
key = m3.group(1);
value = null;
newState = 1;
}
else if (line.equals (""))
{
// ignore
}
else
{
if (value == null)
{
value = line;
}
else
{
value += "\n" + line;
}
}
break;
}
state = newState;
}
if (state != 0)
{
// The record was not properly terminated.
throw new ParseException ("Parse error: unexpected end of file");
}
else
{
// end of file reached, no more records
return null;
}
}
}