/*****************************************************************************
* Copyright (c) 2008 Bioclipse Project
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
*****************************************************************************/
package spok.parser;
import java.io.File;
import java.io.IOException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Enumeration;
import java.util.Iterator;
import java.util.Properties;
import nu.xom.Attribute;
import nu.xom.Builder;
import nu.xom.Document;
import nu.xom.Element;
import nu.xom.Nodes;
import nu.xom.ParsingException;
import nu.xom.ValidityException;
import org.eclipse.core.runtime.FileLocator;
import org.eclipse.core.runtime.Platform;
import org.jcamp.parser.JCAMPBlock;
import org.jcamp.parser.JCAMPException;
import org.jcamp.parser.JCAMPReader;
import org.jcamp.parser.Utils;
import org.jcamp.spectrum.IRSpectrum;
import org.jcamp.spectrum.MassSpectrum;
import org.jcamp.spectrum.NMRSpectrum;
import org.jcamp.spectrum.Peak1D;
import org.jcamp.spectrum.Spectrum;
import org.jcamp.spectrum.Spectrum1D;
import org.jcamp.spectrum.notes.Note;
import org.jcamp.spectrum.notes.NoteDescriptor;
import org.jcamp.units.CommonUnit;
import org.xmlcml.cml.element.CMLArray;
import org.xmlcml.cml.element.CMLConditionList;
import org.xmlcml.cml.element.CMLMetadata;
import org.xmlcml.cml.element.CMLMetadataList;
import org.xmlcml.cml.element.CMLPeak;
import org.xmlcml.cml.element.CMLPeakList;
import org.xmlcml.cml.element.CMLScalar;
import org.xmlcml.cml.element.CMLSpectrum;
import org.xmlcml.cml.element.CMLSpectrumData;
import org.xmlcml.cml.element.CMLSubstance;
import org.xmlcml.cml.element.CMLSubstanceList;
import org.xmlcml.cml.element.CMLXaxis;
import org.xmlcml.cml.element.CMLYaxis;
import spok.GenerateId;
/**
* Maps a JCamp spectrum to a CMLSpectrum
*
* @author Tobias Helmus
* @created 19. Dezember 2005
*
*/
public class JcampToCMLSpectrumMapper {
static ArrayList<Element> mappingListArray;
static{
Builder builder = new Builder();
Document metadataMapping = null;
mappingListArray = new ArrayList<Element>();
URL varPluginUrl = Platform.getBundle(
"net.bioclipse.spectrum").getEntry("/mappingFiles/");
String varInstallPath = null;
try {
varInstallPath = FileLocator.toFileURL(varPluginUrl).getFile();
} catch (IOException e1) {
// TODO Auto-generated catch block
e1.printStackTrace();
}
//get a file list of contained files and iterate over them
File dir = new File(varInstallPath);
File[] files = dir.listFiles();
for (int i=0; i<files.length; i++) {
File file = files[i];
if (file.getName().startsWith(".")) {
continue;
}
else {
try {
metadataMapping = builder.build(file);
} catch (ValidityException e) {
e.printStackTrace();
} catch (ParsingException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
Element rootElem = metadataMapping.getRootElement();
mappingListArray.add(rootElem);
}
}
}
/**
* Main method, responsible for calling the mapping methods and setting
* general settings
*
* @param spectrum
* the JCamp spectrum
* @return the CMLSpectrum element
* @throws JCAMPException
*/
public static CMLSpectrum mapJcampToCMLSpectrum(Spectrum spectrum) throws JCAMPException {
CMLSpectrum cmlSpectrum = new CMLSpectrum();
Spectrum1D spectrum1d = (Spectrum1D) spectrum;
if (spectrum instanceof NMRSpectrum) {
cmlSpectrum.setType("NMR");
} else if (spectrum instanceof MassSpectrum) {
cmlSpectrum.setType("massSpectrum");
} else if (spectrum instanceof IRSpectrum) {
cmlSpectrum.setType("infrared");
}
cmlSpectrum.setTitle(spectrum.getTitle());
if (spectrum1d.hasPeakTable()) {
cmlSpectrum.addPeakList(mapPeaks(spectrum1d));
}
if (spectrum1d.isFullSpectrum()) {
cmlSpectrum.addSpectrumData(mapContData(spectrum1d));
}
if(JCAMPReader.getInstance().getRootblock()!=null){
Enumeration blocks=JCAMPReader.getInstance().getRootblock().getBlocks();
while(blocks.hasMoreElements()){
JCAMPBlock b = (JCAMPBlock) blocks.nextElement();
if(b.getID()!=JCAMPReader.getInstance().getIdoffirstspectrum()){
Spectrum spectrum2=JCAMPReader.getInstance().createSpectrum(JCAMPReader.getInstance().getRootblock(), b.getID());
if(spectrum2.isFullSpectrum())
cmlSpectrum.addSpectrumData(mapContData((Spectrum1D)spectrum2));
else
cmlSpectrum.addPeakList(mapPeaks((Spectrum1D)spectrum2));
break;
}
}
}
if (spectrum.getNotes().size() != 0) {
mapNotes(spectrum, cmlSpectrum);
}
/*
* else if (spectrum instanceof IRSpectrum) { SpokIRSpectrum irSpectrum =
* new SpokIRSpectrum(); irSpectrum.setTitle(spectrum.getTitle()); if
* (spectrum1d.hasPeakTable()) {
* nmrSpectrum.addPeakList(mapPeaks(spectrum1d)); } if
* (spectrum1d.isFullSpectrum()) {
* nmrSpectrum.addXYDataList(mapContData(spectrum1d)); }
*
* if (spectrum.getNotes().size() != 0) {
* nmrSpectrum.setNotesTable(mapNotes(spectrum)); } spokSpectrum =
* (SpokSpectrum) irSpectrum; }
*/
NoteDescriptor notedescriptor = new NoteDescriptor("SPECTRUMID");
String id = null;
if (spectrum.getNotes(notedescriptor) != null && spectrum.getNotes(notedescriptor).size()>0) {
Note note = (Note) spectrum.getNotes(notedescriptor).get(0);
id = note.getValue().toString();
}
if (id == null) {
id = GenerateId.generateId();
}
cmlSpectrum.setId(id);
return cmlSpectrum;
}
/**
* Maps the peak list
*
* @param spectrum1d
* the JCamp spectrum1d
* @return a CMLPeakList element
*/
private static CMLPeakList mapPeaks(Spectrum1D spectrum1d) {
CMLPeakList cmlPeaks = new CMLPeakList();
Peak1D[] peaks = (spectrum1d).getPeakTable();
for (int i = 0; i < peaks.length; i++) {
CMLPeak peak = new CMLPeak();
peak.setXValue(peaks[i].getPosition()[0]);
peak.setYValue(peaks[i].getHeight());
peak.setXUnits("jcampdx:" + spectrum1d.getXAxisLabel());
peak.setYUnits("jcampdx:" + spectrum1d.getYAxisLabel());
cmlPeaks.addPeak(peak);
}
return cmlPeaks;
}
/**
* Maps continuous data
*
* @param spectrum1d
* the JCamp spectrum1d
* @return a CMLSpectrumData element
*/
private static CMLSpectrumData mapContData(Spectrum1D spectrum1d) {
CMLSpectrumData xyData = new CMLSpectrumData();
double[] xData = spectrum1d.getXData().toArray();
double[] yData = spectrum1d.getYData().toArray();
CMLXaxis xAxis = new CMLXaxis();
CMLYaxis yAxis = new CMLYaxis();
xAxis.addArray(new CMLArray(xData));
yAxis.addArray(new CMLArray(yData));
xyData.addXaxis(xAxis);
xyData.addYaxis(yAxis);
String xLabel = spectrum1d.getXData().getLabel();
if (xLabel == null || xLabel.length() < 1) {
xLabel = spectrum1d.getXData().getUnit().getName();
}
String yLabel = spectrum1d.getYData().getLabel();
if (yLabel == null || yLabel.length() < 1) {
yLabel = spectrum1d.getYData().getUnit().getName();
}
xAxis.setTitle("jcampdx:" + xLabel);
yAxis.setTitle("jcampdx:" + yLabel);
return xyData;
}
/**
* Map the notes/metadata
*
* @param spectrum
* the JCamp spectrum
* @param cmlSpectrum
* @return a CMLMetadataList element
*/
private static void mapNotes(Spectrum spectrum, CMLSpectrum cmlSpectrum) {
CMLMetadataList metadataList = new CMLMetadataList();
CMLConditionList conditionList = new CMLConditionList();
CMLSubstanceList substanceList = new CMLSubstanceList();
Properties notesProps = new Properties();
java.io.InputStream is = null;
try {
is = NoteDescriptor.class.getResourceAsStream("/notes.properties");
if (is == null)
return;
notesProps.load(is);
}
catch (java.io.IOException e) {
e.printStackTrace();
}
double frequency=0;
Collection notesCollection = spectrum.getNotes();
Iterator notesIterator = notesCollection.iterator();
while (notesIterator.hasNext()) {
Note note = (Note) notesIterator.next();
if (note.getValue() != " ") {
String key = (String) note.getDescriptor().getKey();
String jcamp = (String) notesProps.get(key + ".jcamp");
if (jcamp != null) {
key = jcamp;
}
searchInMappingFiles(key, note.getDescriptor().getName(), note.getValue().toString(), metadataList, conditionList, substanceList);
if(key.equals( ".observefrequency" )){
frequency = Double.parseDouble( (String)note.getValue());
}
}
}
//convert hz to ppm if a measurement frequency is given and it's an nmr spectrum
if(cmlSpectrum.getType().equals( "NMR" ) && frequency>0 && spectrum.getXAxisLabel().equals( "HZ" )){
for(int i=0;i<cmlSpectrum.getSpectrumDataElements().size();i++){
double[] values=cmlSpectrum.getSpectrumDataElements().get( i ).getXaxisElements().get( 0 ).getArrayElements().get( 0 ).getDoubles();
double[] newvalues=new double[values.length];
for(int k=0;k<values.length;k++){
newvalues[k]=values[k]/frequency;
}
cmlSpectrum.getSpectrumDataElements().get( i ).getXaxisElements().get( 0 ).getArrayElements().get( 0 ).setArray( newvalues );
cmlSpectrum.getSpectrumDataElements().get(i)
.getXaxisElements().get(0).setTitle( "cml:recalculated-"+CommonUnit.ppm.getSymbol() );
}
}
if (metadataList.getChildCount() > 0) {
cmlSpectrum.addMetadataList(metadataList);
}
if (conditionList.getChildCount() > 0) {
cmlSpectrum.addConditionList(conditionList);
}
if (substanceList.getChildCount() > 0) {
cmlSpectrum.appendChild(substanceList);
}
}
public static void searchInMappingFiles(String key, String name, String value, CMLMetadataList metadataList, CMLConditionList conditionList, CMLSubstanceList substanceList){
boolean foundInAMappingFile = false;
String oldKey = key;
for (int j=0; j<mappingListArray.size(); j++) {
Element rootElem = mappingListArray.get(j);
Attribute prefixAttr = rootElem.getAttribute("prefix");
if (prefixAttr.getValue().compareTo("jcampdx") == 0) {
key = Utils.normalizeLabel(key);
if (key.startsWith(".")) {
key = "dot" + key.substring(1);
}
}
Nodes label = rootElem.query("//entry[@id='" + key +"']");
Nodes result = rootElem.query("//entry[@id='" + key +"']/parent::*");
if (result.size() == 0) {
key = "dot" + key;
label = rootElem.query("//entry[@id='" + key +"']");
result = rootElem.query("//entry[@id='" + key +"']/parent::*");
}
if (result.size() > 0) {
foundInAMappingFile = true;
String title;
if (label.size() == 1) {
title =((Element)label.get(0)).getAttributeValue("label");
}
else {
if(name!= null)
title = sanitize(name);
else
title="";
}
String listName = ((Element)result.get(0)).getAttributeValue("name");
if (listName.equals("conditionList")) {
CMLScalar condition = new CMLScalar();
condition.setId(key);
condition.setTitle(title);
condition.setValue(value);
conditionList.appendChild(condition);
}
else if (listName.equals("substanceList")) {
CMLSubstance substance = new CMLSubstance();
substance.setTitle(title);
substance.setId(key);
nu.xom.Text textNode = new nu.xom.Text(value);
substance.appendChild(textNode);
substanceList.appendChild(substance);
}
else {
CMLMetadata metadata = new CMLMetadata();
metadata.setName(prefixAttr.getValue() + ":" + title);
metadata.setTitle(title);
metadata.setId(key);
metadata.setContent(value);
metadataList.appendChild(metadata);
}
break;
}
else {
key = oldKey;
}
}
if (!foundInAMappingFile) {
CMLMetadata metadata = new CMLMetadata();
if(name!=null){
metadata.setName("jcampdx:" + sanitize(name));
metadata.setTitle(sanitize("jcampdx:" + sanitize(name)));
}
metadata.setId(key);
metadata.setContent(value);
metadataList.appendChild(metadata);
}
}
/**
* Ensures the output has the pattern "[A-Za-z][A-Za-z0-9_\.\-]*".
*
* @param key
* @return
*/
public static String sanitize(String key) {
// assume key length > 1
if (key.length() < 2) {
return key;
}
StringBuffer sanatizedString = new StringBuffer();
int firstCharInt = 0;
char firstChar;
do {
firstChar = key.charAt(firstCharInt);
if (Character.isLetter(firstChar))
sanatizedString.append(firstChar);
firstCharInt++;
} while (Character.isLetter(firstChar)
&& !(firstCharInt < key.length()));
for (int i = firstCharInt; i < key.length(); i++) {
char character = key.charAt(i);
if (Character.isDigit(character) || Character.isLetter(character)
|| character == '_' || character == '.' || character == '-') {
sanatizedString.append(character);
} // else: bad char, skip
}
return sanatizedString.toString();
}
}