/*
* Concept profile generation tool suite
* Copyright (C) 2015 Biosemantics Group, Erasmus University Medical Center,
* Rotterdam, The Netherlands
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>
*/
package JochemBuilder.chemIDplus;
import org.erasmusmc.ontology.OntologyFileLoader;
import org.erasmusmc.ontology.OntologyStore;
import org.erasmusmc.ontology.ontologyutilities.OntologyCurator;
import org.erasmusmc.utilities.StringUtilities;
import JochemBuilder.SharedCurationScripts.CasperForJochem;
import JochemBuilder.SharedCurationScripts.CurateUsingManualCurationFile;
import JochemBuilder.SharedCurationScripts.RemoveDictAndCompanyNamesAtEndOfTerm;
import JochemBuilder.SharedCurationScripts.RewriteFurther;
import JochemBuilder.SharedCurationScripts.SaveOnlyCASandInchiEntries;
public class ChemIDplusImport {
/**The commented code below was used to map ChemIDplus IDs to CAS numbers. This was done since
* many CAS numbers were missing in the original ChemIDplus file. ChemIDplus IDs used to be
* based on CAS numbers and I used this property to map them to CAS numbers that I had parsed
* from the PubChem database. This procedure is unfortunately not completely reliable and
* according the the ChemIDplus people not necessary anymore, since the errors in the CAS field
* have been solved. Apart from that, the structure of the ChemIDplus IDs have changed. The code is thus
* not likely to work with releases of ChemIDplus after the year 2008, but probably also not needed.
*/
public static String date = "110809";
public static String home = "/home/khettne/Projects/Jochem";
public static String chemiIDplusImportFile = home+"/ChemIDplus/chemid.xml.20081028";
public static String mergedSemanticTypeslog = home+"/ChemIDplus/mergedSemanticTypes.txt";
public static String chemIDplusDictionariesLog = home+"/ChemIDplus/ChemIDplus_dictionaries_"+date+".log";
public static String chemIDplusRewriteLog = home+"/ChemIDplus/ChemIDplusCAS_casperFiltered_"+date+".log";
public static String chemIdplusLowerCaseLog = home+"/ChemIDplus/ChemIDplusCAS_lowerCase_"+date+".log";
public static String termsToRemove = "chemIDplusTermsToRemove.txt";
public static String chemIdplusCuratedOntologyPath = home+"/ChemIDplus/ChemIDplusCAS_curated_"+date+".ontology";
public static String chemIdplusCuratedLog = home+"/ChemIDplus/ChemIDplusCAS_curated_"+date+".log";
public static void main(String[] args) {
OntologyStore ontology = new OntologyStore();
OntologyFileLoader loader = new OntologyFileLoader();
//Make unprocessed thesaurus
ChemicalsFromChemIDplus chemIDplusChemicals = new ChemicalsFromChemIDplus();
ontology = chemIDplusChemicals.run(chemiIDplusImportFile, mergedSemanticTypeslog);
/** the commented code is probably not needed for newer updates
//This is file output, change the path to your own location!
String chemIDplusIDs = home+"/ChemIDplus/chids.txt";
GetIDsfromChemIDplus getIDs = new GetIDsfromChemIDplus();
getIDs.run(ontology, chemIDplusIDs);
// This is file input, copy the content from the file "casnumbers.txt" in JochemBuilder.chemIDplus to your own location!
String casFromPubChem = home+"/ChemIDplus/casnumbers.txt";
MappChemIDplusIDsToCASfromPubChem mappedChemIDplus = new MappChemIDplusIDsToCASfromPubChem();
ontology = mappedChemIDplus.run(ontology, chemIDplusIDs, casFromPubChem);
*/
RemoveDictAndCompanyNamesAtEndOfTerm remove = new RemoveDictAndCompanyNamesAtEndOfTerm();
ontology = remove.run(ontology, chemIDplusDictionariesLog);
// CAS and InChI
SaveOnlyCASandInchiEntries make = new SaveOnlyCASandInchiEntries();
ontology = make.run(ontology);
// Rewrite
CasperForJochem casper = new CasperForJochem();
ontology = casper.run(ontology, chemIDplusRewriteLog);
// Make some entries lower case and filter further
RewriteFurther rewrite = new RewriteFurther();
ontology = rewrite.run(ontology, chemIdplusLowerCaseLog);
//Remove terms based on medline frequency
CurateUsingManualCurationFile curate = new CurateUsingManualCurationFile();
ontology = curate.run(ontology, chemIdplusCuratedLog, termsToRemove);
//Set default flags and save ontology
OntologyCurator curator = new OntologyCurator();
curator.curateAndPrepare(ontology);
loader.save(ontology, chemIdplusCuratedOntologyPath);
System.out.println("Done! " + StringUtilities.now());
}
}