/*
* Concept profile generation tool suite
* Copyright (C) 2015 Biosemantics Group, Erasmus University Medical Center,
* Rotterdam, The Netherlands
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>
*/
package org.erasmusmc.dataimport;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.erasmusmc.ontology.Concept;
import org.erasmusmc.ontology.DefaultTypes;
import org.erasmusmc.ontology.OntologyManager;
import org.erasmusmc.ontology.OntologyStore;
import org.erasmusmc.ontology.Relation;
import org.erasmusmc.ontology.TermStore;
import org.erasmusmc.utilities.StringUtilities;
import org.erasmusmc.utilities.TextFileUtilities;
public class WordNet {
public static void main(String[] args){
System.out.println("Processing data files");
List<String> dataFiles = new ArrayList<String>();
dataFiles.add("/home/Data/Wordnet3.0/dict/data.adj");
dataFiles.add("/home/Data/Wordnet3.0/dict/data.adv");
dataFiles.add("/home/Data/Wordnet3.0/dict/data.noun");
dataFiles.add("/home/Data/Wordnet3.0/dict/data.verb");
OntologyStore ontology = new OntologyStore();
ontology.setName("Wordnet3_0");
int offset = 0;
for (String datafile : dataFiles){
addToOntology(datafile, ontology, offset);
offset += 100000000;
}
System.out.println("Setting flags");
setFlags(ontology);
System.out.println("Storing ontology");
OntologyManager ontologyManager = new OntologyManager();
//ontologyManager.deleteOntology(ontology.getName());
ontologyManager.dumpStoreInDatabase(ontology);
//OntologyPSFLoader loader = new OntologyPSFLoader();
//loader.ontology = ontology;
//loader.SaveToPSF("/home/temp/wordnet3.0.psf");
}
private static void setFlags(OntologyStore ontology) {
Iterator<Concept> conceptIterator = ontology.getConceptIterator();
while (conceptIterator.hasNext()){
for (TermStore term : conceptIterator.next().getTerms()){
if (term.text.toLowerCase() == term.text){
term.caseSensitive = false;
term.normalised = true;
term.orderSensitive = true;
} else {
term.caseSensitive = true;
term.normalised = false;
term.orderSensitive = true;
}
}
}
}
private static void addToOntology(String datafile, OntologyStore ontology, int offset) {
List<String> lines = TextFileUtilities.loadFromFile(datafile);
for (int i = 0; i < lines.size(); i++){
if (Character.isDigit(lines.get(i).charAt(0)))
processLine(lines.get(i), ontology, offset);
}
}
private static void processLine(String line, OntologyStore ontology, int offset) {
//Fetch concept ID:
String conceptID = line.substring(0, 8);
Concept concept = new Concept(Integer.parseInt(conceptID) + offset);
//Fetch terms:
List<TermStore> terms = new ArrayList<TermStore>();
for (String word : line.substring(17).split(" ")){
if (word.length() > 1 && StringUtilities.containsLetter(word) && noIllegalChars(word)){
if (word.contains("("))
word = word.substring(0, word.indexOf("("));
word = word.replace('_', ' ');
terms.add(new TermStore(word));
} else if (word.equals("|") || word.equals("@")) break;
}
if (terms.size() != 0 ) concept.setTerms(terms);
//Fetch definition:
if (line.contains("|"))
concept.setDefinition(line.substring(line.indexOf("|")+1).trim());
//Fetch parents:
for (int i = 17; i < line.length(); i++)
if (line.charAt(i) == '@'){
String parentID;
if (line.charAt(i+1) != ' ')
parentID = line.substring(i+3, i+11);
else
parentID = line.substring(i+2, i+10);
Relation relation = new Relation(Integer.parseInt(parentID) + offset, DefaultTypes.isParentOf, concept.getID());
ontology.setRelation(relation);
}
//Add to ontology:
ontology.setConcept(concept);
}
private static boolean noIllegalChars(String word) {
for (Character ch : word.toCharArray()){
if (!(Character.isLetterOrDigit(ch) || ch == '_' || ch =='-'|| ch =='('|| ch ==')'|| ch =='\'')) return false;
}
return true;
}
}