/*
* Concept profile generation tool suite
* Copyright (C) 2015 Biosemantics Group, Erasmus University Medical Center,
* Rotterdam, The Netherlands
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>
*/
package org.erasmusmc.ontology.ontologyutilities;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.erasmusmc.ontology.Concept;
import org.erasmusmc.ontology.Ontology;
import org.erasmusmc.ontology.TermStore;
import org.erasmusmc.utilities.StringUtilities;
public class GeneTermVariantGenerator {
/** Generates spelling variants of gene and protein names using a set of rules. *
* @param ontology The ontology in which the spelling variants will be inserted. */
public static void generateVariants(Ontology ontology){ //Generates spelling variations
Iterator<Concept> conceptIterator = ontology.getConceptIterator();
while (conceptIterator.hasNext()){
Concept concept = conceptIterator.next();
if (OntologyUtilities.hasGeneVoc(concept, ontology))
ProcessConcept(concept);
}
}
private static void ProcessConcept(Concept concept) {
List<TermStore> terms = concept.getTerms();
int termCount = terms.size();
for (int i = 0; i < termCount; i++){
TermStore term = terms.get(i);
TermStore newTerm;
newTerm = DoRomanNumerals(term);
if (newTerm != null)
terms.add(newTerm);
newTerm = DoDelimiter(term);
if (newTerm != null)
terms.add(newTerm);
newTerm = DoHyphen(term);
if (newTerm != null)
terms.add(newTerm);
//DoOpticalGapExpander(term, concept);
}
}
enum State {lowercase, uppercase, number, delimiter};
/**
* Optical Gap expander
* Splits terms based on transitions from lowercase to uppercase, letters to numbers and numbers to letters
*
* Warning: generates a lot of spelling variations
*
* Use instead of DoDelimiter
* @param term
* @param concept
*/
public static void DoOpticalGapExpander(TermStore term, Concept concept) {
String text = term.text;
if (text.length() > 15) return;
List<String> variations = new ArrayList<String>();
variations.add("");
State state = State.delimiter;
for (int i = 0; i < text.length(); i++){
char ch = text.charAt(i);
State newState;
if (Character.isLowerCase(ch))
newState = State.lowercase;
else if (Character.isUpperCase(ch))
newState = State.uppercase;
else if (Character.isDigit(ch))
newState = State.number;
else
newState = State.delimiter;
if (!state.equals(newState) && !state.equals(State.delimiter) && !newState.equals(State.delimiter) &&
!(state.equals(State.uppercase) && newState.equals(State.lowercase))){
//Create new variations where a hyphen is added at this location:
int varsize = variations.size();
for (int j = 0; j < varsize; j++)
variations.add(variations.get(j) + "-");
}
//Add the character to all variations:
for (int j = 0; j < variations.size(); j++)
variations.set(j, variations.get(j) + ch);
state = newState;
}
System.out.print(term.text + "\t");
for (int i = 1; i < variations.size(); i++){
TermStore newTerm = term.copy();
newTerm.text = variations.get(i);
System.out.print(newTerm.text + "\t");
concept.getTerms().add(newTerm);
}
System.out.println("");
}
private static TermStore DoDelimiter(TermStore term) {
//if (isSymbol(term.text)){
String text = term.text;
Boolean number = false;
for (int i = text.length()-1; i > 0; i--){
char ch = text.charAt(i);
if (ch < 58 && ch > 47){ //Is a number
number = true;
} else {
if (number){
if (Character.isLetter(text.charAt(i))) { //No delimiter before number: add space
return new TermStore(text.substring(0, i+1)+"-"+text.substring(i+1, text.length()));
} else //Delimiter before number
if (!Character.isDigit(text.charAt(i-1))) //No number before the delimiter:
return new TermStore(text.substring(0, i)+text.substring(i+1, text.length()));
}
break;
}
}
//}
return null;
}
private static TermStore DoHyphen(TermStore term) {
String[] tokens = term.text.split(" ");
if (tokens.length > 1){
String lastToken = tokens[tokens.length-1];
if (StringUtilities.isNumber(lastToken) || StringUtilities.isRomanNumeral(lastToken))
return new TermStore(term.text.substring(0,term.text.length() - lastToken.length()-1) + "-" + lastToken);
}
tokens = term.text.split("-");
if (tokens.length > 1){
String lastToken = tokens[tokens.length-1];
if (StringUtilities.isNumber(lastToken) || StringUtilities.isRomanNumeral(lastToken))
return new TermStore(term.text.substring(0,term.text.length() - lastToken.length()-1) + " " + lastToken);
}
return null;
}
private static TermStore DoRomanNumerals(TermStore term) {
String text = term.text;
if (text.length() > 1){
//Replace number with roman numeral:
char ch = text.charAt(term.text.length()-1);
if (ch < 58 && ch > 48){ //Last char is a number
char previousCh = text.charAt(term.text.length()-2);
if (previousCh < 48 || previousCh > 57){ //Previous char is not a number
StringBuffer newText = new StringBuffer();
newText.append(text.substring(0,text.length()-1));
if (Character.isLetter(previousCh)) newText.append("-");
switch (ch) {
case '1': newText.append("I"); break;
case '2': newText.append("II"); break;
case '3': newText.append("III"); break;
case '4': newText.append("IV"); break;
case '5': newText.append("V"); break;
case '6': newText.append("VI"); break;
case '7': newText.append("VII"); break;
case '8': newText.append("VIII"); break;
case '9': newText.append("IX"); break;
}
return new TermStore(newText.toString());
}
}
//Replace roman numeral with number:
for (int i = text.length()-2; i > 0; i--){
if (!Character.isLetterOrDigit(text.charAt(i))){
String lastPart = text.substring(i+1, text.length());
String number = "";
if (lastPart.equals("I")) number = "1"; else
if (lastPart.equals("II")) number = "2"; else
if (lastPart.equals("III")) number = "3"; else
if (lastPart.equals("IV")) number = "4"; else
if (lastPart.equals("V")) number = "5"; else
if (lastPart.equals("VI")) number = "6"; else
if (lastPart.equals("VII")) number = "7"; else
if (lastPart.equals("VIII")) number = "8"; else
if (lastPart.equals("IX")) number = "9";
if (!number.equals("")){
return new TermStore(text.substring(0, i+1)+number);
}
break;
}
}
}
return null;
}
}