package uk.ac.shef.dcs.jate.nlp;
import dragon.nlp.tool.lemmatiser.EngLemmatiser;
import java.util.HashMap;
import java.util.Map;
public class Lemmatiser {
private final EngLemmatiser lemmatiser;
private Map<String, Integer> tagLookUp;
public Lemmatiser(EngLemmatiser lemmatiser) {
this.lemmatiser=lemmatiser;
tagLookUp = new HashMap<>();
tagLookUp.put("NN", 1);
tagLookUp.put("NNS", 1);
tagLookUp.put("NNP", 1);
tagLookUp.put("NNPS", 1);
tagLookUp.put("VB", 2);
tagLookUp.put("VBG", 2);
tagLookUp.put("VBD", 2);
tagLookUp.put("VBN", 2);
tagLookUp.put("VBP", 2);
tagLookUp.put("VBZ", 2);
tagLookUp.put("JJ", 3);
tagLookUp.put("JJR", 3);
tagLookUp.put("JJS", 3);
tagLookUp.put("RB", 4);
tagLookUp.put("RBR", 4);
tagLookUp.put("RBS", 4);
}
/**
* Lemmatise a phrase or word. If a phrase, only lemmatise the most RHS word.
* @param value phrase/word string
* @return String normalised phrase
*/
public String normalize(String value, String pos) {
Integer tag = tagLookUp.get(pos);
tag=tag==null?1:tag;
int space = value.lastIndexOf(" ");
if(space==-1||value.endsWith("'s")) //if string is a single word, or it is in "XYZ's" form where the ' char has been removed
return lemmatiser.lemmatize(value,tag).trim();
String part1 = value.substring(0,space);
String part2 = lemmatiser.lemmatize(value.substring(space+1),tag);
return (part1+" "+part2).trim();
}
}