package examples;
import is2.data.SentenceData09;
import is2.lemmatizer.Lemmatizer;
import is2.parser.Parser;
import java.io.IOException;
import java.util.ArrayList;
import java.util.StringTokenizer;
/**
* @author Bernd Bohnet, 13.09.2010
*
* Illustrates the application of some components: lemmatizer, tagger, and
* parser
*/
public class Lemmatize {
/**
* How to lemmatize a sentences?
*/
public static void main(String[] args) throws IOException {
// Create a data container for a sentence
SentenceData09 i = new SentenceData09();
if (args.length == 1) { // input might be a sentence: "This is another test ."
StringTokenizer st = new StringTokenizer(args[0]);
ArrayList<String> forms = new ArrayList<>();
forms.add("<root>");
while (st.hasMoreTokens()) {
forms.add(st.nextToken());
}
i.init(forms.toArray(new String[0]));
} else {
// provide a default sentence
i.init(new String[]{"<root>", "Häuser", "hat", "ein", "Umlaut", "."});
}
//print the forms
for (String l : i.forms) {
Parser.out.println("forms : " + l);
}
// tell the lemmatizer the location of the model
is2.lemmatizer.Options optsLemmatizer = new is2.lemmatizer.Options(new String[]{"-model", "models/lemma-ger.model"});
// create a lemmatizer
Lemmatizer lemmatizer = new Lemmatizer(optsLemmatizer);
// lemmatize a sentence; the result is stored in the stenenceData09 i
lemmatizer.apply(i);
// output the lemmata
for (String l : i.plemmas) {
Parser.out.println("lemma : " + l);
}
}
}