package com.s24.wiki;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import edu.jhu.nlp.wikipedia.WikiPage;
public class GermanGrammarPageParser extends GermanPageParser {
public GermanGrammarPageParser(PageParserCallback cb) {
super(cb);
}
@Override
public void parse(WikiPage page) {
if (isValidPage(page)) {
Pattern pattern = Pattern.compile("\\{\\{.*\\n(\\|.*\\n){8}");
Matcher matcher = pattern.matcher(page.getWikiText());
List<String> left = new ArrayList<String>();
List<String> right = new ArrayList<String>();
left.add(page.getTitle().trim().toLowerCase());
right.add(page.getTitle().trim().toLowerCase());
if (matcher.find()) {
Pattern wordpattern = Pattern.compile("\\|[NGDA][a-zA-Z\\s]+=.*?\\s(\\p{Upper}[\\p{L}]+)");
Matcher wordmatcher = wordpattern.matcher(matcher.group());
int end = 0;
while (wordmatcher.find(end)) {
String item = wordmatcher.group(1).toLowerCase();
if (!left.contains(item)) {
left.add(item);
}
end = wordmatcher.end();
}
}
callback.callback(left, right);
}
}
@Override
protected String getName() {
return "Übersicht";
}
}