// Copyright 2014 Thomas Müller
// This file is part of MarMoT, which is licensed under GPLv3.
package marmot.tokenize.rules;
import java.util.Collection;
import java.util.List;
import java.util.regex.Pattern;
public abstract class RuleProvider {
abstract public Collection<Rule> getTokRules();
abstract public Collection<Rule> getUnTokRules();
public static RuleProvider createRuleProvider(String lang) {
if (lang.equalsIgnoreCase("cs")) {
return new CzechRuleProvider();
}
if (lang.equalsIgnoreCase("de")) {
return new GermanRuleProvider();
}
if (lang.equalsIgnoreCase("en")) {
return new EnglishRuleProvider();
}
if (lang.equalsIgnoreCase("es")) {
return new SpanishRuleProvider();
}
// if (lang.equalsIgnoreCase("hu")) {
// return new HungarianRuleProvider();
// }
return null;
}
public void addSimpleRule(String a, String b, List<Rule> rules) {
rules.add(new Rule(
Pattern.compile("\\P{L}("+a+")\\P{L}", Pattern.CASE_INSENSITIVE),
b
));
rules.add(new Rule(
Pattern.compile("^("+a+")\\P{L}", Pattern.CASE_INSENSITIVE),
b
));
rules.add(new Rule(
Pattern.compile("\\P{L}("+a+")$", Pattern.CASE_INSENSITIVE),
b
));
rules.add(new Rule(
Pattern.compile("^("+a+")$", Pattern.CASE_INSENSITIVE),
b
));
}
public RulebasedTransformator getTokTransformator() {
if (getTokRules() == null) {
return null;
}
return new RulebasedTransformator(getTokRules());
}
public RulebasedTransformator getUnTokTransformator() {
if (getUnTokRules() == null) {
return null;
}
return new RulebasedTransformator(getUnTokRules());
}
}