// Copyright 2014 Thomas Müller
// This file is part of MarMoT, which is licensed under GPLv3.
package marmot.tokenize.rules;
import java.util.Collection;
import java.util.LinkedList;
import java.util.List;
import java.util.regex.Pattern;
public class SpanishRuleProvider extends RuleProvider {
@Override
public Collection<Rule> getTokRules() {
List<Rule> rules = new LinkedList<Rule>();
rules.add(new Rule(Pattern.compile("\\S( Fz)") ,""));
rules.add(new Rule(Pattern.compile("\\S(_)\\S") ," "));
rules.add(new Rule(Pattern.compile("( ̃)") , ""));
return rules;
}
@Override
public Collection<Rule> getUnTokRules() {
List<Rule> rules = new LinkedList<Rule>();
addSimpleRule("del", "de el", rules);
addSimpleRule("al", "a el", rules); // Al-razir?
//rules.add(new Rule(Pattern.compile("\\w+(dole)\\W") , "(do le)"));
//rules.add(new Rule(Pattern.compile("\\w+(dose)\\W") , "(do se)"));
// TODO: llamarse, ponerse, pongome, ... how many? Maybe make a list?
// Regex might lead to too many errors
return rules;
}
}