// Copyright 2015 Thomas Müller // This file is part of MarMoT, which is licensed under GPLv3. package marmot.tokenize; import java.util.List; import marmot.tokenize.Tokenizer; import marmot.tokenize.rules.RuleProvider; import marmot.tokenize.rules.RulebasedTransformator; public class RuleBasedTokenizer extends AbstractTokenizer { private static final long serialVersionUID = 1214140578027691025L; private Tokenizer tokenizer_; private RulebasedTransformator untok_transformator_; public RuleBasedTokenizer(Tokenizer tokenizer, RuleProvider provider){ tokenizer_ = tokenizer; untok_transformator_ = null; if(provider != null){ untok_transformator_ = provider.getUnTokTransformator(); } } @Override public List<String> tokenize(String untokenized) { if (untok_transformator_ != null) { untokenized = untok_transformator_.applyRules(untokenized); } return tokenizer_.tokenize(untokenized); } }