/**
* Copyright (C) 2012 cogroo <cogroo@cogroo.org>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package cogroo.uima.interpreters;
import static org.junit.Assert.*;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import org.junit.Test;
import br.usp.pcs.lta.cogroo.entity.impl.runtime.MorphologicalTag;
import br.usp.pcs.lta.cogroo.tag.TagInterpreterI;
import br.usp.pcs.lta.cogroo.tools.checker.rules.model.TagMask.Class;
import br.usp.pcs.lta.cogroo.tools.checker.rules.model.TagMask.Finiteness;
import br.usp.pcs.lta.cogroo.tools.checker.rules.model.TagMask.Punctuation;
public class FlorestaTagInterpreterTest {
private static Map<String, Class> table = new HashMap<String, Class>();
private TagInterpreterI ti = new FlorestaTagInterpreter();
static {
table.put("-", Class.PUNCTUATION_MARK);
table.put("--", Class.PUNCTUATION_MARK);
table.put(",", Class.PUNCTUATION_MARK);
table.put(";", Class.PUNCTUATION_MARK);
table.put(":", Class.PUNCTUATION_MARK);
table.put("!", Class.PUNCTUATION_MARK);
table.put("?", Class.PUNCTUATION_MARK);
table.put(".", Class.PUNCTUATION_MARK);
table.put("...", Class.PUNCTUATION_MARK);
table.put("'", Class.PUNCTUATION_MARK);
table.put("«", Class.PUNCTUATION_MARK);
table.put("»", Class.PUNCTUATION_MARK);
table.put("(", Class.PUNCTUATION_MARK);
table.put(")", Class.PUNCTUATION_MARK);
table.put("[", Class.PUNCTUATION_MARK);
table.put("]", Class.PUNCTUATION_MARK);
table.put("/", Class.PUNCTUATION_MARK);
table.put("adj", Class.ADJECTIVE);
table.put("adv", Class.ADVERB);
table.put("art", Class.DETERMINER);
table.put("conj-c", Class.COORDINATING_CONJUNCTION);
table.put("conj-s", Class.SUBORDINATING_CONJUNCTION);
table.put("ec", Class.HYPHEN_SEPARATED_PREFIX);
table.put("intj", Class.INTERJECTION);
table.put("n", Class.NOUN);
table.put("n-adj", Class.ADJECTIVE);
table.put("n:", Class.NOUN);
table.put("np", Class.NOUN);
table.put("num", Class.NUMERAL);
table.put("pp", Class.PREPOSITION);
//table.put("pron", Class.PRONOUN); // don't happen, added only for
// compatibility
table.put("pron-det", Class.DETERMINER);
table.put("pron-indp", Class.SPECIFIER);
table.put("pron-pers", Class.PERSONAL_PRONOUN);
table.put("prop", Class.PROPER_NOUN);
table.put("prp", Class.PREPOSITION);
table.put("v-fin", Class.VERB);
table.put("v-ger", Class.VERB);
table.put("v-inf", Class.VERB);
table.put("v-pcp", Class.VERB);
table.put("vp", Class.VERB);
}
@Test
public void testParseMorphologicalTag() {
// class
for (String tag : table.keySet()) {
if (table.get(tag) != null)
assertEquals("Failed to parse class tag: " + tag, table.get(tag), ti
.parseMorphologicalTag(tag).getClazzE());
}
}
@Test
public void testSerializeTag() {
Set<Class> classes = new HashSet<Class>(table.values());
for (Class classTag : classes) {
if (!classTag.equals(Class.PUNCTUATION_MARK) && !classTag.equals(Class.VERB)) {
String value = ti.serialize(classTag);
assertEquals("Failed to parse class tag: " + classTag, classTag,
table.get(value));
}
}
}
@Test
public void testPunctuation() {
MorphologicalTag mt = ti.parseMorphologicalTag(".");
assertEquals(Class.PUNCTUATION_MARK, mt.getClazzE());
assertEquals(Punctuation.ABS, mt.getPunctuation());
mt = ti.parseMorphologicalTag("!");
assertEquals(Class.PUNCTUATION_MARK, mt.getClazzE());
assertEquals(Punctuation.ABS, mt.getPunctuation());
mt = ti.parseMorphologicalTag("?");
assertEquals(Class.PUNCTUATION_MARK, mt.getClazzE());
assertEquals(Punctuation.ABS, mt.getPunctuation());
mt = ti.parseMorphologicalTag(",");
assertEquals(Class.PUNCTUATION_MARK, mt.getClazzE());
assertEquals(Punctuation.NSEP, mt.getPunctuation());
mt = ti.parseMorphologicalTag(";");
assertEquals(Class.PUNCTUATION_MARK, mt.getClazzE());
assertEquals(Punctuation.REL, mt.getPunctuation());
mt = ti.parseMorphologicalTag("(");
assertEquals(Class.PUNCTUATION_MARK, mt.getClazzE());
assertEquals(Punctuation.BIN, mt.getPunctuation());
mt = ti.parseMorphologicalTag("--");
assertEquals(Class.PUNCTUATION_MARK, mt.getClazzE());
assertEquals(Punctuation.BIN, mt.getPunctuation());
mt = ti.parseMorphologicalTag("...");
assertEquals(Class.PUNCTUATION_MARK, mt.getClazzE());
assertEquals(Punctuation.REL, mt.getPunctuation());
mt = ti.parseMorphologicalTag("«");
assertEquals(Class.PUNCTUATION_MARK, mt.getClazzE());
assertEquals(Punctuation.NSEP, mt.getPunctuation());
mt = ti.parseMorphologicalTag("v-pcp");
assertEquals(Class.VERB, mt.getClazzE());
assertEquals(Finiteness.PARTICIPLE, mt.getFinitenessE());
mt = ti.parseMorphologicalTag("v-inf");
assertEquals(Class.VERB, mt.getClazzE());
assertEquals(Finiteness.INFINITIVE, mt.getFinitenessE());
mt = ti.parseMorphologicalTag("v-ger");
assertEquals(Class.VERB, mt.getClazzE());
assertEquals(Finiteness.GERUND, mt.getFinitenessE());
mt = ti.parseMorphologicalTag("v-fin");
assertEquals(Class.VERB, mt.getClazzE());
assertEquals(Finiteness.FINITE, mt.getFinitenessE());
mt = ti.parseMorphologicalTag("n-adj");
assertEquals(Class.ADJECTIVE, mt.getClazzE());
mt = ti.parseMorphologicalTag("intj");
assertEquals(Class.INTERJECTION, mt.getClazzE());
}
}