// Copyright 2014 Thomas Müller // This file is part of MarMoT, which is licensed under GPLv3. package marmot.test.morph; import static org.junit.Assert.assertEquals; import java.util.Arrays; import java.util.Collection; import java.util.HashSet; import java.util.LinkedList; import java.util.Set; import marmot.core.Sequence; import marmot.morph.MorphModel; import marmot.morph.MorphOptions; import marmot.morph.Word; import marmot.util.StringUtils.Mode; import org.junit.Test; public class MorphModelTest { public void testFormExtraction(String inform, String expected_form, Collection<Character> expected_chars, MorphOptions options) { MorphModel model = new MorphModel(); model.init(options, new LinkedList<Sequence>()); Word word = new Word(inform); model.addIndexes(word, true); System.err.println(model.getWordTable()); System.err.println(model.getCharTable()); assertEquals(0, model.getWordTable().toIndex(expected_form)); assertEquals(1, model.getWordTable().size()); Set<Character> expected_char_set = new HashSet<>(expected_chars); assertEquals(expected_char_set, model.getCharTable().keySet()); } @Test public void testFormExtraction() { MorphOptions options; options = new MorphOptions(); testFormExtraction("Abc", "Abc", Arrays.asList('A', 'b', 'c'), options); options = new MorphOptions(); options.setProperty(MorphOptions.FORM_NORMALIZATION, Mode.lower.toString()); testFormExtraction("Abc", "abc", Arrays.asList('a', 'b', 'c'), options); testFormExtraction("-LRB-", "(", Arrays.asList('('), options); options = new MorphOptions(); options.setProperty(MorphOptions.FORM_NORMALIZATION, Mode.umlaut.toString()); testFormExtraction("Öl", "oel", Arrays.asList('o', 'e', 'l'), options); } public void testShapeExtraction(String inform, int expected, MorphOptions options) { MorphModel model = new MorphModel(); model.init(options, new LinkedList<Sequence>()); Word word = new Word(inform); model.addIndexes(word, true); assertEquals(expected, word.getWordSignature()); } @Test public void testShapeExtraction() { MorphOptions options; options = new MorphOptions(); options.setProperty(MorphOptions.SPECIAL_SIGNATURE, "false"); testShapeExtraction("?", 0, options); testShapeExtraction("abc", 1, options); testShapeExtraction("DEF", 2, options); testShapeExtraction("-", 4, options); testShapeExtraction("1234", 8, options); testShapeExtraction("1234abc", 8 + 1, options); testShapeExtraction("1234-abc", 8 + 1 + 4, options); testShapeExtraction("1234-DEF", 8 + 2 + 4, options); testShapeExtraction("1234-DEF-abc", 8 + 1 + 2 + 4, options); options = new MorphOptions(); options.setProperty(MorphOptions.SPECIAL_SIGNATURE, "true"); testShapeExtraction("abc", 1, options); testShapeExtraction("DEF", 2, options); testShapeExtraction("-", 4 + 16, options); testShapeExtraction("1234", 8, options); testShapeExtraction("?", 16, options); testShapeExtraction("1234abc", 8 + 1, options); testShapeExtraction("1234#abc", 8 + 1 + 16, options); testShapeExtraction("1234-abc", 8 + 1 + 4 + 16, options); testShapeExtraction("1234-DEF", 8 + 2 + 4 + 16, options); testShapeExtraction("1234-DEF-abc", 8 + 1 + 2 + 4 + 16, options); // Make sure that changing the form doesn't affect signatures. options = new MorphOptions(); options.setProperty(MorphOptions.SPECIAL_SIGNATURE, "true"); options.setProperty(MorphOptions.FORM_NORMALIZATION, Mode.lower.toString()); testShapeExtraction("abc", 1, options); testShapeExtraction("DEF", 2, options); testShapeExtraction("-", 4 + 16, options); testShapeExtraction("1234", 8, options); testShapeExtraction("?", 16, options); testShapeExtraction("1234abc", 8 + 1, options); testShapeExtraction("1234#abc", 8 + 1 + 16, options); testShapeExtraction("1234-abc", 8 + 1 + 4 + 16, options); testShapeExtraction("1234-DEF", 8 + 2 + 4 + 16, options); testShapeExtraction("1234-DEF-abc", 8 + 1 + 2 + 4 + 16, options); } }