/* * Copyright 2010 * Ubiquitous Knowledge Processing (UKP) Lab * Technische Universität Darmstadt * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. **/ package de.tudarmstadt.ukp.dkpro.core.decompounding.splitter; import static org.hamcrest.CoreMatchers.is; import static org.junit.Assert.assertThat; import java.io.File; import java.io.IOException; import java.util.List; import junit.framework.Assert; import org.junit.Test; import de.tudarmstadt.ukp.dkpro.core.api.resources.ResourceUtils; import de.tudarmstadt.ukp.dkpro.core.decompounding.dictionary.Dictionary; import de.tudarmstadt.ukp.dkpro.core.decompounding.dictionary.LinkingMorphemes; import de.tudarmstadt.ukp.dkpro.core.decompounding.dictionary.SimpleDictionary; public class LeftToRightSplitAlgorithmTest { @Test public void testSplit1() { Dictionary dict = new SimpleDictionary("Akt", "ion", "plan", "Aktion", "Aktionsplan"); LinkingMorphemes morphemes = new LinkingMorphemes("s"); LeftToRightSplitterAlgorithm algo = new LeftToRightSplitterAlgorithm(dict, morphemes); List<DecompoundedWord> result = algo.split("Aktionsplan").getAllSplits(); Assert.assertEquals(6, result.size()); Assert.assertEquals("aktionsplan", result.get(0).toString()); Assert.assertEquals("akt+ionsplan", result.get(1).toString()); Assert.assertEquals("akt+ion+splan", result.get(2).toString()); Assert.assertEquals("akt+ion(s)+plan", result.get(3).toString()); Assert.assertEquals("aktion+splan", result.get(4).toString()); Assert.assertEquals("aktion(s)+plan", result.get(5).toString()); } @Test public void testSplit2() { Dictionary dict = new SimpleDictionary("Donau", "dampf", "schiff", "fahrt", "dampfschiff", "schifffahrt"); LinkingMorphemes morphemes = new LinkingMorphemes("s"); LeftToRightSplitterAlgorithm algo = new LeftToRightSplitterAlgorithm(dict, morphemes); List<DecompoundedWord> result = algo.split("Donaudampfschifffahrt").getAllSplits(); Assert.assertEquals(6, result.size()); } @Test public void testSplit3() { Dictionary dict = new SimpleDictionary("Super", "mann", "anzug", "Supermann", "anzug"); LinkingMorphemes morphemes = new LinkingMorphemes("s"); LeftToRightSplitterAlgorithm algo = new LeftToRightSplitterAlgorithm(dict, morphemes); List<DecompoundedWord> result = algo.split("Supermannanzug").getAllSplits(); // Super+mann+anzug, Supermann+anzug Assert.assertEquals(4, result.size()); } @Test public void testMorphemes1() { Dictionary dict = new SimpleDictionary("alarm", "reaktion"); LinkingMorphemes morphemes = new LinkingMorphemes("en"); LeftToRightSplitterAlgorithm algo = new LeftToRightSplitterAlgorithm(dict, morphemes); List<DecompoundedWord> result = algo.split("alarmreaktionen").getAllSplits(); // Super+mann+anzug, Supermann+anzug Assert.assertEquals(3, result.size()); Assert.assertEquals("alarmreaktionen", result.get(0).toString()); Assert.assertEquals("alarm+reaktionen", result.get(1).toString()); Assert.assertEquals("alarm+reaktion(en)", result.get(2).toString()); } @Test public void testSplit4() throws IOException{ final File dictFile = ResourceUtils.getUrlAsFile(getClass().getResource( "/de/tudarmstadt/ukp/dkpro/core/decompounding/lib/spelling-de-igerman98.dic"), false); final File morphemesFile = ResourceUtils.getUrlAsFile(getClass().getResource( "/de/tudarmstadt/ukp/dkpro/core/decompounding/lib/spelling-de-linking.linking"), false); Dictionary dict = new SimpleDictionary(dictFile); LinkingMorphemes morphemes = new LinkingMorphemes(morphemesFile); LeftToRightSplitterAlgorithm splitter = new LeftToRightSplitterAlgorithm(dict,morphemes); List<DecompoundedWord> result = splitter.split("geräteelektronik").getAllSplits(); assertThat(result.size(),is(1)); } }