/*
* Copyright 2010
* Ubiquitous Knowledge Processing (UKP) Lab
* Technische Universität Darmstadt
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
**/
package de.tudarmstadt.ukp.dkpro.core.decompounding.splitter;
import static org.hamcrest.CoreMatchers.is;
import static org.junit.Assert.assertThat;
import java.io.File;
import java.io.IOException;
import java.util.List;
import junit.framework.Assert;
import org.junit.Test;
import de.tudarmstadt.ukp.dkpro.core.api.resources.ResourceUtils;
import de.tudarmstadt.ukp.dkpro.core.decompounding.dictionary.LinkingMorphemes;
import de.tudarmstadt.ukp.dkpro.core.decompounding.dictionary.SimpleDictionary;
public class DataDrivenAlgorithmTest
{
@Test
public void testSplit()
{
SimpleDictionary dict = new SimpleDictionary("friedens", "politik", "friedenspolitik",
"friedensverhaltungen", "friedenshaltung", "frittieren", "friseur", "außenpolitik",
"innenpolitik");
LinkingMorphemes morphemes = new LinkingMorphemes("en", "s", "ens");
DataDrivenSplitterAlgorithm algo = new DataDrivenSplitterAlgorithm(dict, morphemes);
List<DecompoundedWord> result = algo.split("friedenspolitik").getAllSplits();
Assert.assertEquals(2, result.size());
Assert.assertEquals("friedenspolitik", result.get(0).toString());
Assert.assertEquals("friedens+politik", result.get(1).toString());
}
@Test
public void testSplit2()
throws IOException
{
final File dictFile = ResourceUtils.getUrlAsFile(getClass().getResource(
"/de/tudarmstadt/ukp/dkpro/core/decompounding/lib/spelling-de-igerman98.dic"), false);
SimpleDictionary dict = new SimpleDictionary(dictFile);
final File morphemesFile = ResourceUtils.getUrlAsFile(getClass().getResource(
"/de/tudarmstadt/ukp/dkpro/core/decompounding/lib/spelling-de-linking.linking"), false);
LinkingMorphemes morphemes = new LinkingMorphemes(morphemesFile);
DataDrivenSplitterAlgorithm splitter = new DataDrivenSplitterAlgorithm(dict, morphemes);
List<DecompoundedWord> result = splitter.split("geräteelektronik").getAllSplits();
assertThat(result.size(), is(1));
}
}