/* * Copyright 2014 * Ubiquitous Knowledge Processing (UKP) Lab * Technische Universität Darmstadt * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package de.tudarmstadt.ukp.dkpro.core.ldweb1t; import static org.apache.uima.fit.factory.AnalysisEngineFactory.createEngineDescription; import static org.apache.uima.fit.factory.ExternalResourceFactory.createExternalResourceDescription; import static org.apache.uima.fit.pipeline.SimplePipeline.runPipeline; import static org.junit.Assert.assertEquals; import java.util.ArrayList; import java.util.List; import org.apache.uima.analysis_engine.AnalysisEngineDescription; import org.apache.uima.fit.factory.JCasFactory; import org.apache.uima.jcas.JCas; import org.apache.uima.resource.ExternalResourceDescription; import org.junit.Test; import de.tudarmstadt.ukp.dkpro.core.frequency.resources.Web1TInMemoryFrequencyCountResource; import de.tudarmstadt.ukp.dkpro.core.tokit.BreakIteratorSegmenter; public class LanguageDetectorWeb1TTest { @Test public void web1tLanguageDetectorTest() throws Exception { ExternalResourceDescription en = createExternalResourceDescription( Web1TInMemoryFrequencyCountResource.class, Web1TInMemoryFrequencyCountResource.PARAM_MODEL_LOCATION, "src/test/resources/web1t/en/", Web1TInMemoryFrequencyCountResource.PARAM_LANGUAGE, "en", Web1TInMemoryFrequencyCountResource.PARAM_MAX_NGRAM_LEVEL, "2"); ExternalResourceDescription de = createExternalResourceDescription( Web1TInMemoryFrequencyCountResource.class, Web1TInMemoryFrequencyCountResource.PARAM_MODEL_LOCATION, "src/test/resources/web1t/de/", Web1TInMemoryFrequencyCountResource.PARAM_LANGUAGE, "de", Web1TInMemoryFrequencyCountResource.PARAM_MAX_NGRAM_LEVEL, "2"); List<ExternalResourceDescription> resources = new ArrayList<ExternalResourceDescription>(); resources.add(en); resources.add(de); AnalysisEngineDescription engine = createEngineDescription( createEngineDescription(BreakIteratorSegmenter.class), createEngineDescription(LanguageDetectorWeb1T.class, LanguageDetectorWeb1T.PARAM_MAX_NGRAM_SIZE, 2, LanguageDetectorWeb1T.PARAM_FREQUENCY_PROVIDER_RESOURCES, resources)); JCas jcas = JCasFactory.createJCas(); jcas.setDocumentText("This is an English example."); runPipeline(jcas, engine); assertEquals("en", jcas.getDocumentLanguage()); } }