/* * Copyright 2010 * Ubiquitous Knowledge Processing (UKP) Lab * Technische Universität Darmstadt * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package de.tudarmstadt.ukp.dkpro.core.jazzy; import static org.apache.uima.fit.factory.AnalysisEngineFactory.createEngine; import static org.apache.uima.fit.factory.AnalysisEngineFactory.createEngineDescription; import static org.apache.uima.fit.util.JCasUtil.select; import static org.junit.Assert.assertEquals; import java.util.ArrayList; import java.util.List; import org.apache.uima.analysis_engine.AnalysisEngine; import org.apache.uima.fit.factory.ExternalResourceFactory; import org.apache.uima.fit.testing.factory.TokenBuilder; import org.apache.uima.jcas.JCas; import org.apache.uima.resource.ExternalResourceDescription; import org.junit.Test; import de.tudarmstadt.ukp.dkpro.core.api.anomaly.type.SpellingAnomaly; import de.tudarmstadt.ukp.dkpro.core.api.frequency.TestFrequencyCountResource; import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence; import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token; public class JazzyCheckerTest { @Test public void spellCheckerTest() throws Exception { String testDocumentEnglish = "The cat sta on the mat . Some errosr occur in user " + "discourse morre often . What do you tink ?"; List<String> errorsEnglish = new ArrayList<String>(); errorsEnglish.add("sta"); errorsEnglish.add("errosr"); errorsEnglish.add("morre"); errorsEnglish.add("tink"); AnalysisEngine engine = createEngine(JazzyChecker.class, JazzyChecker.PARAM_MODEL_LOCATION, "src/test/resources/testdict.txt"); JCas aJCas = engine.newJCas(); TokenBuilder<Token, Sentence> tb = TokenBuilder.create(Token.class, Sentence.class); tb.buildTokens(aJCas, testDocumentEnglish); engine.process(aJCas); int i = 0; for (SpellingAnomaly errorAnnotation : select(aJCas, SpellingAnomaly.class)) { // System.out.println(errorAnnotation.getCoveredText() + " - " // + errorAnnotation.getSuggestions(0).getReplacement()); assertEquals(errorsEnglish.get(i), errorAnnotation.getCoveredText()); i++; } assertEquals(4, i); } @Test public void contextualizedSpellCheckerTest() throws Exception { String testDocumentEnglish = "The cat sta on the mat ."; ExternalResourceDescription resource = ExternalResourceFactory.createExternalResourceDescription(TestFrequencyCountResource.class); // String context = DkproContext.getContext().getWorkspace("web1t").getAbsolutePath(); // String workspace = "en"; // ExternalResourceDescription resource = ExternalResourceFactory.createExternalResourceDescription( // Web1TFrequencyCountResource.class, // Web1TFrequencyCountResource.PARAM_MIN_NGRAM_LEVEL, "1", // Web1TFrequencyCountResource.PARAM_MAX_NGRAM_LEVEL, "3", // Web1TFrequencyCountResource.PARAM_INDEX_PATH, new File(context, workspace).getAbsolutePath() // ); AnalysisEngine engine = createEngine( createEngineDescription( createEngineDescription( JazzyChecker.class, JazzyChecker.PARAM_SCORE_THRESHOLD, 3, JazzyChecker.PARAM_MODEL_LOCATION, "src/test/resources/testdict_variants.txt" ), createEngineDescription( CorrectionsContextualizer.class, CorrectionsContextualizer.FREQUENCY_PROVIDER_RESOURCE, resource ) ) ); JCas aJCas = engine.newJCas(); TokenBuilder<Token, Sentence> tb = TokenBuilder.create(Token.class, Sentence.class); tb.buildTokens(aJCas, testDocumentEnglish); engine.process(aJCas); int i = 0; for (SpellingAnomaly errorAnnotation : select(aJCas, SpellingAnomaly.class)) { assertEquals("sat", errorAnnotation.getSuggestions(0).getReplacement()); i++; } assertEquals(1, i); } }