/* * Copyright 2016 * Ubiquitous Knowledge Processing (UKP) Lab * Technische Universität Darmstadt * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package de.tudarmstadt.ukp.dkpro.core.castransformation; import de.tudarmstadt.ukp.dkpro.core.api.transform.type.SofaChangeAnnotation; import de.tudarmstadt.ukp.dkpro.core.io.text.TextReader; import de.tudarmstadt.ukp.dkpro.core.io.xmi.XmiWriter; import de.tudarmstadt.ukp.dkpro.core.testing.DkproTestContext; import de.tudarmstadt.ukp.dkpro.core.testing.dumper.CasDumpWriter; import de.tudarmstadt.ukp.dkpro.core.tokit.BreakIteratorSegmenter; import org.apache.commons.io.FileUtils; import org.apache.uima.analysis_engine.AnalysisEngineDescription; import org.apache.uima.analysis_engine.AnalysisEngineProcessException; import org.apache.uima.cas.CAS; import org.apache.uima.collection.CollectionReaderDescription; import org.apache.uima.fit.component.JCasAnnotator_ImplBase; import org.apache.uima.fit.descriptor.ConfigurationParameter; import org.apache.uima.fit.factory.AggregateBuilder; import org.apache.uima.fit.pipeline.SimplePipeline; import org.apache.uima.fit.util.JCasUtil; import org.apache.uima.jcas.JCas; import org.apache.uima.jcas.cas.AnnotationBase; import org.junit.Rule; import org.junit.Test; import java.io.File; import java.io.FileWriter; import static org.apache.uima.fit.factory.AnalysisEngineFactory.createEngineDescription; import static org.apache.uima.fit.factory.CollectionReaderFactory.createReaderDescription; import static org.junit.Assert.assertEquals; public class ApplyChangesBackmapperTest { public static final String TARGET_VIEW = "TargetView"; @Test public void test() throws Exception { File output = testContext.getTestOutputFolder(); File inputFile = new File("src/test/resources/input.txt"); File dumpFile = new File(output, "output.txt"); String pipelineFilePath = new File(output, "pipeline.xml").getPath(); CollectionReaderDescription reader = createReaderDescription( TextReader.class, TextReader.PARAM_SOURCE_LOCATION, inputFile, TextReader.PARAM_LANGUAGE, "en"); AnalysisEngineDescription deletes = createEngineDescription( SofaDeleteAnnotator.class); AnalysisEngineDescription applyChanges = createEngineDescription( ApplyChangesAnnotator.class); AnalysisEngineDescription segmenter = createEngineDescription( BreakIteratorSegmenter.class); AnalysisEngineDescription backMapper = createEngineDescription( Backmapper.class, Backmapper.PARAM_CHAIN, new String[]{TARGET_VIEW, CAS.NAME_DEFAULT_SOFA}); AnalysisEngineDescription xmiWriter = createEngineDescription( XmiWriter.class, XmiWriter.PARAM_TARGET_LOCATION, output); AnalysisEngineDescription dumpWriter = createEngineDescription( CasDumpWriter.class, CasDumpWriter.PARAM_TARGET_LOCATION, dumpFile); AggregateBuilder builder = new AggregateBuilder(); builder.add(deletes); // Removing some lines to make sure to confuse the backmapper builder.add( applyChanges, ApplyChangesAnnotator.VIEW_TARGET, TARGET_VIEW, ApplyChangesAnnotator.VIEW_SOURCE, CAS.NAME_DEFAULT_SOFA); builder.add(segmenter, CAS.NAME_DEFAULT_SOFA, TARGET_VIEW); builder.add(backMapper); builder.add(xmiWriter, CAS.NAME_DEFAULT_SOFA, TARGET_VIEW); builder.add(dumpWriter, CAS.NAME_DEFAULT_SOFA, TARGET_VIEW); AnalysisEngineDescription pipeline = builder.createAggregateDescription(); try (FileWriter writer = new FileWriter(pipelineFilePath)) { pipeline.toXML(writer); } SimplePipeline.runPipeline(reader, pipeline); assertEquals( FileUtils.readFileToString(new File("src/test/resources/output.txt"), "UTF-8"), FileUtils.readFileToString(dumpFile, "UTF-8")); } public static class SofaDeleteAnnotator extends JCasAnnotator_ImplBase { @Override public void process(JCas jCas) throws AnalysisEngineProcessException { try { // Removes some "sentences" in a deterministic way. Assumes there are at least 5 // sentences though :-) String text = jCas.getDocumentText(); int previousPunctuation = -1; int sentenceCount = 0; for (int i = 0; i < text.length(); i++) { if (text.charAt(i) == '.') { if (sentenceCount % 5 == 0) { SofaChangeAnnotation delete = new SofaChangeAnnotation(jCas); delete.setOperation("delete"); delete.setBegin(previousPunctuation + 1); delete.setEnd(i + 1); delete.addToIndexes(); } previousPunctuation = i; sentenceCount++; } } } catch (Exception e) { throw new AnalysisEngineProcessException(e); } } } @Rule public DkproTestContext testContext = new DkproTestContext(); @Test public void testBackMappingOfGeneralFeatureStructures() throws Exception { File inputFile = new File("src/test/resources/input.txt"); CollectionReaderDescription reader = createReaderDescription( TextReader.class, TextReader.PARAM_SOURCE_LOCATION, inputFile, TextReader.PARAM_LANGUAGE, "en"); AnalysisEngineDescription applyChanges = createEngineDescription( ApplyChangesAnnotator.class); AnalysisEngineDescription fsCreator = createEngineDescription(CreateFeatureStructure.class); AnalysisEngineDescription backMapper = createEngineDescription( Backmapper.class, Backmapper.PARAM_CHAIN, new String[]{TARGET_VIEW, CAS.NAME_DEFAULT_SOFA}); AnalysisEngineDescription assertNotYetMappedBack = createEngineDescription( AssertFeatureStructureCount.class, AssertFeatureStructureCount.PARAM_EXPECTED_COUNT, 0 ); AnalysisEngineDescription assertMappedBack = createEngineDescription( AssertFeatureStructureCount.class, AssertFeatureStructureCount.PARAM_EXPECTED_COUNT, 1 ); AggregateBuilder builder = new AggregateBuilder(); builder.add( applyChanges, ApplyChangesAnnotator.VIEW_TARGET, TARGET_VIEW, ApplyChangesAnnotator.VIEW_SOURCE, CAS.NAME_DEFAULT_SOFA); builder.add(fsCreator, CAS.NAME_DEFAULT_SOFA, TARGET_VIEW); builder.add(assertNotYetMappedBack); // Should only exist in target view builder.add(backMapper); builder.add(assertMappedBack); // Should now be present in initial view AnalysisEngineDescription pipeline = builder.createAggregateDescription(); SimplePipeline.runPipeline(reader, pipeline); } public static class CreateFeatureStructure extends JCasAnnotator_ImplBase { @Override public void process(JCas jCas) throws AnalysisEngineProcessException { new AnnotationBase(jCas).addToIndexes(); } } public static class AssertFeatureStructureCount extends JCasAnnotator_ImplBase { public static final String PARAM_EXPECTED_COUNT = "expectedCount"; @ConfigurationParameter(name = PARAM_EXPECTED_COUNT, mandatory = true) private int expectedCount; @Override public void process(JCas jCas) throws AnalysisEngineProcessException { int fsCount = (int) JCasUtil.select(jCas, AnnotationBase.class).stream() .filter(t -> t.getClass().equals(AnnotationBase.class)) .count(); assertEquals (fsCount, expectedCount); } } }