/* * Copyright 2010 * Ubiquitous Knowledge Processing (UKP) Lab * Technische Universität Darmstadt * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package de.tudarmstadt.ukp.dkpro.core.castransformation; import java.util.ArrayList; import java.util.Collections; import java.util.List; import org.apache.uima.analysis_engine.AnalysisEngineProcessException; import org.apache.uima.cas.CASException; import org.apache.uima.cas.FSIndex; import org.apache.uima.cas.FSIterator; import org.apache.uima.fit.component.JCasAnnotator_ImplBase; import org.apache.uima.fit.descriptor.TypeCapability; import org.apache.uima.jcas.JCas; import org.apache.uima.jcas.tcas.Annotation; import de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData; import de.tudarmstadt.ukp.dkpro.core.api.transform.alignment.AlignedString; import de.tudarmstadt.ukp.dkpro.core.api.transform.type.SofaChangeAnnotation; import de.tudarmstadt.ukp.dkpro.core.castransformation.internal.AlignmentStorage; /** * Applies changes annotated using a {@link SofaChangeAnnotation}. * * @since 1.1.0 * @see Backmapper */ @TypeCapability( inputs={ "de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData", "de.tudarmstadt.ukp.dkpro.core.api.transform.type.SofaChangeAnnotation"}, outputs={ "de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData", "de.tudarmstadt.ukp.dkpro.core.api.transform.type.SofaChangeAnnotation"}) public class ApplyChangesAnnotator extends JCasAnnotator_ImplBase { public static final String VIEW_SOURCE = "source"; public static final String VIEW_TARGET = "target"; public static final String OP_INSERT = "insert"; public static final String OP_REPLACE = "replace"; public static final String OP_DELETE = "delete"; @Override public void process(JCas aJCas) throws AnalysisEngineProcessException { try { JCas sourceView = aJCas.getView(VIEW_SOURCE); JCas targetView = aJCas.createView(VIEW_TARGET); DocumentMetaData.copy(sourceView, targetView); applyChanges(sourceView, targetView); } catch (CASException e) { throw new AnalysisEngineProcessException(e); } } protected void applyChanges(JCas aSourceView, JCas aTargetView) { FSIndex<Annotation> idx = aSourceView.getAnnotationIndex(SofaChangeAnnotation.type); getLogger().info("Found " + idx.size() + " changes"); // Apply all the changes AlignedString as = new AlignedString(aSourceView.getDocumentText()); // Collect all those edits that are going to be executed. // // | A | C1 C2 R // BBBBBB + - - // BBBBBBBBBB + + + // BBBBBBBBBBBBBBBBB + + + // BBBBBBB - + - // BBBBBBBBBBBBB - + - // BBBBBBBB - + - // if (idx.size() > 0) { List<SofaChangeAnnotation> edits = new ArrayList<SofaChangeAnnotation>(); { // Get an iterator over all the change annotations. Per UIMA default // this iterator is sorted first by begin and then by end offsets. // We will make use of this fact here to skip change annotations that // are covered by others. The earliest longest change wins - this means // the one with the smallest begin offset and the largest end offset. FSIterator<Annotation> it = idx.iterator(); SofaChangeAnnotation top = (SofaChangeAnnotation) it.get(); edits.add(top); it.moveToNext(); while (it.isValid()) { SofaChangeAnnotation b = (SofaChangeAnnotation) it.get(); if (((top.getBegin() <= b.getBegin()) && // C1 (top.getEnd() > b.getBegin()) // C2 ) || ((top.getBegin() == b.getBegin()) && (top.getEnd() == b.getEnd()))) { // Found annotation covering current annotation. Skipping // current annotation. } else { top = b; edits.add(top); } it.moveToNext(); } } // If we remove or add stuff all offsets right of the change location // will change and thus the offsets in the change annotation are no // longer valid. If we move from right to left it works better because // the left offsets remain stable. Collections.reverse(edits); for (SofaChangeAnnotation a : edits) { if (OP_INSERT.equals(a.getOperation())) { // getLogger().debug("Performing insert[" + a.getBegin() + "-" + a.getEnd() + "]: [" // + a.getCoveredText() + "]"); as.insert(a.getBegin(), a.getValue()); } if (OP_DELETE.equals(a.getOperation())) { // getLogger().debug("Performing delete[" + a.getBegin() + "-" + a.getEnd() + "]: [" // + a.getCoveredText() + "]"); as.delete(a.getBegin(), a.getEnd()); } if (OP_REPLACE.equals(a.getOperation())) { // getLogger().debug("Performing replace[" + a.getBegin() + "-" + a.getEnd() + "]: [" // + a.getCoveredText() + "]"); as.replace(a.getBegin(), a.getEnd(), a.getValue()); } } } // Set the text of the new Sofa aTargetView.setDocumentText(as.get()); // Set document language aTargetView.setDocumentLanguage(aSourceView.getDocumentLanguage()); // Optionally we may want to remember the AlignedString for the backmapper. AlignmentStorage.getInstance().put(aSourceView.getCasImpl().getBaseCAS(), aSourceView.getViewName(), aTargetView.getViewName(), as); } }