/*
* Copyright 2014
* Ubiquitous Knowledge Processing (UKP) Lab
* Technische Universität Darmstadt
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package de.tudarmstadt.ukp.dkpro.core.api.transform;
import static org.apache.uima.fit.util.CasUtil.getType;
import static org.apache.uima.fit.util.CasUtil.selectFS;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.ListIterator;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.AnnotationBaseFS;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.Feature;
import org.apache.uima.cas.FeatureStructure;
import org.apache.uima.cas.Type;
import org.apache.uima.cas.text.AnnotationFS;
import org.apache.uima.jcas.JCas;
import org.apache.uima.util.CasCopier;
import de.tudarmstadt.ukp.dkpro.core.api.transform.alignment.AlignedString;
import de.tudarmstadt.ukp.dkpro.core.api.transform.alignment.ImmutableInterval;
import de.tudarmstadt.ukp.dkpro.core.api.transform.alignment.Interval;
/**
* Base-class for normalizers that do insert/delete/replace operations. Please mind that these
* operations must not overlap!.
*/
public abstract class JCasTransformerChangeBased_ImplBase
extends JCasTransformer_ImplBase
{
private JCas input;
private List<Change> changes;
@Override
public void beforeProcess(JCas aInput, JCas aOutput)
throws AnalysisEngineProcessException
{
super.beforeProcess(aInput, aOutput);
changes = new ArrayList<Change>();
// Remember the input CAS so that we can access its text in replace()
input = aInput;
}
@Override
public void afterProcess(JCas aInput, JCas aOutput)
{
AlignedString alignedString = new AlignedString(aInput.getDocumentText());
Collections.sort(changes, Interval.SEG_START_CMP);
// A sanity check here would be good to see if there are any overlapping changes,
// because these would cause corrupt output
Change previousChange = null;
for (Change change : changes) {
if (previousChange != null && change.overlaps(previousChange)) {
throw new IllegalStateException("Change " + change + " must not overlap with "
+ previousChange);
}
previousChange = change;
}
// Apply changes in reverse order so that offsets of unprocessed changes remain valid
ListIterator<Change> li = changes.listIterator(changes.size());
while (li.hasPrevious()) {
Change change = li.previous();
switch (change.getAction()) {
case INSERT:
alignedString.insert(change.getStart(), change.getText());
break;
case DELETE:
alignedString.delete(change.getStart(), change.getEnd());
break;
case REPLACE:
alignedString.replace(change.getStart(), change.getEnd(), change.getText());
break;
default:
throw new IllegalStateException("Unknown change action [" + change.getAction()
+ "]");
}
}
aOutput.setDocumentText(alignedString.get());
// Below we repeat the super.afterProcess() code but inject adjustments of the offsets.
// We do NOT call super.afterProcess()
// Copy the annotation types mentioned in PARAM_TYPES_TO_COPY
// We have do do this in the afterProcess() phase, because otherwise the SofA in the
// target CAS does not exist yet.
CAS inputCas = aInput.getCas();
CAS outputCas = aOutput.getCas();
CasCopier copier = new CasCopier(inputCas, aOutput.getCas());
Feature mDestSofaFeature = aOutput.getTypeSystem()
.getFeatureByFullName(CAS.FEATURE_FULL_NAME_SOFA);
for (String typeName : getTypesToCopy()) {
Type annotationType = getType(outputCas, CAS.TYPE_NAME_ANNOTATION);
Feature beginFeature = annotationType.getFeatureByBaseName(CAS.FEATURE_BASE_NAME_BEGIN);
Feature endFeature = annotationType.getFeatureByBaseName(CAS.FEATURE_BASE_NAME_END);
for (FeatureStructure fs : selectFS(inputCas, getType(inputCas, typeName))) {
if (!copier.alreadyCopied(fs)) {
FeatureStructure fsCopy = copier.copyFs(fs);
// Make sure that the sofa annotation in the copy is set
if (fs instanceof AnnotationBaseFS) {
FeatureStructure sofa = fsCopy.getFeatureValue(mDestSofaFeature);
if (sofa == null) {
fsCopy.setFeatureValue(mDestSofaFeature, aOutput.getSofa());
}
}
// Update the begin/end offsets
if (fs instanceof AnnotationFS) {
AnnotationFS annoFs = (AnnotationFS) fs;
Interval i = alignedString.inverseResolve(new ImmutableInterval(annoFs
.getBegin(), annoFs.getEnd()));
fsCopy.setIntValue(beginFeature, i.getStart());
fsCopy.setIntValue(endFeature, i.getEnd());
}
aOutput.addFsToIndexes(fsCopy);
}
}
}
}
public void insert(int aBegin, String aText)
{
changes.add(new Change(ChangeAction.INSERT, aBegin, -1, aText));
}
public void delete(int aBegin, int aEnd)
{
changes.add(new Change(ChangeAction.DELETE, aBegin, aEnd, null));
}
public void replace(int aBegin, int aEnd, String aText)
{
// Create a change action only if the new text differs from the old text.
// This avoids clutter in the changes list and improves performance when applying the
// changes later.
if (!aText.equals(input.getDocumentText().substring(aBegin, aEnd))) {
changes.add(new Change(ChangeAction.REPLACE, aBegin, aEnd, aText));
}
}
private static enum ChangeAction
{
INSERT,
DELETE,
REPLACE
}
private static class Change extends ImmutableInterval
{
private ChangeAction action;
private String text;
public Change(ChangeAction aAction, int aBegin, int aEnd, String aText)
{
super(aBegin, aEnd);
action = aAction;
text = aText;
}
public ChangeAction getAction()
{
return action;
}
public void setAction(ChangeAction aAction)
{
action = aAction;
}
public String getText()
{
return text;
}
public void setText(String aText)
{
text = aText;
}
@Override
public String toString()
{
return "[action=" + action + ", text=" + text + ", begin=" + getStart()
+ ", end=" + getEnd() + "]";
}
}
}