/**
* Copyright 2007-2014
* Ubiquitous Knowledge Processing (UKP) Lab
* Technische Universität Darmstadt
*
* Apache UIMA
* Copyright 2006, 2011 The Apache Software Foundation
*
* This product includes software developed at
* The Apache Software Foundation (http://www.apache.org/).
*
* Portions of UIMA were originally developed by
* International Business Machines Corporation and are
* licensed to the Apache Software Foundation under the
* "Software Grant License Agreement", informally known as the
* "IBM UIMA License Agreement".
* Copyright (c) 2003, 2006 IBM Corporation.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*/
package de.tudarmstadt.ukp.dkpro.core.stanfordnlp.util;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.uima.UIMARuntimeException;
import org.apache.uima.cas.ArrayFS;
import org.apache.uima.cas.BooleanArrayFS;
import org.apache.uima.cas.ByteArrayFS;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.CASRuntimeException;
import org.apache.uima.cas.DoubleArrayFS;
import org.apache.uima.cas.FSIndex;
import org.apache.uima.cas.FSIterator;
import org.apache.uima.cas.Feature;
import org.apache.uima.cas.FeatureStructure;
import org.apache.uima.cas.FloatArrayFS;
import org.apache.uima.cas.IntArrayFS;
import org.apache.uima.cas.LongArrayFS;
import org.apache.uima.cas.ShortArrayFS;
import org.apache.uima.cas.SofaFS;
import org.apache.uima.cas.StringArrayFS;
import org.apache.uima.cas.Type;
import org.apache.uima.cas.impl.CASImpl;
import org.apache.uima.cas.impl.LowLevelCAS;
import org.apache.uima.cas.text.AnnotationFS;
import org.apache.uima.cas.text.AnnotationIndex;
import org.apache.uima.jcas.tcas.Annotation;
/**
* Utility class for doing deep copies of FeatureStructures from one CAS to another. To handle cases
* where the source CAS has multiple references to the same FS, you can create one instance of
* CasCopier and use it to copy multiple FeatureStructures. The CasCopier will remember previously
* copied FeatureStructures, so if you later copy another FS that has a reference to a previously
* copied FS, it will not duplicate the multiply-referenced FS.
*/
public class CasCopier
{
private final CAS mSrcCas;
private final CAS mDestCas;
private final LowLevelCAS mLowLevelDestCas;
private final Feature mDestSofaFeature;
private final Map<FeatureStructure, FeatureStructure> mFsMap = new HashMap<FeatureStructure, FeatureStructure>();
private List<Annotation> batchCopyAnnoList = null;
/**
* Creates a new CasCopier that can be used to copy FeatureStructures from one CAS to another.
* Note that if you are merging data from multiple CASes, you must create a new CasCopier for
* each source CAS.
*
* @param aSrcCas
* the CAS to copy from.
* @param aDestCas
* the CAS to copy into.
*/
public CasCopier(CAS aSrcCas, CAS aDestCas)
{
mSrcCas = aSrcCas;
mDestCas = aDestCas;
mLowLevelDestCas = aDestCas.getLowLevelCAS();
mDestSofaFeature = aDestCas.getTypeSystem()
.getFeatureByFullName(CAS.FEATURE_FULL_NAME_SOFA);
}
/**
* Does a complete deep copy of one CAS into another CAS. The contents of each view in the
* source CAS will be copied to the same-named view in the destination CAS. If the view does not
* already exist it will be created. All FeatureStructures that are indexed in a view in the
* source CAS will become indexed in the same-named view in the destination CAS.
*
* @param aSrcCas
* the CAS to copy from
* @param aDestCas
* the CAS to copy to
* @param aCopySofa
* if true, the sofa data and mimeType of each view will be copied. If false they
* will not.
*/
public static void copyCas(CAS aSrcCas, CAS aDestCas, boolean aCopySofa)
{
CasCopier copier = new CasCopier(aSrcCas, aDestCas);
Iterator<SofaFS> sofaIter = aSrcCas.getSofaIterator();
while (sofaIter.hasNext()) {
SofaFS sofa = sofaIter.next();
CAS view = aSrcCas.getView(sofa);
copier.copyCasView(view, aCopySofa);
}
}
/**
* Does a deep copy of the contents of one CAS View into another CAS. If a view with the same
* name as <code>aSrcCasView</code> exists in the destination CAS, then it will be the target of
* the copy. Otherwise, a new view will be created with that name and will become the target of
* the copy. All FeatureStructures that are indexed in the source CAS view will become indexed
* in the target view.
*
* @param aSrcCasView
* the CAS to copy from
* @param aCopySofa
* if true, the sofa data and mimeType will be copied. If false they will not.
*/
public void copyCasView(CAS aSrcCasView, boolean aCopySofa)
{
// get or create the target view
CAS targetView = getOrCreateView(mDestCas, aSrcCasView.getViewName());
if (aCopySofa) {
// can't copy the SofaFS - just copy the sofa data and mime type
String sofaMime = aSrcCasView.getSofa().getSofaMime();
if (aSrcCasView.getDocumentText() != null) {
targetView.setSofaDataString(aSrcCasView.getDocumentText(), sofaMime);
}
else if (aSrcCasView.getSofaDataURI() != null) {
targetView.setSofaDataURI(aSrcCasView.getSofaDataURI(), sofaMime);
}
else if (aSrcCasView.getSofaDataArray() != null) {
targetView.setSofaDataArray(copyFs(aSrcCasView.getSofaDataArray()), sofaMime);
}
}
// now copy indexed FS, but keep track so we don't index anything more
// than once
Set<FeatureStructure> indexedFs = new HashSet<FeatureStructure>();
Iterator<FSIndex<FeatureStructure>> indexes = aSrcCasView.getIndexRepository().getIndexes();
while (indexes.hasNext()) {
FSIndex<FeatureStructure> index = indexes.next();
Iterator<FeatureStructure> iter = index.iterator();
while (iter.hasNext()) {
FeatureStructure fs = iter.next();
if (!indexedFs.contains(fs)) {
FeatureStructure copyOfFs = copyFs(fs);
// check for annotations with null Sofa reference - this can
// happen if the annotations were created with the Low Level
// CAS API. If the Sofa reference isn't set, attempting to
// add the FS to the indexes will fail.
if (fs instanceof AnnotationFS) {
FeatureStructure sofa = ((AnnotationFS) copyOfFs)
.getFeatureValue(mDestSofaFeature);
if (sofa == null) {
copyOfFs.setFeatureValue(mDestSofaFeature, targetView.getSofa());
}
}
// also don't index the DocumentAnnotation (it's indexed by
// default)
if (!isDocumentAnnotation(copyOfFs)) {
targetView.addFsToIndexes(copyOfFs);
}
indexedFs.add(fs);
}
}
}
}
/**
* Copies an FS from the source CAS to the destination CAS. Also copies any referenced FS,
* except that previously copied FS will not be copied again.
*
* @param aFS
* the FS to copy. Must be contained within the source CAS.
* @return the copy of <code>aFS</code> in the target CAS.
*/
public FeatureStructure copyFs(FeatureStructure aFS)
{
// FS must be in the source CAS
assert ((CASImpl) aFS.getCAS()).getBaseCAS() == ((CASImpl) mSrcCas).getBaseCAS();
// check if we already copied this FS
FeatureStructure copy = mFsMap.get(aFS);
if (copy != null) {
return copy;
}
// get the type of the FS
Type srcType = aFS.getType();
// Certain types need to be handled specially
// Sofa - cannot be created by normal methods. Instead, we return the
// Sofa with the
// same Sofa ID in the target CAS. If it does not exist it will be
// created.
if (aFS instanceof SofaFS) {
String sofaId = ((SofaFS) aFS).getSofaID();
return getOrCreateView(mDestCas, sofaId).getSofa();
}
// DocumentAnnotation - instead of creating a new instance, reuse the
// automatically created
// instance in the destination view.
if (isDocumentAnnotation(aFS)) {
String viewName = ((AnnotationFS) aFS).getView().getViewName();
CAS destView = mDestCas.getView(viewName);
FeatureStructure destDocAnnot = destView.getDocumentAnnotation();
if (destDocAnnot != null) {
copyFeatures(aFS, destDocAnnot);
}
return destDocAnnot;
}
// Arrays - need to be created a populated differently than "normal" FS
if (aFS.getType().isArray()) {
copy = copyArray(aFS);
mFsMap.put(aFS, copy);
return copy;
}
// create a new FS of the same type in the target CAS
Type destType;
if (mDestCas.getTypeSystem() == mSrcCas.getTypeSystem()) {
// optimize type lookup if type systems are identical
destType = srcType;
}
else {
destType = mDestCas.getTypeSystem().getType(srcType.getName());
}
if (destType == null) {
throw new UIMARuntimeException(UIMARuntimeException.TYPE_NOT_FOUND_DURING_CAS_COPY,
new Object[] { srcType.getName() });
}
// We need to use the LowLevel CAS interface to create the FS, because
// the usual CAS.createFS() call doesn't allow us to create subtypes of
// AnnotationBase from a base CAS. In any case we don't need the Sofa
// reference to be automatically set because we'll set it manually when
// in the copyFeatures method.
int typeCode = mLowLevelDestCas.ll_getTypeSystem().ll_getCodeForType(destType);
int destFsAddr = mLowLevelDestCas.ll_createFS(typeCode);
FeatureStructure destFs = mDestCas.getLowLevelCAS().ll_getFSForRef(destFsAddr);
// add to map so we don't try to copy this more than once
mFsMap.put(aFS, destFs);
copyFeatures(aFS, destFs);
return destFs;
}
/**
* Copy feature values from one FS to another. For reference-valued features, this does a deep
* copy.
*
* <p>
* NOTE:<br/>
* Different behavior as the original CasCopier!!!
* </P>
*
* <strong>_IF_ there is no appropriate alternative present in the destination CAS. If the
* destination CAS contains an annotation that is identical to the referenced annotation in the
* sourceCas except it has a different ID, then it won't be deep copied, but the reference-id in
* the annotation will be updated.</strong>
*
* @param aSrcFS
* FeatureStructure to copy from
* @param aDestFS
* FeatureStructure to copy to
*/
private void copyFeatures(FeatureStructure aSrcFS, FeatureStructure aDestFS)
{
// set feature values
Type srcType = aSrcFS.getType();
Type destType = aDestFS.getType();
for (Feature srcFeat : srcType.getFeatures()) {
Feature destFeat;
if (destType == aSrcFS.getType()) {
// sharing same type system, so destFeat == srcFeat
destFeat = srcFeat;
}
else {
// not sharing same type system, so do a name loop up in
// destination type system
destFeat = destType.getFeatureByBaseName(srcFeat.getShortName());
if (destFeat == null) {
throw new UIMARuntimeException(
UIMARuntimeException.TYPE_NOT_FOUND_DURING_CAS_COPY,
new Object[] { srcFeat.getName() });
}
}
// copy primitive values using their string representation
// TODO: could be optimized but this code would be very messy if we
// have to
// enumerate all possible primitive types. Maybe LowLevel CAS API
// could help?
if (srcFeat.getRange().isPrimitive()) {
aDestFS.setFeatureValueFromString(destFeat, aSrcFS.getFeatureValueAsString(srcFeat));
}
else {
// recursive copy
FeatureStructure refFS = aSrcFS.getFeatureValue(srcFeat);
if (refFS != null) {
boolean foundexisting = false;
// if yes, only update ID in the copied annotation
if (refFS instanceof Annotation) {
// Check if referenced annotation is among the
// annotations that have not YET been recovered
// Here we can check the Annotation-address (id),
// because we are recovering original instances,
// no recreated annotations.
if (!foundexisting && batchCopyAnnoList != null) {
for (Annotation curAnno : batchCopyAnnoList) {
if (curAnno.getAddress() == ((Annotation) refFS).getAddress()) {
aDestFS.setFeatureValue(destFeat, copyFs(curAnno));
foundexisting = true;
}
}
}
// If last check has been unsuccessful, check if
// the referenced annotation is a parser annotation that
// has been recreated during the deserialization of the
// transformed Stanford tree object.
// In this case we want to change the reference to the
// new annotation (which should already be present in the
// destinationFS)
if (!foundexisting) {
AnnotationIndex<AnnotationFS> annoIndex = aDestFS.getCAS()
.getAnnotationIndex(refFS.getType());
FSIterator<AnnotationFS> it = annoIndex.iterator();
while (it.hasNext()) {
AnnotationFS fs = it.next();
// TODO Caution: the following check cannot identify
// reference targets if their span has changed
// during transformation, eg. due to some
// TSurgeon-operation
// We cannot compare annotation-ids here, because
// the we are dealing with different instances
if (fs.getBegin() == ((AnnotationFS) refFS).getBegin()
&& fs.getEnd() == ((AnnotationFS) refFS).getEnd()
&& fs.getView()
.getViewName()
.equals(((AnnotationFS) refFS).getView()
.getViewName())) {
aDestFS.setFeatureValue(destFeat, fs);
foundexisting = true;
}
}
}
}
// if referenced annotation has not been recreated and
// not been recovered from the standford tree nodes,
// copy annotation directly from the old CAS
if (!foundexisting) {
FeatureStructure copyRefFs = copyFs(refFS);
aDestFS.setFeatureValue(destFeat, copyRefFs);
}
}
}
}
}
/**
* Returns whether the given FS has already been copied using this CasCopier.
*
* @param aFS
* a feature structure.
* @return if the FS has already been copied.
*/
public boolean alreadyCopied(FeatureStructure aFS)
{
return mFsMap.containsKey(aFS);
}
private FeatureStructure copyArray(FeatureStructure aSrcFs)
{
// TODO: there should be a way to do this without enumerating all the
// array types!
if (aSrcFs instanceof StringArrayFS) {
StringArrayFS arrayFs = (StringArrayFS) aSrcFs;
int len = arrayFs.size();
StringArrayFS destFS = mDestCas.createStringArrayFS(len);
for (int i = 0; i < len; i++) {
destFS.set(i, arrayFs.get(i));
}
return destFS;
}
if (aSrcFs instanceof IntArrayFS) {
IntArrayFS arrayFs = (IntArrayFS) aSrcFs;
int len = arrayFs.size();
IntArrayFS destFS = mDestCas.createIntArrayFS(len);
for (int i = 0; i < len; i++) {
destFS.set(i, arrayFs.get(i));
}
return destFS;
}
if (aSrcFs instanceof ByteArrayFS) {
ByteArrayFS arrayFs = (ByteArrayFS) aSrcFs;
int len = arrayFs.size();
ByteArrayFS destFS = mDestCas.createByteArrayFS(len);
for (int i = 0; i < len; i++) {
destFS.set(i, arrayFs.get(i));
}
return destFS;
}
if (aSrcFs instanceof ShortArrayFS) {
ShortArrayFS arrayFs = (ShortArrayFS) aSrcFs;
int len = arrayFs.size();
ShortArrayFS destFS = mDestCas.createShortArrayFS(len);
for (int i = 0; i < len; i++) {
destFS.set(i, arrayFs.get(i));
}
return destFS;
}
if (aSrcFs instanceof LongArrayFS) {
LongArrayFS arrayFs = (LongArrayFS) aSrcFs;
int len = arrayFs.size();
LongArrayFS destFS = mDestCas.createLongArrayFS(len);
for (int i = 0; i < len; i++) {
destFS.set(i, arrayFs.get(i));
}
return destFS;
}
if (aSrcFs instanceof FloatArrayFS) {
FloatArrayFS arrayFs = (FloatArrayFS) aSrcFs;
int len = arrayFs.size();
FloatArrayFS destFS = mDestCas.createFloatArrayFS(len);
for (int i = 0; i < len; i++) {
destFS.set(i, arrayFs.get(i));
}
return destFS;
}
if (aSrcFs instanceof DoubleArrayFS) {
DoubleArrayFS arrayFs = (DoubleArrayFS) aSrcFs;
int len = arrayFs.size();
DoubleArrayFS destFS = mDestCas.createDoubleArrayFS(len);
for (int i = 0; i < len; i++) {
destFS.set(i, arrayFs.get(i));
}
return destFS;
}
if (aSrcFs instanceof BooleanArrayFS) {
BooleanArrayFS arrayFs = (BooleanArrayFS) aSrcFs;
int len = arrayFs.size();
BooleanArrayFS destFS = mDestCas.createBooleanArrayFS(len);
for (int i = 0; i < len; i++) {
destFS.set(i, arrayFs.get(i));
}
return destFS;
}
if (aSrcFs instanceof ArrayFS) {
ArrayFS arrayFs = (ArrayFS) aSrcFs;
int len = arrayFs.size();
ArrayFS destFS = mDestCas.createArrayFS(len);
for (int i = 0; i < len; i++) {
FeatureStructure srcElem = arrayFs.get(i);
if (srcElem != null) {
FeatureStructure copyElem = copyFs(arrayFs.get(i));
destFS.set(i, copyElem);
}
}
return destFS;
}
assert false; // the set of array types should be exhaustive, so we
// should never get here
return null;
}
/**
* Gets the named view; if the view doesn't exist it will be created.
*/
private static CAS getOrCreateView(CAS aCas, String aViewName)
{
// TODO: there should be some way to do this without the try...catch
try {
return aCas.getView(aViewName);
}
catch (CASRuntimeException e) {
// create the view
return aCas.createView(aViewName);
}
}
/**
* Determines whether the given FS is the DocumentAnnotation for its view. This is more than
* just a type check; we actually check if it is the one "special" DocumentAnnotation that
* CAS.getDocumentAnnotation() would return.
*/
private static boolean isDocumentAnnotation(FeatureStructure aFS)
{
return (aFS instanceof AnnotationFS)
&& aFS.equals(((AnnotationFS) aFS).getView().getDocumentAnnotation());
}
/**
* Performs batch-copying of Annotations (could also be generalized to FeatureStructures)
*
* While copying the annotations, the whole batch is held in a class attribute. This way, we can
* cope with referenced annotations that have not been copied/recovered yet.
*
* @param annoList
* the list of annotations that is to be batch-copied
* @return the list of copied annotations
*/
public List<Annotation> batchCopyAnnotations(List<Annotation> annoList)
{
batchCopyAnnoList = annoList;
List<Annotation> returnList = new ArrayList<Annotation>();
for (Annotation anno : batchCopyAnnoList) {
returnList.add((Annotation) copyFs(anno));
}
// reset list
batchCopyAnnoList = null;
return returnList;
}
}