/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.apache.uima.util; import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.Map; import java.util.Set; import org.apache.uima.UIMARuntimeException; import org.apache.uima.cas.ArrayFS; import org.apache.uima.cas.BooleanArrayFS; import org.apache.uima.cas.ByteArrayFS; import org.apache.uima.cas.CAS; import org.apache.uima.cas.CASRuntimeException; import org.apache.uima.cas.DoubleArrayFS; import org.apache.uima.cas.FSIndex; import org.apache.uima.cas.Feature; import org.apache.uima.cas.FeatureStructure; import org.apache.uima.cas.FloatArrayFS; import org.apache.uima.cas.IntArrayFS; import org.apache.uima.cas.LongArrayFS; import org.apache.uima.cas.ShortArrayFS; import org.apache.uima.cas.SofaFS; import org.apache.uima.cas.StringArrayFS; import org.apache.uima.cas.Type; import org.apache.uima.cas.impl.CASImpl; import org.apache.uima.cas.impl.LowLevelCAS; import org.apache.uima.cas.text.AnnotationFS; /** * Utility class for doing deep copies of FeatureStructures from one CAS to another. To handle cases * where the source CAS has multiple references to the same FS, you can create one instance of * CasCopier and use it to copy multiple FeatureStructures. The CasCopier will remember previously * copied FeatureStructures, so if you later copy another FS that has a reference to a previously * copied FS, it will not duplicate the multiply-referenced FS. */ public class CasCopier { private CAS mSrcCas; private CAS mDestCas; private LowLevelCAS mLowLevelDestCas; private Feature mDestSofaFeature; private Map<FeatureStructure, FeatureStructure> mFsMap = new HashMap<FeatureStructure, FeatureStructure>(); /** * feature structures whose slots need copying are put on this list, together with their source * List is operated as a stack, from the end, for efficiency */ private ArrayList<FeatureStructure> fsToDo = new ArrayList<FeatureStructure>(); /** * Creates a new CasCopier that can be used to copy FeatureStructures from one CAS to another. * Note that if you are merging data from multiple CASes, you must create a new CasCopier * for each source CAS. * * @param aSrcCas * the CAS to copy from. * @param aDestCas * the CAS to copy into. */ public CasCopier(CAS aSrcCas, CAS aDestCas) { mSrcCas = aSrcCas; mDestCas = aDestCas; mLowLevelDestCas = aDestCas.getLowLevelCAS(); mDestSofaFeature = aDestCas.getTypeSystem().getFeatureByFullName(CAS.FEATURE_FULL_NAME_SOFA); } /** * Does a complete deep copy of one CAS into another CAS. The contents of each view * in the source CAS will be copied to the same-named view in the destination CAS. If * the view does not already exist it will be created. All FeatureStructures that are * indexed in a view in the source CAS will become indexed in the same-named view in the * destination CAS. * * @param aSrcCas * the CAS to copy from * @param aDestCas * the CAS to copy to * @param aCopySofa * if true, the sofa data and mimeType of each view will be copied. If false they will not. */ public static void copyCas(CAS aSrcCas, CAS aDestCas, boolean aCopySofa) { CasCopier copier = new CasCopier(aSrcCas, aDestCas); Iterator<SofaFS> sofaIter = aSrcCas.getSofaIterator(); while (sofaIter.hasNext()) { SofaFS sofa = sofaIter.next(); CAS view = aSrcCas.getView(sofa); copier.copyCasView(view, aCopySofa); } } /** * Does a deep copy of the contents of one CAS View into another CAS. * If a view with the same name as <code>aSrcCasView</code> exists in the destination CAS, * then it will be the target of the copy. Otherwise, a new view will be created with * that name and will become the target of the copy. All FeatureStructures that are indexed * in the source CAS view will become indexed in the target view. * * @param aSrcCasView * the CAS to copy from * @param aCopySofa * if true, the sofa data and mimeType will be copied. If false they will not. */ public void copyCasView(CAS aSrcCasView, boolean aCopySofa) { //get or create the target view CAS targetView = getOrCreateView(mDestCas, aSrcCasView.getViewName()); if (aCopySofa) { // can't copy the SofaFS - just copy the sofa data and mime type String sofaMime = aSrcCasView.getSofa().getSofaMime(); if (aSrcCasView.getDocumentText() != null) { targetView.setSofaDataString(aSrcCasView.getDocumentText(), sofaMime); } else if (aSrcCasView.getSofaDataURI() != null) { targetView.setSofaDataURI(aSrcCasView.getSofaDataURI(), sofaMime); } else if (aSrcCasView.getSofaDataArray() != null) { targetView.setSofaDataArray(copyFs(aSrcCasView.getSofaDataArray()), sofaMime); } } // now copy indexed FS, but keep track so we don't index anything more than once Set<FeatureStructure> indexedFs = new HashSet<FeatureStructure>(); Iterator<FSIndex<FeatureStructure>> indexes = aSrcCasView.getIndexRepository().getIndexes(); while (indexes.hasNext()) { FSIndex<FeatureStructure> index = indexes.next(); Iterator<FeatureStructure> iter = index.iterator(); while (iter.hasNext()) { FeatureStructure fs = iter.next(); if (!indexedFs.contains(fs)) { FeatureStructure copyOfFs = copyFs(fs); //check for annotations with null Sofa reference - this can happen //if the annotations were created with the Low Level CAS API. If the //Sofa reference isn't set, attempting to add the FS to the indexes //will fail. if (fs instanceof AnnotationFS) { FeatureStructure sofa =((AnnotationFS)copyOfFs).getFeatureValue(mDestSofaFeature); if (sofa == null) { copyOfFs.setFeatureValue(mDestSofaFeature, targetView.getSofa()); } } // also don't index the DocumentAnnotation (it's indexed by default) if (!isDocumentAnnotation(copyOfFs)) { targetView.addFsToIndexes(copyOfFs); } indexedFs.add(fs); } } } } /** * For long lists, and other structures, the straight-forward impl with recursion can * nest too deep, causing a Java failure - out of stack space. * * This is a non-recursive impl, making use of an aux object: featureStructuresWithSlotsToSet to * hold copied FSs whose slots need to be scanned and set with values. * * The main loop dequeues one element, and copies the features. * * The copying of a FS copies the FS without setting the slots; instead it queues the * copied FS together with its source instance on featureStructuresWithSlotsToSet * for later processing. * */ public FeatureStructure copyFs(FeatureStructure aFS) { FeatureStructure copy = copyFsInner(aFS); // doesn't copy the slot values, but enqueues them while (!fsToDo.isEmpty()) { FeatureStructure copyToFillSlots = fsToDo.remove(fsToDo.size()-1); FeatureStructure srcToFillSlots = fsToDo.remove(fsToDo.size()-1); copyFeatures(srcToFillSlots, copyToFillSlots); } return copy; } /** * Copies an FS from the source CAS to the destination CAS. Also copies any referenced FS, except * that previously copied FS will not be copied again. * * @param aFS * the FS to copy. Must be contained within the source CAS. * @return the copy of <code>aFS</code> in the target CAS. */ public FeatureStructure copyFsInner(FeatureStructure aFS) { //FS must be in the source CAS assert ((CASImpl)aFS.getCAS()).getBaseCAS() == ((CASImpl)mSrcCas).getBaseCAS(); // check if we already copied this FS FeatureStructure copy = (FeatureStructure) mFsMap.get(aFS); if (copy != null) return copy; // get the type of the FS Type srcType = aFS.getType(); // Certain types need to be handled specially // Sofa - cannot be created by normal methods. Instead, we return the Sofa with the // same Sofa ID in the target CAS. If it does not exist it will be created. if (aFS instanceof SofaFS) { String sofaId = ((SofaFS)aFS).getSofaID(); return getOrCreateView(mDestCas, sofaId).getSofa(); } // DocumentAnnotation - instead of creating a new instance, reuse the automatically created // instance in the destination view. if (isDocumentAnnotation(aFS)) { String viewName = ((AnnotationFS)aFS).getView().getViewName(); CAS destView = mDestCas.getView(viewName); FeatureStructure destDocAnnot = destView.getDocumentAnnotation(); if (destDocAnnot != null) { copyFeatures(aFS, destDocAnnot); } return destDocAnnot; } // Arrays - need to be created a populated differently than "normal" FS if (aFS.getType().isArray()) { copy = copyArray(aFS); mFsMap.put(aFS, copy); return copy; } // create a new FS of the same type in the target CAS Type destType; if (mDestCas.getTypeSystem() == mSrcCas.getTypeSystem()) { //optimize type lookup if type systems are identical destType = srcType; } else { destType = mDestCas.getTypeSystem().getType(srcType.getName()); } if (destType == null) { throw new UIMARuntimeException(UIMARuntimeException.TYPE_NOT_FOUND_DURING_CAS_COPY, new Object[] { srcType.getName() }); } //We need to use the LowLevel CAS interface to create the FS, because the usual //CAS.createFS() call doesn't allow us to create subtypes of AnnotationBase from //a base CAS. In any case we don't need the Sofa reference to be automatically //set because we'll set it manually when in the copyFeatures method. int typeCode = mLowLevelDestCas.ll_getTypeSystem().ll_getCodeForType(destType); int destFsAddr = mLowLevelDestCas.ll_createFS(typeCode); FeatureStructure destFs = mDestCas.getLowLevelCAS().ll_getFSForRef(destFsAddr); // add to map so we don't try to copy this more than once mFsMap.put(aFS, destFs); fsToDo.add(aFS); // order important fsToDo.add(destFs); return destFs; } /** * Copy feature values from one FS to another. For reference-valued features, this does a deep * copy. * * @param aSrcFS * FeatureStructure to copy from * @param aDestFS * FeatureStructure to copy to */ private void copyFeatures(FeatureStructure aSrcFS, FeatureStructure aDestFS) { // set feature values Type srcType = aSrcFS.getType(); Type destType = aDestFS.getType(); for (Feature srcFeat : srcType.getFeatures()) { Feature destFeat; if (destType == aSrcFS.getType()) { // sharing same type system, so destFeat == srcFeat destFeat = srcFeat; } else { // not sharing same type system, so do a name loop up in destination type system destFeat = destType.getFeatureByBaseName(srcFeat.getShortName()); if (destFeat == null) { throw new UIMARuntimeException(UIMARuntimeException.TYPE_NOT_FOUND_DURING_CAS_COPY, new Object[] { srcFeat.getName() }); } } // copy primitive values using their string representation // TODO: could be optimized but this code would be very messy if we have to // enumerate all possible primitive types. Maybe LowLevel CAS API could help? String srcRangeName = srcFeat.getRange().getName(); if (srcRangeName.equals(CAS.TYPE_NAME_STRING)) { aDestFS.setStringValue(destFeat, aSrcFS.getStringValue(srcFeat)); } else if (srcRangeName.equals(CAS.TYPE_NAME_INTEGER)) { aDestFS.setIntValue(destFeat, aSrcFS.getIntValue(srcFeat)); } else if (srcFeat.getRange().isPrimitive()) { aDestFS.setFeatureValueFromString(destFeat, aSrcFS.getFeatureValueAsString(srcFeat)); } else { // recursive copy no longer done recursively, to avoid blowing the stack FeatureStructure refFS = aSrcFS.getFeatureValue(srcFeat); if (refFS != null) { FeatureStructure copyRefFs = copyFsInner(refFS); aDestFS.setFeatureValue(destFeat, copyRefFs); } } } } /** * @param aFS a feature structure * @return true if the given FS has already been copied using this CasCopier. */ public boolean alreadyCopied(FeatureStructure aFS) { return mFsMap.containsKey(aFS); } /** * @param arrayFS * @return */ private FeatureStructure copyArray(FeatureStructure aSrcFs) { // TODO: there should be a way to do this without enumerating all the array types! if (aSrcFs instanceof StringArrayFS) { StringArrayFS arrayFs = (StringArrayFS) aSrcFs; int len = arrayFs.size(); StringArrayFS destFS = mDestCas.createStringArrayFS(len); for (int i = 0; i < len; i++) { destFS.set(i, arrayFs.get(i)); } return destFS; } if (aSrcFs instanceof IntArrayFS) { IntArrayFS arrayFs = (IntArrayFS) aSrcFs; int len = arrayFs.size(); IntArrayFS destFS = mDestCas.createIntArrayFS(len); for (int i = 0; i < len; i++) { destFS.set(i, arrayFs.get(i)); } return destFS; } if (aSrcFs instanceof ByteArrayFS) { ByteArrayFS arrayFs = (ByteArrayFS) aSrcFs; int len = arrayFs.size(); ByteArrayFS destFS = mDestCas.createByteArrayFS(len); for (int i = 0; i < len; i++) { destFS.set(i, arrayFs.get(i)); } return destFS; } if (aSrcFs instanceof ShortArrayFS) { ShortArrayFS arrayFs = (ShortArrayFS) aSrcFs; int len = arrayFs.size(); ShortArrayFS destFS = mDestCas.createShortArrayFS(len); for (int i = 0; i < len; i++) { destFS.set(i, arrayFs.get(i)); } return destFS; } if (aSrcFs instanceof LongArrayFS) { LongArrayFS arrayFs = (LongArrayFS) aSrcFs; int len = arrayFs.size(); LongArrayFS destFS = mDestCas.createLongArrayFS(len); for (int i = 0; i < len; i++) { destFS.set(i, arrayFs.get(i)); } return destFS; } if (aSrcFs instanceof FloatArrayFS) { FloatArrayFS arrayFs = (FloatArrayFS) aSrcFs; int len = arrayFs.size(); FloatArrayFS destFS = mDestCas.createFloatArrayFS(len); for (int i = 0; i < len; i++) { destFS.set(i, arrayFs.get(i)); } return destFS; } if (aSrcFs instanceof DoubleArrayFS) { DoubleArrayFS arrayFs = (DoubleArrayFS) aSrcFs; int len = arrayFs.size(); DoubleArrayFS destFS = mDestCas.createDoubleArrayFS(len); for (int i = 0; i < len; i++) { destFS.set(i, arrayFs.get(i)); } return destFS; } if (aSrcFs instanceof BooleanArrayFS) { BooleanArrayFS arrayFs = (BooleanArrayFS) aSrcFs; int len = arrayFs.size(); BooleanArrayFS destFS = mDestCas.createBooleanArrayFS(len); for (int i = 0; i < len; i++) { destFS.set(i, arrayFs.get(i)); } return destFS; } if (aSrcFs instanceof ArrayFS) { ArrayFS arrayFs = (ArrayFS) aSrcFs; int len = arrayFs.size(); ArrayFS destFS = mDestCas.createArrayFS(len); for (int i = 0; i < len; i++) { FeatureStructure srcElem = arrayFs.get(i); if (srcElem != null) { FeatureStructure copyElem = copyFsInner(arrayFs.get(i)); destFS.set(i, copyElem); } } return destFS; } assert false; // the set of array types should be exhaustive, so we should never get here return null; } /** * Gets the named view; if the view doesn't exist it will be created. */ private static CAS getOrCreateView(CAS aCas, String aViewName) { //TODO: there should be some way to do this without the try...catch try { return aCas.getView(aViewName); } catch(CASRuntimeException e) { //create the view return aCas.createView(aViewName); } } /** * Determines whether the given FS is the DocumentAnnotation for its view. * This is more than just a type check; we actually check if it is the one "special" * DocumentAnnotation that CAS.getDocumentAnnotation() would return. */ private static boolean isDocumentAnnotation(FeatureStructure aFS) { return (aFS instanceof AnnotationFS) && aFS.equals(((AnnotationFS)aFS).getView().getDocumentAnnotation()); } }