/*
* Copyright 2015
* Ubiquitous Knowledge Processing (UKP) Lab
* Technische Universität Darmstadt
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package de.tudarmstadt.ukp.dkpro.core.testing.validation;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.FSIterator;
import org.apache.uima.cas.Feature;
import org.apache.uima.cas.FeatureStructure;
import org.apache.uima.cas.TypeSystem;
import org.apache.uima.cas.impl.LowLevelCAS;
public class CasAnalysisUtils
{
public static Set<FeatureStructure> collectIndexed(CAS aCas)
{
LowLevelCAS llcas = aCas.getLowLevelCAS();
Set<FeatureStructure> fses = new TreeSet<>((fs1, fs2) -> llcas.ll_getFSRef(fs1)
- llcas.ll_getFSRef(fs2));
FSIterator<FeatureStructure> i = aCas.getIndexRepository().getAllIndexedFS(
aCas.getTypeSystem().getTopType());
i.forEachRemaining(fs -> fses.add(fs));
return fses;
}
public static Set<FeatureStructure> collectReachable(CAS aCas)
{
LowLevelCAS llcas = aCas.getLowLevelCAS();
Set<FeatureStructure> fses = new TreeSet<>((fs1, fs2) -> llcas.ll_getFSRef(fs1)
- llcas.ll_getFSRef(fs2));
FSIterator<FeatureStructure> i = aCas.getIndexRepository().getAllIndexedFS(
aCas.getTypeSystem().getTopType());
i.forEachRemaining(fs -> collect(fses, fs));
return fses;
}
public static void collect(Set<FeatureStructure> aFSes, FeatureStructure aFS)
{
if (aFS != null && !aFSes.contains(aFS)) {
aFSes.add(aFS);
for (Feature f : aFS.getType().getFeatures()) {
if (!f.getRange().isPrimitive() && !CAS.FEATURE_BASE_NAME_SOFA.equals(f.getShortName())) {
collect(aFSes, aFS.getFeatureValue(f));
}
}
}
}
/**
* Recursively collect referenced FSes and also record for each the last indexed FS that refers
* the them.
*
* @param aFSes
* map in which to collect the feature structures and through which FS they are
* reachable (updated by the method).
* @param aIndexed
* set of all indexed feature structures.
* @param aFS
* the current feature structure.
* @param aLastIndexed
* the last indexed feature structure through which the current feature structure was
* reachable.
*/
public static void collect(Map<FeatureStructure, FeatureStructure> aFSes,
Set<FeatureStructure> aIndexed, FeatureStructure aFS, FeatureStructure aLastIndexed)
{
if (aFS != null && !aFSes.containsKey(aFS)) {
// We might find an annotation indirectly. In that case make sure we consider it as
// an indexed annotation instead of wrongly recording it as non-indexed
if (aIndexed.contains(aFS)) {
aFSes.put(aFS, aFS);
}
else {
aFSes.put(aFS, aLastIndexed);
}
for (Feature f : aFS.getType().getFeatures()) {
if (!f.getRange().isPrimitive() && !CAS.FEATURE_BASE_NAME_SOFA.equals(f.getShortName())) {
collect(aFSes, aIndexed, aFS.getFeatureValue(f), aIndexed.contains(aFS) ? aFS
: aLastIndexed);
}
}
}
}
public static Set<FeatureStructure> getNonIndexedFSes(CAS aCas)
{
TypeSystem ts = aCas.getTypeSystem();
Set<FeatureStructure> allIndexedFS = collectIndexed(aCas);
Set<FeatureStructure> allReachableFS = collectReachable(aCas);
// Remove all that are indexed
allReachableFS.removeAll(allIndexedFS);
// Remove all that are not annotations
allReachableFS.removeIf(fs -> !ts.subsumes(aCas.getAnnotationType(), fs.getType()));
// All that is left are non-index annotations
return allReachableFS;
}
public static Map<FeatureStructure, FeatureStructure> getNonIndexedFSesWithOwner(CAS aCas)
{
TypeSystem ts = aCas.getTypeSystem();
LowLevelCAS llcas = aCas.getLowLevelCAS();
Set<FeatureStructure> allIndexedFS = collectIndexed(aCas);
Map<FeatureStructure, FeatureStructure> allReachableFS = new TreeMap<>(
(fs1, fs2) -> llcas.ll_getFSRef(fs1) - llcas.ll_getFSRef(fs2));
FSIterator<FeatureStructure> i = aCas.getIndexRepository().getAllIndexedFS(
aCas.getTypeSystem().getTopType());
i.forEachRemaining(fs -> collect(allReachableFS, allIndexedFS, fs, fs));
// Remove all that are not annotations
allReachableFS.entrySet().removeIf(
e -> !ts.subsumes(aCas.getAnnotationType(), e.getKey().getType()));
// Remove all that are indexed
allReachableFS.entrySet().removeIf(e -> e.getKey() == e.getValue());
// All that is left are non-index annotations
return allReachableFS;
}
}