/*
* Copyright 2012
* Ubiquitous Knowledge Processing (UKP) Lab
* Technische Universität Darmstadt
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package de.tudarmstadt.ukp.dkpro.core.testing;
import static de.tudarmstadt.ukp.dkpro.core.testing.validation.Message.Level.ERROR;
import static java.util.Arrays.asList;
import static org.apache.commons.lang.StringUtils.normalizeSpace;
import static org.apache.uima.fit.factory.AnalysisEngineFactory.createEngineDescription;
import static org.apache.uima.fit.factory.CollectionReaderFactory.createReaderDescription;
import static org.apache.uima.fit.util.JCasUtil.select;
import static org.apache.uima.fit.util.JCasUtil.toText;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import junit.framework.Assert;
import org.apache.commons.lang.ObjectUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.uima.UIMAException;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineDescription;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.collection.CollectionException;
import org.apache.uima.collection.CollectionReaderDescription;
import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
import org.apache.uima.fit.component.JCasCollectionReader_ImplBase;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.factory.JCasFactory;
import org.apache.uima.fit.pipeline.SimplePipeline;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.util.CasCopier;
import org.apache.uima.util.Progress;
import org.apache.uima.util.ProgressImpl;
import de.tudarmstadt.ukp.dkpro.core.api.anomaly.type.Anomaly;
import de.tudarmstadt.ukp.dkpro.core.api.coref.type.CoreferenceChain;
import de.tudarmstadt.ukp.dkpro.core.api.lexmorph.morph.MorphologicalFeaturesParser;
import de.tudarmstadt.ukp.dkpro.core.api.lexmorph.morph.internal.AnalysisMapping;
import de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.morph.Morpheme;
import de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.morph.MorphologicalFeatures;
import de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS;
import de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData;
import de.tudarmstadt.ukp.dkpro.core.api.metadata.type.TagDescription;
import de.tudarmstadt.ukp.dkpro.core.api.metadata.type.TagsetDescription;
import de.tudarmstadt.ukp.dkpro.core.api.ner.type.NamedEntity;
import de.tudarmstadt.ukp.dkpro.core.api.parameter.ComponentParameters;
import de.tudarmstadt.ukp.dkpro.core.api.resources.MappingProvider;
import de.tudarmstadt.ukp.dkpro.core.api.resources.MappingUtils;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Lemma;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Stem;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token;
import de.tudarmstadt.ukp.dkpro.core.api.semantics.type.SemArgLink;
import de.tudarmstadt.ukp.dkpro.core.api.semantics.type.SemPred;
import de.tudarmstadt.ukp.dkpro.core.api.semantics.type.SemanticArgument;
import de.tudarmstadt.ukp.dkpro.core.api.semantics.type.SemanticField;
import de.tudarmstadt.ukp.dkpro.core.api.semantics.type.SemanticPredicate;
import de.tudarmstadt.ukp.dkpro.core.api.syntax.type.PennTree;
import de.tudarmstadt.ukp.dkpro.core.api.syntax.type.chunk.Chunk;
import de.tudarmstadt.ukp.dkpro.core.api.syntax.type.constituent.Constituent;
import de.tudarmstadt.ukp.dkpro.core.api.syntax.type.dependency.Dependency;
import de.tudarmstadt.ukp.dkpro.core.testing.validation.CasValidator;
import de.tudarmstadt.ukp.dkpro.core.testing.validation.Message;
import de.tudarmstadt.ukp.dkpro.core.testing.validation.checks.Check;
public class AssertAnnotations
{
public static void assertAnomaly(String[] aExpected, Collection<? extends Anomaly> aActual)
{
String[] actualTags = new String[aActual.size()];
String[] actualClasses = new String[aActual.size()];
int i = 0;
for (Anomaly a : aActual) {
actualTags[i] = String.format("[%3d,%3d] %s (%s)", a.getBegin(),
a.getEnd(), a.getType().getShortName(), a.getDescription());
actualClasses[i] = String.format("[%3d,%3d] %s (%s)", a.getBegin(),
a.getEnd(), a.getType().getShortName(), a.getDescription());
i++;
}
List<String> sortedExpectedOriginal = deduplicateAndSort(asList(aExpected));
List<String> sortedActualOriginal = deduplicateAndSort(asList(actualTags));
if (aExpected != null) {
System.out.printf("%-20s - Expected: %s%n", "Anomalies (orig.)",
asCopyableString(sortedExpectedOriginal));
System.out.printf("%-20s - Actual : %s%n", "Anomalies (orig.)",
asCopyableString(sortedActualOriginal));
}
if (aExpected != null) {
assertEquals(asCopyableString(sortedExpectedOriginal, true),
asCopyableString(sortedActualOriginal, true));
}
}
public static void assertToken(String[] aExpected, Collection<Token> aActual)
{
if (aExpected == null) {
return;
}
List<String> expected = asList(aExpected);
List<String> actual = toText(aActual);
System.out.printf("%-20s - Expected: %s%n", "Tokens", asCopyableString(expected));
System.out.printf("%-20s - Actual : %s%n", "Tokens", asCopyableString(actual));
assertEquals(asCopyableString(expected, true), asCopyableString(actual, true));
}
public static void assertSentence(String[] aExpected, Collection<Sentence> aActual)
{
if (aExpected == null) {
return;
}
List<String> expected = asList(aExpected);
List<String> actual = toText(aActual);
System.out.printf("%-20s - Expected: %s%n", "Sentences", asCopyableString(expected));
System.out.printf("%-20s - Actual : %s%n", "Sentences", asCopyableString(actual));
assertEquals(asCopyableString(expected, true), asCopyableString(actual, true));
}
public static void assertPOS(String[] aExpectedMapped, String[] aExpectedOriginal,
Collection<POS> actual)
{
List<String> expectedOriginal = aExpectedOriginal != null ? asList(aExpectedOriginal)
: null;
List<String> expectedMapped = aExpectedMapped != null ? asList(aExpectedMapped) : null;
List<String> actualOriginal = new ArrayList<String>();
List<String> actualMapped = new ArrayList<String>();
for (POS posAnnotation : actual) {
actualOriginal.add(posAnnotation.getPosValue());
actualMapped.add(posAnnotation.getType().getShortName());
}
if (aExpectedOriginal != null) {
System.out.printf("%-20s - Expected: %s%n", "POS (original)",
asCopyableString(expectedOriginal));
System.out.printf("%-20s - Actual : %s%n", "POS (original)",
asCopyableString(actualOriginal));
}
if (aExpectedMapped != null) {
System.out.printf("%-20s - Expected: %s%n", "POS (mapped)",
asCopyableString(expectedMapped));
System.out.printf("%-20s - Actual : %s%n", "POS (mapped)",
asCopyableString(actualMapped));
}
if (aExpectedOriginal != null) {
assertEquals(asCopyableString(expectedOriginal, true),
asCopyableString(actualOriginal, true));
}
if (aExpectedMapped != null) {
assertEquals(asCopyableString(expectedMapped, true),
asCopyableString(actualMapped, true));
}
}
public static void assertLemma(String[] aExpected, Collection<Lemma> aActual)
{
if (aExpected == null) {
return;
}
List<String> expected = asList(aExpected);
List<String> actual = new ArrayList<String>();
for (Lemma a : aActual) {
actual.add(a.getValue());
}
System.out.printf("%-20s - Expected: %s%n", "Lemmas", asCopyableString(expected));
System.out.printf("%-20s - Actual : %s%n", "Lemmas", asCopyableString(actual));
assertEquals(asCopyableString(expected, true), asCopyableString(actual, true));
}
/**
* @param aExpected
* expected morph tags
* @param aActual
* actual morph tags
* @deprecated Use {@link #assertMorph(String[], Collection)}
*/
@Deprecated
public static void assertMorpheme(String[] aExpected, Collection<Morpheme> aActual)
{
if (aExpected == null) {
return;
}
List<String> expected = asList(aExpected);
List<String> actual = new ArrayList<String>();
for (Morpheme a : aActual) {
actual.add(a.getMorphTag());
}
System.out.printf("%-20s - Expected: %s%n", "Morphemes", asCopyableString(expected));
System.out.printf("%-20s - Actual : %s%n", "Morphemes", asCopyableString(actual));
assertEquals(asCopyableString(expected, true), asCopyableString(actual, true));
}
public static void assertMorph(String[] aExpected, Collection<MorphologicalFeatures> aActual)
{
if (aExpected == null) {
return;
}
List<String> expected = asList(aExpected);
List<String> actual = new ArrayList<String>();
for (MorphologicalFeatures a : aActual) {
actual.add(String.format(
"[%18$3d,%19$3d] %1$5s %2$5s %3$4s %4$4s %5$4s %6$5s %7$4s %8$4s %9$5s %10$6s %11$2s %12$4s %13$4s %14$4s %15$5s %16$6s %17$5s %20$s (%21$s)",
nd(a.getAnimacy()), // 1 w:4
nd(a.getAspect()), // 2 w:4
nd(a.getCase()), // 3 w:3
nd(a.getDefiniteness()), // 4 w:3
nd(a.getDegree()), // 5 w:3
nd(a.getGender()), // 6 w:4
nd(a.getMood()), // 7 w:3
nd(a.getNegative()), // 8 w:3
nd(a.getNumber()), // 9 w:4
nd(a.getNumType()), // 10 w:5
nd(a.getPerson()), // 11 w:1
nd(a.getPossessive()), // 12 w:3
nd(a.getPronType()), // 13 w:3
nd(a.getReflex()), // 14 w:3
nd(a.getTense()), // 15 w:4
nd(a.getVerbForm()), // 16 w:5
nd(a.getVoice()), // 17 w:4
a.getBegin(), // 18
a.getEnd(), // 19
a.getCoveredText(), // 20
a.getValue())); // 21
}
System.out.printf("%-20s - Expected: %s%n", "Morph. feats.", asCopyableString(expected));
System.out.printf("%-20s - Actual : %s%n", "Morph. feats.", asCopyableString(actual));
assertEquals(asCopyableString(expected, true), asCopyableString(actual, true));
}
private static String nd(String aValue)
{
return aValue == null ? "-" : aValue;
}
public static void assertStem(String[] aExpected, Collection<Stem> aActual)
{
if (aExpected == null) {
return;
}
List<String> expected = asList(aExpected);
List<String> actual = new ArrayList<String>();
for (Stem a : aActual) {
actual.add(a.getValue());
}
System.out.printf("%-20s - Expected: %s%n", "Stems", asCopyableString(expected));
System.out.printf("%-20s - Actual : %s%n", "Stems", asCopyableString(actual));
assertEquals(asCopyableString(expected, true), asCopyableString(actual, true));
}
public static void assertNamedEntity(String[] aExpected, Collection<NamedEntity> aActual)
{
List<String> actual = new ArrayList<String>();
List<String> expected = new ArrayList<String>(asList(aExpected));
for (NamedEntity a : aActual) {
actual.add(String.format("[%3d,%3d]%s(%s) (%s)", a.getBegin(), a.getEnd(), a
.getClass().getSimpleName(), a.getValue(), a.getCoveredText()));
}
Collections.sort(actual);
Collections.sort(expected);
if (aExpected != null) {
System.out.printf("%-20s - Expected: %s%n", "Named entities (orig.)",
asCopyableString(expected));
System.out.printf("%-20s - Actual : %s%n", "Named entities (orig.)",
asCopyableString(actual));
}
if (aExpected != null) {
assertEquals(asCopyableString(expected, true),
asCopyableString(actual, true));
}
}
public static void assertConstituents(String[] aExpectedMapped, String[] aExpectedOriginal,
Collection<Constituent> aActual)
{
String[] actualTags = new String[aActual.size()];
String[] actualClasses = new String[aActual.size()];
int i = 0;
for (Constituent a : aActual) {
actualTags[i] = String.format("%s %d,%d", a.getConstituentType(), a.getBegin(),
a.getEnd());
actualClasses[i] = String.format("%s %d,%d", a.getType().getShortName(), a.getBegin(),
a.getEnd());
i++;
}
List<String> sortedExpectedOriginal = deduplicateAndSort(asList(aExpectedOriginal));
List<String> sortedExpectedMapped = deduplicateAndSort(asList(aExpectedMapped));
List<String> sortedActualOriginal = deduplicateAndSort(asList(actualTags));
List<String> sortedActualMapped = deduplicateAndSort(asList(actualClasses));
if (aExpectedOriginal != null) {
System.out.printf("%-20s - Expected: %s%n", "Constituents (orig.)",
asCopyableString(sortedExpectedOriginal));
System.out.printf("%-20s - Actual : %s%n", "Constituents (orig.)",
asCopyableString(sortedActualOriginal));
}
if (aExpectedMapped != null) {
System.out.printf("%-20s - Expected: %s%n", "Constituents (map.)",
asCopyableString(sortedExpectedMapped));
System.out.printf("%-20s - Actual : %s%n", "Constituents (map.)",
asCopyableString(sortedActualMapped));
}
if (aExpectedOriginal != null) {
assertEquals(asCopyableString(sortedExpectedOriginal, true),
asCopyableString(sortedActualOriginal, true));
}
if (aExpectedMapped != null) {
assertEquals(asCopyableString(sortedExpectedMapped, true),
asCopyableString(sortedActualMapped, true));
}
}
public static void assertChunks(String[] aExpected,
Collection<Chunk> aActual)
{
List<String> expected = new ArrayList<String>(asList(aExpected));
List<String> actual = new ArrayList<String>();
for (Chunk a : aActual) {
actual.add(String.format("[%3d,%3d]%s(%s) (%s)", a.getBegin(), a.getEnd(), a.getClass()
.getSimpleName(), a.getChunkValue(), a.getCoveredText()));
}
Collections.sort(actual);
Collections.sort(expected);
System.out.printf("%-20s - Expected: %s%n", "Chunks", asCopyableString(expected));
System.out.printf("%-20s - Actual : %s%n", "Chunks", asCopyableString(actual));
assertEquals(asCopyableString(expected, true), asCopyableString(actual, true));
}
public static void assertSyntacticFunction(String[] aExpectedOriginal,
Collection<Constituent> aActual)
{
List<String> actualTagsList = new ArrayList<String>();
for (Constituent a : aActual) {
if (a.getSyntacticFunction() != null) {
actualTagsList.add(String.format("%s %d,%d", a.getSyntacticFunction(),
a.getBegin(), a.getEnd()));
}
}
String[] actualTags = actualTagsList.toArray(new String[actualTagsList.size()]);
List<String> sortedExpectedOriginal = deduplicateAndSort(asList(aExpectedOriginal));
List<String> sortedActualOriginal = deduplicateAndSort(asList(actualTags));
if (aExpectedOriginal != null) {
System.out.printf("%-20s - Expected: %s%n", "Syn. func. (orig.)",
asCopyableString(sortedExpectedOriginal));
System.out.printf("%-20s - Actual : %s%n", "Syn. func. (orig.)",
asCopyableString(sortedActualOriginal));
}
if (aExpectedOriginal != null) {
assertEquals(asCopyableString(sortedExpectedOriginal, true),
asCopyableString(sortedActualOriginal, true));
}
}
public static <T extends Comparable<T>> List<T> deduplicateAndSort(Collection<T> aCollection)
{
if (aCollection == null) {
return null;
}
else {
List<T> result = new ArrayList<T>(new HashSet<T>(aCollection));
Collections.sort(result);
return result;
}
}
public static void assertDependencies(String[] aExpected, Collection<Dependency> aActual)
{
List<String> expected = new ArrayList<String>(asList(aExpected));
List<String> actual = new ArrayList<String>();
boolean offsetCorrect = true;
for (Dependency a : aActual) {
actual.add(String.format("[%3d,%3d]%s(%s,%s) D[%d,%d](%s) G[%d,%d](%s)", a.getBegin(), a
.getEnd(), a.getClass().getSimpleName(), a.getDependencyType(), a.getFlavor(), a
.getDependent().getBegin(), a.getDependent().getEnd(), a.getDependent()
.getCoveredText(), a.getGovernor().getBegin(), a.getGovernor().getEnd(), a
.getGovernor().getCoveredText()));
offsetCorrect &= (a.getBegin() == a.getDependent().getBegin())
&& (a.getEnd() == a.getDependent().getEnd());
}
Collections.sort(actual);
Collections.sort(expected);
System.out.printf("%-20s - Expected: %s%n", "Dependencies", asCopyableString(expected));
System.out.printf("%-20s - Actual : %s%n", "Dependencies", asCopyableString(actual));
assertEquals(asCopyableString(expected, true), asCopyableString(actual, true));
assertTrue("Dependency offsets must match dependent offsets", offsetCorrect);
}
public static void assertPennTree(String aExpected, PennTree aActual)
{
String expected = normalizeSpace(aExpected);
String actual = normalizeSpace(aActual != null ? aActual.getPennTree() : "<none>");
System.out.printf("%-20s - Expected: \"%s\"%n", "Penn tree", expected);
System.out.printf("%-20s - Actual : \"%s\"%n", "Penn tree", actual);
assertEquals(expected, actual);
}
public static void assertPennTree(String aExpected[], Collection<PennTree> aActual) {
List<PennTree> actual = new ArrayList<PennTree>(aActual);
assertEquals(aExpected.length, aActual.size());
for (int i = 0; i < aExpected.length; i++) {
assertPennTree(aExpected[i], actual.get(i));
}
}
public static void assertPennTree(String aExpected, String aActual)
{
String expected = normalizeSpace(aExpected);
String actual = normalizeSpace(aActual != null ? aActual : "<none>");
System.out.printf("%-20s - Expected: %s%n", "Penn tree", expected);
System.out.printf("%-20s - Actual : %s%n", "Penn tree", actual);
assertEquals(expected, actual);
}
/**
* @param aExpected
* expected semantic predicates
* @param aActual
* actual semantic predicates
* @deprecated Use {@link #assertSemPred(String[], Collection)}
*/
@Deprecated
public static void assertSemanticPredicates(String[] aExpected,
Collection<SemanticPredicate> aActual)
{
List<String> expected = new ArrayList<String>(asList(aExpected));
List<String> actual = new ArrayList<String>();
for (SemanticPredicate p : aActual) {
StringBuilder sb = new StringBuilder();
sb.append(p.getCoveredText()).append(" (").append(p.getCategory()).append("): [");
for (SemanticArgument a : select(p.getArguments(), SemanticArgument.class)) {
sb.append('(').append(a.getRole()).append(':').append(a.getCoveredText())
.append(')');
}
sb.append(']');
actual.add(sb.toString());
}
Collections.sort(actual);
Collections.sort(expected);
System.out.printf("%-20s - Expected: %s%n", "Semantic predicates",
asCopyableString(expected));
System.out
.printf("%-20s - Actual : %s%n", "Semantic predicates", asCopyableString(actual));
assertEquals(asCopyableString(expected, true), asCopyableString(actual, true));
}
public static void assertSemPred(String[] aExpected, Collection<SemPred> aActual)
{
List<String> expected = new ArrayList<String>(asList(aExpected));
List<String> actual = new ArrayList<String>();
for (SemPred p : aActual) {
StringBuilder sb = new StringBuilder();
sb.append(p.getCoveredText()).append(" (").append(p.getCategory()).append("): [");
List<SemArgLink> args = new ArrayList<>(select(p.getArguments(), SemArgLink.class));
// Sort arguments by role to avoid sensitivity to unstable iteration orders in
// annotation tools
Comparator<SemArgLink> byRole = (a,b) -> ObjectUtils.compare(a.getRole(), b.getRole());
args.sort(byRole);
for (SemArgLink a : args) {
sb.append('(').append(a.getRole()).append(':').append(a.getTarget().getCoveredText())
.append(')');
}
sb.append(']');
actual.add(sb.toString());
}
Collections.sort(actual);
Collections.sort(expected);
System.out.printf("%-20s - Expected: %s%n", "Semantic predicates",
asCopyableString(expected));
System.out
.printf("%-20s - Actual : %s%n", "Semantic predicates", asCopyableString(actual));
assertEquals(asCopyableString(expected, true), asCopyableString(actual, true));
}
public static void assertSemanticField(String[] aExpected, Collection<SemanticField> aActual)
{
if (aExpected == null) {
return;
}
List<String> expected = asList(aExpected);
List<String> actual = new ArrayList<String>();
for (SemanticField a : aActual) {
actual.add(a.getValue());
}
System.out.printf("%-20s - Expected: %s%n", "Semantic field values",
asCopyableString(expected));
System.out.printf("%-20s - Actual : %s%n", "Semantic field values",
asCopyableString(actual));
assertEquals(asCopyableString(expected, true), asCopyableString(actual, true));
}
public static void assertCoreference(String[][] aExpected, Collection<CoreferenceChain> aActual)
{
List<CoreferenceChain> actual = new ArrayList<CoreferenceChain>(aActual);
for (String[] i : aExpected) {
System.out.printf("%-20s - Expected: %s%n", "Coreference",
asCopyableString(asList(i)));
}
for (CoreferenceChain i : actual) {
System.out.printf("%-20s - Actual : %s%n", "Coreference",
asCopyableString(toText(i.links())));
}
if (aExpected.length == aActual.size()) {
for (int i = 0; i < actual.size(); i++) {
assertEquals(asCopyableString(asList(aExpected[i]), true),
asCopyableString(toText(actual.get(i).links()), true));
}
}
else {
fail("Expected [" + aExpected.length + "] chains but found " + aActual.size() + "]");
}
}
public static void assertTagset(Class<?> aLayer, String aName, String[] aExpected, JCas aJCas)
{
assertTagset(null, aLayer, aName, aExpected, aJCas);
}
public static void assertTagset(Class<?> aComponent, Class<?> aLayer, String aName,
String[] aExpected, JCas aJCas)
{
List<String> expected = new ArrayList<String>(asList(aExpected));
Collections.sort(expected);
StringBuilder sb = new StringBuilder();
for (TagsetDescription tsd : select(aJCas, TagsetDescription.class)) {
sb.append('\t');
sb.append(tsd.getComponentName());
sb.append(" - ");
sb.append(tsd.getLayer());
sb.append(" - ");
sb.append(tsd.getName());
sb.append('\n');
boolean layerMatch = StringUtils.equals(aLayer.getName(), tsd.getLayer());
boolean tagsetMatch = StringUtils.equals(aName, tsd.getName());
boolean optComponentMatch = aComponent == null
|| aComponent.getName().equals(tsd.getComponentName());
if (layerMatch && tagsetMatch && optComponentMatch) {
List<String> actual = new ArrayList<String>();
for (TagDescription td : select(tsd.getTags(), TagDescription.class)) {
actual.add(td.getName());
}
Collections.sort(actual);
System.out.printf("%-20s : %s%n", "Layer", tsd.getLayer());
System.out.printf("%-20s : %s%n", "Tagset", tsd.getName());
System.out.printf("%-20s : %s%n", "Component", tsd.getComponentName());
System.out.printf("%-20s : %s%n", "Model location", tsd.getModelLocation());
System.out.printf("%-20s : %s%n", "Model language", tsd.getModelLanguage());
System.out.printf("%-20s : %s%n", "Model variant", tsd.getModelVariant());
System.out.printf("%-20s : %s%n", "Model version", tsd.getModelVersion());
System.out.printf("%-20s : %b%n", "Input", tsd.getInput());
System.out.printf("%-20s - Expected: %s%n", "Tags", asCopyableString(expected));
System.out.printf("%-20s - Actual : %s%n", "Tags", asCopyableString(actual));
assertEquals(asCopyableString(expected, true), asCopyableString(actual, true));
return;
}
}
System.out.println("The CAS does not containg a description for layer [" + aLayer.getName()
+ "] tagset [" + aName + "]");
System.out.println("What has been found is:\n" + sb);
fail("No tagset definition found for layer [" + aLayer.getName() + "] tagset [" + aName
+ "]");
}
public static void assertTagsetMapping(Class<?> aLayer, String aName, String[] aDefaultMapped,
JCas aJCas) throws AnalysisEngineProcessException
{
assertTagsetMapping(null, aLayer, aName, aDefaultMapped, aJCas, false);
}
public static void assertTagsetMapping(Class<?> aLayer, String aName,
String[] aDefaultMapped, JCas aJCas, boolean aExact)
throws AnalysisEngineProcessException
{
assertTagsetMapping(null, aLayer, aName, aDefaultMapped, aJCas, false);
}
public static void assertTagsetMapping(Class<?> aComponent, Class<?> aLayer, String aName,
String[] aDefaultMapped, JCas aJCas)
throws AnalysisEngineProcessException
{
assertTagsetMapping(aComponent, aLayer, aName, aDefaultMapped, aJCas, false);
}
public static void assertTagsetMapping(Class<?> aComponent, Class<?> aLayer, String aName,
String[] aDefaultMapped, JCas aJCas, boolean aExact)
throws AnalysisEngineProcessException
{
String pattern;
if (aLayer == POS.class) {
pattern = "classpath:/de/tudarmstadt/ukp/dkpro/"
+ "core/api/lexmorph/tagset/${language}-${tagset}-pos.map";
}
else if (aLayer == Dependency.class) {
pattern = "classpath:/de/tudarmstadt/ukp/dkpro/"
+ "core/api/syntax/tagset/${language}-${tagset}-dependency.map";
}
else if (aLayer == Constituent.class) {
pattern = "classpath:/de/tudarmstadt/ukp/dkpro/"
+ "core/api/syntax/tagset/${language}-${tagset}-constituency.map";
}
else if (aLayer == Chunk.class) {
pattern = "classpath:/de/tudarmstadt/ukp/dkpro/"
+ "core/api/syntax/tagset/${language}-${tagset}-chunk.map";
}
else {
throw new IllegalArgumentException("Unsupported layer: " + aLayer.getName());
}
MappingProvider mp = new MappingProvider();
mp.setDefault(MappingProvider.LOCATION, pattern);
mp.setDefault("tagset", aName);
mp.configure(aJCas.getCas());
Map<String, String> mapping = mp.getResource();
Assert.assertNotNull("No mapping found for layer [" + aLayer.getName() + "] tagset ["
+ aName + "]", mapping);
mapping.keySet().retainAll(MappingUtils.stripMetadata(mapping.keySet()));
mapping.remove("*"); // Remove wildcard
List<String> expected = new ArrayList<String>(asList(aDefaultMapped));
Collections.sort(expected);
List<String> mappedTags = new ArrayList<String>(mapping.keySet());
Collections.sort(mappedTags);
StringBuilder sb = new StringBuilder();
for (TagsetDescription tsd : select(aJCas, TagsetDescription.class)) {
sb.append('\t');
sb.append(tsd.getComponentName());
sb.append(" - ");
sb.append(tsd.getLayer());
sb.append(" - ");
sb.append(tsd.getName());
sb.append('\n');
boolean layerMatch = StringUtils.equals(aLayer.getName(), tsd.getLayer());
boolean tagsetMatch = StringUtils.equals(aName, tsd.getName());
boolean optComponentMatch = aComponent == null
|| aComponent.getName().equals(tsd.getComponentName());
if (layerMatch && tagsetMatch && optComponentMatch) {
List<String> actual = new ArrayList<String>();
for (TagDescription td : select(tsd.getTags(), TagDescription.class)) {
actual.add(td.getName());
}
Collections.sort(actual);
// Keep only the unmapped tags
List<String> unmapped = new ArrayList<>(actual);
unmapped.removeAll(mappedTags);
// Keep the mapped tags that are not in the model
List<String> notInModel = new ArrayList<>(mappedTags);
notInModel.removeAll(actual);
System.out.printf("%-20s : %s%n", "Layer", tsd.getLayer());
System.out.printf("%-20s : %s%n", "Tagset", tsd.getName());
System.out.printf("%-20s : %s%n", "Component", tsd.getComponentName());
System.out.printf("%-20s : %s%n", "Model location", tsd.getModelLocation());
System.out.printf("%-20s : %s%n", "Model language", tsd.getModelLanguage());
System.out.printf("%-20s : %s%n", "Model variant", tsd.getModelVariant());
System.out.printf("%-20s : %s%n", "Model version", tsd.getModelVersion());
System.out.printf("%-20s : %b%n", "Input", tsd.getInput());
System.out.printf("%-20s - Expected: %s%n", "Unmapped tags",
asCopyableString(expected));
System.out.printf("%-20s - Actual : %s%n", "Unmapped tags",
asCopyableString(unmapped));
if (aExact) {
System.out.printf("%-20s - Expected: %s%n", "Tags not in model",
asCopyableString(Collections.EMPTY_LIST));
System.out.printf("%-20s - Actual : %s%n", "Tags not in model",
asCopyableString(notInModel));
}
assertEquals(asCopyableString(expected, true), asCopyableString(unmapped, true));
if (aExact) {
assertEquals(asCopyableString(Collections.EMPTY_LIST, true),
asCopyableString(notInModel, true));
}
return;
}
}
System.out.println("The CAS does not containg a description for layer [" + aLayer.getName()
+ "] tagset [" + aName + "]");
System.out.println("What has been found is:\n" + sb);
fail("No tagset definition found for layer [" + aLayer.getName() + "] tagset [" + aName
+ "]");
}
public static void assertTagsetParser(Class<?> aLayer, String aName, String[] aDefaultMapped,
JCas aJCas) throws AnalysisEngineProcessException
{
String pattern;
if (aLayer == MorphologicalFeatures.class) {
pattern = "classpath:/de/tudarmstadt/ukp/dkpro/"
+ "core/api/lexmorph/tagset/${language}-${tagset}-morph.map";
}
else {
throw new IllegalArgumentException("Unsupported layer: " + aLayer.getName());
}
MorphologicalFeaturesParser mp = new MorphologicalFeaturesParser();
mp.setDefault(MappingProvider.LOCATION, pattern);
mp.setDefault("tagset", aName);
mp.configure(aJCas.getCas());
{
List<AnalysisMapping> mapping = mp.getResource();
Assert.assertNotNull("No mapping found for layer [" + aLayer.getName() + "] tagset ["
+ aName + "]", mapping);
}
List<String> expected = new ArrayList<String>(asList(aDefaultMapped));
Collections.sort(expected);
StringBuilder sb = new StringBuilder();
for (TagsetDescription tsd : select(aJCas, TagsetDescription.class)) {
sb.append('\t');
sb.append(tsd.getLayer());
sb.append(" - ");
sb.append(tsd.getName());
sb.append('\n');
if (StringUtils.equals(aLayer.getName(), tsd.getLayer())
&& StringUtils.equals(aName, tsd.getName())) {
List<String> actual = new ArrayList<String>();
for (TagDescription td : select(tsd.getTags(), TagDescription.class)) {
actual.add(td.getName());
}
Collections.sort(actual);
List<String> mappedTags = new ArrayList<String>();
for (String t : actual) {
if (mp.canParse(t)) {
mappedTags.add(t);
}
}
Collections.sort(mappedTags);
// Keep only the unmapped tags
List<String> unmapped = new ArrayList<>(actual);
unmapped.removeAll(mappedTags);
// // Keep the mapped tags that are not in the model
// List<String> notInModel = new ArrayList<>(mappedTags);
// notInModel.removeAll(actual);
System.out.printf("%-20s - Layer : %s%n", "Layer", tsd.getLayer());
System.out.printf("%-20s - Tagset : %s%n", "Tagset", tsd.getName());
System.out.printf("%-20s - Expected: %s%n", "Unmapped tags",
asCopyableString(expected));
System.out.printf("%-20s - Actual : %s%n", "Unmapped tags",
asCopyableString(unmapped));
// if (aExact) {
// System.out.printf("%-20s - Expected: %s%n", "Tags not in model",
// asCopyableString(Collections.EMPTY_LIST));
// System.out.printf("%-20s - Actual : %s%n", "Tags not in model",
// asCopyableString(notInModel));
// }
assertEquals(asCopyableString(expected, true), asCopyableString(unmapped, true));
// if (aExact) {
// assertEquals(asCopyableString(Collections.EMPTY_LIST, true),
// asCopyableString(notInModel, true));
// }
return;
}
}
System.out.println("The CAS does not containg a description for layer [" + aLayer.getName()
+ "] tagset [" + aName + "]");
System.out.println("What has been found is:\n" + sb);
fail("No tagset definition found for layer [" + aLayer.getName() + "] tagset [" + aName
+ "]");
}
public static void assertTransformedText(String normalizedText, String inputText,
String language, AnalysisEngineDescription... aEngines)
throws ResourceInitializationException
{
CollectionReaderDescription reader = createReaderDescription(InternalStringReader.class,
InternalStringReader.PARAM_DOCUMENT_TEXT, inputText,
InternalStringReader.PARAM_LANGUAGE, language);
List<AnalysisEngineDescription> engines = new ArrayList<AnalysisEngineDescription>();
for (AnalysisEngineDescription e : aEngines) {
engines.add(e);
}
engines.add(createEngineDescription(InternalJCasHolder.class));
for (JCas jcas : SimplePipeline.iteratePipeline(reader,
engines.toArray(new AnalysisEngineDescription[engines.size()]))) {
// iteratePipeline does not support CAS multipliers. jcas is not updated after the
// multiplier. In order to access the new CAS, we use the JCasHolder (not thread-safe!)
assertEquals(normalizedText, InternalJCasHolder.get().getDocumentText());
}
}
@SafeVarargs
public static List<Message> assertValid(JCas jcas, Class<? extends Check>... aExtras)
{
CasValidator validator = CasValidator.createWithAllChecks();
for (Class<? extends Check> extra : aExtras) {
validator.addCheck(extra);
}
List<Message> messages = validator.analyze(jcas);
List<String> errors = messages.stream()
.filter(m -> m.level == ERROR)
.map(m -> m.toString())
.collect(Collectors.toList());
errors.forEach(m -> System.out.println(m));
List<String> expected = Collections.emptyList();
assertEquals(asCopyableString(expected, true), asCopyableString(errors, true));
return messages;
}
public static void assertValid(Collection<Message> messages)
{
messages.forEach(m -> System.out.println(m));
List<String> errors = messages.stream()
.filter(m -> m.level == ERROR)
.map(m -> m.toString())
.collect(Collectors.toList());
List<String> expected = Collections.emptyList();
assertEquals(asCopyableString(expected, true), asCopyableString(errors, true));
}
public static String asCopyableString(Collection<String> aCollection, boolean aLinebreak)
{
String result;
if (aCollection.isEmpty()) {
result = "{}";
}
else {
String sep = aLinebreak ? ",\n" : ", ";
result = aCollection.stream().map(s -> s == null ? "null" : '"' + s + '"')
.collect(Collectors.joining(sep));
}
return result.replace("\\", "\\\\");
}
private static String asCopyableString(Collection<String> aCollection)
{
return asCopyableString(aCollection, false);
}
public static class InternalStringReader
extends JCasCollectionReader_ImplBase
{
public static final String PARAM_LANGUAGE = ComponentParameters.PARAM_LANGUAGE;
@ConfigurationParameter(name = PARAM_LANGUAGE, mandatory = true)
private String language;
public static final String PARAM_DOCUMENT_TEXT = "documentText";
@ConfigurationParameter(name = PARAM_DOCUMENT_TEXT, mandatory = true)
private String documentText;
private boolean isDone = false;
@Override
public void initialize(UimaContext aContext)
throws ResourceInitializationException
{
super.initialize(aContext);
isDone = false;
}
@Override
public void getNext(JCas sJCas)
throws IOException
{
isDone = true;
sJCas.setDocumentLanguage(language);
sJCas.setDocumentText(documentText);
}
@Override
public boolean hasNext()
throws IOException, CollectionException
{
return !isDone;
}
@Override
public Progress[] getProgress()
{
return new Progress[] { new ProgressImpl(isDone ? 0 : 1, 1, Progress.ENTITIES) };
}
}
public static class InternalJCasHolder extends JCasAnnotator_ImplBase
{
private static JCas value;
@Override
public void process(JCas aJCas)
throws AnalysisEngineProcessException
{
try {
value = JCasFactory.createJCas();
}
catch (UIMAException e) {
throw new AnalysisEngineProcessException(e);
}
try {
DocumentMetaData.copy(aJCas, value);
}
catch (IllegalArgumentException e) {
// Ignore missing DocumentMetaData
}
CasCopier.copyCas(aJCas.getCas(), value.getCas(), true);
}
public static JCas get()
{
return value;
}
}
}