/* * Copyright 2013 * Ubiquitous Knowledge Processing (UKP) Lab * Technische Universität Darmstadt * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package de.tudarmstadt.ukp.dkpro.core.io.tiger; import static org.apache.uima.fit.factory.AnalysisEngineFactory.createEngineDescription; import static org.apache.uima.fit.factory.CollectionReaderFactory.*; import static org.apache.uima.fit.pipeline.SimplePipeline.*; import static org.apache.uima.fit.util.JCasUtil.selectSingle; import static org.apache.uima.fit.util.JCasUtil.select; import static org.apache.uima.fit.util.JCasUtil.selectCovered; import static org.junit.Assert.assertEquals; import static de.tudarmstadt.ukp.dkpro.core.testing.IOTestRunner.*; import org.apache.uima.analysis_engine.AnalysisEngineDescription; import org.apache.uima.collection.CollectionReader; import org.apache.uima.collection.CollectionReaderDescription; import org.apache.uima.fit.factory.JCasFactory; import org.apache.uima.jcas.JCas; import org.junit.Rule; import org.junit.Test; import de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData; import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence; import de.tudarmstadt.ukp.dkpro.core.api.semantics.type.SemPred; import de.tudarmstadt.ukp.dkpro.core.api.syntax.type.PennTree; import de.tudarmstadt.ukp.dkpro.core.io.conll.Conll2012Writer; import de.tudarmstadt.ukp.dkpro.core.testing.AssertAnnotations; import de.tudarmstadt.ukp.dkpro.core.testing.DkproTestContext; public class TigerXmlReaderTest { @Test public void test() throws Exception { CollectionReader reader = createReader(TigerXmlReader.class, TigerXmlReader.PARAM_SOURCE_LOCATION, "src/test/resources/", TigerXmlReader.PARAM_PATTERNS, "[+]tiger-sample.xml", TigerXmlReader.PARAM_LANGUAGE, "de", TigerXmlReader.PARAM_READ_PENN_TREE, true); JCas jcas = JCasFactory.createJCas(); reader.getNext(jcas.getCas()); String pennTree = "(VROOT ($( ``) (S (PN-SB (NE Ross) (NE Perot)) (VAFIN wäre) " + "(ADV vielleicht) (NP-PD (ART ein) (ADJA prächtiger) (NN Diktator))) ($( ''))"; AssertAnnotations.assertPennTree(pennTree, selectSingle(jcas, PennTree.class)); } @Test(expected=IllegalStateException.class) public void test2() throws Exception { CollectionReaderDescription reader = createReaderDescription(TigerXmlReader.class, TigerXmlReader.PARAM_SOURCE_LOCATION, "src/test/resources/", TigerXmlReader.PARAM_PATTERNS, "[+]simple-broken-sentence.xml", TigerXmlReader.PARAM_LANGUAGE, "de", TigerXmlReader.PARAM_READ_PENN_TREE, true); for (JCas cas : iteratePipeline(reader, new AnalysisEngineDescription[] {})) { System.out.printf("%s %n", DocumentMetaData.get(cas).getDocumentId()); } } @Test public void tigerSampleTest() throws Exception { testOneWay( createReaderDescription(TigerXmlReader.class, TigerXmlReader.PARAM_LANGUAGE, "de", TigerXmlReader.PARAM_READ_PENN_TREE, true), "tiger-sample.xml.dump", "tiger-sample.xml"); } @Test public void semevalSampleTest() throws Exception { testOneWay( createReaderDescription(TigerXmlReader.class, TigerXmlReader.PARAM_LANGUAGE, "en", TigerXmlReader.PARAM_READ_PENN_TREE, true), "semeval1010-sample.xml.dump", "semeval1010-en-sample.xml"); } @Test public void semevalSampleTest2() throws Exception { testOneWay( createReaderDescription(TigerXmlReader.class, TigerXmlReader.PARAM_LANGUAGE, "en", TigerXmlReader.PARAM_READ_PENN_TREE, true), createEngineDescription(Conll2012Writer.class), "semeval1010-en-sample.conll", "semeval1010-en-sample.xml"); } @Test public void testNoncontiguousFrameTarget() throws Exception { CollectionReaderDescription reader = createReaderDescription(TigerXmlReader.class, TigerXmlReader.PARAM_SOURCE_LOCATION, "src/test/resources/", TigerXmlReader.PARAM_PATTERNS, "[+]tiger-sample-noncontiguousframe.xml", TigerXmlReader.PARAM_LANGUAGE, "de", TigerXmlReader.PARAM_READ_PENN_TREE, true); int[][] frameRanges = new int[][] {{4, 11}, {33, 47}, {71, 74}, {112, 138}, {143, 147}, {246, 255}}; for (JCas cas : iteratePipeline(reader, new AnalysisEngineDescription[] {})) { for (Sentence sentence : select(cas, Sentence.class)){ for(SemPred frame: selectCovered(SemPred.class, sentence)){ System.out.println("frame boundary " + frame.getBegin() + " : " + frame.getEnd()); boolean found = false; for(int[] element:frameRanges){ if(element[0] == frame.getBegin() && element[1] == frame.getEnd()){ found = true; break; } } assertEquals(true, found); } } } } @Test public void testFrameTargetHavingMultipleChildren() throws Exception { CollectionReaderDescription reader = createReaderDescription(TigerXmlReader.class, TigerXmlReader.PARAM_SOURCE_LOCATION, "src/test/resources/", TigerXmlReader.PARAM_PATTERNS, "[+]tiger-sample-complexframe.xml", TigerXmlReader.PARAM_LANGUAGE, "de", TigerXmlReader.PARAM_READ_PENN_TREE, true); int[][] frameRanges = new int[][] {{26, 41}, {54, 61}, {64, 85}, {97, 104}, {120, 130}, {135, 151}, {152, 169}}; /* Frame targets: * Glaubwürdigkeit * wichtig * ein Zeichen zu setzen * gewillt * Erreichung * Millenniumsziele * <eine aktive Rolle> ... <übernehmen> (Noncontiguous frame target) * **/ for (JCas cas : iteratePipeline(reader, new AnalysisEngineDescription[] {})) { for (Sentence sentence : select(cas, Sentence.class)){ for(SemPred frame: selectCovered(SemPred.class, sentence)){ System.out.println("frame target text [" +frame.getCoveredText() + "], frame boundary " + frame.getBegin() + " : " + frame.getEnd()); boolean found = false; for(int[] element:frameRanges){ if(element[0] == frame.getBegin() && element[1] == frame.getEnd()){ found = true; break; } } assertEquals(true, found); } } } } @Test public void testContiguousFrameTarget() throws Exception { CollectionReaderDescription reader = createReaderDescription(TigerXmlReader.class, TigerXmlReader.PARAM_SOURCE_LOCATION, "src/test/resources/", TigerXmlReader.PARAM_PATTERNS, "[+]tiger-sample-contiguousframe.xml", TigerXmlReader.PARAM_LANGUAGE, "de", TigerXmlReader.PARAM_READ_PENN_TREE, true); /** * first element is contiguous * it spans over 2 tokens "schlage" and "mit", so the boundary should be * schlage.begin and mit.end ==> (4, 15) */ int[][] frameRanges = new int[][] {{4, 15}, {33, 47}, {71, 74}, {112, 138}, {143, 147}, {246, 255}}; for (JCas cas : iteratePipeline(reader, new AnalysisEngineDescription[] {})) { for (Sentence sentence : select(cas, Sentence.class)){ for(SemPred frame: selectCovered(SemPred.class, sentence)){ System.out.println("frame boundary " + frame.getBegin() + " : " + frame.getEnd()); boolean found = false; for(int[] element:frameRanges){ if(element[0] == frame.getBegin() && element[1] == frame.getEnd()){ found = true; break; } } assertEquals(true, found); } } } } @Rule public DkproTestContext testContext = new DkproTestContext(); }