/* * Copyright 2012 * Ubiquitous Knowledge Processing (UKP) Lab * Technische Universität Darmstadt * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package de.tudarmstadt.ukp.dkpro.core.io.penntree; import static de.tudarmstadt.ukp.dkpro.core.io.penntree.PennTreeUtils.convertPennTree; import static de.tudarmstadt.ukp.dkpro.core.io.penntree.PennTreeUtils.parsePennTree; import static de.tudarmstadt.ukp.dkpro.core.io.penntree.PennTreeUtils.selectDfs; import static de.tudarmstadt.ukp.dkpro.core.io.penntree.PennTreeUtils.toPennTree; import static de.tudarmstadt.ukp.dkpro.core.io.penntree.PennTreeUtils.toPrettyPennTree; import static de.tudarmstadt.ukp.dkpro.core.io.penntree.PennTreeUtils.toText; import static org.apache.uima.fit.util.JCasUtil.selectSingle; import static org.junit.Assert.assertEquals; import org.apache.uima.fit.factory.JCasFactory; import org.apache.uima.jcas.JCas; import org.junit.Ignore; import org.junit.Rule; import org.junit.Test; import de.tudarmstadt.ukp.dkpro.core.api.syntax.type.constituent.ROOT; import de.tudarmstadt.ukp.dkpro.core.testing.AssertAnnotations; import de.tudarmstadt.ukp.dkpro.core.testing.DkproTestContext; /** */ public class PennTreeUtilsTest { @Test public void testParseSerialize() { doTest("(S (NP a) (VP b) (PUNC .))"); doTest("(ROOT (S (NP (PRP It)) (VP (VBZ is) (PP (IN for) (NP (NP (DT this) (NN reason)) " + "(SBAR (IN that) (S (NP (NN deconstruction)) (VP (VP (VBZ remains) (NP (NP " + "(DT a) (JJ fundamental) (NN threat)) (PP (TO to) (NP (NNP Marxism))))) (, ,) " + "(CC and) (VP (PP (IN by) (NP (NP (NN implication)) (PP (TO to) (NP (JJ other) " + "(ADJP (JJ culturalist) (CC and) (JJ contextualizing)) " + "(NNS approaches)))))))))))) (. .)))"); } @Test public void testPrettySerialize() { String tree = "(ROOT\n" + " (S\n" + " (S\n" + " (NP\n" + " (NP (DT The) (JJS strongest) (NN rain))\n" + " (VP\n" + " (ADVP (RB ever))\n" + " (VBN recorded)\n" + " (PP (IN in)\n" + " (NP (NNP India)))))\n" + " (VP\n" + " (VP (VBD shut)\n" + " (PRT (RP down))\n" + " (NP\n" + " (NP (DT the) (JJ financial) (NN hub))\n" + " (PP (IN of)\n" + " (NP (NNP Mumbai)))))\n" + " (, ,)\n" + " (VP (VBD snapped)\n" + " (NP (NN communication) (NNS lines)))\n" + " (, ,)\n" + " (VP (VBD closed)\n" + " (NP (NNS airports)))\n" + " (CC and)\n" + " (VP (VBD forced)\n" + " (NP\n" + " (NP (NNS thousands))\n" + " (PP (IN of)\n" + " (NP (NNS people))))\n" + " (S\n" + " (VP (TO to)\n" + " (VP\n" + " (VP (VB sleep)\n" + " (PP (IN in)\n" + " (NP (PRP$ their) (NNS offices))))\n" + " (CC or)\n" + " (VP (VB walk)\n" + " (NP (NN home))\n" + " (PP (IN during)\n" + " (NP (DT the) (NN night))))))))))\n" + " (, ,)\n" + " (NP (NNS officials))\n" + " (VP (VBD said)\n" + " (NP-TMP (NN today)))\n" + " (. .)))"; PennTreeNode n = parsePennTree(tree); String actual = toPrettyPennTree(n); assertEquals(tree, actual); } private static void doTest(String aBracket) { String expected = aBracket; PennTreeNode n = parsePennTree(expected); String actual = n.toString(); assertEquals(expected, actual); } @Test @Ignore("No asserts yet!") public void testSelectDfs() { PennTreeNode n = parsePennTree( "(ROOT (S (NP (PRP It)) (VP (VBZ is) (PP (IN for) (NP (NP (DT this) (NN reason)) " + "(SBAR (IN that) (S (NP (NN deconstruction)) (VP (VP (VBZ remains) (NP (NP " + "(DT a) (JJ fundamental) (NN threat)) (PP (TO to) (NP (NNP Marxism))))) (, ,) " + "(CC and) (VP (PP (IN by) (NP (NP (NN implication)) (PP (TO to) (NP (JJ other) " + "(ADJP (JJ culturalist) (CC and) (JJ contextualizing)) " + "(NNS approaches)))))))))))) (. .)))"); System.out.println(selectDfs(n, 1)); System.out.println(selectDfs(n, 2)); System.out.println(selectDfs(n, 3)); System.out.println(selectDfs(n, 4)); System.out.println(selectDfs(n, 5)); System.out.println(selectDfs(n, 6)); System.out.println(selectDfs(n, 7)); System.out.println(selectDfs(n, 8)); System.out.println(selectDfs(n, 9)); System.out.println(selectDfs(n, 10)); System.out.println(selectDfs(n, 11)); System.out.println(selectDfs(n, 12)); } @Test public void testFromUimaConversion() throws Exception { String documentEnglish = "It is for this reason that deconstruction remains a ( fundamental ) threat to " + "Marxism , and by implication to other culturalist and contextualizing " + "approaches ."; String pennTree = "(ROOT (S (S (NP (PRP It)) (VP (VBZ is) (PP (IN for) (NP (DT this) " + "(NN reason))) (SBAR (IN that) (S (NP (NN deconstruction)) (VP (VBZ remains) " + "(NP (NP (DT a) (PRN (-LRB- -LRB-) (NN fundamental) (-RRB- -RRB-)) (NN threat)) " + "(PP (TO to) (NP (NNP Marxism))))))))) (, ,) (CC and) (S (PP (IN by) (NP " + "(NN implication))) (PP (TO to) (NP (NP (JJ other) (NN culturalist)) (CC and) " + "(NP (VBG contextualizing) (NNS approaches))))) (. .)))"; PennTreeToJCasConverter converter = new PennTreeToJCasConverter(null, null); converter.setInternTags(true); converter.setWriteTracesToText(false); converter.setCreatePosTags(true); converter.setRootLabel("ROOT"); JCas jcas = JCasFactory.createJCas(); StringBuilder text = new StringBuilder(); converter.convertPennTree(jcas, text, PennTreeUtils.parsePennTree(pennTree)); jcas.setDocumentText(text.toString()); ROOT root = selectSingle(jcas, ROOT.class); PennTreeNode r = convertPennTree(root); assertEquals(documentEnglish.trim(), toText(r).trim()); AssertAnnotations.assertPennTree(pennTree, toPennTree(r)); } @Rule public DkproTestContext testContext = new DkproTestContext(); }