/* * Copyright 2015 * Ubiquitous Knowledge Processing (UKP) Lab * Technische Universität Darmstadt * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package de.tudarmstadt.ukp.dkpro.core.io.penntree; import java.util.Collection; import org.apache.uima.UIMAException; import org.apache.uima.fit.factory.JCasFactory; import org.apache.uima.fit.util.JCasUtil; import org.apache.uima.jcas.JCas; import org.junit.Assert; import org.junit.Test; import de.tudarmstadt.ukp.dkpro.core.api.resources.MappingProvider; import de.tudarmstadt.ukp.dkpro.core.api.resources.MappingProviderFactory; import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence; import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token; import de.tudarmstadt.ukp.dkpro.core.api.syntax.type.constituent.Constituent; import de.tudarmstadt.ukp.dkpro.core.testing.AssertAnnotations; import de.tudarmstadt.ukp.dkpro.core.testing.validation.extras.AllTokensHavePos; public class PennTreeToJCasConverterTest { @Test public void whenConvertingFromStringThenTheParentOfConstituensAreSet() throws UIMAException { MappingProvider posMappingProvider = MappingProviderFactory.createPosMappingProvider(null, null, (String) null); MappingProvider constituentMappingProvider = MappingProviderFactory .createConstituentMappingProvider(null, null, (String) null); PennTreeToJCasConverter converter = new PennTreeToJCasConverter(posMappingProvider, constituentMappingProvider); String parseTree = "(ROOT (S (NP (PRP It)) (VP (VBZ is) (NP (DT a) (NN test))) (. .)))"; JCas jcas = JCasFactory.createJCas(); posMappingProvider.configure(jcas.getCas()); constituentMappingProvider.configure(jcas.getCas()); PennTreeNode parsePennTree = PennTreeUtils.parsePennTree(parseTree); String sent = PennTreeUtils.toText(parsePennTree); jcas.setDocumentText(sent); jcas.setDocumentLanguage("en"); Sentence aSentence = new Sentence(jcas, 0, sent.length()); aSentence.addToIndexes(); int pos = 0; for (String tokenStr : sent.split(" ")) { new Token(jcas, pos, pos + tokenStr.length()).addToIndexes(); pos += tokenStr.length() + 1; } converter.setCreatePosTags(true); converter.convertPennTree(aSentence, parsePennTree); AssertAnnotations.assertValid(jcas, AllTokensHavePos.class); Collection<Constituent> constituents = JCasUtil.select(jcas, Constituent.class); for (Constituent constituent : constituents) { if (!constituent.getConstituentType().equals("ROOT")) { Assert.assertNotNull(constituent.getParent()); } else { Assert.assertNull(constituent.getParent()); } } } }