/* * Copyright 2016 * Ubiquitous Knowledge Processing (UKP) Lab and FG Language Technology * Technische Universität Darmstadt * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package de.tudarmstadt.ukp.dkpro.core.api.io; import static de.tudarmstadt.ukp.dkpro.core.testing.AssertAnnotations.assertChunks; import static org.apache.uima.fit.util.JCasUtil.select; import java.util.ArrayList; import java.util.List; import org.apache.uima.cas.Feature; import org.apache.uima.cas.Type; import org.apache.uima.fit.factory.JCasBuilder; import org.apache.uima.fit.factory.JCasFactory; import org.apache.uima.fit.util.JCasUtil; import org.apache.uima.jcas.JCas; import org.junit.Test; import de.tudarmstadt.ukp.dkpro.core.api.resources.MappingProvider; import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token; import de.tudarmstadt.ukp.dkpro.core.api.syntax.type.chunk.Chunk; public class BilouDecoderTest { @Test public void bilouEncoderTest() throws Exception { String[] input = new String[] { "O","O","O","B-NP","I-NP","I-NP","L-NP","O","O","O","O","O","U-NP","O","U-NP","O","O","O" }; String[] chunks = new String[] { "[ 10, 43]Chunk(NP) (very complicated example sentence)", "[ 69, 81]Chunk(NP) (constituents)", "[ 86, 98]Chunk(NP) (dependencies)" }; JCas jcas = getJCas(); MappingProvider mappingProvider = new MappingProvider(); mappingProvider.setDefault(MappingProvider.BASE_TYPE, Chunk.class.getName()); mappingProvider.setDefault(MappingProvider.LOCATION, "dummy"); mappingProvider.configure(jcas.getCas()); Type chunkType = JCasUtil.getType(jcas, Chunk.class); Feature chunkValue = chunkType.getFeatureByBaseName("chunkValue"); BilouDecoder decoder = new BilouDecoder(jcas.getCas(), chunkValue, mappingProvider); List<Token> tokens = new ArrayList<Token>(JCasUtil.select(jcas, Token.class)); decoder.decode(tokens, input); assertChunks(chunks, select(jcas, Chunk.class)); } private JCas getJCas() throws Exception { String text = "We need a very complicated example sentence , which " + "contains as many constituents and dependencies as possible ."; JCas jcas = JCasFactory.createJCas(); JCasBuilder cb = new JCasBuilder(jcas); for (String token : text.split(" ")) { cb.add(token, Token.class); cb.add(" "); } cb.close(); return cb.getJCas(); } }