/* * Copyright 2011 * Ubiquitous Knowledge Processing (UKP) Lab * Technische Universität Darmstadt * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package de.tudarmstadt.ukp.dkpro.core.io.imscwb; import static org.apache.uima.fit.factory.CollectionReaderFactory.createReaderDescription; import static org.apache.uima.fit.pipeline.SimplePipeline.iteratePipeline; import static org.apache.uima.fit.util.JCasUtil.select; import static org.junit.Assert.assertEquals; import static org.junit.Assert.fail; import org.apache.uima.collection.CollectionReaderDescription; import org.apache.uima.jcas.JCas; import org.junit.Rule; import org.junit.Test; import de.tudarmstadt.ukp.dkpro.core.api.io.ResourceCollectionReaderBase; import de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS; import de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData; import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Lemma; import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence; import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token; import de.tudarmstadt.ukp.dkpro.core.testing.DkproTestContext; public class ImsCwbReaderTest { @Test public void wackyTest() throws Exception { CollectionReaderDescription reader = createReaderDescription( ImsCwbReader.class, ImsCwbReader.PARAM_SOURCE_LOCATION, "src/test/resources/wacky/", ImsCwbReader.PARAM_LANGUAGE, "de", ImsCwbReader.PARAM_SOURCE_ENCODING, "ISO-8859-15", ResourceCollectionReaderBase.PARAM_PATTERNS, "[+]test.txt"); String firstSentence = "Nikita ( La Femme Nikita ) Dieser Episodenf\u00FChrer wurde von " + "September 1998 bis Mai 1999 von Konstantin C.W. Volkmann geschrieben und im Mai " + "2000 von Stefan B\u00F6rzel \u00FCbernommen . "; int i = 0; for (JCas jcas : iteratePipeline(reader)) { // System.out.println(jcas.getDocumentText()); if (i == 0) { assertEquals(11406, select(jcas, Token.class).size()); assertEquals(11406, select(jcas, Lemma.class).size()); assertEquals(11406, select(jcas, POS.class).size()); assertEquals(717, select(jcas, Sentence.class).size()); assertEquals(firstSentence, select(jcas, Sentence.class).iterator().next() .getCoveredText()); assertEquals("http://www.epguides.de/nikita.htm", DocumentMetaData.get(jcas) .getDocumentTitle()); } i++; } assertEquals(4, i); } @Test public void wackyTest_noAnnotations() throws Exception { CollectionReaderDescription reader = createReaderDescription( ImsCwbReader.class, ImsCwbReader.PARAM_SOURCE_LOCATION, "src/test/resources/wacky/", ImsCwbReader.PARAM_PATTERNS, "[+]test.txt", ImsCwbReader.PARAM_LANGUAGE, "de", ImsCwbReader.PARAM_SOURCE_ENCODING, "ISO-8859-15", ImsCwbReader.PARAM_READ_TOKEN, false, ImsCwbReader.PARAM_READ_LEMMA, false, ImsCwbReader.PARAM_READ_POS, false, ImsCwbReader.PARAM_READ_SENTENCES, false); int i = 0; for (JCas jcas : iteratePipeline(reader)) { if (i == 0) { assertEquals(0, select(jcas, Token.class).size()); assertEquals(0, select(jcas, POS.class).size()); assertEquals(0, select(jcas, Sentence.class).size()); } i++; } assertEquals(4, i); } @Test(expected = IllegalStateException.class) public void wackyTest__expectedException() throws Exception { CollectionReaderDescription reader = createReaderDescription( ImsCwbReader.class, ImsCwbReader.PARAM_SOURCE_LOCATION, "src/test/resources/wacky", ImsCwbReader.PARAM_LANGUAGE, "de", ImsCwbReader.PARAM_SOURCE_ENCODING, "ISO-8859-15", ImsCwbReader.PARAM_READ_TOKEN, false, ImsCwbReader.PARAM_READ_LEMMA, true, ImsCwbReader.PARAM_READ_POS, false, ImsCwbReader.PARAM_READ_SENTENCES, false); for (JCas jcas : iteratePipeline(reader)) { // should never get here fail("no Exception!"); } fail("no Exception!"); } @Rule public DkproTestContext testContext = new DkproTestContext(); }