/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package opennlp.tools.formats;
import java.io.IOException;
import java.nio.charset.Charset;
import org.junit.Test;
import opennlp.tools.postag.POSSample;
import opennlp.tools.util.InputStreamFactory;
import opennlp.tools.util.ObjectStream;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNull;
public class ConllXPOSSampleStreamTest {
@Test
public void testParsingSample() throws IOException {
InputStreamFactory in = new ResourceAsStreamFactory(ConllXPOSSampleStreamTest.class,
"/opennlp/tools/formats/conllx.sample");
ObjectStream<POSSample> sampleStream = new ConllXPOSSampleStream(in,Charset.forName("UTF-8"));
POSSample a = sampleStream.read();
String[] aSentence = a.getSentence();
String[] aTags = a.getTags();
assertEquals(22, aSentence.length);
assertEquals(22, aTags.length);
assertEquals("To", aSentence[0]);
assertEquals("AC", aTags[0]);
assertEquals("kendte", aSentence[1]);
assertEquals("AN", aTags[1]);
assertEquals("russiske", aSentence[2]);
assertEquals("AN", aTags[2]);
assertEquals("historikere", aSentence[3]);
assertEquals("NC", aTags[3]);
assertEquals("Andronik", aSentence[4]);
assertEquals("NP", aTags[4]);
assertEquals("Andronik", aSentence[5]);
assertEquals("NP", aTags[5]);
assertEquals("og", aSentence[6]);
assertEquals("CC", aTags[6]);
assertEquals("Igor", aSentence[7]);
assertEquals("NP", aTags[7]);
assertEquals("Klamkin", aSentence[8]);
assertEquals("NP", aTags[8]);
assertEquals("tror", aSentence[9]);
assertEquals("VA", aTags[9]);
assertEquals("ikke", aSentence[10]);
assertEquals("RG", aTags[10]);
assertEquals(",", aSentence[11]);
assertEquals("XP", aTags[11]);
assertEquals("at", aSentence[12]);
assertEquals("CS", aTags[12]);
assertEquals("Rusland", aSentence[13]);
assertEquals("NP", aTags[13]);
assertEquals("kan", aSentence[14]);
assertEquals("VA", aTags[14]);
assertEquals("udvikles", aSentence[15]);
assertEquals("VA", aTags[15]);
assertEquals("uden", aSentence[16]);
assertEquals("SP", aTags[16]);
assertEquals("en", aSentence[17]);
assertEquals("PI", aTags[17]);
assertEquals("\"", aSentence[18]);
assertEquals("XP", aTags[18]);
assertEquals("jernnæve", aSentence[19]);
assertEquals("NC", aTags[19]);
assertEquals("\"", aSentence[20]);
assertEquals("XP", aTags[20]);
assertEquals(".", aSentence[21]);
assertEquals("XP", aTags[21]);
POSSample b = sampleStream.read();
String[] bSentence = b.getSentence();
String[] bTags = b.getTags();
assertEquals(12, bSentence.length);
assertEquals(12, bTags.length);
assertEquals("De", bSentence[0]);
assertEquals("PP", bTags[0]);
assertEquals("hævder", bSentence[1]);
assertEquals("VA", bTags[1]);
assertEquals(",", bSentence[2]);
assertEquals("XP", bTags[2]);
assertEquals("at", bSentence[3]);
assertEquals("CS", bTags[3]);
assertEquals("Ruslands", bSentence[4]);
assertEquals("NP", bTags[4]);
assertEquals("vej", bSentence[5]);
assertEquals("NC", bTags[5]);
assertEquals("til", bSentence[6]);
assertEquals("SP", bTags[6]);
assertEquals("demokrati", bSentence[7]);
assertEquals("NC", bTags[7]);
assertEquals("går", bSentence[8]);
assertEquals("VA", bTags[8]);
assertEquals("gennem", bSentence[9]);
assertEquals("SP", bTags[9]);
assertEquals("diktatur", bSentence[10]);
assertEquals("NC", bTags[10]);
assertEquals(".", bSentence[11]);
assertEquals("XP", bTags[11]);
assertNull(sampleStream.read());
}
}