/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package opennlp.uima.util;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.text.AnnotationFS;
import org.apache.uima.resource.metadata.TypeSystemDescription;
import org.junit.Assert;
import org.junit.Test;
public class AnnotationComboIteratorTest {
/**
* Tests ensures that the bug observed in OPENNLP 676 is fixed. The described
* bug occurs if there are tokens which are out side of the sentence bounds.
* In that case an uncommon code path in the iterator is used to skip the
* out-of-sentence tokens until it again finds tokens which are inside a sentence.
* <p>
* The iterator was either crashing with a NoSuchElementException or it just left
* out the first token in the next sentence.
*
* @throws IOException
*/
@Test
public void OPENNLP_676() throws IOException {
TypeSystemDescription ts = CasUtil
.createTypeSystemDescription(AnnotationComboIteratorTest.class
.getResourceAsStream("/test-descriptors/TypeSystem.xml"));
CAS cas = CasUtil.createEmptyCAS(ts);
CasUtil.deserializeXmiCAS(cas, AnnotationComboIteratorTest.class
.getResourceAsStream("/cas/OPENNLP-676.xmi"));
AnnotationComboIterator comboIterator = new AnnotationComboIterator(cas,
cas.getTypeSystem().getType("opennlp.uima.Sentence"), cas
.getTypeSystem().getType("opennlp.uima.Token"));
List<List<String>> tokensBySentence = new ArrayList<>();
for (AnnotationIteratorPair annotationIteratorPair : comboIterator) {
final List<String> tokens = new ArrayList<>();
for (AnnotationFS tokenAnnotation : annotationIteratorPair
.getSubIterator()) {
tokens.add(tokenAnnotation.getCoveredText());
}
tokensBySentence.add(tokens);
}
Assert.assertEquals(Collections.singletonList("A"), tokensBySentence.get(0));
Assert.assertEquals(Arrays.asList("H", "I"), tokensBySentence.get(1));
}
}