/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package opennlp.tools.namefind; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import org.junit.Assert; import org.junit.Test; import opennlp.tools.util.Span; /** * This is the test class for {@link BioCodec}. */ public class BioCodecTest { private static final BioCodec codec = new BioCodec(); private static final String A_TYPE = "atype"; private static final String A_START = A_TYPE + "-" + BioCodec.START; private static final String A_CONTINUE = A_TYPE + "-" + BioCodec.CONTINUE; private static final String B_TYPE = "btype"; private static final String B_START = B_TYPE + "-" + BioCodec.START; private static final String B_CONTINUE = B_TYPE + "-" + BioCodec.CONTINUE; private static final String C_TYPE = "ctype"; private static final String C_START = C_TYPE + "-" + BioCodec.START; private static final String OTHER = BioCodec.OTHER; @Test public void testEncodeNoNames() { NameSample nameSample = new NameSample("Once upon a time.".split(" "), new Span[] {}, true); String[] expected = new String[] { OTHER, OTHER, OTHER, OTHER}; String[] actual = codec.encode(nameSample.getNames(), nameSample.getSentence().length); Assert.assertArrayEquals("Only 'Other' is expected.", expected, actual); } @Test public void testEncodeSingleTokenSpan() { String[] sentence = "I called Julie again.".split(" "); Span[] spans = new Span[] { new Span(2,3, A_TYPE)}; NameSample nameSample = new NameSample(sentence, spans, true); String[] expected = new String[] {OTHER, OTHER, A_START, OTHER}; String[] actual = codec.encode(nameSample.getNames(), nameSample.getSentence().length); Assert.assertArrayEquals("'Julie' should be 'start' only, the rest should be 'other'.", expected, actual); } @Test public void testEncodeDoubleTokenSpan() { String[] sentence = "I saw Stefanie Schmidt today.".split(" "); Span[] span = new Span[] { new Span(2,4, A_TYPE)}; NameSample nameSample = new NameSample(sentence, span, true); String[] expected = new String[] {OTHER, OTHER, A_START, A_CONTINUE, OTHER}; String[] actual = codec.encode(nameSample.getNames(), nameSample.getSentence().length); Assert.assertArrayEquals("'Stefanie' should be 'start' only, 'Schmidt' is " + "'continue' and the rest should be 'other'.", expected, actual); } @Test public void testEncodeDoubleTokenSpanNoType() { final String DEFAULT_START = "default" + "-" + BioCodec.START; final String DEFAULT_CONTINUE = "default" + "-" + BioCodec.CONTINUE; String[] sentence = "I saw Stefanie Schmidt today.".split(" "); Span[] span = new Span[] { new Span(2,4, null)}; NameSample nameSample = new NameSample(sentence, span, true); String[] expected = new String[] {OTHER, OTHER, DEFAULT_START, DEFAULT_CONTINUE, OTHER}; String[] actual = codec.encode(nameSample.getNames(), nameSample.getSentence().length); Assert.assertArrayEquals("'Stefanie' should be 'start' only, 'Schmidt' is " + "'continue' and the rest should be 'other'.", expected, actual); } @Test public void testEncodeAdjacentSingleSpans() { String[] sentence = "something PersonA PersonB Something".split(" "); Span[] span = new Span[] { new Span(1,2, A_TYPE), new Span(2, 3, A_TYPE) }; NameSample nameSample = new NameSample(sentence, span, true); String[] expected = new String[] {OTHER, A_START, A_START, OTHER}; String[] actual = codec.encode(nameSample.getNames(), nameSample.getSentence().length); Assert.assertArrayEquals(expected, actual); } @Test public void testEncodeAdjacentSpans() { String[] sentence = "something PersonA PersonA PersonB Something".split(" "); Span[] span = new Span[] { new Span(1,3, A_TYPE), new Span(3, 4, A_TYPE) }; NameSample nameSample = new NameSample(sentence, span, true); String[] expected = new String[] {OTHER, A_START, A_CONTINUE, A_START, OTHER}; String[] actual = codec.encode(nameSample.getNames(), nameSample.getSentence().length); Assert.assertArrayEquals(expected, actual); } @Test public void testCreateSequenceValidator() { Assert.assertTrue(codec.createSequenceValidator() instanceof NameFinderSequenceValidator); } @Test public void testDecodeEmpty() { Span[] expected = new Span[] {}; Span[] actual = codec.decode(new ArrayList<String>()); Assert.assertArrayEquals(expected, actual); } /** * Start, Other */ @Test public void testDecodeSingletonFirst() { List<String> encoded = Arrays.asList(B_START, OTHER); Span[] expected = new Span[] {new Span(0, 1, B_TYPE)}; Span[] actual = codec.decode(encoded); Assert.assertArrayEquals(expected, actual); } /** * Start Start Other */ @Test public void testDecodeAdjacentSingletonFirst() { List<String> encoded = Arrays.asList(B_START, B_START, OTHER); Span[] expected = new Span[] {new Span(0, 1, B_TYPE), new Span(1, 2, B_TYPE)}; Span[] actual = codec.decode(encoded); Assert.assertArrayEquals(expected, actual); } /** * Start Continue Other */ @Test public void testDecodePairFirst() { List<String> encoded = Arrays.asList(B_START, B_CONTINUE, OTHER); Span[] expected = new Span[] {new Span(0, 2, B_TYPE)}; Span[] actual = codec.decode(encoded); Assert.assertArrayEquals(expected, actual); } /** * Start Continue Continue Other */ @Test public void testDecodeTripletFirst() { List<String> encoded = Arrays.asList(B_START, B_CONTINUE, B_CONTINUE, OTHER); Span[] expected = new Span[] {new Span(0, 3, B_TYPE)}; Span[] actual = codec.decode(encoded); Assert.assertArrayEquals(expected, actual); } /** * Start Continue Start Other */ @Test public void testDecodeAdjacentPairSingleton() { List<String> encoded = Arrays.asList(B_START, B_CONTINUE, B_START, OTHER); Span[] expected = new Span[] {new Span(0, 2, B_TYPE), new Span(2, 3, B_TYPE)}; Span[] actual = codec.decode(encoded); Assert.assertArrayEquals(expected, actual); } /** * Other Start Other */ @Test public void testDecodeOtherFirst() { List<String> encoded = Arrays.asList(OTHER, B_START, OTHER); Span[] expected = new Span[] {new Span(1, 2, B_TYPE)}; Span[] actual = codec.decode(encoded); Assert.assertArrayEquals(expected, actual); } /** * A-Start A-Continue, A-Continue, Other, B-Start, B-Continue, Other, C-Start, Other */ @Test public void testDecodeMultiClass() { List<String> encoded = Arrays.asList(OTHER, A_START, A_CONTINUE, A_CONTINUE, OTHER, B_START, B_CONTINUE, OTHER, C_START, OTHER); Span[] expected = new Span[] {new Span(1, 4, A_TYPE), new Span(5, 7, B_TYPE), new Span(8, 9, C_TYPE)}; Span[] actual = codec.decode(encoded); Assert.assertArrayEquals(expected, actual); } @Test public void testCompatibilityEmpty() { Assert.assertFalse(codec.areOutcomesCompatible(new String[] {})); } @Test public void testCompatibilitySingleStart() { Assert.assertTrue(codec.areOutcomesCompatible(new String[] {A_START})); } @Test public void testCompatibilitySingleContinue() { Assert.assertFalse(codec.areOutcomesCompatible(new String[] {A_CONTINUE})); Assert.assertFalse(codec.areOutcomesCompatible(new String[] {B_START, A_CONTINUE})); } @Test public void testCompatibilitySingleOther() { Assert.assertFalse(codec.areOutcomesCompatible(new String[] {OTHER})); } @Test public void testCompatibilityStartContinue() { Assert.assertTrue(codec.areOutcomesCompatible(new String[] {A_START, A_CONTINUE})); } @Test public void testCompatibilityStartOther() { Assert.assertTrue(codec.areOutcomesCompatible(new String[] {A_START, OTHER})); } @Test public void testCompatibilityContinueOther() { Assert.assertFalse(codec.areOutcomesCompatible(new String[] {A_CONTINUE, OTHER})); Assert.assertFalse(codec.areOutcomesCompatible(new String[] {B_START, A_CONTINUE, OTHER})); } @Test public void testCompatibilityStartContinueOther() { Assert.assertTrue(codec.areOutcomesCompatible(new String[] {A_START, A_CONTINUE, OTHER})); } @Test public void testCompatibilityMultiClass() { Assert.assertTrue(codec.areOutcomesCompatible( new String[] {A_START, A_CONTINUE, B_START, OTHER})); } @Test public void testCompatibilityBadTag() { Assert.assertFalse(codec.areOutcomesCompatible(new String[] {A_START, A_CONTINUE, "BAD"})); } @Test public void testCompatibilityRepeated() { Assert.assertTrue(codec.areOutcomesCompatible( new String[] {A_START, A_START, A_CONTINUE, A_CONTINUE, B_START, B_START, OTHER, OTHER})); } }