/*******************************************************************************
* Copyright 2015-2016 - CNRS (Centre National de Recherche Scientifique)
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*
*******************************************************************************/
package eu.project.ttc.test.unit.io;
import static org.assertj.core.api.Assertions.assertThat;
import static org.assertj.core.api.Assertions.tuple;
import static org.junit.Assert.assertEquals;
import org.apache.uima.resource.ResourceInitializationException;
import org.hamcrest.core.StringContains;
import org.junit.Before;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.ExpectedException;
import eu.project.ttc.engines.morpho.Segmentation;
import eu.project.ttc.io.SegmentationParser;
import fr.univnantes.julestar.uima.resources.ResourceFormatException;
public class SegmentationParserSpec {
String s1 = "[re][inscription]";
String s2 = "[re:toto][inscription:tata]";
String s3 = "[homme:toto]-[grenouille]";
String s4 = "bla[homme:toto]blabla[grenouille]blablabla";
String s5 = " [homme : toto]- [ grenouille] ";
SegmentationParser parser;
@Before
public void setUp() {
parser = new SegmentationParser();
}
@Test
public void testGetTargetString() throws ResourceInitializationException {
assertEquals("reinscription", parser.parse(s1).getString());
assertEquals("reinscription", parser.parse(s2).getString());
assertEquals("homme-grenouille", parser.parse(s3).getString());
assertEquals("blahommeblablagrenouilleblablabla", parser.parse(s4).getString());
}
@Test
public void testParseWithHyphen() throws ResourceInitializationException {
Segmentation seg3 = parser.parse(s3);
assertThat(seg3.getSegments()).hasSize(2)
.extracting("begin", "end", "lemma", "substring")
.containsExactly(
tuple(0, 5, "toto", "homme"),
tuple(6, 16, "grenouille", "grenouille")
);
}
@Test
public void testParseWithUnsegmentedText() throws ResourceInitializationException {
Segmentation seg4 = parser.parse(s4);
assertThat(seg4.getSegments()).hasSize(2)
.extracting("begin", "end", "lemma", "substring")
.containsExactly(
tuple(3, 8, "toto", "homme"),
tuple(14, 24, "grenouille", "grenouille")
);
}
@Rule
public ExpectedException thrown = ExpectedException.none();
String se1 = "[homme:toto][ ]"; // error, empty not allowed
@Test
public void shouldRaiseErrorOnEmptySegment() throws ResourceInitializationException {
thrown.expect(ResourceFormatException.class);
thrown.expectMessage(StringContains.containsString("Empty segment not allowed"));
parser.parse(se1);
}
String se2 = "[homme:]-[grenouille]"; // bad segment format;
@Test
public void shouldRaiseErrorOnEndSegmentWithCol() throws ResourceInitializationException {
thrown.expect(ResourceFormatException.class);
thrown.expectMessage(StringContains.containsString("Cannot end segment with \":\""));
parser.parse(se2);
}
String se3 = "[homme:ho]-[:grenouille]"; // bad segment format;
@Test
public void shouldRaiseErrorOnStartSegmentWithCol() throws ResourceInitializationException {
thrown.expect(ResourceFormatException.class);
thrown.expectMessage(StringContains.containsString("Cannot start segment with \":\""));
parser.parse(se3);
}
String se4 = "[homme:ho-[hoih:grenouille]"; // illegal character [;
@Test
public void shouldRaiseErrorOnIllegalOpeningBracket() throws ResourceInitializationException {
thrown.expect(ResourceFormatException.class);
thrown.expectMessage(StringContains.containsString("Illegal character \"[\""));
parser.parse(se4);
}
String se5 = "[homme:ho]]-[jpo:grenouille]"; // illegal character ] ;
@Test
public void shouldRaiseErrorOnIllegalClosingBracket() throws ResourceInitializationException {
thrown.expect(ResourceFormatException.class);
thrown.expectMessage(StringContains.containsString("Illegal character \"]\""));
parser.parse(se5);
}
String se6 = "[homme:ho]-[hoih:grenouille"; // expecting ];
@Test
public void shouldRaiseErrorOnMissingClosingBracket() throws ResourceInitializationException {
thrown.expect(ResourceFormatException.class);
thrown.expectMessage(StringContains.containsString("Expected \"]\""));
parser.parse(se6);
}
@Test
public void testParseWithoutLemma() throws ResourceInitializationException {
Segmentation seg1 = parser.parse(s1);
assertThat(seg1.getSegments()).hasSize(2)
.extracting("begin", "end", "lemma", "substring")
.containsExactly(
tuple(0, 2, "re", "re"),
tuple(2, 13, "inscription", "inscription")
);
}
@Test
public void testParseWithLemma() throws ResourceInitializationException {
Segmentation seg2 = parser.parse(s2);
assertThat(seg2.getSegments()).hasSize(2)
.extracting("begin", "end", "lemma", "substring")
.containsExactly(
tuple(0, 2, "toto", "re"),
tuple(2, 13, "tata", "inscription")
);
}
@Test
public void testParseEmpty() throws ResourceInitializationException {
assertEquals(0, parser.parse("[]").size());
assertEquals(0, parser.parse(" [ ] ").size());
}
}