/******************************************************************************* * Copyright 2015-2016 - CNRS (Centre National de Recherche Scientifique) * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. * *******************************************************************************/ package eu.project.ttc.test.unit.models; import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.tuple; import static org.junit.Assert.fail; import org.junit.Assert; import org.junit.Before; import org.junit.Test; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; import eu.project.ttc.models.OccurrenceType; import eu.project.ttc.models.Term; import eu.project.ttc.models.TermClass; import eu.project.ttc.models.TermIndex; import eu.project.ttc.models.VariationType; import eu.project.ttc.models.index.TermIndexes; import eu.project.ttc.models.index.TermValueProvider; import eu.project.ttc.models.index.TermValueProviders; import eu.project.ttc.test.unit.Fixtures; public class TermSpec { private Term term1; private Term term2; private Term term3; private Term term4; private Term term5; @Before public void setTerms() throws NoSuchFieldException, SecurityException, IllegalArgumentException, IllegalAccessException { this.term1 = Fixtures.term1(); this.term2 = Fixtures.term2(); this.term3 = Fixtures.term3(); this.term4 = Fixtures.term4(); this.term5 = Fixtures.term5(); } private Term termWithContext1; private Term termWithContext2; private Term termWithContext3; private TermIndex termIndex; @Before public void initContexts() { termIndex = Fixtures.termIndexWithOccurrences(); termIndex.createOccurrenceIndex(); termWithContext1 = termIndex.getTermByGroupingKey("n: énergie"); termWithContext2 = termIndex.getTermByGroupingKey("a: éolien"); termWithContext3 = termIndex.getTermByGroupingKey("n: accès"); } private void initTermClasses() { TermClass termClass1 = new TermClass(termWithContext1, ImmutableSet.of(termWithContext1)); TermClass termClass2 = new TermClass(termWithContext2, ImmutableSet.of(termWithContext2, termWithContext3)); termWithContext1.setTermClass(termClass1); termWithContext2.setTermClass(termClass2); termWithContext3.setTermClass(termClass2); } @Test public void testGetLemmaStemKeys() { TermValueProvider provider = TermValueProviders.get(TermIndexes.WORD_COUPLE_LEMMA_STEM); Assert.assertEquals( ImmutableList.of("energie+eol"), provider.getClasses(termIndex, term1)); Assert.assertEquals( ImmutableList.of(), provider.getClasses(termIndex, term2)); Assert.assertEquals( ImmutableList.of("acces+radioelectriq", "acces+recouvr", "radioelectrique+recouvr"), provider.getClasses(termIndex, term3)); } @Test public void computeContextVectorScope1() { termWithContext1.computeContextVector(OccurrenceType.SINGLE_WORD, 1, 1, false); termWithContext2.computeContextVector(OccurrenceType.SINGLE_WORD, 1, 1, false); termWithContext3.computeContextVector(OccurrenceType.SINGLE_WORD, 1, 1, false); // T1 T2 T3 T1 T3 T3 T1 assertThat(termWithContext1.getContextVector().getEntries()) .hasSize(2) .extracting("coTerm.groupingKey", "nbCooccs", "assocRate") .contains(tuple("a: éolien", 1, 0d), tuple("n: accès", 3, 0d)); assertThat(termWithContext2.getContextVector().getEntries()) .hasSize(2) .extracting("coTerm.groupingKey", "nbCooccs", "assocRate") .contains(tuple("n: énergie", 1, 0d), tuple("n: accès", 1, 0d)); assertThat(termWithContext3.getContextVector().getEntries()) .hasSize(2) .extracting("coTerm.groupingKey", "nbCooccs", "assocRate") .contains(tuple("n: énergie", 3, 0d), tuple("a: éolien", 1, 0d)); } @Test public void computeContextVectorScope3() { termWithContext1.computeContextVector(OccurrenceType.SINGLE_WORD, 3, 1, false); termWithContext2.computeContextVector(OccurrenceType.SINGLE_WORD, 3, 1, false); termWithContext3.computeContextVector(OccurrenceType.SINGLE_WORD, 3, 1, false); // T1 T2 T3 T1 T3 T3 T1 assertThat(termWithContext1.getContextVector().getEntries()) .hasSize(2) .extracting("coTerm.groupingKey", "nbCooccs", "assocRate") .contains(tuple("a: éolien", 2, 0d), tuple("n: accès", 6, 0d)); assertThat(termWithContext2.getContextVector().getEntries()) .hasSize(2) .extracting("coTerm.groupingKey", "nbCooccs", "assocRate") .contains(tuple("n: énergie", 2, 0d), tuple("n: accès", 2, 0d)); assertThat(termWithContext3.getContextVector().getEntries()) .hasSize(2) .extracting("coTerm.groupingKey", "nbCooccs", "assocRate") .contains(tuple("n: énergie", 6, 0d), tuple("a: éolien", 2, 0d)); } @Test public void computeContextVectorWithTermClassesRaiseErrorIfNoTermClass() { try { termWithContext1.computeContextVector(OccurrenceType.SINGLE_WORD, 3, 1, true); fail("should raise error"); } catch(IllegalStateException e) { // ok } catch(Exception e) { fail("Unexpected exception"); } initTermClasses(); // should not raise error termWithContext1.computeContextVector(OccurrenceType.SINGLE_WORD, 3, 1, true); } @Test public void computeContextVectorWithTermClasses() { initTermClasses(); termWithContext1.computeContextVector(OccurrenceType.SINGLE_WORD, 3, 1, true); termWithContext2.computeContextVector(OccurrenceType.SINGLE_WORD, 3, 1, true); termWithContext3.computeContextVector(OccurrenceType.SINGLE_WORD, 3, 1, true); assertThat(termWithContext1.getContextVector().getEntries()) .hasSize(1) .extracting("coTerm.groupingKey", "nbCooccs", "assocRate") .contains(tuple("a: éolien", 8, 0d)); assertThat(termWithContext2.getContextVector().getEntries()) .hasSize(1) .extracting("coTerm.groupingKey", "nbCooccs", "assocRate") .contains(tuple("n: énergie", 2, 0d)); assertThat(termWithContext3.getContextVector().getEntries()) .hasSize(1) .extracting("coTerm.groupingKey", "nbCooccs", "assocRate") .contains(tuple("n: énergie", 6, 0d)); } @Test public void testAddTermVariation() { assertThat(this.term5.getVariations()).hasSize(0); assertThat(this.term5.getBases()).hasSize(0); assertThat(this.term3.getVariations()).hasSize(0); assertThat(this.term3.getBases()).hasSize(0); assertThat(this.term4.getVariations()).hasSize(0); assertThat(this.term4.getBases()).hasSize(0); term5.addTermVariation(term3, VariationType.SYNTACTICAL, "Tata"); assertThat(this.term5.getVariations()).hasSize(1); assertThat(this.term5.getBases()).hasSize(0); assertThat(this.term3.getVariations()).hasSize(0); assertThat(this.term3.getBases()).hasSize(1); assertThat(this.term3.getBases()).extracting("info").containsExactly("Tata"); term5.addTermVariation(term4, VariationType.SYNTACTICAL, "Tata"); assertThat(this.term5.getVariations()).hasSize(2); assertThat(this.term5.getBases()).hasSize(0); assertThat(this.term3.getVariations()).hasSize(0); assertThat(this.term3.getBases()).hasSize(1); assertThat(this.term4.getVariations()).hasSize(0); assertThat(this.term4.getBases()).hasSize(1); assertThat(this.term5.getVariations()).extracting("info").containsExactly("Tata","Tata"); term5.addTermVariation(term3, VariationType.SYNTACTICAL, "Tata"); assertThat(this.term5.getVariations()).hasSize(2); assertThat(this.term5.getBases()).hasSize(0); assertThat(this.term3.getVariations()).hasSize(0); assertThat(this.term3.getBases()).hasSize(1); assertThat(this.term4.getVariations()).hasSize(0); assertThat(this.term4.getBases()).hasSize(1); assertThat(this.term5.getVariations()).extracting("info").containsExactly("Tata","Tata"); } @Test public void getVariationPaths() { assertThat(term5.getVariationPaths(0)).isEmpty(); term5.addTermVariation(term3, VariationType.SYNTACTICAL, "Tata"); assertThat(term5.getVariationPaths(0)).isEmpty(); assertThat(term5.getVariationPaths(1)).hasSize(1).extracting("variant").contains(term3); assertThat(term5.getVariationPaths(10)).hasSize(1).extracting("variant").contains(term3); term3.addTermVariation(term4, VariationType.SYNTACTICAL, "Toto"); assertThat(term5.getVariationPaths(0)).isEmpty(); assertThat(term5.getVariationPaths(1)).hasSize(1).extracting("variant").contains(term3); assertThat(term5.getVariationPaths(2)).hasSize(2).extracting("variant").contains(term3, term4); assertThat(term5.getVariationPaths(10)).hasSize(2).extracting("variant").contains(term3, term4); term4.addTermVariation(term5, VariationType.SYNTACTICAL, "Toto"); assertThat(term5.getVariationPaths(0)).isEmpty(); assertThat(term5.getVariationPaths(1)).hasSize(1).extracting("variant").contains(term3); assertThat(term5.getVariationPaths(2)).hasSize(2).extracting("variant").contains(term3, term4); assertThat(term5.getVariationPaths(3)).hasSize(3).extracting("variant").contains(term3, term4, term5); // handles cycles assertThat(term5.getVariationPaths(10)).hasSize(3).extracting("variant").contains(term3, term4, term5); } @Test public void testGetLemmaKeys() { TermValueProvider provider = TermValueProviders.get(TermIndexes.WORD_LEMMA); assertThat(provider.getClasses(termIndex, term1)) .hasSize(2) .contains("énergie", "éolien"); assertThat(provider.getClasses(termIndex, term2)) .hasSize(1) .contains("radioélectrique"); assertThat(provider.getClasses(termIndex, term3)) .hasSize(4) .contains("accès", "radioélectrique", "de", "recouvrement"); } }