/*******************************************************************************
* Copyright 2015-2016 - CNRS (Centre National de Recherche Scientifique)
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*
*******************************************************************************/
package eu.project.ttc.test.unit.engines;
import static org.assertj.core.api.Assertions.assertThat;
import static org.assertj.core.api.Assertions.tuple;
import org.apache.uima.analysis_engine.AnalysisEngine;
import org.apache.uima.analysis_engine.AnalysisEngineDescription;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.fit.factory.AnalysisEngineFactory;
import org.apache.uima.fit.factory.ExternalResourceFactory;
import org.apache.uima.resource.ExternalResourceDescription;
import org.junit.Before;
import org.junit.Test;
import eu.project.ttc.engines.GraphicalVariantGatherer;
import eu.project.ttc.engines.desc.Lang;
import eu.project.ttc.history.TermHistory;
import eu.project.ttc.history.TermHistoryResource;
import eu.project.ttc.models.Term;
import eu.project.ttc.models.TermIndex;
import eu.project.ttc.models.VariationType;
import eu.project.ttc.resources.TermIndexResource;
import eu.project.ttc.test.unit.Fixtures;
import eu.project.ttc.test.unit.TermFactory;
import eu.project.ttc.tools.TermSuiteResourceManager;
public class GraphicalVariantGathererSpec {
private TermIndex termIndex;
private Term tetetete;
private Term tetetetx;
private Term teteteteAccent;
private Term abcdefghijkl;
private Term abcdefghijkx;
private Term abcdefghijklCapped;
@Before
public void setup() {
this.termIndex = termIndex();
}
private TermIndex termIndex() {
TermSuiteResourceManager manager = TermSuiteResourceManager.getInstance();
manager.clear();
TermIndex termIndex = Fixtures.emptyTermIndex();
manager.register(termIndex.getName(), termIndex);
TermFactory termFactory = new TermFactory(termIndex);
tetetete = termFactory.create("N:tetetete|tetetete");
tetetetx = termFactory.create("N:tetetetx|tetetetx");
teteteteAccent = termFactory.create("N:tétetete|tétetete");
abcdefghijklCapped = termFactory.create("N:Abcdefghijkl|Abcdefghijkl");
abcdefghijkl = termFactory.create("N:abcdefghijkl|abcdefghijkl");
abcdefghijkx = termFactory.create("N:abcdefghijkx|abcdefghijkx");
return termIndex;
}
private AnalysisEngine makeAE(Lang lang, float similarityThreashhold) throws Exception {
TermSuiteResourceManager.getInstance().clear();
AnalysisEngineDescription aeDesc = AnalysisEngineFactory.createEngineDescription(
GraphicalVariantGatherer.class,
GraphicalVariantGatherer.LANG, lang.getCode(),
GraphicalVariantGatherer.SIMILARITY_THRESHOLD, similarityThreashhold
);
/*
* The history resource
*/
String historyResourceName = "Toto";
TermSuiteResourceManager.getInstance().register(historyResourceName, new TermHistory());
ExternalResourceDescription historyResourceDesc = ExternalResourceFactory.createExternalResourceDescription(
TermHistoryResource.TERM_HISTORY,
TermHistoryResource.class,
historyResourceName
);
ExternalResourceFactory.bindResource(aeDesc, historyResourceDesc);
/*
* The term index resource
*/
TermSuiteResourceManager.getInstance().register(this.termIndex.getName(), this.termIndex);
ExternalResourceDescription termIndexDesc = ExternalResourceFactory.createExternalResourceDescription(
TermIndexResource.TERM_INDEX,
TermIndexResource.class,
this.termIndex.getName()
);
ExternalResourceFactory.bindResource(aeDesc, termIndexDesc);
AnalysisEngine ae = AnalysisEngineFactory.createEngine(aeDesc);
return ae;
}
@Test
public void testCaseInsensitive() throws Exception {
makeAE(Lang.FR, 1.0f).collectionProcessComplete();
assertThat(this.abcdefghijkl.getBases()).hasSize(1)
.extracting("base")
.contains(this.abcdefghijklCapped);
assertThat(this.abcdefghijkl.getVariations()).hasSize(0);
assertThat(this.abcdefghijklCapped.getVariations())
.hasSize(1)
.extracting("variant")
.contains(this.abcdefghijkl);
assertThat(this.abcdefghijklCapped.getBases()).hasSize(0);
}
@Test
public void testWithDiacritics() throws AnalysisEngineProcessException, Exception {
makeAE(Lang.FR, 1.0f).collectionProcessComplete();
assertThat(this.tetetete.getVariations())
.hasSize(1)
.extracting("variationType", "variant")
.contains(tuple(VariationType.GRAPHICAL, this.teteteteAccent));
}
@Test
public void testWith0_9() throws AnalysisEngineProcessException, Exception {
makeAE(Lang.FR, 0.9f).collectionProcessComplete();
assertThat(this.abcdefghijklCapped.getVariations())
.hasSize(2)
.extracting("variant")
.contains(this.abcdefghijkl, this.abcdefghijkx);
assertThat(this.tetetete.getVariations())
.hasSize(1)
.extracting("variationType", "variant")
.contains(
tuple(VariationType.GRAPHICAL, this.teteteteAccent)
);
}
@Test
public void testWith0_8() throws AnalysisEngineProcessException, Exception {
makeAE(Lang.FR, 0.8f).collectionProcessComplete();
assertThat(this.abcdefghijklCapped.getVariations())
.hasSize(2)
.extracting("variant")
.contains(this.abcdefghijkl, this.abcdefghijkx);
assertThat(this.tetetete.getVariations())
.hasSize(2)
.extracting("variationType", "variant")
.contains(
tuple(VariationType.GRAPHICAL, this.teteteteAccent),
tuple(VariationType.GRAPHICAL, this.tetetetx)
);
}
}