/******************************************************************************* * Copyright 2015-2016 - CNRS (Centre National de Recherche Scientifique) * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. * *******************************************************************************/ package eu.project.ttc.test.func; import static eu.project.ttc.test.TermSuiteAssertions.assertThat; import static org.assertj.core.api.Assertions.tuple; import java.util.List; import org.assertj.core.api.iterable.Extractor; import org.assertj.core.groups.Tuple; import org.assertj.core.util.Lists; import org.junit.Before; import org.junit.Test; import com.google.common.cache.CacheBuilder; import com.google.common.cache.CacheLoader; import com.google.common.cache.LoadingCache; import eu.project.ttc.engines.cleaner.TermProperty; import eu.project.ttc.engines.desc.Lang; import eu.project.ttc.engines.desc.TermSuiteCollection; import eu.project.ttc.models.Term; import eu.project.ttc.models.TermIndex; import eu.project.ttc.models.VariationType; import eu.project.ttc.tools.TermSuitePipeline; import eu.project.ttc.tools.TermSuiteResourceManager; import eu.project.ttc.tools.utils.ControlFilesGenerator; public abstract class WindEnergySpec { protected TermIndex termIndex = null; protected Lang lang; protected List<String> syntacticMatchingRules = Lists.newArrayList(); protected List<String> syntacticNotMatchingRules = Lists.newArrayList(); public WindEnergySpec() { super(); this.lang = getLang(); this.syntacticMatchingRules = getSyntacticMatchingRules(); this.syntacticNotMatchingRules = getSyntacticNotMatchingRules(); } protected abstract Lang getLang(); protected abstract List<String> getSyntacticMatchingRules(); protected abstract List<String> getSyntacticNotMatchingRules(); protected void expectNotMatchingRules(String... rules) { for(String rule:rules) syntacticNotMatchingRules.add(rule); } protected void expectMatchingRules(String... rules) { for(String rule:rules) syntacticMatchingRules.add(rule); } private static final LoadingCache<Lang, TermIndex> TERM_INDEX_CACHE = CacheBuilder.newBuilder() .maximumSize(1) .build(new CacheLoader<Lang, TermIndex>() { @Override public TermIndex load(Lang lang) throws Exception { return runPipeline(lang); } }); @Before public void setup() { this.termIndex = TERM_INDEX_CACHE.getUnchecked(lang); } protected static TermIndex runPipeline(Lang lang) { TermSuiteResourceManager manager = TermSuiteResourceManager.getInstance(); manager.clear(); TermSuitePipeline pipeline = TermSuitePipeline.create(lang.getCode()) .setCollection(TermSuiteCollection.TXT, FunctionalTests.getCorpusWEPath(lang), "UTF-8") .aeWordTokenizer() .setTreeTaggerHome(FunctionalTests.getTaggerPath()) .aeTreeTagger() .aeUrlFilter() .aeStemmer() .aeRegexSpotter() .aeStopWordsFilter() .aeSpecificityComputer() .aeCompostSplitter() .aePrefixSplitter() .aeSuffixDerivationDetector() .aeSyntacticVariantGatherer() .aeGraphicalVariantGatherer() .aeExtensionDetector() .aeRanker(TermProperty.WR, true) .run(); return pipeline.getTermIndex(); } @Test public void weControlSyntacticMatchingRules() { assertThat(termIndex) .asMatchingRules() .containsOnlyElementsOf(syntacticMatchingRules) .doesNotContainAnyElementsOf(syntacticNotMatchingRules); } @Test public void weControlPrefixes() { assertThat(termIndex) .asTermVariations(VariationType.IS_PREFIX_OF) .extracting("base.groupingKey", "variant.groupingKey") .containsOnly( ControlFiles.prefixVariationTuples(lang, "we") ); } @Test public void weControlDerivates() { assertThat(termIndex) .asTermVariations(VariationType.DERIVES_INTO) .extracting("info", "base.groupingKey", "variant.groupingKey") .containsOnly( ControlFiles.derivateVariationTuples(lang, "we") ); } @Test public void weCompounds() { assertThat(termIndex) .asCompoundList() .extracting(new Extractor<Term, Tuple>() { @Override public Tuple extract(Term compoundTerm) { return tuple( compoundTerm.getWords().get(0).getWord().getCompoundType().getShortName(), compoundTerm.getGroupingKey(), ControlFilesGenerator.toCompoundString(compoundTerm) ); } }) .containsOnly( ControlFiles.compoundTuples(lang, "we") ); } }