package eu.project.ttc.test.func.tools.builders; import static eu.project.ttc.test.TermSuiteAssertions.assertThat; import java.nio.file.Paths; import java.util.List; import org.junit.Before; import org.junit.Test; import com.google.common.collect.Lists; import eu.project.ttc.api.Document; import eu.project.ttc.api.TerminoExtractor; import eu.project.ttc.engines.desc.Lang; import eu.project.ttc.models.TermIndex; import eu.project.ttc.test.func.FunctionalTests; public class TerminoExtractorSpec { Lang lang; List<Document> documents; Document document1; Document document2; @Before public void setup() { lang = Lang.FR; documents = Lists.newArrayList(); document1 = new Document(lang, "url1", "L'énergie éolienne est l'énergie de demain."); documents.add(document1); document2 = new Document(lang, "url2", "Une éolienne produit de l'énergie."); documents.add(document2); } @Test public void fromPreprocessedJsonFiles() { String jsonDirPath = Paths.get(FunctionalTests.CORPUS2_PATH.toString(), "json").toString(); TermIndex termIndex = TerminoExtractor .fromPreprocessedJsonFiles(Lang.FR, jsonDirPath) .setTreeTaggerHome(FunctionalTests.getTaggerPath()) .execute(); assertTermIndex(termIndex); } @Test public void fromPreprocessedXmiFiles() { String jsonDirPath = Paths.get(FunctionalTests.CORPUS2_PATH.toString(), "xmi").toString(); TermIndex termIndex = TerminoExtractor .fromPreprocessedXmiFiles(Lang.FR, jsonDirPath) .setTreeTaggerHome(FunctionalTests.getTaggerPath()) .execute(); assertTermIndex(termIndex); } @Test public void fromTxtCorpus() { TermIndex termIndex = TerminoExtractor .fromTxtCorpus(Lang.FR, FunctionalTests.CORPUS1_PATH.toString(), "**/*.txt", "UTF-8") .setTreeTaggerHome(FunctionalTests.getTaggerPath()) .execute(); assertTermIndex(termIndex); } private void assertTermIndex(TermIndex termIndex) { assertThat(termIndex) .hasSize(7) .containsTerm("n: énergie", 4); } @Test public void fromCustomDocumentStream() { TermIndex termIndex = TerminoExtractor.fromDocumentStream(Lang.FR, documents.stream(), 2) .setTreeTaggerHome(FunctionalTests.getTaggerPath()) .execute(); assertThat(termIndex) .hasSize(7) .containsTerm("n: énergie", 3); } }