package eu.project.ttc.test.func.tools.builders;
import static eu.project.ttc.test.TermSuiteAssertions.assertThat;
import static org.assertj.core.api.Assertions.assertThat;
import static org.junit.Assert.assertFalse;
import java.nio.charset.Charset;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Iterator;
import java.util.List;
import org.apache.uima.jcas.JCas;
import org.junit.Before;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TemporaryFolder;
import com.google.common.collect.Lists;
import eu.project.ttc.api.Document;
import eu.project.ttc.api.TermSuitePreprocessor;
import eu.project.ttc.engines.desc.Lang;
import eu.project.ttc.test.func.FunctionalTests;
import eu.project.ttc.types.TermOccAnnotation;
import eu.project.ttc.types.WordAnnotation;
public class TermSuitePreprocessorSpec {
Lang lang;
List<Document> documents;
Document document1;
Document document2;
@Before
public void setup() {
lang = Lang.FR;
documents = Lists.newArrayList();
document1 = new Document(lang, "url1", "L'énergie éolienne est l'énergie de demain.");
documents.add(document1);
document2 = new Document(lang, "url2", "Une éolienne produit de l'énergie.");
documents.add(document2);
}
@Rule
public TemporaryFolder folder = new TemporaryFolder();
@Test
public void testFromTxtToJson() {
TermSuitePreprocessor
.fromTxtCorpus(lang, FunctionalTests.CORPUS1_PATH.toString())
.setTreeTaggerHome(FunctionalTests.getTaggerPath())
.toJson(folder.getRoot().getAbsolutePath(), Charset.defaultCharset().name())
.execute();
// assertThat(folder.getRoot().list()).extracting("name").containsExactly("file1.xmi", "dir1");
assertThat(Paths.get(folder.getRoot().getAbsolutePath(), "file1.json").toFile()).exists();
assertThat(Paths.get(folder.getRoot().getAbsolutePath(), "dir1", "file3.json").toFile()).exists();
}
@Test
public void testFromTxtCorpusExtTxt() {
Iterator<JCas> it = TermSuitePreprocessor
.fromTxtCorpus(lang, FunctionalTests.CORPUS1_PATH.toString())
.setTreeTaggerHome(FunctionalTests.getTaggerPath())
.stream().iterator();
JCas cas1 = it.next();
JCas cas2 = it.next();
assertFalse(it.hasNext());
Path path1 = Paths.get(
System.getProperty("user.dir"),
FunctionalTests.CORPUS1_PATH.toString(),
"file1.txt");
assertThat(cas1).hasUrl(path1.toString());
Path path2 = Paths.get(
System.getProperty("user.dir"),
FunctionalTests.CORPUS1_PATH.toString(),
"dir1","file3.txt");
assertThat(cas2).hasUrl(path2.toString());
}
@Test
public void testFromTxtCorpusExtTxtAndDocument() {
Iterator<JCas> it = TermSuitePreprocessor
.fromTxtCorpus(
lang,
FunctionalTests.CORPUS1_PATH.toString(),
"**/*")
.setTreeTaggerHome(FunctionalTests.getTaggerPath())
.stream().iterator();
assertAllDocuments(it);
}
@Test
public void testFromTxtCorpusExtWildcard() {
Iterator<JCas> it = TermSuitePreprocessor
.fromTxtCorpus(
lang,
FunctionalTests.CORPUS1_PATH.toString(),
"**/*.{txt,document}")
.setTreeTaggerHome(FunctionalTests.getTaggerPath())
.stream().iterator();
assertAllDocuments(it);
}
private void assertAllDocuments(Iterator<JCas> it) {
JCas cas1 = it.next();
JCas cas2 = it.next();
JCas cas3 = it.next();
assertFalse(it.hasNext());
Path path1 = Paths.get(
System.getProperty("user.dir"),
FunctionalTests.CORPUS1_PATH.toString(),
"file2.document");
assertThat(cas1).hasUrl(path1.toString());
Path path2 = Paths.get(
System.getProperty("user.dir"),
FunctionalTests.CORPUS1_PATH.toString(),
"file1.txt");
assertThat(cas2).hasUrl(path2.toString());
Path path3 = Paths.get(
System.getProperty("user.dir"),
FunctionalTests.CORPUS1_PATH.toString(),
"dir1","file3.txt");
assertThat(cas3).hasUrl(path3.toString());
}
@Test
public void testPreprocessorFromTextString() {
Iterator<JCas> iterator = TermSuitePreprocessor
.fromTextString(lang, document1.getText())
.setTreeTaggerHome(FunctionalTests.getTaggerPath())
.stream().iterator();
JCas cas = iterator.next();
assertFalse(iterator.hasNext());
assertThat(cas)
.containsAnnotation(TermOccAnnotation.class, 2, 18)
.containsAnnotation(WordAnnotation.class, 2, 9);
}
@Test
public void testPreprocessorFromDocumentStream() {
Iterator<JCas> it = TermSuitePreprocessor
.fromDocumentStream(lang, documents.stream(), 2)
.setTreeTaggerHome(FunctionalTests.getTaggerPath())
.stream().iterator();
JCas cas1 = it.next();
JCas cas2 = it.next();
assertFalse(it.hasNext());
assertThat(cas1)
.containsAnnotation(TermOccAnnotation.class, 2, 18)
.containsAnnotation(WordAnnotation.class, 2, 9);
assertThat(cas2)
.containsAnnotation(TermOccAnnotation.class, 13, 33)
.containsAnnotation(WordAnnotation.class, 4, 12);
}
}