package eu.project.ttc.test.func.tools.cmd;
import static eu.project.ttc.test.TermSuiteAssertions.assertThat;
import static org.assertj.core.api.Assertions.assertThat;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import java.nio.charset.Charset;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.List;
import org.junit.FixMethodOrder;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TemporaryFolder;
import org.junit.runners.MethodSorters;
import com.google.common.base.Splitter;
import eu.project.ttc.api.TermIndexIO;
import eu.project.ttc.api.Traverser;
import eu.project.ttc.engines.desc.Lang;
import eu.project.ttc.models.TermIndex;
import eu.project.ttc.models.VariationType;
import eu.project.ttc.test.TermSuiteAssertions;
import eu.project.ttc.test.func.FunctionalTests;
import eu.project.ttc.tools.cli.TermSuiteTerminoCLI;
import eu.project.ttc.utils.FileUtils;
@FixMethodOrder(MethodSorters.NAME_ASCENDING)
public class TermSuiteTerminoCLISpec {
@Rule
public TemporaryFolder folder = new TemporaryFolder();
@Test
public void testTerminoEnBasic() throws Exception {
Path jsonPath = Paths.get(folder.getRoot().getAbsolutePath(), "termino.json");
Path tbxPath = Paths.get(folder.getRoot().getAbsolutePath(), "tbx.json");
Path tsvPath = Paths.get(folder.getRoot().getAbsolutePath(), "tsv.json");
assertThat(jsonPath.toFile()).doesNotExist();
assertThat(tbxPath.toFile()).doesNotExist();
assertThat(tbxPath.toFile()).doesNotExist();
launch(String.format("-t %s -c %s -l %s --tsv %s --tbx %s --json %s" ,
FunctionalTests.getTaggerPath(),
FunctionalTests.getCorpusWEShortPath(Lang.EN),
Lang.EN.getCode(),
tsvPath.toString(),
tbxPath.toString(),
jsonPath.toString()
));
assertThat(jsonPath.toFile()).exists();
assertThat(tbxPath.toFile()).exists();
assertThat(tbxPath.toFile()).exists();
TermIndex termindex = TermIndexIO.fromJson(jsonPath);
assertThat(termindex).containsTerm("nn: wind energy").hasSize(974);
assertFalse(termindex.getTermByGroupingKey("nn: wind energy").isContextVectorComputed());
}
@Test
public void testTerminoEnWithSpecificTsvExport() throws Exception {
Path tsvPath = Paths.get(folder.getRoot().getAbsolutePath(), "tsv.json");
assertThat(tsvPath.toFile()).doesNotExist();
launch(String.format("-t %s -c %s -l %s --tsv %s "
+ "--tsv-properties pilot,frequency,dfreq,pattern,spottingRule "
+ "--tsv-show-scores" ,
FunctionalTests.getTaggerPath(),
FunctionalTests.getCorpusWEShortPath(Lang.EN),
Lang.EN.getCode(),
tsvPath.toString()
));
assertThat(tsvPath.toFile()).exists();
TermSuiteAssertions.assertThat(FileUtils.readFile(tsvPath.toString(), Charset.defaultCharset()))
.tsvLineEquals(1, "#","type","pilot","f","dfreq","p", "rule")
.tsvLineEquals(2, "1","T","rotor","96","2", "N", "n")
;
}
@Test
public void testTerminoEnWithFilterTh() throws Exception {
Path jsonPath = Paths.get(folder.getRoot().getAbsolutePath(), "termino.json");
assertThat(jsonPath.toFile()).doesNotExist();
launch(String.format("-t %s -c %s -l %s --json %s --filter-property dfreq --filter-th 2" ,
FunctionalTests.getTaggerPath(),
FunctionalTests.getCorpusWEShortPath(Lang.EN),
Lang.EN.getCode(),
jsonPath.toString()
));
assertThat(jsonPath.toFile()).exists();
TermIndex termindex = TermIndexIO.fromJson(jsonPath);
assertThat(termindex)
.containsTerm("nn: wind energy")
.containsVariation("nn: wind energy", VariationType.SYNTACTICAL, "ann: offshore wind energy")
.hasSize(144);
}
@Test
public void testTerminoEnWithFilterTop100() throws Exception {
Path jsonPath = Paths.get(folder.getRoot().getAbsolutePath(), "termino.json");
assertThat(jsonPath.toFile()).doesNotExist();
launch(String.format("-t %s -c %s -l %s --json %s --filter-property specificity --filter-top-n 100 --filter-variants" ,
FunctionalTests.getTaggerPath(),
FunctionalTests.getCorpusWEShortPath(Lang.EN),
Lang.EN.getCode(),
jsonPath.toString()
));
assertThat(jsonPath.toFile()).exists();
TermIndex termindex = TermIndexIO.fromJson(jsonPath);
assertThat(termindex).containsTerm("nn: wind turbine").hasSize(100);
}
@Test
public void testTerminoEnContextualizeSWTOnly() throws Exception {
Path jsonPath = Paths.get(folder.getRoot().getAbsolutePath(), "termino1.json");
assertThat(jsonPath.toFile()).doesNotExist();
assertTrue(jsonPath.toFile().getAbsoluteFile().getParentFile().canWrite());
launch(String.format("-t %s -c %s -l %s --json %s "
+ "--contextualize --context-scope 4 --allow-mwts-in-contexts" ,
FunctionalTests.getTaggerPath(),
FunctionalTests.getCorpusWEShortPath(Lang.EN),
Lang.EN.getCode(),
jsonPath.toString()
));
assertThat(jsonPath.toFile()).exists();
TermIndex termindex = TermIndexIO.fromJson(jsonPath);
assertThat(termindex).containsTerm("nn: wind energy");
assertTrue(termindex.getTermByGroupingKey("n: wind").isContextVectorComputed());
assertFalse(termindex.getTermByGroupingKey("nn: wind energy").isContextVectorComputed());
}
@Test
public void testTerminoEnContextualizeAllTerms() throws Exception {
Path jsonPath = Paths.get(folder.getRoot().getAbsolutePath(), "termino2.json");
assertThat(jsonPath.toFile()).doesNotExist();
launch(String.format("-t %s -c %s -l %s --json %s "
+ "--contextualize --context-scope 2 --contextualize-all-terms --allow-mwts-in-contexts" ,
FunctionalTests.getTaggerPath(),
FunctionalTests.getCorpusWEShortPath(Lang.EN),
Lang.EN.getCode(),
jsonPath.toString()
));
assertThat(jsonPath.toFile()).exists();
TermIndex termindex = TermIndexIO.fromJson(jsonPath);
assertThat(termindex).containsTerm("nn: wind energy");
assertTrue(termindex.getTermByGroupingKey("n: wind").isContextVectorComputed());
assertTrue(termindex.getTermByGroupingKey("nn: wind energy").isContextVectorComputed());
}
private void launch(String args) throws Exception {
List<String> argList = Splitter.on(" ").splitToList(args);
TermSuiteTerminoCLI.main(argList.toArray(new String[argList.size()]));
}
}