/*******************************************************************************
* Copyright 2015-2016 - CNRS (Centre National de Recherche Scientifique)
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*
*******************************************************************************/
package eu.project.ttc.test.func;
import static org.assertj.core.api.Assertions.tuple;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Collection;
import java.util.List;
import org.assertj.core.groups.Tuple;
import org.assertj.core.util.Lists;
import com.google.common.base.Preconditions;
import com.google.common.base.Splitter;
import eu.project.ttc.engines.desc.Lang;
import eu.project.ttc.models.CompoundType;
import eu.project.ttc.models.VariationType;
import eu.project.ttc.tools.utils.ControlFilesGenerator;
public class ControlFiles {
private static final Path CONTROL_DIRETCORY_PATH = Paths.get("src", "test", "resources", "eu", "project", "ttc", "test", "corpus");
public static Path getControlDirectory(Lang lang, String corpus) {
Path controlDirectory = CONTROL_DIRETCORY_PATH.resolve(corpus).resolve(lang.getName()).resolve("control");
Preconditions.checkState(controlDirectory.toFile().exists());
Preconditions.checkState(controlDirectory.toFile().isDirectory());
return controlDirectory;
}
/**
*
* tuple("source gKey", "target gKey")
*
* @param lang
* the language
* @param corpus
* the test corpus
* @return
* the list of tuples parsed from control file
*/
public static Tuple[] prefixVariationTuples(Lang lang, String corpus) {
Path path = getControlDirectory(lang, corpus).resolve(ControlFilesGenerator.getPrefixFileName());
return array(getPrefixVariationTuples(path.toFile()));
}
/**
*
* tuple("Deriv pattern", "source gKey", "target gKey")
*
* e.g. tuple("N A", "n: éolienne", "a: éolien")
*
* @param lang
* the language
* @param corpus
* the test corpus
* @return
* the list of tuples parsed from control file
*/
public static Tuple[] derivateVariationTuples(Lang lang, String corpus) {
Path path = getControlDirectory(lang, corpus).resolve(ControlFilesGenerator.getDerivatesFileName());
return array(getDerivateVariationTuples(path.toFile()));
}
/**
*
*
* tuple("source gKey", "target gKey", VariationType)
*
*
* @param lang
* the language
* @param corpus
* the test corpus
* @param ruleName
* the name of the syntactic rule to control
* @return
* the list of tuples parsed from control file
*/
public static Iterable<Tuple> syntacticVariationTuples(Lang lang, String corpus, String ruleName) {
Path path = syntacticVariationControlFilePath(lang, corpus, ruleName);
return getSyntacticVariationTuples(path.toFile(), ruleName);
}
/**
* Returns a list of tuples having the following format :
*
* tuple("Compound type short name", "term gKey", "compound string")
*
* e.g. tuple("nat", "n: interconnexion", "inter:inter|connexion:connexion")
*
* @see CompoundType#getShortName()
* @see ControlFilesGenerator#toCompoundString(eu.project.ttc.models.Term)
* @param lang
* the language
* @param corpus
* the test corpus
* @return
* the list of tuples parsed from control file
*/
public static Tuple[] compoundTuples(Lang lang, String corpus) {
Path path = getControlDirectory(lang, corpus).resolve(ControlFilesGenerator.getCompoundsFileName());
return array(getCompoundTuples(path.toFile()));
}
public static List<String> getLines(File file) {
Preconditions.checkArgument(file.exists());
List<String> lines = Lists.newArrayList();
try (BufferedReader br = new BufferedReader(new FileReader(file))) {
String line;
while ((line = br.readLine()) != null) {
String trim = line.trim();
if(trim.startsWith("#") || trim.isEmpty())
continue;
else
lines.add(trim);
}
} catch (Exception e) {
throw new IllegalStateException(e);
}
return lines;
}
public static List<String[]> getRows(File file, int nbColumns, String sep) {
List<String[]> rows = Lists.newArrayList();
for(String line:getLines(file)) {
List<String> valuesAsList = Splitter.on(sep).splitToList(line);
Preconditions.checkArgument(valuesAsList.size() == nbColumns,
"Bad row format for line: \"%s\". Expected %s columns, got %s", line, nbColumns, valuesAsList.size());
rows.add(valuesAsList.toArray(new String[nbColumns]));
}
return rows;
}
public static List<Tuple> getSyntacticVariationTuples(File file, String ruleName) {
List<Tuple> tuples = Lists.newArrayList();
for(String[] row:getRows(file, 4, "\t")) {
Preconditions.checkState(row[3].equals(ruleName));
tuples.add(tuple(row[0], row[1], VariationType.valueOf(row[2])));
}
return tuples;
}
private static List<Tuple> getPrefixVariationTuples(File file) {
List<Tuple> tuples = Lists.newArrayList();
for(String[] row:getRows(file, 3, "\t")) {
Preconditions.checkState(row[2].equals(VariationType.IS_PREFIX_OF.toString()));
tuples.add(tuple(row[0], row[1]));
}
return tuples;
}
private static Tuple[] array(Collection<Tuple> tuples) {
return tuples.toArray(new Tuple[tuples.size()]);
}
private static List<Tuple> getDerivateVariationTuples(File file) {
List<Tuple> tuples = Lists.newArrayList();
for(String[] row:getRows(file, 4, "\t")) {
Preconditions.checkState(row[2].equals(VariationType.DERIVES_INTO.toString()));
tuples.add(tuple(row[3], row[0], row[1]));
}
return tuples;
}
private static List<Tuple> getCompoundTuples(File file) {
List<Tuple> tuples = Lists.newArrayList();
for(String[] row:getRows(file, 3, "\t")) {
tuples.add(tuple(row[1], row[0], row[2]));
}
return tuples;
}
public static Path syntacticVariationControlFilePath(Lang lang, String corpus, String ruleName) {
return getControlDirectory(lang, corpus).resolve(ControlFilesGenerator.getSyntacticRuleFileName(ruleName));
}
}