package eu.project.ttc.eval; import java.io.FileWriter; import java.io.IOException; import java.io.InputStream; import java.nio.file.Path; import java.nio.file.Paths; import java.util.Properties; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.google.common.base.Preconditions; import eu.project.ttc.api.JsonOptions; import eu.project.ttc.api.TermIndexIO; import eu.project.ttc.engines.desc.Lang; import eu.project.ttc.models.TermIndex; import eu.project.ttc.test.func.FunctionalTests; import eu.project.ttc.tools.TermSuiteResourceManager; public class TermSuiteEvals { private static final Logger LOGGER = LoggerFactory.getLogger(TermSuiteEvals.class); private static final String EVAL_CONFIG = "termsuite-eval.properties"; private static final String PROP_TREETAGGER_HOME_PATH = "treetagger.home.path"; private static final String PROP_OUTPUTDIR = "eval.output"; private static final String PROP_DICTIONARIES = "eval.dictionaries"; /** * The path to cached computed terminologies. * * @return */ public static Path getTerminoDirectory() { return getSubDir("terminologies"); } public static Path getAlignmentDirectory() { return getSubDir("alignment"); } private static Path getSubDir(String other) { Path alignmentDir = getEvalOuputDirectory().resolve(other); if(!alignmentDir.toFile().exists()) alignmentDir.toFile().mkdirs(); return alignmentDir; } public static Object getCheckedProperty(String propertyName) { Object configProperty = getConfigProperty(propertyName); if(configProperty == null) configProperty = System.getProperty(propertyName); Preconditions.checkNotNull(configProperty, "No such property set: %s", propertyName); return configProperty; } public static Path getTreeTaggerPath() { String treeTaggerPathValue = getCheckedProperty(PROP_TREETAGGER_HOME_PATH).toString(); return Paths.get(treeTaggerPathValue.toString()); } public static Path getDictionariesPath() { String dicoPathValue = getCheckedProperty(PROP_DICTIONARIES).toString(); return Paths.get(dicoPathValue.toString()); } public static Path getEvalOuputDirectory() { String outputDirValue = getCheckedProperty(PROP_OUTPUTDIR).toString(); return Paths.get(outputDirValue.toString()); } private static Object getConfigProperty( String propName) { InputStream is = FunctionalTests.class.getClassLoader().getResourceAsStream(EVAL_CONFIG); Properties properties = new Properties(); try { properties.load(is); is.close(); return properties.get(propName); } catch (IOException e) { throw new RuntimeException(e); } } public static TermIndex getTerminology(Corpus corpus, Lang lang, TerminoConfig config) { Path path = getTerminologyPath(lang, corpus, config); if(!path.toFile().isFile()) { LOGGER.info("Terminology {} not found in cache", getTerminologyFileName(lang, corpus, config)); TermSuiteResourceManager.getInstance().clear(); TermIndex termIndex = config.toExtractor(lang, corpus).execute(); try(FileWriter writer = new FileWriter(path.toFile())){ TermIndexIO.toJson(termIndex, writer, new JsonOptions().withOccurrences(false).withContexts(true)); } catch (IOException e) { LOGGER.error("Could not create terminology {}", getTerminologyFileName(lang, corpus, config)); throw new RuntimeException(e); } } else LOGGER.info("Terminology {} found in cache", getTerminologyFileName(lang, corpus, config)); return TermIndexIO.fromJson(path); } public static String getTerminologyFileName(Lang lang, Corpus corpus, TerminoConfig config) { return String.format("%s-%s-th%s-scope%d-%s.json", corpus.getShortName(), lang.getCode(), Integer.toString(config.getFrequencyTh()), config.getScope(), config.isSwtOnly() ? "swtonly" : "allterms"); } public static Path getTerminologyPath(Lang lang, Corpus corpus, TerminoConfig config) { return TermSuiteEvals.getTerminoDirectory().resolve(getTerminologyFileName(lang, corpus, config)); } /** * * Loads a bilingual dictionary from the directory denoted by system property {@link #PROP_DICTIONARIES} * * @param langPair * lang pair * @return * The path to the existing bilingual dico. */ public static Path getDictionaryPath(LangPair langPair) { return getDictionariesPath().resolve(String.format("%s-%s.txt", langPair.getSource().getCode(), langPair.getTarget().getCode())); } public static String getRunName(Lang source, Lang target, Corpus corpus, TerminoConfig config) { return String.format("alignment-results-%s-%s-%s-th%s-scope%d", corpus.getShortName(), source.getCode(), target.getCode(), Integer.toString((int)config.getFrequencyTh()), config.getScope() ); } }