/******************************************************************************* * Copyright 2015-2016 - CNRS (Centre National de Recherche Scientifique) * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. * *******************************************************************************/ package eu.project.ttc.tools.utils; import java.io.File; import java.io.FileWriter; import java.io.IOException; import java.io.Writer; import java.util.Collection; import java.util.List; import java.util.Set; import com.google.common.base.Joiner; import com.google.common.base.Preconditions; import com.google.common.collect.Lists; import com.google.common.collect.Sets; import eu.project.ttc.models.Component; import eu.project.ttc.models.Term; import eu.project.ttc.models.TermIndex; import eu.project.ttc.models.TermVariation; import eu.project.ttc.models.VariationType; import eu.project.ttc.models.Word; import eu.project.ttc.utils.TermIndexUtils; /** * * A tool that generates all control files required for * functional tests from a {@link TermIndex}. * * @author Damien Cram * */ public class ControlFilesGenerator { private TermIndex termIndex; public ControlFilesGenerator(TermIndex termIndex) { super(); this.termIndex = termIndex; } /** * * @param directory * the directory where to create the files. */ public void generate(File directory) throws IOException { if(!directory.exists()) directory.mkdirs(); Set<String> distinctRuleNames = Sets.newHashSet(); Set<TermVariation> variations = TermIndexUtils.getVariations(termIndex); for(TermVariation tv:variations) if(tv.getVariationType() == VariationType.SYNTACTICAL || tv.getVariationType() == VariationType.MORPHOLOGICAL) distinctRuleNames.add((String)tv.getInfo()); /* * Write syntactic rules */ for(String ruleName:distinctRuleNames) { String pathname = directory.getAbsolutePath() + "/" + getSyntacticRuleFileName(ruleName); writeVariations(pathname, TermIndexUtils.selectTermVariationsByInfo(termIndex, ruleName)); } /* * Write prefix variations */ String prefixPath = directory.getAbsolutePath() + "/" + getPrefixFileName(); writeVariations(prefixPath, TermIndexUtils.selectTermVariations(termIndex, VariationType.IS_PREFIX_OF)); /* * Write derivative variations */ String derivativePath = directory.getAbsolutePath() + "/" + getDerivatesFileName(); writeVariations(derivativePath, TermIndexUtils.selectTermVariations(termIndex, VariationType.DERIVES_INTO)); /* * Write compounds */ String compoundPath = directory.getAbsolutePath() + "/" + getCompoundsFileName(); writeCompounds(compoundPath); } public static String getCompoundsFileName() { return "compounds.tsv"; } public static String getDerivatesFileName() { return "derivates.tsv"; } public static String getPrefixFileName() { return "prefixes.tsv"; } public static String getSyntacticRuleFileName(String ruleName) { return "syntactic-" + ruleNametoFileName(ruleName) + ".tsv"; } public static String ruleNametoFileName(String ruleName) { return ruleName.replaceAll("\\|", "-or-"); } private void writeCompounds(String filePath) throws IOException { Writer writer = new FileWriter(filePath); for(Term t:termIndex.getTerms()) { if(t.isSingleWord() && t.isCompound()) { writer.append(String.format("%s\t%s\t%s%n", t.getGroupingKey(), t.getWords().get(0).getWord().getCompoundType(), toCompoundString(t) ) ); } } writer.flush(); writer.close(); } public static String toCompoundString(Term t) { Preconditions.checkArgument(t.isSingleWord(), "Term %s should be a single-word term", t); Preconditions.checkArgument(t.isCompound(), "Term %s should be compound", t); List<String> componentStrings = Lists.newArrayList(); Word word = t.getWords().get(0).getWord(); for(Component c:word.getComponents()) { componentStrings.add(String.format("%s:%s", word.getLemma().substring(c.getBegin(), c.getEnd()), c.getLemma() )); } return Joiner.on("|").join(componentStrings); } private void writeVariations(String path, Collection<TermVariation> variations) throws IOException { Writer writer = new FileWriter(path); for(TermVariation tv:variations) { writer.append(String.format("%s\t%s\t%s\t%s%n", tv.getBase().getGroupingKey(), tv.getVariant().getGroupingKey(), tv.getVariationType(), tv.getInfo() )); } writer.flush(); writer.close(); } }