package eu.project.ttc.termino.export; import java.io.IOException; import java.io.Writer; import java.util.Collections; import java.util.Comparator; import java.util.List; import java.util.TreeSet; import com.google.common.collect.ComparisonChain; import com.google.common.collect.HashMultimap; import com.google.common.collect.Lists; import com.google.common.collect.Multimap; import eu.project.ttc.api.TermSuiteException; import eu.project.ttc.engines.variant.VariantRule; import eu.project.ttc.models.Term; import eu.project.ttc.models.TermIndex; import eu.project.ttc.models.TermOccurrence; import eu.project.ttc.models.TermVariation; import eu.project.ttc.models.VariationType; import eu.project.ttc.resources.YamlVariantRules; import eu.project.ttc.utils.TermOccurrenceUtils; public class VariationRuleExamplesExporter { private TermIndex termIndex; private Writer writer; private YamlVariantRules yamlVariantRules; class TermPair implements Comparable<TermPair> { Term source; Term target; public TermPair(Term source, Term target) { super(); this.source = source; this.target = target; } @Override public int compareTo(TermPair o) { return ComparisonChain.start().compare(o.target.getFrequency(), this.target.getFrequency()).result(); } } private VariationRuleExamplesExporter(TermIndex termIndex, Writer writer, YamlVariantRules yamlVariantRules) { this.termIndex = termIndex; this.writer = writer; this.yamlVariantRules = yamlVariantRules; } public static void export(TermIndex termIndex, Writer writer, YamlVariantRules yamlVariantRules) { new VariationRuleExamplesExporter(termIndex, writer, yamlVariantRules).doExport(); } private void doExport() { final Multimap<String, TermPair> pairs = HashMultimap.create(); for (Term t : termIndex.getTerms()) { for (TermVariation v : t.getVariations(VariationType.MORPHOLOGICAL, VariationType.SYNTACTICAL)) pairs.put(v.getInfo().toString(), new TermPair(t, v.getVariant())); } // gets all variant rules (event size-0) and sorts them TreeSet<VariantRule> varianRules = new TreeSet<VariantRule>(new Comparator<VariantRule>() { @Override public int compare(VariantRule o1, VariantRule o2) { return ComparisonChain.start().compare(pairs.get(o2.getName()).size(), pairs.get(o1.getName()).size()) .compare(o1.getName(), o2.getName()).result(); } }); varianRules.addAll(yamlVariantRules.getVariantRules()); try { /* * Display Summary */ int total = 0; int nbMatchingRules = 0; String summaryLine = "%-16s: %d\n"; for (VariantRule rule : varianRules) { int nbPairs = pairs.get(rule.getName()).size(); total += nbPairs; if (nbPairs > 0) nbMatchingRules++; writer.write(String.format(summaryLine, rule.getName(), nbPairs)); } writer.write("---\n"); writer.write(String.format(summaryLine, "TOTAL", total)); writer.write(String.format("%-16s: %d / %d\n", "nb matching rules", nbMatchingRules, varianRules.size())); writer.write("\n---\n"); /* * Display variant rules' matches. */ for (VariantRule rule : varianRules) { List<TermPair> sortedPairs = Lists.newArrayList(pairs.get(rule.getName())); Collections.sort(sortedPairs); int nbOverlappingOccs = 0; int nbStrictOccs = 0; List<String> lines = Lists.newArrayList(); for (TermPair pair : sortedPairs) { List<TermOccurrence> targetStrictOccurrences = Lists.newLinkedList(pair.target.getOccurrences()); TermOccurrenceUtils.removeOverlaps(pair.source.getOccurrences(), targetStrictOccurrences); nbOverlappingOccs += pair.target.getFrequency(); nbStrictOccs += targetStrictOccurrences.size(); lines.add(String.format("%14d%14d%35s || %-35s\n", pair.target.getFrequency(), targetStrictOccurrences.size(), pair.source.getGroupingKey(), pair.target.getGroupingKey())); } writer.write( "\n----------------------------------------------------------------------------------------------------------------\n"); writer.write(String.format( "--------------------- %s [nb_terms: %d, total_occs: %d, total_strict_occurrences: %d] \n", rule.getName(), sortedPairs.size(), nbOverlappingOccs, nbStrictOccs)); writer.write( "----------------------------------------------------------------------------------------------------------------\n"); writer.write(String.format("%14s%14s\n", "fr_overlaps", "fr_strict")); for (String line : lines) writer.write(line); writer.write(String.format("TOTAL: %7s%14s\n", nbOverlappingOccs, nbStrictOccs)); } } catch (IOException e) { throw new TermSuiteException(e); } } }