package eu.project.ttc.termino.export; import java.io.IOException; import java.io.Writer; import java.util.Comparator; import java.util.List; import java.util.Map; import java.util.Set; import java.util.TreeSet; import com.google.common.base.Joiner; import com.google.common.collect.ComparisonChain; import com.google.common.collect.HashMultimap; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.common.collect.Multimap; import com.google.common.collect.Sets; import eu.project.ttc.api.TermSuiteException; import eu.project.ttc.models.Component; import eu.project.ttc.models.Term; import eu.project.ttc.models.TermIndex; import eu.project.ttc.models.Word; public class CompoundExporter { private static final String LINE_FORMAT = "%-30s %-10s %-35s %d\n"; private TermIndex termIndex; private Writer writer; private CompoundExporter(TermIndex termIndex, Writer writer) { super(); this.termIndex = termIndex; this.writer = writer; } public static void export(TermIndex termIndex, Writer writer) { new CompoundExporter(termIndex, writer).doExport(); } private void doExport() { try { Multimap<Word,Term> terms = HashMultimap.create(); Set<Word> compounds = Sets.newHashSet(); for(Term t:termIndex.getTerms()) { Word w = t.getWords().get(0).getWord(); if(t.getWords().size() == 1 && w.isCompound()) { compounds.add(w); terms.put(w, t); } } final Map<Word,Integer> frequencies = Maps.newHashMap(); for(Word w: terms.keySet()) { int f = 0; for(Term t:terms.get(w)) f += t.getFrequency(); frequencies.put(w, f); } Set<Word> sortedCompounds = new TreeSet<Word>(new Comparator<Word>() { @Override public int compare(Word o1, Word o2) { return ComparisonChain.start() .compare(frequencies.get(o2), frequencies.get(o1)) .result(); } }); sortedCompounds.addAll(compounds); for(Word w:sortedCompounds) { List<String> compLemmas = Lists.newArrayList(); for(Component c:w.getComponents()) compLemmas.add(c.getLemma()); writer.write(String.format(LINE_FORMAT, w.getLemma(), w.getCompoundType(), Joiner.on('|').join(compLemmas), frequencies.get(w) )); } } catch (IOException e) { throw new TermSuiteException(e); } } }