/* LanguageTool, a natural language style checker * Copyright (C) 2013 Daniel Naber (http://www.danielnaber.de) * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 * USA */ package org.languagetool.synthesis; import org.jetbrains.annotations.NotNull; import org.languagetool.AnalyzedToken; import org.languagetool.tokenizers.de.GermanCompoundTokenizer; import org.languagetool.tools.StringTools; import java.io.IOException; import java.util.*; /** * German word form synthesizer. Also supports compounds. * * @since 2.4 */ public class GermanSynthesizer extends BaseSynthesizer { private final GermanCompoundTokenizer splitter; public GermanSynthesizer() { super("/de/german_synth.dict", "/de/german_tags.txt"); try { splitter = new GermanCompoundTokenizer(); } catch (IOException e) { throw new RuntimeException(e); } } @Override public String[] synthesize(AnalyzedToken token, String posTag) throws IOException { String[] result = super.synthesize(token, posTag); if (result.length == 0) { return getCompoundForms(token, posTag, false); } return result; } @Override public String[] synthesize(AnalyzedToken token, String posTag, boolean posTagRegExp) throws IOException { String[] result = super.synthesize(token, posTag, posTagRegExp); if (result.length == 0) { return getCompoundForms(token, posTag, posTagRegExp); } return result; } @NotNull private String[] getCompoundForms(AnalyzedToken token, String posTag, boolean posTagRegExp) throws IOException { List<String> parts = splitter.tokenize(token.getToken()); String firstPart = String.join("", parts.subList(0, parts.size() - 1)); String lastPart = StringTools.uppercaseFirstChar(parts.get(parts.size() - 1)); AnalyzedToken lastPartToken = new AnalyzedToken(lastPart, posTag, lastPart); String[] lastPartForms; if (posTagRegExp) { lastPartForms = super.synthesize(lastPartToken, posTag, true); } else { lastPartForms = super.synthesize(lastPartToken, posTag); } Set<String> results = new LinkedHashSet<>(); // avoid dupes for (String part : lastPartForms) { results.add(firstPart + StringTools.lowercaseFirstChar(part)); } return results.toArray(new String[results.size()]); } }