/**
* Copyright 2005, 2011 DFKI GmbH.
* All Rights Reserved. Use is subject to license terms.
*
* This file is part of MARY TTS.
*
* MARY TTS is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, version 3 of the License.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
*/
package marytts.language.de.phonemiser;
import java.util.HashSet;
import java.util.Hashtable;
import java.util.Iterator;
import java.util.Locale;
import marytts.language.de.JPhonemiser;
import marytts.modules.phonemiser.Allophone;
import marytts.modules.phonemiser.AllophoneSet;
import marytts.util.MaryUtils;
import org.apache.log4j.Logger;
/**
* @author steigner
*/
public class PhonemiseDenglish {
private static Logger logger = MaryUtils.getLogger("PhonemiseDenglish");
private String[] vowels = { "a", "e", "i", "o", "u" };
private String[] dentalPlosives = { "t", "d" };
private Hashtable<String, String> flectionToPhon = null;
private Hashtable<String, String> prefixLexicon = null;
private Hashtable<String, String> endingsAndAffixes = null;
private Hashtable<String, String> terminalVoicings = null;
private HashSet<String> endingSet = null;
private int maxEndingLength = 0;
private int maxPrefixLength = 0;
private Locale locale;
private JPhonemiser jphon = null;// building an instance of JPhonemiser for using
// lexiconLookup method and transducers from this class
private MorphologyReader mr = new MorphologyReader();
public PhonemiseDenglish(JPhonemiser jphon) throws Exception {
this.jphon = jphon;
String classpathPrefix = "/marytts/language/de/lexicon/denglish/";
this.flectionToPhon = mr.loadInputModel(getClass().getResourceAsStream(classpathPrefix + "flectionsPhonLex.xml"));
this.prefixLexicon = mr.loadInputModel(getClass().getResourceAsStream(classpathPrefix + "PrefixLex.xml"));
for (Iterator<String> prefixIt = prefixLexicon.keySet().iterator(); prefixIt.hasNext();) {
String prefix = prefixIt.next();
if (prefix.length() > this.maxPrefixLength)
this.maxPrefixLength = prefix.length();
}
this.endingsAndAffixes = mr.loadInputModel(getClass().getResourceAsStream(classpathPrefix + "germanEndings.xml"));
this.terminalVoicings = mr.loadInputModel(getClass().getResourceAsStream(
classpathPrefix + "terminal_voicing_for_german.xml"));
String[] endingList = getEndingsAndAffixes("flections");// list of flection endings of a specific language
if (endingList != null) {
this.endingSet = new HashSet<String>(50);//
for (int j = 0; j < endingList.length; j++) {
if (endingList[j].length() > this.maxEndingLength) {
this.maxEndingLength = endingList[j].length();
}
this.endingSet.add(endingList[j]);
}
}
}
/**
* Method that is called from JPhonemiser - Managing all the processing
*
* @param toBePhonemised
* The input word
* @param allowOtherLanguage
* allowOtherLanguage
* @return Transcription of input word if one can be built - null otherwise
*/
public Result processWord(String toBePhonemised, boolean allowOtherLanguage) {
// cleanAllInstanceVariables();
Word currentWord = new Word(toBePhonemised);
// Vector result = new Vector();
String transcription = null;
Result currentResult = new Result();
long time1 = System.currentTimeMillis();
if (currentWord.getToBePhonemised().equals("")) {
logger.debug("Empty String!");
// return null;
return currentResult;
}
// word or item must at least have length 3 to be a denglish item
if (currentWord.getToBePhonemised().length() <= 2) {
logger.debug("Input to short to be a deng item");
logger.debug("Now using letter to sound rules");
// return null;
return currentResult;
}
// try cutting off inflection ending and/or prefix:
transcription = processFlection(currentWord, currentResult, allowOtherLanguage);
if (transcription != null) {
long time2 = System.currentTimeMillis();
long end = time2 - time1;
logger.debug("Processing took: " + end + " ms");
// return transcription;
currentResult.setTranscription(transcription);
// System.out.println("1) var is "+currentResult.isUsedOtherLanguageToPhonemise());
return currentResult;
}
// try compound analysis, first without, then with other language:
transcription = compoundAnalysis(currentWord, currentResult, false);
if (transcription != null) {
long time3 = System.currentTimeMillis();
long end = time3 - time1;
logger.debug("Processing took: " + end + " ms");
// return transcription;
currentResult.setTranscription(transcription);
// System.out.println("2) var is "+currentResult.isUsedOtherLanguageToPhonemise());
return currentResult;
}
transcription = compoundAnalysis(currentWord, currentResult, allowOtherLanguage);
if (transcription != null) {
long time3 = System.currentTimeMillis();
long end = time3 - time1;
logger.debug("Processing took: " + end + " ms");
// return transcription;
currentResult.setTranscription(transcription);
// System.out.println("2) var is "+currentResult.isUsedOtherLanguageToPhonemise());
return currentResult;
}
// return null;
return currentResult;
}
/**
* Try to process the input word, as it stands, or by cutting off prefixes or inflectional suffixes.
*
* @param toBePhonemised
* the input word
* @param allowOtherLanguage
* allowOtherLanguage
* @return the transcription of the word, or null if the word could not be transcribed
*/
private String processFlection(Word word, Result currentResult, boolean allowOtherLanguage) {
String toBePhonemised = word.getToBePhonemised();
logger.debug("processFlection is starting with: " + toBePhonemised);
// First of all, make sure there is no userdict/lexicon entry:
String transcription = jphon.userdictLookup(toBePhonemised, null);
if (transcription != null) {
return transcription;
}
transcription = jphon.lexiconLookup(toBePhonemised, null);
if (transcription != null) {
return transcription;
}
// Try to process by cutting off endings only, without cutting off prefix:
if (allowOtherLanguage) {
transcription = processFlectionEnding(word, currentResult);
}
if (transcription != null) {
return transcription;
}
// try removing prefix:
// Enforce at least 3 characters in the stem (the part of the word that comes after the prefix):
int maxPrefLen = Math.min(this.maxPrefixLength, word.getToBePhonemised().length() - 3);
for (int i = maxPrefLen; i > 0; i--) {
String prefix = word.getToBePhonemised().substring(0, i).toLowerCase();
String prefixPhon = prefixLexiconLookup(prefix);
if (prefixPhon != null) {
logger.debug("Prefix found: " + prefix + " [" + prefixPhon + "]");
Word partialWord = new Word(word.getToBePhonemised().substring(i));
// recursively call this method, i.e. allow multiple prefixes:
String restTranscription = processFlection(partialWord, currentResult, allowOtherLanguage);
if (restTranscription != null) { // yes, found valid analysis
if (prefixPhon.indexOf("'") != -1) {
restTranscription = restTranscription.replaceAll("'", "");
}
transcription = prefixPhon + "-" + restTranscription;
return transcription;
}
}
}
return null;
}
/**
* Try to process the input word as a verbal or adjective flection.
*
* @param word
* the input word
* @param currentResult
* currentResult
* @return the transcription of the word
*/
private String processFlectionEnding(Word word, Result currentResult) {
String toBePhonemised = word.getToBePhonemised();
logger.debug("processFlectionEnding is starting with: " + toBePhonemised);
String wordMinusFlectionEnding = null;
String flectionEnding = null;
String result = null;
// separateFlectionEndings returns null if no valid flection ending can be found
// otherwise it returns an array containing the word without flection and the flection
String[] wordPlusEnding = separateFlectionEndings(toBePhonemised, this.maxEndingLength);
if (wordPlusEnding != null) {
wordMinusFlectionEnding = wordPlusEnding[0];
flectionEnding = wordPlusEnding[1];
word = transformWordToEnBaseForm(wordMinusFlectionEnding, flectionEnding, word);// language-dependent
if (word.getOtherLanguageBaseForm() != null) {
word.setFlectionEnding(flectionEnding);
result = transcribeFlection(word, currentResult);// language-dependent
} else {// case of upgedatete
// start separateFlectionEndings() with a smaller number of ending chars
int currentEndingLength = flectionEnding.length();
wordPlusEnding = separateFlectionEndings(toBePhonemised, currentEndingLength - 1);
if (wordPlusEnding != null) {
wordMinusFlectionEnding = wordPlusEnding[0];
flectionEnding = wordPlusEnding[1];
word = transformWordToEnBaseForm(wordMinusFlectionEnding, flectionEnding, word);// language-dependent
if (word.getOtherLanguageBaseForm() != null) {
word.setFlectionEnding(flectionEnding);
result = transcribeFlection(word, currentResult);// language-dependent
} else {
currentEndingLength = flectionEnding.length();
wordPlusEnding = separateFlectionEndings(toBePhonemised, currentEndingLength - 1);
if (wordPlusEnding != null) {
wordMinusFlectionEnding = wordPlusEnding[0];
flectionEnding = wordPlusEnding[1];
word = transformWordToEnBaseForm(wordMinusFlectionEnding, flectionEnding, word);// language-dependent
if (word.getOtherLanguageBaseForm() != null) {
word.setFlectionEnding(flectionEnding);
result = transcribeFlection(word, currentResult);// language-dependent
}
}
}
} else {// array is null
// we have sth. that is already without flection ending like 'check'
word = transformWordToEnBaseForm(toBePhonemised, null, word);// language-dependent
if (word.getOtherLanguageBaseForm() != null) {
result = transcribeFlection(word, currentResult);// language-dependent
} else {
logger.debug("Unable to transcribe flection. Returning null.");
}
}
}
}
// System.out.println("var is in processFlection: "+currentResult.isUsedOtherLanguageToPhonemise());
logger.debug("processFlection: " + result);
return result;
}
/**
* Analyses parts of input word for affixes, compounds etc.
*
* @param word
* the input word
* @param currentResult
* currentResult
* @param allowOtherLanguage
* whether to allow component words from other language in compound analysis
* @return If a transcription for the input can be found, then it is returned. Otherwise returns null.
*/
private String compoundAnalysis(Word word, Result currentResult, boolean allowOtherLanguage) {
// Chop off longest possible prefixes and try to look them up
// in the lexicon. Any part must have a minimum length of 3 -> 2!! characters.
logger.debug("compoundAnalysis is starting with: " + word.getToBePhonemised());
for (int i = word.getToBePhonemised().length() - 3; i >= 3; i--) { // -3!!! >= 3!!!
String firstPhon = null;
String fugePhon = null;
String restPhon = null;
String[] genitiveAccusativeAndPluralEndings = getEndingsAndAffixes("noun_genitive_accusative_and_plural_endings");// should
// be
// 's'
// and
// 'n'
// for
// german
String prefix = word.getToBePhonemised().substring(0, i);
logger.debug("Pre: " + prefix);
firstPhon = jphon.userdictLookup(prefix, null);
if (firstPhon == null) {
firstPhon = jphon.lexiconLookup(prefix, null);
}
if (firstPhon == null && allowOtherLanguage) {
firstPhon = jphon.phonemiseEn(prefix);
if (firstPhon != null) {
currentResult.setUsedOtherLanguageToPhonemise(true);
}
}
if (firstPhon != null) { // found a valid prefix
// TODO: shouldn't this call processFlection()?
String rest = word.getToBePhonemised().substring(i);
logger.debug("Rest is: " + rest);
// Is the rest a simple lexical entry?
// restPhon = germanLexiconLookup(rest);
restPhon = prefixLexiconLookup(rest);
logger.debug("RestPhon: " + restPhon);
if (restPhon == null) {
restPhon = jphon.userdictLookup(rest, null);
}
if (restPhon == null) {
restPhon = jphon.lexiconLookup(rest, null);
}
if (restPhon == null && allowOtherLanguage) {
restPhon = jphon.phonemiseEn(rest);
if (restPhon != null) {
currentResult.setUsedOtherLanguageToPhonemise(true);
}
}
if (restPhon == null) {
for (int j = 0; j < genitiveAccusativeAndPluralEndings.length; j++) {
if (rest.endsWith(genitiveAccusativeAndPluralEndings[j])) {
logger.debug("rest ends with: " + genitiveAccusativeAndPluralEndings[j]);
String restWithoutLast = rest.substring(0, rest.length() - 1);
String restPhonDe = jphon.userdictLookup(restWithoutLast, null);
if (restPhonDe == null)
restPhonDe = jphon.lexiconLookup(restWithoutLast, null);
String genitiveAndPluralEndingTrans = endingTranscriptionLookup(genitiveAccusativeAndPluralEndings[j]);
if (restPhonDe != null) {
restPhon = restPhonDe + genitiveAndPluralEndingTrans;
} else if (allowOtherLanguage) {
String restPhonEn = jphon.phonemiseEn(rest.substring(0, rest.length() - 1));
if (restPhonEn != null) {
currentResult.setUsedOtherLanguageToPhonemise(true);
restPhon = restPhonEn + genitiveAndPluralEndingTrans;
}
}
}
if (restPhon != null)
break;
}
}
// Or does it help if we cut off a Fuge?
if (restPhon == null) {
String[] helper = fugeSearch(rest);
if (helper != null && helper.length == 2) {
fugePhon = helper[0];
String rest2 = helper[1];
restPhon = jphon.userdictLookup(rest2, null);
if (restPhon == null) {
restPhon = jphon.lexiconLookup(rest2, null);
}
if (restPhon == null && allowOtherLanguage) {
restPhon = jphon.phonemiseEn(rest2);
if (restPhon != null) {
currentResult.setUsedOtherLanguageToPhonemise(true);
}
}
if (restPhon == null)
restPhon = compoundAnalysis(new Word(rest2), currentResult, allowOtherLanguage);
}
}
// Maybe rest is a flection
if (restPhon == null) {
// System.out.println("1) new word is : "+rest+". processFlection is called from here. var is : "+currentResult.isUsedOtherLanguageToPhonemise());
restPhon = processFlection(new Word(rest), currentResult, allowOtherLanguage);
// System.out.println("2) new word was : "+rest+". processFlection is called from here. var is : "+currentResult.isUsedOtherLanguageToPhonemise());
}
// Or can the rest be analysed as a compound?
if (restPhon == null)
restPhon = compoundAnalysis(new Word(rest), currentResult, allowOtherLanguage);
if (restPhon != null) {
// In restPhon, delete stress signs:
restPhon = restPhon.replaceAll("'", "");
return firstPhon + (fugePhon != null ? fugePhon : "") + "-" + restPhon;
}
}
}
return null;
}
/**
* Try to cut off a Fuge morpheme at the beginning of suffix.
*
* @param suffix
* a part of a word with a prefix already removed.
* @return a two-item String array. First string is the trannscnscription of the Fuge found, second is the suffix after the
* Fuge was removed. Returns null if no Fuge was found.
*/
private String[] fugeSearch(String suffix) {
String fugePhon = null;
int fugeLength = 0;
String[] validFuges = getEndingsAndAffixes("compound_fuge");
for (int j = 0; j < validFuges.length; j++) {
if (suffix.startsWith(validFuges[j])) {
fugePhon = endingTranscriptionLookup(validFuges[j]);
fugeLength = validFuges[j].length();
break;
}
}
if (fugePhon != null) { // found a Fuge
String[] returnValue = new String[2];
returnValue[0] = fugePhon;
returnValue[1] = suffix.substring(fugeLength);
return returnValue;
} else {
return null;
}
}
/**
* Separates flection ending from input word.
*
* @param toBePhonemised
* the input word
* @param endingLength
* endings from language specific ending list
* @return when valid flection ending is found, returns a string array of two elements (first is stem, second is ending);
* else, returns null.
*/
private String[] separateFlectionEndings(String toBePhonemised, int endingLength) {
String wordMinusFlectionEnding = null;
String flectionEnding = knowEnding(toBePhonemised, endingLength);
if (flectionEnding != null) {
String[] wordPlusEnding = new String[2];
wordMinusFlectionEnding = toBePhonemised.substring(0, toBePhonemised.length() - flectionEnding.length());
wordPlusEnding[0] = wordMinusFlectionEnding;
wordPlusEnding[1] = flectionEnding;
return wordPlusEnding;
} else {
return null;
}
}
/**
* Try to find baseform of otherLanguageWord (i.e. english infinitive in denglish word)
*
* @param wordMinusFlectionEnding
* wordMinusFlectionEnding
* @param flectionEnding
* flectionEnding
* @param word
* word
* @return word
*/
private Word transformWordToEnBaseForm(String wordMinusFlectionEnding, String flectionEnding, Word word) {
logger.debug("getEnBaseForm is starting with...: " + wordMinusFlectionEnding);
String[] participleBaseLong = getEndingsAndAffixes("participle_base_long");// 'et' for german
String[] participleBaseShort = getEndingsAndAffixes("participle_base_short");// 't' for german
// String[] flectionFuge = getEndingsAndAffixes("flection_fuge");//should be 'e' for german
String wordMinusFlectionEndingPenultimateChar = null;
String wordMinusFlectionEndingUltimateChar = null;
if (wordMinusFlectionEnding.length() > 2) {
wordMinusFlectionEndingPenultimateChar = wordMinusFlectionEnding.substring(wordMinusFlectionEnding.length() - 2,
wordMinusFlectionEnding.length() - 1);
}
if (wordMinusFlectionEnding.length() > 1) {
wordMinusFlectionEndingUltimateChar = wordMinusFlectionEnding.substring(wordMinusFlectionEnding.length() - 1,
wordMinusFlectionEnding.length());
}
String wordMinusFlectionEndingLastTwo = wordMinusFlectionEndingPenultimateChar + wordMinusFlectionEndingUltimateChar;
if (wordMinusFlectionEnding.length() > 3) {
if (knowEnBaseForm(wordMinusFlectionEnding)) {// item without ending is already en base form like >boot<
if (flectionEnding != null) {
if (isLongParticipleBaseEnding(flectionEnding, participleBaseLong)
|| isShortParticipleBaseEnding(flectionEnding, participleBaseShort)) {// 'boot >et< or 'scroll>t<'
word.setOtherLanguageBaseForm(wordMinusFlectionEnding);
word.setCouldBeParticiple(true);
word.setCouldBeParticipleInBaseForm(true);
// next is special case for words like dat>e<>te<
} else if (endsWithVowel(wordMinusFlectionEnding)) { // 'te'
word.setOtherLanguageBaseForm(wordMinusFlectionEnding);
word.setWordMinusFlectionEndsWithVowel(true);
word.setCouldBeParticiple(true);
} else {// downloaden
word.setOtherLanguageBaseForm(wordMinusFlectionEnding);
}
} else {// scroll
word.setOtherLanguageBaseForm(wordMinusFlectionEnding);
logger.debug("wordMinusFlectionEnding is already enBaseForm");
}
}
// (up | ge) | date >t< (em) (j = 1)
// (ge) | boot >et< (em) (j =2)
// scroll >t< (en) (j=1)
// scan >nt< (en) (j=2)
if (word.getOtherLanguageBaseForm() == null) {
for (int j = 1; j < 3; j++) {// chop off 1-3 chars from end of word
logger.debug("new(2a): " + wordMinusFlectionEnding.substring(0, wordMinusFlectionEnding.length() - j));
if (knowEnBaseForm(wordMinusFlectionEnding.substring(0, wordMinusFlectionEnding.length() - j))) {
if (isLongParticipleBaseEnding(wordMinusFlectionEndingLastTwo, participleBaseLong) // 'et'
|| isShortParticipleBaseEnding(wordMinusFlectionEndingUltimateChar, participleBaseShort)
&& !(isShortParticipleBaseEnding(wordMinusFlectionEndingUltimateChar, participleBaseShort) && // to
// force
// that
// sth
// like
// 'cha>tt<en'
// is
// correctly
isShortParticipleBaseEnding(wordMinusFlectionEndingPenultimateChar, participleBaseShort))) {// processed
// in
// geminate
// clause
word.setOtherLanguageBaseForm(wordMinusFlectionEnding.substring(0, wordMinusFlectionEnding.length()
- j));
word.setCouldBeParticiple(true);
word.setCutOffCharacter(true);
logger.debug("new(2a)");
}
}
if (word.getOtherLanguageBaseForm() != null)
break;
}
}
if (word.getOtherLanguageBaseForm() == null) {
// check for geminates -> scannen
logger.debug("in geminate clause: " + wordMinusFlectionEnding);
if (wordMinusFlectionEndingUltimateChar.equals(wordMinusFlectionEndingPenultimateChar)) {// sca>nn<
if (knowEnBaseForm(wordMinusFlectionEnding.substring(0, wordMinusFlectionEnding.length() - 1))) {
word.setOtherLanguageBaseForm(wordMinusFlectionEnding.substring(0, wordMinusFlectionEnding.length() - 1));
logger.debug("geminate.......");
}
}
}
if (word.getOtherLanguageBaseForm() == null) {
// try to test if it is a gerund -> updatend
word.setIsVerbalGerund(checkIfGerund(wordMinusFlectionEnding));
if (word.getIsVerbalGerund()) {// we have a gerund
word.setOtherLanguageBaseForm(transformWordToEnBaseFormGerund(wordMinusFlectionEnding));
}
}
}
logger.debug("finally enBaseForm: " + word.getOtherLanguageBaseForm());
return word;
}
/**
*
* @param word
* word
* @return enBaseForm
*/
private String transformWordToEnBaseFormGerund(String word) {
logger.debug("getBaseFormGerund called with: " + word);
String enBaseForm = null;
String[] flectionFuge = getEndingsAndAffixes("flection_fuge");//
logger.debug("found gerund..........");
for (int j = 0; j < flectionFuge.length; j++) {
if (knowEnBaseForm(word.substring(0, word.length() - 3) + flectionFuge[j])) {// updat>e<nd
enBaseForm = word.substring(0, word.length() - 3) + flectionFuge[j];// like updat >e< nd
logger.debug("gerund case 3");
}
if (enBaseForm != null)
break;
}
if (enBaseForm == null) {
if (knowEnBaseForm(word.substring(0, word.length() - 3))) {// download>end<
enBaseForm = word.substring(0, word.length() - 3);// item without 'end' is base
logger.debug("gerund case 1");
} else if (knowEnBaseForm(word.substring(0, word.length() - 4))
&& word.charAt(word.length() - 4) == word.charAt(word.length() - 5)) {// scan>n< end
enBaseForm = word.substring(0, word.length() - 4);
logger.debug("gerund case 2");
}
}
return enBaseForm;
}
/**
* Building the transcription and syllabification of a flection
*
* @param currentResult
* currentResult
* @param word
* : the English infinitive as found in English lexicon
* @return transcription of complete input word
*/
private String transcribeFlection(Word word, Result currentResult) {
String result = null;
String otherLanguageTranscription = null;
String endingTranscription = null;
String gerundEndingTrans = null;
String participleBaseShortEndingTrans = null;
String flectionFugeTrans = null;
otherLanguageTranscription = jphon.phonemiseEn(word.getOtherLanguageBaseForm());
if (otherLanguageTranscription != null) {
// System.out.println("var should be true");
currentResult.setUsedOtherLanguageToPhonemise(true);
for (int j = 0; j < this.dentalPlosives.length; j++) {
if (otherLanguageTranscription.endsWith(this.dentalPlosives[j])) {
word.setExtraSyll(true);
logger.debug("extraSyll true");
}
}
// System.out.println("var is in transcribeFlection: "+currentResult.isUsedOtherLanguageToPhonemise());
// for cases like 'scrollet' where 'et' is flection ending and NOT ending of
// participleBaseForm; otherwise 'scrollet' would sound like 'scrollt'
String[] participleBaseLongEndings = getEndingsAndAffixes("participle_base_long");
for (int j = 0; j < participleBaseLongEndings.length; j++) {
if (word.getFlectionEnding() != null && word.getFlectionEnding().equals(participleBaseLongEndings[j])
&& !(word.getCutOffCharacter())) {// 'et'
word.setExtraSyll(true);
}
}
String[] gerundEndings = getEndingsAndAffixes("gerund_ending");// should be 'end' -> bootend
// String gerundEndingTrans = endingTranscriptionLookup(gerundEnding);//should be '@nt'
for (int j = 0; j < gerundEndings.length; j++) {
if (endingTranscriptionLookup(gerundEndings[j]) != null) {
gerundEndingTrans = endingTranscriptionLookup(gerundEndings[j]);
}
}
String[] participleBaseShortEndings = getEndingsAndAffixes("participle_base_short");
// If the participle ends with 'ed' or 'et' doesn't matter -> you get the same transcription
// String participleBaseEndingTrans = endingTranscriptionLookup(participleBaseEnding);//gives you 't'
for (int j = 0; j < participleBaseShortEndings.length; j++) {
if (endingTranscriptionLookup(participleBaseShortEndings[j]) != null) {
participleBaseShortEndingTrans = endingTranscriptionLookup(participleBaseShortEndings[j]);// gives you 't'
}
}
String[] flectionFuge = getEndingsAndAffixes("flection_fuge");// gives you 'e'
for (int j = 0; j < flectionFuge.length; j++) {
if (endingTranscriptionLookup(flectionFuge[j]) != null) {
flectionFugeTrans = endingTranscriptionLookup(flectionFuge[j]);
}
}
endingTranscription = endingTranscriptionLookup(word.getFlectionEnding());
String newEnTranscription = rebuildTrans(otherLanguageTranscription);
String newGerundEndingTrans = rebuildTrans(gerundEndingTrans);// should then be '@n-t'
String voicedNewGerundEndingTrans = voiceFinal(newGerundEndingTrans);// should be '@n-d'
// String voicedGerundEndingTrans = voiceFinal(gerundEndingTrans); //should be '@nd'
logger.debug("enTrans: " + otherLanguageTranscription);
if (word.getFlectionEnding() != null) {
if (endingTranscriptionLookup(word.getFlectionEnding()) != null) {
// special rule in case of enBaseForm's last char equals valid flection ending i.e. 't'
// in this case give us back the enBaseForm aka enInfinitive
// testing for participle because of date>te< enBaseForm ends with found ending
if (otherLanguageTranscription.endsWith(word.getFlectionEnding()) && !(word.getIsVerbalGerund())
&& !(word.getCouldBeParticiple())) {
result = otherLanguageTranscription;
logger.debug("(0)");
} else {
if (word.getCouldBeParticiple() && isShortSuperlative(word.getFlectionEnding()) && word.getExtraSyll()) {// i.e.
// downgeloadetsten
result = newEnTranscription + flectionFugeTrans + participleBaseShortEndingTrans
+ endingTranscription;
logger.debug("(1)");
} else if (word.getCouldBeParticiple() && word.getCouldBeParticipleInBaseForm() && word.getExtraSyll()) {// scrollet
// or
// downloadet
result = newEnTranscription + flectionFugeTrans + participleBaseShortEndingTrans;
logger.debug("(2)");
} else if (word.getCouldBeParticiple() && word.getExtraSyll() && word.getWordMinusFlectionEndsWithVowel()) {
result = newEnTranscription + flectionFugeTrans + "-" + endingTranscription;
logger.debug("(3)");
} else if (word.getCouldBeParticiple() && word.getExtraSyll()) {// i.e. downgeloadetere
result = newEnTranscription + flectionFugeTrans + "-" + participleBaseShortEndingTrans
+ endingTranscription;
logger.debug("(4)");
} else if (word.getCouldBeParticiple() && isShortSuperlative(word.getFlectionEnding())) {// i.e.
// gescrolltstem
result = otherLanguageTranscription + participleBaseShortEndingTrans + endingTranscription;
logger.debug("(5)");
} else if (word.getCouldBeParticiple() && word.getCouldBeParticipleInBaseForm()) {
result = otherLanguageTranscription + participleBaseShortEndingTrans;
logger.debug("(6)");
} else if (word.getCouldBeParticiple()) {// i.e. gescrolltestem
result = otherLanguageTranscription + "-" + participleBaseShortEndingTrans + endingTranscription;
logger.debug("(7)");
} else {
if (word.getIsVerbalGerund()) {
logger.debug("isVerbalGerund");
if (isShortSuperlative(word.getFlectionEnding())) {
result = newEnTranscription + gerundEndingTrans + endingTranscription;
} else {
result = newEnTranscription + voicedNewGerundEndingTrans + endingTranscription;
}
} else {
if (isShortSuperlative(word.getFlectionEnding())) {
result = otherLanguageTranscription + endingTranscription;
} else {// no Gerund, no superlative but maybe something like 'scannst'
if (word.getExtraSyll()) {// means: word ends on 't' or 'd'
logger.debug("extraSyll is true here...");
result = newEnTranscription + endingTranscription;
} else {// means: word ends on something else
if (endingContainsVowel(word.getFlectionEnding())
&& (!(endingBeginsWithVowel(word.getFlectionEnding())))) {
result = otherLanguageTranscription + "-" + endingTranscription;
} else {
if (endingContainsVowel(word.getFlectionEnding())
&& endingBeginsWithVowel(word.getFlectionEnding())) {
result = newEnTranscription + endingTranscription;
} else {
result = otherLanguageTranscription + endingTranscription;
}
}
}
}
}
}
}
}
} else {// flection ending is null: two possibilities: en-Word like boot or ger gerund like bootend
if (word.getIsVerbalGerund()) {
result = newEnTranscription + gerundEndingTrans;
logger.debug("(((1)))");
} else {// scann, date
result = otherLanguageTranscription;
logger.debug("(((2)))");
}
}
}
return result;
}
/**
* Checks if input word has valid ending from ending list.
*
* @param toBePhonemised
* The input word
* @param endingLength
* endingLength
* @return Flection ending if one can be found, null otherwise
*/
private String knowEnding(String toBePhonemised, int endingLength) {
logger.debug("in knowEnding: " + toBePhonemised);
String currentEnding = null;
String foundEnding = null;
int wordLength = toBePhonemised.length();
for (int j = endingLength; j > 0; j--) {
if (j < wordLength) {
currentEnding = toBePhonemised.substring(wordLength - j, wordLength);
logger.debug("currentEnding: " + currentEnding);
if (this.endingSet.contains(currentEnding)) {
foundEnding = currentEnding;
logger.debug("foundEnding....: " + foundEnding);
}
} else {
continue;
}
if (foundEnding != null)
break;
}
return foundEnding;
}
/**
* Voices the final consonant of an ending.
*
* @param ending
* the input ending
* @return voiced ending of ending can be voiced, input ending otherwise
*/
private String voiceFinal(String ending) {
String finalPhoneme = null;
String voicedFinalPhoneme = null;
String voicedEnding = null;
if (ending.length() > 0) {
finalPhoneme = ending.substring(ending.length() - 1, ending.length());
}
if (getVoicedFinal(finalPhoneme) != null) {
voicedFinalPhoneme = getVoicedFinal(finalPhoneme);
voicedEnding = ending.substring(0, ending.length() - 1) + voicedFinalPhoneme;// return new ending voiced
} else {
// if there is no voiced value for the last phone, return ending as it came in
voicedEnding = ending;
}
return voicedEnding;
}
/**
* If the given string ends with a consonant, insert a syllable boundary before that consonant. Otherwise, append a syllable
* boundary.
*
* @param s
* input syllable
* @return syllable with boundaries reset
*/
private String rebuildTrans(String s) {
AllophoneSet set = jphon.getAllophoneSet();
if (set != null) {
Allophone[] allophones = set.splitIntoAllophones(s);
if (allophones != null && allophones.length > 0) {
Allophone last = allophones[allophones.length - 1];
if (last.isConsonant()) { // insert a syllable boundary before final consonant
String lastPh = last.name();
return s.substring(0, s.length() - lastPh.length()) + "-" + lastPh;
}
}
}
return s + "-";
}
/**
* Checks if input string is a gerund - means: input ends with 'end' Sets global var isVerbalGerund which is important for
* building transcription later on
*
* @param s
* input string
* @return true if s.substring(s.length -3, s.length).equals(gerundEndings[j]), false otherwise
*/
private boolean checkIfGerund(String s) {
String[] gerundEndings = getEndingsAndAffixes("gerund_ending");// should be 'end' for german
for (int j = 0; j < gerundEndings.length; j++) {
if (s.length() > gerundEndings[j].length()) {
if (s.substring(s.length() - 3, s.length()).equals(gerundEndings[j])) {// we have an gerund
return true;
}
}
}
return false;
}
/**
* Checks if flection ending is a short superlative ending
*
* @param flectionEnding
* flection ending
* @return true if ending is short superlative false otherwise
*/
private boolean isShortSuperlative(String flectionEnding) {
String[] shortSuperlativeEndings = getEndingsAndAffixes("superlative_short");
for (int i = 0; i < shortSuperlativeEndings.length; i++) {
if (flectionEnding != null) {
if (flectionEnding.equals(shortSuperlativeEndings[i])) {
return true;
}
}
}
return false;
}
/**
* Checks if flection ending begins with a vowel
*
* @param ending
* flection ending
* @return true if ending begins with a vowel, false otherwise
*/
private boolean endingBeginsWithVowel(String ending) {
for (int i = 0; i < this.vowels.length; i++) {
if (this.vowels[i].equals(ending.substring(0, 1))) {
return true;
}
}
return false;
}
/**
* Checks if flection ending contains a vowel
*
* @param ending
* flection ending
* @return true if ending contains a vowel, false otherwise
*/
private boolean endingContainsVowel(String ending) {
for (int i = 0; i < this.vowels.length; i++) {
for (int j = 0; j < ending.length(); j++) {
if (this.vowels[i].equals(ending.substring(j, j + 1))) {
return true;
}
}
}
return false;
}
private boolean endsWithVowel(String s) {
for (int i = 0; i < this.vowels.length; i++) {
for (int j = 0; j < s.length(); j++) {
if (this.vowels[i].equals(s.substring(s.length() - 1, s.length()))) {
return true;
}
}
}
return false;
}
private boolean isLongParticipleBaseEnding(String s, String[] participleBaseLong) {
for (int j = 0; j < participleBaseLong.length; j++) {
if (s.equals(participleBaseLong[j])) {// 'et'
return true;
}
}
return false;
}
private boolean isShortParticipleBaseEnding(String s, String[] participleBaseShort) {
for (int j = 0; j < participleBaseShort.length; j++) {
if (s.equals(participleBaseShort[j])) {// 't'
return true;
}
}
return false;
}
public Locale getLocale() {
return this.locale;
}
/**
* Checks if item is in english lexicon.
*
* @param s
* english base form
* @return true if item is in english lexicon, false if not
*/
private boolean knowEnBaseForm(String s) {
if (jphon.phonemiseEn(s) != null) {
return true;
}
return false;
}
// public boolean usedOtherLanguageToPhonemise(boolean usedOtherLanguageToPhonemise) {
// return usedOtherLanguageToPhonemise;
// }
/**
******************************************************************************
Hashtable lookup methods for morphology & phonology information
******************************************************************************
**/
/**
* Looking for item in german prefix lexicon
*
* @param s
* item to be found
* @return Transcription of item
*/
private String prefixLexiconLookup(String s) {
String prefixTranscription = null;
try {
if (this.prefixLexicon.get(s) != null) {
prefixTranscription = (String) this.prefixLexicon.get(s);
}
} catch (Exception e) {
logger.debug("prefixLexLookup: " + e.toString());
}
return prefixTranscription;
}
/**
* Looks up in terminal voicing hash if there is a match for a unvoiced consonant
*
* @param phon
* The unvoiced consonant
* @return Voiced consonant if one can be found, null otherwise
*/
private String getVoicedFinal(String phon) {
String voicedPhon = null;
try {
if (this.terminalVoicings.get(phon) != null) {
voicedPhon = (String) this.terminalVoicings.get(phon);
}
} catch (Exception e) {
logger.debug("getVoicedFinal: " + e.toString());
}
return voicedPhon;
}
/**
* Looks up list of all endings and affixes for specific language
*
* @param key
* key of the affixes/endings you want to get, i.e. superlative_short
* @return list of endings or affixes if key is valid, null otherwise
*/
private String[] getEndingsAndAffixes(String key) {
String[] endingList = null;
try {
if (this.endingsAndAffixes.get(key) != null) {
String value = (String) this.endingsAndAffixes.get(key);
endingList = value.split("/");
}
} catch (Exception e) {
logger.debug("getEndingsAndAffixes: " + e.toString());
}
return endingList;
}
/**
* Try to get transcription for ending
*
* @param s
* The ending to be phonemised
* @return Transcription of ending
*/
private String endingTranscriptionLookup(String s) {
String affixPhon = null;
try {
if (this.flectionToPhon.get(s) != null) {
affixPhon = (String) this.flectionToPhon.get(s);
}
} catch (Exception e) {
logger.debug("endingTranscriptionLookup: " + e.toString());
}
return affixPhon;
}
}