/* * Copyright 2012 * Ubiquitous Knowledge Processing (UKP) Lab * Technische Universität Darmstadt * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. **/ package de.tudarmstadt.ukp.dkpro.core.api.phonetics.util; import java.util.Collections; import java.util.HashMap; import java.util.Map; public class SoundUtils { public static int differenceEncoded(String es1, String es2) { if (es1 == null || es2 == null) { return 0; } int lengthToMatch = Math.min(es1.length(), es2.length()); int diff = 0; for (int i = 0; i < lengthToMatch; i++) { if (es1.charAt(i) == es2.charAt(i)) { diff++; } } return diff; } /** * Converts an Arpabet phonemic transcription to an IPA phonemic * transcription. Note that, somewhat unusually, the stress symbol will * precede the vowel rather than the syllable. This is because Arpabet does * not mark syllable boundaries. * * @param s * The Darpabet phonemic transcription to convert. * @return The IPA equivalent of s. * @throws IllegalArgumentException if a phoneme is unknown. */ public static String arpabetToIPA(String s) throws IllegalArgumentException { String[] arpaPhonemes = s.trim().split("[ \\t]+"); StringBuffer ipaPhonemes = new StringBuffer(s.length()); for (String arpaPhoneme : arpaPhonemes) { char stressChar = arpaPhoneme.charAt(arpaPhoneme.length() - 1); if (stressChar == '0' || stressChar == '1' || stressChar == '2') { arpaPhoneme = arpaPhoneme.substring(0, arpaPhoneme.length() - 1); ipaPhonemes.append(arpabetMap.get(Character.toString(stressChar))); } String ipaPhoneme = arpabetMap.get(arpaPhoneme); if (ipaPhoneme == null) { throw new IllegalArgumentException(); } ipaPhonemes.append(ipaPhoneme); } return ipaPhonemes.toString(); } private static final Map<String, String> arpabetMap; static { Map<String, String> aMap = new HashMap<String, String>(); aMap.put("0", ""); aMap.put("1", "ˈ"); aMap.put("2", "ˌ"); aMap.put("AA", "ɑ"); aMap.put("AE", "æ"); aMap.put("AH", "ʌ"); aMap.put("AO", "ɔ"); aMap.put("AW", "aʊ"); aMap.put("AX", "ə"); aMap.put("AY", "aɪ"); aMap.put("B", "b"); aMap.put("CH", "tʃ"); aMap.put("D", "d"); aMap.put("DH", "ð"); aMap.put("DX", "?"); aMap.put("EH", "ɛ"); aMap.put("ER", "ɚ"); aMap.put("EY", "eɪ"); aMap.put("F", "f"); aMap.put("G", "?"); aMap.put("HH", "h"); aMap.put("IH", "ɪ"); aMap.put("IY", "i"); aMap.put("JH", "dʒ"); aMap.put("K", "k"); aMap.put("L", "l"); aMap.put("M", "m"); aMap.put("NG", "ŋ"); aMap.put("N", "n"); aMap.put("OW", "oʊ"); aMap.put("OY", "ɔɪ"); aMap.put("P", "p"); aMap.put("R", "ɹ"); aMap.put("SH", "ʃ"); aMap.put("S", "s"); aMap.put("TH", "θ"); aMap.put("T", "t"); aMap.put("UH", "ʊ"); aMap.put("UW", "u"); aMap.put("V", "v"); aMap.put("W", "w"); aMap.put("Y", "j"); aMap.put("ZH", "ʒ"); aMap.put("Z", "z"); arpabetMap = Collections.unmodifiableMap(aMap); } }