/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is part of dcm4che, an implementation of DICOM(TM) in
* Java(TM), hosted at https://github.com/gunterze/dcm4che.
*
* The Initial Developer of the Original Code is
* Agfa Healthcare.
* Portions created by the Initial Developer are Copyright (C) 2011
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
* See listed authors below.
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
package org.dcm4che3.soundex;
/**
* @author Gunter Zeilinger <gunterze@gmail.com>
*/
public class Metaphone implements FuzzyStr {
@Override
public String toFuzzy(String s) {
if (s == null || s.length() == 0)
return "";
char[] in = s.toUpperCase().toCharArray();
int countX = 0;
for (char c : in)
if (c == 'X')
countX++;
char[] out = countX > 0 ? new char[in.length + countX] : in;
int i = 0;
int j = 0;
char prev = 0;
char cur = 0;
char next1 = in[0];
char next2 = in.length > 1 ? in[1] : 0;
// Initial kn-, gn- pn, ae- or wr- -> drop first letter
if (next2 == 'N' && (next1 == 'K' || next1 == 'G' || next1 == 'P')
|| next1 == 'A' && next2 == 'E'
|| next1 == 'W' && next2 == 'R') {
next1 = next2;
next2 = in.length > 2 ? in[2] : 0;
i++;
// Initial x- -> change to "s"
} else if (next1 == 'X') {
next1 = 'S';
// Initial wh- -> change to "w"
} else if (next1 == 'W' && next2 == 'H') {
next2 = in.length > 2 ? in[2] : 0;
i++;
}
for (; i < in.length; i++) {
prev = cur;
cur = next1;
next1 = next2;
next2 = i+2 < in.length ? in[i+2] : 0;
// Doubled letters except "g" -> drop 2nd letter.
if (cur == prev && cur != 'C')
continue;
switch (cur) {
// Vowels are only kept when they are the first letter.
case 'A':
case 'E':
case 'I':
case 'O':
case 'U':
if (j == 0)
out[j++] = cur;
break;
// B -> B unless at the end of a word after "m" as in "dumb"
case 'B':
if (!(next1 == 0 && prev == 'M'))
out[j++] = cur;
break;
// C -> X (sh) if -cia- or -ch-
// S if -ci-, -ce- or -cy-
// SILENT if "-sci-", "-sce-", or "-scy-"
// K otherwise, including -sch-
case 'C':
if (next1 == 'I' || next1 == 'E' || next1 == 'Y') {
if (prev != 'S')
out[j++] = next1 == 'I' && next2 == 'A' ? 'X' : 'S';
} else
out[j++] = next1 == 'H' && prev != 'S' ? 'X' : 'K';
break;
// D -> J if in -dge-, -dgy- or -dgi-
// T otherwise
case 'D':
out[j++] = next1 == 'G'
&& (next2 == 'I' || next2 == 'E' || next2 == 'Y')
? 'J' : 'T';
break;
// F -> F
// J -> J
// L -> L
// M -> M
// N -> N
// R -> R
case 'F':
case 'J':
case 'L':
case 'M':
case 'N':
case 'R':
out[j++] = cur;
break;
// G -> silent if in -gh- and not at end or before a vowel
// in -gn or -gned (also see dge etc. above)
// J if before i or e or y if not double gg
// K otherwise
case 'G':
if (next1 == 'H' && next2 != 0 && !vowel(next2)
|| next1 == 'N'
&& (next2 == 0 || next2 == 'E'
&& in.length == (i+4) && in[3] == 'D')
|| prev == 'D'
&& (next1 == 'I' || next1 == 'E' || next1 == 'Y'))
continue;
// if double gg, next1 == 'G' -> K
out[j++] = (next1 == 'I' || next1 == 'E' || next1 == 'Y')
? 'J' : 'K';
break;
// H -> silent if after vowel and no vowel follows
// or in "-ch-", "-sh-", "-ph-", "-th-", "-gh-"
// H otherwise
case 'H':
switch (prev) {
case 'A':
case 'E':
case 'I':
case 'O':
case 'U':
if (!vowel(next1))
continue;
break;
case 'C':
case 'S':
case 'P':
case 'T':
case 'G':
continue;
}
out[j++] = cur;
break;
// K -> silent if after "c"
// K otherwise
case 'K':
if (prev != 'C')
out[j++] = cur;
break;
// P -> F if before "h"
// P otherwise
case 'P':
out[j++] = (next1 == 'H') ? 'F' : 'P';
break;
// Q -> K
case 'Q':
out[j++] = 'K';
break;
// S -> X (sh) if before "h" or in -sio- or -sia-
// S otherwise
case 'S':
out[j++] = next1 == 'H'
|| next1 == 'I' && (next2 == 'O' || next2 == 'A')
? 'X' : 'S';
break;
// T -> X (sh) if -tia- or -tio-
// 0 (th) if before "h"
// silent if in -tch-
// T otherwise
case 'T':
if (!(next1 == 'C' || next2 == 'H'))
out[j++] = next1 == 'I' && (next2 == 'A' || next2 == 'O')
? 'X' : (next1 == 'H') ? '0' : 'T';
break;
// V -> F
case 'V':
out[j++] = 'F';
break;
// W -> silent if not followed by a vowel
// W if followed by a vowel
// Y -> silent if not followed by a vowel
// Y if followed by a vowel
case 'W':
case 'Y':
if (vowel(next1))
out[j++] = cur;
break;
// X -> KS
case 'X':
out[j++] = 'K';
out[j++] = 'S';
break;
// Z -> S
case 'Z':
out[j++] = 'S';
break;
default:
continue;
}
}
return new String(out, 0, j);
}
private static boolean vowel(char ch) {
return ch == 'A' || ch == 'E' || ch == 'I' || ch == 'O' || ch == 'U';
}
public static void main(String[] args) {
Metaphone inst = new Metaphone();
for (String arg : args)
System.out.println(inst.toFuzzy(arg));
}
}