// ============================================================================ // // Copyright (C) 2006-2016 Talend Inc. - www.talend.com // // This source code is available under agreement available at // %InstallDIR%\features\org.talend.rcp.branding.%PRODUCTNAME%\%PRODUCTNAME%license.txt // // You should have received a copy of the agreement // along with this program; if not, write to Talend SA // 9 rue Pages 92150 Suresnes, France // // ============================================================================ package org.talend.windowkey; import java.util.StringTokenizer; import org.talend.dataquality.record.linkage.utils.AsciiUtils; /** * FIXME this class should not provide static utilities. * * FIXME all internal strings should be made constant. * */ public class AlgoBox { private static final FingerprintKeyer FINGERPRINTKEYER = new FingerprintKeyer(); private static final NGramFingerprintKeyer NGRAMKEYER = new NGramFingerprintKeyer(); private static final org.apache.commons.codec.language.Soundex soundex = new org.apache.commons.codec.language.Soundex(); private static final org.apache.commons.codec.language.DoubleMetaphone doublemetaphone = new org.apache.commons.codec.language.DoubleMetaphone(); private static final org.apache.commons.codec.language.Metaphone metaphone = new org.apache.commons.codec.language.Metaphone(); private static final org.apache.commons.codec.language.ColognePhonetic colognePhonetic = new org.apache.commons.codec.language.ColognePhonetic(); /** * DOC ytao Comment method "main". * * @param args */ public static void main(String[] args) { String sInput = null; // key algos (notice that it is incorrect to return null, since the operation +) System.out.println("first_Char_EW:" + AlgoBox.first_Char_EW(sInput) + "-"); //$NON-NLS-1$ //$NON-NLS-2$ System.out.println("first_N_Char_EW:" + AlgoBox.first_N_Char_EW(sInput, 2) + "-"); //$NON-NLS-1$ //$NON-NLS-2$ System.out.println("first_N_Char:" + AlgoBox.first_N_Char(sInput, 5) + "-"); //$NON-NLS-1$ //$NON-NLS-2$ System.out.println("last_N_Char:" + AlgoBox.last_N_Char(sInput, 3) + "-"); //$NON-NLS-1$ //$NON-NLS-2$ System.out.println("first_N_Consonants:" + AlgoBox.first_N_Consonants(sInput, 2000) + "-"); //$NON-NLS-1$ //$NON-NLS-2$ System.out.println("first_N_Vowels:" + AlgoBox.first_N_Vowels(sInput, 1000000) + "-"); //$NON-NLS-1$ //$NON-NLS-2$ System.out.println("add_Left_Char:" + AlgoBox.add_Left_Char(sInput, "<") + "-"); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ System.out.println("pick_Char:" + AlgoBox.pick_Char(sInput, "1-2;40;0-5") + "-"); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ System.out.println("subStr:" + AlgoBox.subStr(sInput, "1;100") + "-"); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ System.out.println("metaphone:" + AlgoBox.metaphone(sInput) + "-"); //$NON-NLS-1$ //$NON-NLS-2$ System.out.println("soundex:" + AlgoBox.soundex(sInput) + "-"); //$NON-NLS-1$ //$NON-NLS-2$ System.out.println("doublemetaphone:" + AlgoBox.doublemetaphone(sInput) + "-"); //$NON-NLS-1$ //$NON-NLS-2$ System.out.println("exact:" + AlgoBox.exact(sInput) + "-"); //$NON-NLS-1$ //$NON-NLS-2$ // optional algos (notice that it is no pbm to return null) System.out.println("removeDiacriticalMarks:" + AlgoBox.removeDiacriticalMarks(sInput) + "-"); //$NON-NLS-1$ //$NON-NLS-2$ System.out.println("removeDMAndLowerCase: " + AlgoBox.removeDMAndLowerCase(sInput)); //$NON-NLS-1$ System.out.println("removeDMAndUpperCase: " + AlgoBox.removeDMAndUpperCase(sInput)); //$NON-NLS-1$ System.out.println("useDefault: " + AlgoBox.useDefault(sInput, "ytao")); //$NON-NLS-1$ //$NON-NLS-2$ System.out.println("add_Right_Char:" + AlgoBox.add_Right_Char(sInput, ">") + "-"); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ System.out.println("lowerCase: " + AlgoBox.lowerCase(sInput)); //$NON-NLS-1$ System.out.println("upperCase: " + AlgoBox.upperCase(sInput)); //$NON-NLS-1$ } // Pick characters public static String pick_Char(String sInput, String pattern) { if (sInput == null || "".equals(sInput.trim())) { //$NON-NLS-1$ return ""; //$NON-NLS-1$ } if (pattern == null || "".equals(pattern.trim())) { //$NON-NLS-1$ return ""; //$NON-NLS-1$ } String d_pattern = "^[0-9--;]*"; //$NON-NLS-1$ if (!pattern.matches(d_pattern)) { return ""; //$NON-NLS-1$ } StringBuffer sb = new StringBuffer(); String[] arr_1 = pattern.split(";"); //$NON-NLS-1$ for (String valueOf_arr_1 : arr_1) { if (!"".equals(valueOf_arr_1)) { //$NON-NLS-1$ String[] arr_2 = valueOf_arr_1.split("-"); //$NON-NLS-1$ int len_arr_2 = arr_2.length; if (len_arr_2 == 2) { if ("".equals(arr_2[0]) || "".equals(arr_2[1])) { //$NON-NLS-1$ //$NON-NLS-2$ ; } else { sb.append(subStr(sInput, arr_2[0] + ";" + arr_2[1])); //$NON-NLS-1$ } } else if (len_arr_2 == 1) { if (Integer.parseInt(arr_2[0]) < sInput.length()) { sb.append(sInput.charAt(Integer.parseInt(arr_2[0]))); } } else { ; } } } return sb.toString(); } // First N vowels of the string public static String first_N_Consonants(String sInput, int nb) { if (sInput == null || "".equals(sInput.trim())) { //$NON-NLS-1$ return ""; //$NON-NLS-1$ } String d_pattern = "[a-zA-Z&&[^aeiouAEIOU]]"; //$NON-NLS-1$ StringBuffer sb = new StringBuffer(); int s_len = sInput.length(); String s = null; for (int i = 0; i < s_len; i++) { s = sInput.substring(i, i + 1); if (!" ".equals(s) && s.matches(d_pattern) && ((--nb) >= 0)) { //$NON-NLS-1$ sb.append(s); } } return sb.toString(); } // First N consonants of the string public static String first_N_Vowels(String sInput, int nb) { if (sInput == null || "".equals(sInput.trim())) { //$NON-NLS-1$ return ""; //$NON-NLS-1$ } String d_pattern = "[aeiouAEIOU]"; //$NON-NLS-1$ StringBuffer sb = new StringBuffer(); int s_len = sInput.length(); String s = null; for (int i = 0; i < s_len; i++) { s = sInput.substring(i, i + 1); if (!" ".equals(s) && s.matches(d_pattern) && ((--nb) >= 0)) { //$NON-NLS-1$ sb.append(s); } } return sb.toString(); } // substring public static String subStr(String sInput, String pattern) { if (sInput == null || "".equals(sInput)) { //$NON-NLS-1$ return ""; //$NON-NLS-1$ } if (pattern == null) { return ""; //$NON-NLS-1$ } else { String d_pattern = "^[0-9]*[;][0-9]*"; //$NON-NLS-1$ if (pattern.matches(d_pattern)) { int beginIndex = Integer.parseInt(pattern.substring(0, pattern.indexOf(";"))); //$NON-NLS-1$ int endIndex = Integer.parseInt(pattern.substring(pattern.indexOf(";") + 1)); //$NON-NLS-1$ if (sInput.length() < endIndex) { endIndex = sInput.length(); } if (beginIndex <= endIndex) { return sInput.substring(beginIndex, endIndex); } } } return ""; //$NON-NLS-1$ } // first N characters of the string public static String first_N_Char(String sInput, int nb) { if (sInput == null || "".equals(sInput)) { //$NON-NLS-1$ return ""; //$NON-NLS-1$ } if (nb < 0) { return sInput; } if (sInput.length() < nb) { nb = sInput.length(); } return sInput.substring(0, nb); } // last N characters of the string public static String last_N_Char(String sInput, int nb) { if (sInput == null || "".equals(sInput)) { //$NON-NLS-1$ return ""; //$NON-NLS-1$ } int s_len = sInput.length(); if (s_len < nb) { nb = s_len; } return sInput.substring(s_len - nb); } // N first characters of each word public static String first_N_Char_EW(String sInput, int nb) { if (sInput == null || "".equals(sInput)) { //$NON-NLS-1$ return ""; //$NON-NLS-1$ } StringBuffer sb = new StringBuffer(); StringTokenizer tok = new StringTokenizer(sInput); while (tok.hasMoreTokens()) { String word = tok.nextToken(); int len_word = word.length(); for (int i = 0; i < nb && i < len_word; i++) { sb.append(word.charAt(i)); } } return sb.toString(); } // First character of each word public static String first_Char_EW(String sInput) { if (sInput == null || "".equals(sInput)) { //$NON-NLS-1$ return ""; //$NON-NLS-1$ } StringBuffer sb = new StringBuffer(); StringTokenizer tok = new StringTokenizer(sInput); while (tok.hasMoreTokens()) { String word = tok.nextToken(); sb.append(word.charAt(0)); } return sb.toString(); } public static String soundex(String sInput) { if (sInput == null) { return ""; //$NON-NLS-1$ } return soundex.soundex(sInput); } public static String doublemetaphone(String sInput) { if (sInput == null) { return ""; //$NON-NLS-1$ } return doublemetaphone.doubleMetaphone(sInput); } public static String metaphone(String sInput) { if (sInput == null) { return ""; //$NON-NLS-1$ } return metaphone.metaphone(sInput); } /*-----------------------optional algo---------------------*/ // Add left position character public static String add_Left_Char(String sInput, String position) { if (position == null || "".equals(position)) { //$NON-NLS-1$ return sInput; } if (sInput == null) { sInput = ""; //$NON-NLS-1$ } return position + sInput; } // Add right position character public static String add_Right_Char(String sInput, String position) { if (position == null || "".equals(position)) { //$NON-NLS-1$ return sInput; } if (sInput == null) { sInput = ""; //$NON-NLS-1$ } return sInput + position; } // Remove diacritical marks public static String removeDiacriticalMarks(String sInput) { if (sInput == null) { return null; } return AsciiUtils.removeDiacriticalMarks(sInput); } public static String exact(String sInput) { // must set it to "" when it is null. otherwise use + to contact will get "null" return sInput == null ? "" : sInput; //$NON-NLS-1$ } public static String useDefault(String sInput, String insteadOf) { if (sInput == null || "".equals(sInput)) { //$NON-NLS-1$ return insteadOf; } else { return sInput; } } public static String lowerCase(String sInput) { if (sInput == null) { return null; } return sInput.toLowerCase(); } public static String upperCase(String sInput) { if (sInput == null) { return null; } return sInput.toUpperCase(); } public static String removeDMAndLowerCase(String sInput) { if (sInput == null) { return null; } return lowerCase(removeDiacriticalMarks(sInput)); } public static String removeDMAndUpperCase(String sInput) { if (sInput == null) { return null; } return upperCase(removeDiacriticalMarks(sInput)); } public static String fingerPrintKey(String sInput) { if (sInput == null) { return null; } return FINGERPRINTKEYER.key(sInput); } public static String nGramKey(String sInput) { if (sInput == null) { return null; } return NGRAMKEYER.key(sInput); } public static String colognePhonetic(String sInput) { if (sInput == null) { return null; } return colognePhonetic.colognePhonetic(sInput); } }