/** * Copyright Intellectual Reserve, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.gedcomx.util; import org.gedcomx.common.TextValue; import java.util.Collection; import java.util.Locale; import java.util.Set; import java.util.regex.Matcher; import java.util.regex.Pattern; /** * Class for helping to choose the "best" locale that is available, given a preferred locale (e.g., that is being * used by a particular user at a web site) and a default locale (e.g., English). * User: Randy Wilson * Date: 7/31/2014 * Time: 11:44 AM */ public class LocaleUtil { /** * Return the textValue whose language is closest the localeToMatch. * @param textValues - Collection of TextValues, each with a value and a language. * @param localeToMatch - Preferred locale to match against. * @return TextValue in the given collection that is closest to the given locale; or null if there are no values. */ public static TextValue findClosestLocale(Collection<TextValue> textValues, Locale localeToMatch) { return findClosestLocale(textValues, localeToMatch, Locale.ENGLISH); } /** * Return the textValue whose language is closest the localeToMatch. * @param textValues - Collection of TextValues, each with a value and a language. * @param localeToMatch - Preferred locale to match against. * @param defaultLocale - Default locale to match against, if none are close to the preferred locale. * @return TextValue in the given collection that is closest to the given locale; or null if there are no values. */ public static TextValue findClosestLocale(Collection<TextValue> textValues, Locale localeToMatch, Locale defaultLocale) { if (textValues != null) { TextValue bestTextValue = null; Locale bestLocale = null; for (TextValue textValue : textValues) { Locale locale = getSimpleLocale(textValue.getLang()); if (bestTextValue == null || LocaleUtil.isBetterLocaleMatch(localeToMatch, locale, bestLocale, defaultLocale)) { bestTextValue = textValue; bestLocale = locale; } } return bestTextValue; } return null; } public static Locale findClosestLocale(Set<Locale> locales, Locale localeToMatch) { return findClosestLocale(locales, localeToMatch, Locale.ENGLISH); } public static Locale findClosestLocale(Set<Locale> locales, Locale localeToMatch, Locale defaultLocale) { if (locales == null) { return null; } if (defaultLocale == null) { defaultLocale = Locale.getDefault(); } if (localeToMatch == null) { localeToMatch = defaultLocale; } Locale currentBest = null; for (Locale locale : locales) { if (locale.equals(localeToMatch)) { return locale; } if (isBetterLocaleMatch(localeToMatch, locale, currentBest, defaultLocale)) { currentBest = locale; } } return currentBest; } /** * Decide if the current locale is a better match to the preferred one than the 'best' one found so far. * @param preferredLocale - The preferred locale that is being matched against. * @param currentLocale - The current locale to check to see if it is closer to the preferred locale than the 'best' one so far. * @param bestLocale - The best locale (i.e., closest to 'preferredLocale') found so far, or null if this is the first one. * @param defaultLocale - Default locale to * @return true if 'currentLocale' is closer to preferredLocale (or, if best and current are both too far, if it is * closer to defaultLocale) than 'best'; false otherwise. */ private static boolean isBetterLocaleMatch(Locale preferredLocale, Locale currentLocale, Locale bestLocale, Locale defaultLocale) { if (bestLocale == null) { // pick the first one you come to return true; } int currentMatch = matchCount(preferredLocale, currentLocale); int bestMatch = matchCount(preferredLocale, bestLocale); // if the new locale is better than current best we have if (currentMatch > bestMatch) { // take the new one return true; } // if neither the current best, nor the current value match the preferred locale at all... // (meaning that the preferred locale is something other than the default, and we don't have any string in that locale at all) // pick the one that is closest to the default return bestMatch == 0 && !defaultLocale.equals(preferredLocale) && matchCount(defaultLocale, currentLocale) > matchCount(defaultLocale, bestLocale); } /** * Get a score for "nearness" between two locales * * @param locale1 the first locale * @param locale2 the second locale * @return the score (10 for each match on part, 1 for mismatched parts when either one is an empty string, so "en_US" matches "en" better than "en_CA" */ private static int matchCount(Locale locale1, Locale locale2) { int value = 0; String l1 = locale1.getLanguage(); String l2 = locale2.getLanguage(); if (l1.equals(l2)) { value += 10; String c1 = locale1.getCountry(); String c2 = locale2.getCountry(); if (c1.equals(c2)) { value += 10; String v1 = locale1.getVariant(); String v2 = locale2.getVariant(); if (v1.equals(v2)) { value += 10; } else if (v1.equals("") || v2.equals("")) { value++; } } else if (c1.equals("") || c2.equals("")) { value++; } } return value; } // Regex for BCP47 language tags (ignoring case, and allowing "_" instead of "-", since Java does that). // Full regex looks like this: // ^ // ( // ( // (?<language> // ([A-Za-z]{2,3} // (-(?<extlang>[A-Za-z]{3} // (-[A-Za-z]{3}){0,2}))? // ) // |[A-Za-z]{4}|[A-Za-z]{5,8} // ) // (-(?<script>[A-Za-z]{4}))? // (-(?<region>[A-Za-z]{2}|[0-9]{3}))? // (-(?<variant>[A-Za-z0-9]{5,8}|[0-9][A-Za-z0-9]{3}))* // (-(?<extension>[0-9A-WY-Za-wy-z](-[A-Za-z0-9]{2,8})+))* // (-(?<privateUse>x(-[A-Za-z0-9]{1,8})+))? // ) // |(?<privateUse>x(-[A-Za-z0-9]{1,8})+) // |(?<grandfathered>(en-GB-oed|i-ami|i-bnn|i-default|i-enochian // |i-hak|i-klingon|i-lux|i-mingo|i-navajo|i-pwn|i-tao|i-tay // |i-tsu|sgn-BE-FR|sgn-BE-NL|sgn-CH-DE) // |(art-lojban|cel-gaulish|no-bok|no-nyn|zh-guoyu|zh-hakka|zh-min|zh-min-nan|zh-xiang)) // )$ // But that's a little beyond the scope here. We'll handle language, script and region: private static final Pattern bcp47 = Pattern.compile( "([A-Za-z]{2,3}(?:-[A-Za-z]{3}(?:-[A-Za-z]{3}){0,2})?|[A-Za-z]{4,8})" + // language "(?:[-_]([A-Za-z]{4}))?" + // script "(?:[-_]([A-Za-z]{2}|[0-9]{3}))?" + //region "(?:[-_]([A-Za-z0-9]{5,8}|[0-9][A-Za-z0-9]{3}))*" + // variants "(?:[-_](?:[0-9A-WY-Za-wy-z](-[A-Za-z0-9]{2,8})+))*" + // extensions "(?:[-_](?:x(-[A-Za-z0-9]{1,8})+))?"); // private use /** * Parse the given languageString (e.g., "en-us", "en-US", "en_us", "en_US") and create a Locale from it. * Parse but then ignore script, variants, extensions and private use. * Keep only the language and region. * This should not be needed in Java 1.7, which has Locale.forLangaugeTag(languageString), which does the same thing only better. * @param languageString - BCP47 or Java language string ("en", "en-us", "en_US", etc.) * @return Locale for that language. */ public static Locale getSimpleLocale(String languageString) { Matcher m = bcp47.matcher(languageString); if (m.matches()) { String language = m.group(1); String script = m.group(2); String region = m.group(3); String variant = m.group(4); // Java 1.6 doesn't have a constructor that supports script... if (region != null) { if (variant != null) { return new Locale(language, region, variant); } else { return new Locale(language, region); } } return new Locale(language); } throw new IllegalArgumentException(); } }