package com.commafeed.backend.feed; import java.util.regex.Pattern; /** * This code is copied and simplified from GWT * https://github.com/google-web-toolkit/gwt/blob/master/user/src/com/google/gwt/i18n/shared/BidiUtils.java Released under Apache 2.0 * license, credit of it goes to Google and please use GWT wherever possible instead of this */ class EstimateDirection { private static final float RTL_DETECTION_THRESHOLD = 0.40f; private static final String LTR_CHARS = "A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02B8\u0300-\u0590\u0800-\u1FFF" + "\u2C00-\uFB1C\uFDFE-\uFE6F\uFEFD-\uFFFF"; private static final String RTL_CHARS = "\u0591-\u07FF\uFB1D-\uFDFD\uFE70-\uFEFC"; private static final Pattern WORD_SEPARATOR_RE = Pattern.compile("\\s+"); private static final Pattern FIRST_STRONG_IS_RTL_RE = Pattern.compile("^[^" + LTR_CHARS + "]*[" + RTL_CHARS + ']'); private static final Pattern IS_REQUIRED_LTR_RE = Pattern.compile("^http://.*"); private static final Pattern HAS_ANY_LTR_RE = Pattern.compile("[" + LTR_CHARS + ']'); private static boolean startsWithRtl(String str) { return FIRST_STRONG_IS_RTL_RE.matcher(str).matches(); } private static boolean hasAnyLtr(String str) { return HAS_ANY_LTR_RE.matcher(str).matches(); } static boolean isRTL(String str) { int rtlCount = 0; int total = 0; String[] tokens = WORD_SEPARATOR_RE.split(str, 20); // limit splits to 20, usually enough for (int i = 0; i < tokens.length; i++) { String token = tokens[i]; if (startsWithRtl(token)) { rtlCount++; total++; } else if (IS_REQUIRED_LTR_RE.matcher(token).matches()) { // do nothing } else if (hasAnyLtr(token)) { total++; } } return total == 0 ? false : ((float) rtlCount / total > RTL_DETECTION_THRESHOLD ? true : false); } }