Java Examples for java.text.Normalizer

The following java examples will help you to understand the usage of java.text.Normalizer. These source code samples are taken from different open source projects.

Example 1
Project: QMAClone-master  File: Normalizer.java View source code
public static String normalize(String s) {
    s = java.text.Normalizer.normalize(s, Form.NFKC);
    final char[] charArray = s.toCharArray();
    for (int i = 0; i < charArray.length; ++i) {
        int c = charArray[i];
        if (65281 <= c && c <= 65374) {
            c -= 65248;
        }
        charArray[i] = (char) c;
    }
    s = new String(charArray);
    s = s.toLowerCase();
    return s;
}
Example 2
Project: QueryHighlighter-master  File: Normalizer.java View source code
public static final String forSearch(CharSequence searchTerm) {
    if (searchTerm == null) {
        return null;
    }
    String result = java.text.Normalizer.normalize(searchTerm, java.text.Normalizer.Form.NFD);
    result = PATTERN_DIACRITICS.matcher(result).replaceAll("");
    result = PATTERN_NON_LETTER_DIGIT_TO_SPACES.matcher(result).replaceAll(" ");
    return result.toLowerCase(Locale.ROOT);
}
Example 3
Project: osm-address-extractor-master  File: MapUtils.java View source code
public static String stringToId(String val) {
    try {
        String regex = "[\\p{InCombiningDiacriticalMarks}\\p{IsLm}\\p{IsSk}]+";
        String normalized = Normalizer.normalize(val, Normalizer.Form.NFKD);
        normalized = new String(normalized.replaceAll(regex, "").getBytes("ascii"), "ascii");
        return normalized.toLowerCase().replaceAll("\\s", "-");
    } catch (Exception e) {
        e.printStackTrace();
        System.exit(-1);
        return null;
    }
}
Example 4
Project: lombok-pg-master  File: IParameterSanitizer.java View source code
@Override
public Statement<?> getStatementFor(final Object argumentType, final String argumentName, final String newArgumentName, final java.lang.annotation.Annotation annotation) {
    final Normalizer.Form normalizerForm = ((Sanitize.Normalize) annotation).value();
    return LocalDecl(Type(argumentType), newArgumentName).makeFinal().withInitialization(Call(//
    Name("java.text.Normalizer"), //
    "normalize").withArgument(Name(argumentName)).withArgument(Name(String.format("java.text.Normalizer.Form.%s", normalizerForm.name()))));
}
Example 5
Project: longneck-core-master  File: UnicodeNormalize.java View source code
@Override
public void apply(Record record, VariableSpace parentScope) {
    for (String fieldName : applyTo) {
        String value = BlockUtils.getValue(fieldName, record, parentScope);
        if (value == null || "".equals(value)) {
            continue;
        }
        value = Normalizer.normalize(value, form);
        BlockUtils.setValue(fieldName, value, record, parentScope);
    }
}
Example 6
Project: memoryfilesystem-master  File: WindowsFileSystemComptiblityTest.java View source code
@Test
@Ignore
public void windowsNormalization() throws IOException {
    FileSystem fileSystem = this.getFileSystem();
    String aUmlaut = "Ä";
    Path aPath = fileSystem.getPath(aUmlaut);
    String normalized = Normalizer.normalize(aUmlaut, Form.NFD);
    Path nPath = fileSystem.getPath(normalized);
    Path createdFile = null;
    try {
        createdFile = Files.createFile(nPath);
        assertEquals(2, createdFile.getFileName().toString().length());
        assertEquals(2, createdFile.toAbsolutePath().getFileName().toString().length());
        // REVIEW ??
        assertEquals(2, createdFile.toRealPath().getFileName().toString().length());
        assertThat(aPath, not(exists()));
        assertThat(nPath, exists());
        //assertTrue(Files.isSameFile(aPath, nPath));
        //assertTrue(Files.isSameFile(nPath, aPath));
        assertThat(aPath, not(equalTo(nPath)));
    } finally {
        if (createdFile != null) {
            Files.delete(createdFile);
        }
    }
}
Example 7
Project: voj-master  File: SlugifyUtils.java View source code
/**
	 * 获�字符串的Slug.
	 * @param str - 待获�Slug的字符串
	 * @return 字符串对应的Slug
	 */
public static String getSlug(String str) {
    if (str == null) {
        return "";
    }
    // Rid of White Spaces
    String noWhiteSpace = WHITESPACE.matcher(str.trim()).replaceAll("-");
    // Processing Non-ASCII Characters
    try {
        noWhiteSpace = URLEncoder.encode(noWhiteSpace, "UTF-8");
    } catch (UnsupportedEncodingException e) {
    }
    // Slugify String
    String normalized = Normalizer.normalize(noWhiteSpace, Form.NFD);
    return normalized.toLowerCase();
}
Example 8
Project: GT-FHIR-master  File: QueryUtilities.java View source code
public static String normalizeString(String theString) {
    char[] out = new char[theString.length()];
    theString = Normalizer.normalize(theString, Normalizer.Form.NFD);
    int j = 0;
    for (int i = 0, n = theString.length(); i < n; ++i) {
        char c = theString.charAt(i);
        if (c <= '') {
            out[j++] = c;
        }
    }
    //		return new String(out).toUpperCase();
    return new String(out);
}
Example 9
Project: invoicexpress-android-master  File: Test.java View source code
public static void compareStrings() {
    System.out.println(Normalizer.normalize("É", Normalizer.Form.NFD).replaceAll("[^\\p{ASCII}]", ""));
    System.out.println(Normalizer.normalize("E", Normalizer.Form.NFD).replaceAll("[^\\p{ASCII}]", ""));
    System.out.println("Resultado da comparação " + "E".compareTo("É"));
    System.out.println("Resultado da comparação " + "T".compareTo("É"));
    System.out.println("Resultado da comparação " + "É".compareTo("Z"));
}
Example 10
Project: smile-master  File: SimpleNormalizer.java View source code
@Override
public String normalize(String text) {
    text = text.trim();
    if (!java.text.Normalizer.isNormalized(text, java.text.Normalizer.Form.NFKC)) {
        text = java.text.Normalizer.normalize(text, java.text.Normalizer.Form.NFKC);
    }
    text = WHITESPACE.matcher(text).replaceAll(" ");
    text = CONTROL_FORMAT_CHARS.matcher(text).replaceAll("");
    text = DOUBLE_QUOTES.matcher(text).replaceAll("\"");
    text = SINGLE_QUOTES.matcher(text).replaceAll("'");
    return text;
}
Example 11
Project: zamiaDroid-master  File: StringASCIIFormat.java View source code
public static String toASCII(String input) {
    String s1 = Normalizer.normalize(input, Normalizer.Form.NFKD);
    String regex = "[\\p{InCombiningDiacriticalMarks}\\p{IsLm}\\p{IsSk}]+";
    String s2 = "";
    try {
        s2 = new String(s1.replaceAll(regex, "").getBytes("ascii"), "ascii");
    } catch (UnsupportedEncodingException e) {
        e.printStackTrace();
    }
    return s2;
}
Example 12
Project: Dolomite-master  File: BaseController.java View source code
protected static String normalize(String original) {
    // to lower case
    String str = original.toLowerCase();
    // replace multiple spaces with one space
    str = str.replaceAll(" +", " ");
    // drop initial or final spaces        
    str = str.trim();
    // normalize and remove accents (diacritics)
    str = java.text.Normalizer.normalize(str, java.text.Normalizer.Form.NFD).replaceAll("\\p{InCombiningDiacriticalMarks}+", "");
    // replace some separators with underscore
    str = str.replaceAll("[- .']", "_");
    // keep only alphanumeric characters and underscores
    str = str.replaceAll("[^(_|a-z|0-9)]", "");
    return str;
}
Example 13
Project: completely-master  File: DiacriticsTransformer.java View source code
@Override
public Collection<String> apply(Collection<String> input) {
    checkPointer(input != null);
    List<String> result = new LinkedList<>();
    for (String text : input) {
        checkPointer(text != null);
        StringBuilder builder = new StringBuilder();
        String canonical = Normalizer.normalize(text, Normalizer.Form.NFD);
        for (int i = 0; i < canonical.length(); ++i) {
            if (Character.getType(canonical.charAt(i)) != Character.NON_SPACING_MARK) {
                builder.append(canonical.charAt(i));
            }
        }
        result.add(builder.toString());
    }
    return result;
}
Example 14
Project: android-sdk-sources-for-api-level-23-master  File: NormalizerTest.java View source code
/**
     * @tests java.text.Normalizer#isNormalized(CharSequence, Form)
     */
public void test_isNormalized() throws Exception {
    String src = "Á";
    assertTrue(Normalizer.isNormalized(src, Form.NFC));
    assertFalse(Normalizer.isNormalized(src, Form.NFD));
    assertTrue(Normalizer.isNormalized(src, Form.NFKC));
    assertFalse(Normalizer.isNormalized(src, Form.NFKD));
    src = "Á";
    assertFalse(Normalizer.isNormalized(src, Form.NFC));
    assertTrue(Normalizer.isNormalized(src, Form.NFD));
    assertFalse(Normalizer.isNormalized(src, Form.NFKC));
    assertTrue(Normalizer.isNormalized(src, Form.NFKD));
    src = "ffi";
    assertTrue(Normalizer.isNormalized(src, Form.NFC));
    assertTrue(Normalizer.isNormalized(src, Form.NFD));
    assertFalse(Normalizer.isNormalized(src, Form.NFKC));
    assertFalse(Normalizer.isNormalized(src, Form.NFKD));
    src = "ffi";
    assertTrue(Normalizer.isNormalized(src, Form.NFC));
    assertTrue(Normalizer.isNormalized(src, Form.NFD));
    assertTrue(Normalizer.isNormalized(src, Form.NFKC));
    assertTrue(Normalizer.isNormalized(src, Form.NFKD));
    src = "";
    assertTrue(Normalizer.isNormalized(src, Form.NFC));
    assertTrue(Normalizer.isNormalized(src, Form.NFD));
    assertTrue(Normalizer.isNormalized(src, Form.NFKC));
    assertTrue(Normalizer.isNormalized(src, Form.NFKD));
}
Example 15
Project: android_libcore-master  File: NativeNormalizer.java View source code
private static int toUNormalizationMode(Form form) {
    // See UNormalizationMode in "unicode/unorm.h". Stable API since ICU 2.0.
    switch(form) {
        case NFC:
            return 4;
        case NFD:
            return 2;
        case NFKC:
            return 5;
        case NFKD:
            return 3;
    }
    throw new AssertionError("unknown Normalizer.Form " + form);
}
Example 16
Project: ARTPart-master  File: Main.java View source code
static void testNormalizer() {
    String composed = "BlÁah";
    String decomposed = "BlÁah";
    String res;
    res = Normalizer.normalize(composed, Normalizer.Form.NFD);
    if (!decomposed.equals(res)) {
        System.err.println("Bad decompose: '" + composed + "' --> '" + res + "'");
    }
    res = Normalizer.normalize(decomposed, Normalizer.Form.NFC);
    if (!composed.equals(res)) {
        System.err.println("Bad compose: '" + decomposed + "' --> '" + res + "'");
    }
    System.out.println("Normalizer passed");
}
Example 17
Project: greenhouse-master  File: SlugUtils.java View source code
/**
	   * Convert the String input to a slug.
	   */
public static String toSlug(String input) {
    if (input == null) {
        throw new IllegalArgumentException("Input cannot be null");
    }
    String nowhitespace = WHITESPACE.matcher(input).replaceAll("-");
    String normalized = Normalizer.normalize(nowhitespace, Form.NFD);
    String slug = NONLATIN.matcher(normalized).replaceAll("");
    return slug.toLowerCase(Locale.ENGLISH);
}
Example 18
Project: midpoint-master  File: PrismDefaultPolyStringNormalizer.java View source code
/* (non-Javadoc)
	 * @see com.evolveum.midpoint.prism.polystring.PolyStringNormalizer#normalize(java.lang.String)
	 */
@Override
public String normalize(String orig) {
    if (orig == null) {
        return null;
    }
    String s = StringUtils.trim(orig);
    s = Normalizer.normalize(s, Normalizer.Form.NFKD);
    s = s.replaceAll("[^\\w\\s\\d]", "");
    s = s.replaceAll("\\s+", " ");
    if (StringUtils.isBlank(s)) {
        s = "";
    }
    return StringUtils.lowerCase(s);
}
Example 19
Project: openrocket-master  File: L10NGenerator.java View source code
private static void output(char ch) {
    String text = "" + ch;
    StringBuilder sb = new StringBuilder(text.length());
    //			s = normalize(s);
    text = Normalizer.normalize(text, Normalizer.Form.NFKD);
    for (char c : text.toCharArray()) {
        if (c < 128) {
            sb.append(c);
        } else if (c == Chars.FRACTION) {
            sb.append('/');
        }
    }
    text = sb.toString().trim();
    if (text.length() > 0) {
        print(ch, text);
    }
}
Example 20
Project: package-drone-master  File: Tokens.java View source code
public static String hashIt(final String salt, String data) {
    data = Normalizer.normalize(data, Form.NFC);
    final byte[] strData = data.getBytes(StandardCharsets.UTF_8);
    final byte[] saltData = salt.getBytes(StandardCharsets.UTF_8);
    final byte[] first = new byte[saltData.length + strData.length];
    System.arraycopy(saltData, 0, first, 0, saltData.length);
    System.arraycopy(strData, 0, first, saltData.length, strData.length);
    MessageDigest md;
    try {
        md = MessageDigest.getInstance("SHA-256");
    } catch (final NoSuchAlgorithmException e) {
        throw new IllegalStateException(e);
    }
    byte[] digest = md.digest(first);
    final byte[] current = new byte[saltData.length + digest.length];
    for (int i = 0; i < 1000; i++) {
        System.arraycopy(saltData, 0, current, 0, saltData.length);
        System.arraycopy(digest, 0, current, saltData.length, digest.length);
        digest = md.digest(current);
    }
    return Base64.getEncoder().encodeToString(digest);
}
Example 21
Project: spring-greenhouse-clickstart-master  File: SlugUtils.java View source code
/**
	   * Convert the String input to a slug.
	   */
public static String toSlug(String input) {
    if (input == null) {
        throw new IllegalArgumentException("Input cannot be null");
    }
    String nowhitespace = WHITESPACE.matcher(input).replaceAll("-");
    String normalized = Normalizer.normalize(nowhitespace, Form.NFD);
    String slug = NONLATIN.matcher(normalized).replaceAll("");
    return slug.toLowerCase(Locale.ENGLISH);
}
Example 22
Project: spring-mvc-movies-master  File: Slug.java View source code
public static String makeSlug(String input) {
    if (input == null)
        throw new IllegalArgumentException();
    String nowhitespace = WHITESPACE.matcher(input).replaceAll("-");
    String normalized = Normalizer.normalize(nowhitespace, Form.NFD);
    String slug = NONLATIN.matcher(normalized).replaceAll("");
    return slug.toLowerCase(Locale.ENGLISH);
}
Example 23
Project: DrawBridge-master  File: Normalizer.java View source code
/**
   * A conservative heuristic as to whether s is normalized according to Unicode
   * Normal Form C.  It is heuristic, because Caja needs to run with versions
   * of the Java standard libraries that do not include normalization.
   * @return false if s is not normalized.
   */
public static boolean isNormalized(CharSequence s) {
    if (IS_NORMALIZED != null) {
        try {
            return ((Boolean) IS_NORMALIZED.invoke(null, s, NORMAL_FORM_C)).booleanValue();
        } catch (IllegalAccessException ex) {
            throw new SomethingWidgyHappenedError("Normalizer unexpectedly uninvokable", ex);
        } catch (InvocationTargetException ex) {
            Throwable th = ex.getTargetException();
            throw new SomethingWidgyHappenedError("Normalizer unexpectedly uninvokable", th);
        }
    }
    // ...
    for (int i = s.length(); --i >= 0; ) {
        char ch = s.charAt(i);
        // will never reach here.
        if (ch >= 256) {
            return false;
        }
    }
    return true;
}
Example 24
Project: solrmarc-master  File: FieldFormatterBase.java View source code
public String cleanData(VariableField vf, boolean isSubfieldA, String data) {
    final EnumSet<eCleanVal> cleanVal = getCleanVal();
    int numToDel = 0;
    String trimmed = data;
    if (cleanVal.contains(eCleanVal.STRIP_INDICATOR_2) && isSubfieldA && vf instanceof DataField) {
        DataField df = (DataField) vf;
        char ind2Val = df.getIndicator2();
        numToDel = (ind2Val >= '0' && ind2Val <= '9') ? ind2Val - '0' : 0;
        if (numToDel > 0)
            trimmed = trimmed.substring(numToDel);
    }
    trimmed = cleanVal.contains(eCleanVal.UNTRIMMED) ? getSubstring(trimmed) : getSubstring(trimmed).trim();
    String str = (cleanVal.contains(eCleanVal.CLEAN_EACH)) ? DataUtil.cleanData(trimmed) : trimmed;
    if (!cleanVal.contains(eCleanVal.STRIP_ACCCENTS) && !cleanVal.contains(eCleanVal.STRIP_ALL_PUNCT) && !cleanVal.contains(eCleanVal.TO_LOWER) && !cleanVal.contains(eCleanVal.TO_UPPER) && !cleanVal.contains(eCleanVal.TO_TITLECASE) && !cleanVal.contains(eCleanVal.STRIP_INDICATOR_2)) {
        return (str);
    }
    // Do more extensive cleaning of data.
    if (cleanVal.contains(eCleanVal.STRIP_ACCCENTS)) {
        str = ACCENTS.matcher(Normalizer.normalize(str, Form.NFD)).replaceAll("");
        StringBuilder folded = new StringBuilder();
        boolean replaced = false;
        for (char c : str.toCharArray()) {
            char newc = Utils.foldDiacriticLatinChar(c);
            if (newc != 0x00) {
                folded.append(newc);
                replaced = true;
            } else {
                folded.append(c);
            }
        }
        if (replaced)
            str = folded.toString();
    }
    if (cleanVal.contains(eCleanVal.STRIP_ALL_PUNCT))
        str = str.replaceAll("( |\\p{Punct})+", " ");
    if (!cleanVal.contains(eCleanVal.UNTRIMMED))
        str = str.trim();
    if (cleanVal.contains(eCleanVal.TO_LOWER)) {
        str = str.toLowerCase();
    } else if (cleanVal.contains(eCleanVal.TO_UPPER)) {
        str = str.toUpperCase();
    } else if (cleanVal.contains(eCleanVal.TO_TITLECASE)) {
        str = DataUtil.toTitleCase(str);
    }
    return str;
}
Example 25
Project: agile-itsm-master  File: LookupProcessContrato.java View source code
@SuppressWarnings({ "rawtypes", "unchecked" })
public List processLookup(LookupDTO lookupObject) throws LogicException, Exception {
    String sql = "";
    // Collection colRetorno = new ArrayList();
    LookupFieldUtil lookUpField = new LookupFieldUtil();
    Collection colCamposRet = lookUpField.getCamposRetorno(lookupObject.getNomeLookup());
    Iterator itRet = colCamposRet.iterator();
    Campo cp;
    while (itRet.hasNext()) {
        cp = (Campo) itRet.next();
        if (!sql.equalsIgnoreCase("")) {
            sql = sql + ",";
        }
        sql = sql + cp.getNomeFisico();
    }
    sql = "SELECT " + sql;
    sql += "  FROM CONTRATOS PRJ INNER JOIN CLIENTES CLI on CLI.idCliente = PRJ.idCliente INNER JOIN Fornecedor FORN on FORN.idFornecedor = PRJ.idFornecedor ";
    String where = " (PRJ.deleted IS NULL or PRJ.deleted = 'N')";
    ContratosGruposService contratosGruposService = (ContratosGruposService) ServiceLocator.getInstance().getService(ContratosGruposService.class, null);
    String COLABORADORES_VINC_CONTRATOS = ParametroUtil.getValorParametroCitSmartHashMap(br.com.centralit.citcorpore.util.Enumerados.ParametroSistema.COLABORADORES_VINC_CONTRATOS, "N");
    if (COLABORADORES_VINC_CONTRATOS == null)
        COLABORADORES_VINC_CONTRATOS = "N";
    if (COLABORADORES_VINC_CONTRATOS.equalsIgnoreCase("S")) {
        where += " AND PRJ.idContrato in (-1";
        UsuarioDTO usuarioDto = (UsuarioDTO) lookupObject.getUser();
        if (usuarioDto != null) {
            Collection<ContratosGruposDTO> colContratosColab = contratosGruposService.findByIdEmpregado(usuarioDto.getIdEmpregado());
            if (colContratosColab != null) {
                for (ContratosGruposDTO contratosGruposDto : colContratosColab) {
                    where += "," + contratosGruposDto.getIdContrato();
                }
            }
        }
        where += ")";
    }
    Collection colCamposPesq = lookUpField.getCamposPesquisa(lookupObject.getNomeLookup());
    Iterator itPesq = colCamposPesq.iterator();
    String obj = null;
    int count = 1;
    while (itPesq.hasNext()) {
        cp = (Campo) itPesq.next();
        obj = null;
        obj = this.getValueParmLookup(lookupObject, count);
        if (obj != null) {
            String[] trataGetNomeFisico = cp.getNomeFisico().split("\\.");
            String nomeFisico = cp.getNomeFisico();
            if (trataGetNomeFisico.length > 1) {
                cp.setNomeFisico(trataGetNomeFisico[1]);
                nomeFisico = trataGetNomeFisico[0] + "." + trataGetNomeFisico[1];
            }
            if (!obj.equalsIgnoreCase("")) {
                if (!where.equalsIgnoreCase("")) {
                    where = where + " AND ";
                }
                if (cp.getType().equalsIgnoreCase(Constantes.getValue("FIELDTYPE_TEXT").trim()) || cp.getType().equalsIgnoreCase(Constantes.getValue("FIELDTYPE_TEXTAREA").trim())) {
                    String func = Constantes.getValue("FUNCAO_CONVERTE_MAIUSCULO");
                    if (func != null && !func.trim().equalsIgnoreCase("")) {
                        where = where + func + "(" + nomeFisico + ")";
                    } else {
                        where = where + cp.getNomeFisico();
                    }
                    where = where + " LIKE '%";
                } else {
                    if (cp.getType().equalsIgnoreCase(Constantes.getValue("FIELDTYPE_COMBO").trim())) {
                        where = where + cp.getNomeFisico();
                        where = where + " IN (";
                    } else if (cp.getType().equalsIgnoreCase("DATE")) {
                        where = where + cp.getNomeFisico();
                        where = where + " = '";
                    } else {
                        where = where + nomeFisico;
                        where = where + " = ";
                    }
                }
                if (cp.isSomenteBusca()) {
                    obj = obj.trim();
                    obj = obj.toUpperCase();
                    obj = Normalizer.normalize(obj, Normalizer.Form.NFD);
                    obj = obj.replaceAll("[^\\p{ASCII}]", "");
                }
                if (StringUtils.contains(obj, "'") && !cp.getType().equalsIgnoreCase(Constantes.getValue("FIELDTYPE_COMBO").trim())) {
                    obj = StringEscapeUtils.escapeSql(obj);
                }
                where = where + obj;
                if (cp.getType().equalsIgnoreCase(Constantes.getValue("FIELDTYPE_TEXT").trim()) || cp.getType().equalsIgnoreCase(Constantes.getValue("FIELDTYPE_TEXTAREA").trim())) {
                    where = where + "%'";
                } else if (cp.getType().equalsIgnoreCase("DATE")) {
                    where = where + "'";
                } else if (cp.getType().equalsIgnoreCase(Constantes.getValue("FIELDTYPE_COMBO").trim())) {
                    where = where + ")";
                }
            }
        }
        count++;
    }
    String strAux;
    if (!where.equalsIgnoreCase("")) {
        sql = sql + " WHERE " + where;
        strAux = lookUpField.getWhere(lookupObject.getNomeLookup());
        if (!strAux.equalsIgnoreCase("")) {
            sql = sql + " AND ";
            sql = sql + strAux;
        }
    } else {
        strAux = lookUpField.getWhere(lookupObject.getNomeLookup());
        if (!strAux.equalsIgnoreCase("")) {
            sql = sql + " WHERE " + strAux;
        }
    }
    Collection colCamposOrd = lookUpField.getCamposOrdenacao(lookupObject.getNomeLookup());
    Iterator itOrd = colCamposOrd.iterator();
    String ordem = "";
    while (itOrd.hasNext()) {
        cp = (Campo) itOrd.next();
        if (!ordem.equalsIgnoreCase("")) {
            ordem = ordem + ",";
        }
        ordem = ordem + cp.getNomeFisico();
    }
    if (!ordem.equalsIgnoreCase("")) {
        sql = sql + " ORDER BY " + ordem;
    }
    sql = sql.toUpperCase();
    List lista = execSQL(sql, null);
    if (lista == null || lista.size() == 0) {
        TransactionControler tc = this.getTransactionControler();
        if (tc != null) {
            tc.close();
        }
        return null;
    }
    // Processa o resultado.
    List result = new ArrayList();
    if (lista == null || lista.size() == 0) {
        TransactionControler tc = this.getTransactionControler();
        if (tc != null) {
            tc.close();
        }
        return result;
    }
    if (lista.size() > 400) {
        TransactionControler tc = this.getTransactionControler();
        if (tc != null) {
            tc.close();
        }
        throw new LogicException("citcorpore.comum.consultaEstourouLimite");
    }
    Iterator it = lista.iterator();
    Campo campoAux;
    int i;
    Collection colAux;
    Object auxObj;
    while (it.hasNext()) {
        Object[] row = (Object[]) it.next();
        itRet = colCamposRet.iterator();
        i = 0;
        campoAux = null;
        colAux = new ArrayList();
        while (itRet.hasNext()) {
            cp = (Campo) itRet.next();
            campoAux = new Campo(cp.getNomeFisico(), cp.getDescricao(), cp.isObrigatorio(), cp.getType(), cp.getTamanho());
            if (cp.getType().equalsIgnoreCase(Constantes.getValue("FIELDTYPE_TEXT").trim()) || cp.getType().equalsIgnoreCase(Constantes.getValue("FIELDTYPE_TEXTAREA").trim())) {
                if (row[i] == null) {
                    auxObj = new String("");
                } else {
                    String str = new String(row[i].toString());
                    auxObj = str.replaceAll("\"", """).replaceAll("'", "´");
                    /*
                         * alteracao feita por Cleon, pois ao tentar restaurar através de uma lookup um elemento textarea com quebra de linha, o setretorno nao estava comportando de forma correta
                         * disparando um erro
                         */
                    auxObj = str.replaceAll("\n", " ");
                }
                campoAux.setObjValue(auxObj);
            } else if (cp.getType().equalsIgnoreCase(Constantes.getValue("FIELDTYPE_DATE").trim())) {
                if (row[i] == null) {
                    campoAux.setObjValue(null);
                } else {
                    auxObj = row[i];
                    if ((auxObj instanceof java.sql.Date)) {
                        campoAux.setObjValue(UtilDatas.dateToSTR((java.sql.Date) auxObj));
                    } else if ((auxObj instanceof java.sql.Timestamp)) {
                        campoAux.setObjValue(UtilDatas.dateToSTR((java.sql.Timestamp) auxObj));
                    } else {
                        campoAux.setObjValue(auxObj.toString());
                    }
                }
            } else if (cp.getType().equalsIgnoreCase(Constantes.getValue("FIELDTYPE_MOEDA").trim())) {
                if (row[i] == null) {
                    campoAux.setObjValue(null);
                } else {
                    auxObj = row[i];
                    String valorTransf = null;
                    if ((auxObj instanceof Double)) {
                        valorTransf = UtilFormatacao.formatBigDecimal(new BigDecimal(((Double) auxObj).doubleValue()), 2);
                    } else if ((auxObj instanceof BigDecimal)) {
                        valorTransf = UtilFormatacao.formatBigDecimal(((BigDecimal) auxObj), 2);
                    } else {
                        valorTransf = auxObj.toString();
                    }
                    campoAux.setObjValue(valorTransf);
                }
            }
            colAux.add(campoAux);
            i++;
        }
        result.add(colAux);
    }
    TransactionControler tc = this.getTransactionControler();
    if (tc != null) {
        tc.close();
    }
    return result;
}
Example 26
Project: bagit-java-master  File: CheckIfFileExistsTask.java View source code
/**
   * if a file is parially normalized or of a different normalization then the manifest specifies it will fail the existence test.
   * This method checks for that by normalizing what is on disk with the normalized filename and see if they match.
   * 
   * @return true if the normalized filename matches one on disk in the specified folder
   */
private boolean existsNormalized() {
    final String normalizedFile = Normalizer.normalize(file.toString(), Normalizer.Form.NFD);
    final Path parent = file.getParent();
    if (parent != null) {
        try (final DirectoryStream<Path> files = Files.newDirectoryStream(parent)) {
            for (final Path fileToCheck : files) {
                final String normalizedFileToCheck = Normalizer.normalize(fileToCheck.toString(), Normalizer.Form.NFD);
                if (normalizedFile.equals(normalizedFileToCheck)) {
                    return true;
                }
            }
        } catch (IOException e) {
            logger.error(messages.getString("error_reading_normalized_file"), parent, normalizedFile, e);
        }
    }
    return false;
}
Example 27
Project: basex-master  File: FnNormalizeUnicode.java View source code
@Override
public Item item(final QueryContext qc, final InputInfo ii) throws QueryException {
    final byte[] str = toEmptyToken(exprs[0], qc);
    Form form = Form.NFC;
    if (exprs.length == 2) {
        final byte[] n = uc(trim(toToken(exprs[1], qc)));
        if (n.length == 0)
            return Str.get(str);
        try {
            form = Form.valueOf(string(n));
        } catch (final IllegalArgumentException ex) {
            throw NORMUNI_X.get(info, n);
        }
    }
    return ascii(str) ? Str.get(str) : Str.get(Normalizer.normalize(string(str), form));
}
Example 28
Project: cognitionis-nlp-libraries-master  File: WikiHtml2PlainHandler.java View source code
@Override
public void endElement(final String uri, final String localName, final String tag) throws SAXException {
    if (tag.equalsIgnoreCase("html") && inText) {
        inText = false;
        if (!hasSentence) {
            System.out.println("no sentences");
            strBuilder = textStrb;
        } else {
            int n = sentences.size() - 1;
            for (int i = 0; i < n; i++) {
                strBuilder.append(sentences.get(i) + "\n\n");
            }
            strBuilder.append(sentences.get(n));
            sentences = null;
        }
        // For the garbage collector - free memory
        textStrb = null;
    }
    if (tag.equalsIgnoreCase("p") && inSentence) {
        inSentence = false;
        if (sentenceStrb.length() > 0) {
            String temp = sentenceStrb.toString().replaceAll("(\n|\r|\\p{javaSpaceChar})", " ").replaceAll("\\s+", " ").replaceAll("(—|–)", " - ").replaceAll("’", "'").trim();
            if (encoding.equals("ascii")) {
                temp = java.text.Normalizer.normalize(temp, java.text.Normalizer.Form.NFD);
                temp = temp.replaceAll("[^\\p{ASCII}]", "");
            }
            sentences.add(temp);
        }
        // For the garbage collector - free memory
        sentenceStrb = null;
    }
    if (tag.equalsIgnoreCase("table") && inTable > 0) {
        inTable--;
    }
    if (tag.equalsIgnoreCase("sup") && inSup) {
        inSup = false;
    }
    if (tag.matches("h[1234]")) {
        inH = false;
        if (H2Strb.length() > 0 && !H2Strb.toString().replaceAll("(\n|\r|\\s*\\[\\s*edit(ar)?\\s*\\]\\s*)", "").matches("(Media|Animated maps|See also|Notes|References|External links)")) {
            String temp = H2Strb.toString().replaceAll("(\n|\r|\\s*\\[\\s*edit(ar)?\\s*\\]\\s*|\t)", " ").replaceAll("\\s+", " ").replaceAll("(—|–)", " - ").replaceAll("’", "'").trim();
            // NOT ALWAYS WORK THAT BELOW NFD + ASCII
            if (encoding.equals("ascii")) {
                temp = java.text.Normalizer.normalize(temp, java.text.Normalizer.Form.NFD);
                temp = temp.replaceAll("[^\\p{ASCII}]", "");
            }
            sentences.add(temp + ".");
        }
        // For the garbage collector - free memory
        H2Strb = null;
    }
    // ho puc fer quan s'acaba el document si no tenia text...
    if (tag.equalsIgnoreCase(root_tag)) {
        if (!hasText) {
            if (hasSentence) {
                int n = sentences.size() - 1;
                for (int i = 0; i < n; i++) {
                    strBuilder.append(sentences.get(i) + "\n");
                }
                strBuilder.append(sentences.get(n));
                sentences = null;
            } else {
                strBuilder = textStrb;
                // For the garbage collector - free memory
                textStrb = null;
            }
        }
    }
}
Example 29
Project: facebook-hive-udfs-master  File: UDFNormalizeUnicode.java View source code
public String evaluate(String s, String form) {
    if (s == null || form == null) {
        return null;
    }
    if (form.equals("NFC")) {
        return Normalizer.normalize(s, Normalizer.Form.NFC);
    } else if (form.equals("NFD")) {
        return Normalizer.normalize(s, Normalizer.Form.NFD);
    } else if (form.equals("NFKC")) {
        return Normalizer.normalize(s, Normalizer.Form.NFKC);
    } else if (form.equals("NFKD")) {
        return Normalizer.normalize(s, Normalizer.Form.NFKD);
    } else {
        return null;
    }
}
Example 30
Project: iswc2012metadata-master  File: ToolText2Rdf.java View source code
public static String removeDiacritics(String input) {
    String nrml = Normalizer.normalize(input, Normalizer.Form.NFD);
    StringBuilder stripped = new StringBuilder();
    for (int i = 0; i < nrml.length(); ++i) {
        if (Character.getType(nrml.charAt(i)) != Character.NON_SPACING_MARK) {
            stripped.append(nrml.charAt(i));
        }
    }
    return stripped.toString();
}
Example 31
Project: skalli-master  File: NormalizeUtil.java View source code
@SuppressWarnings("nls")
public static String normalize(String s) {
    if (s == null) {
        return null;
    }
    s = s.replaceAll("ä", "ae");
    s = s.replaceAll("ö", "oe");
    s = s.replaceAll("ü", "ue");
    s = s.replaceAll("Ä", "Ae");
    s = s.replaceAll("Ö", "Oe");
    s = s.replaceAll("Ü", "Ue");
    s = s.replaceAll("ß", "ss");
    return Normalizer.normalize(s, Normalizer.Form.NFD).replaceAll("\\p{InCombiningDiacriticalMarks}+", "");
}
Example 32
Project: TweetLanes-master  File: Validator.java View source code
public int getTweetLength(String text) {
    text = Normalizer.normalize(text, Normalizer.Form.NFC);
    int length = text.codePointCount(0, text.length());
    for (Extractor.Entity urlEntity : extractor.extractURLsWithIndices(text)) {
        length += urlEntity.start - urlEntity.end;
        length += urlEntity.value.toLowerCase().startsWith("https://") ? shortUrlLengthHttps : shortUrlLength;
    }
    return length;
}
Example 33
Project: xtf-master  File: UnicodeNormalizingFilter.java View source code
@Override
public Token next() throws IOException {
    Token t = input.next();
    if (t == null) {
        return null;
    }
    // Only do the (sometimes lengthy) normalization step if we haven't already 
    // looked up this token.
    //
    String text = t.termText();
    if (!cache.contains(text)) {
        String normalizedText = Normalizer.normalize(text);
        cache.put(text, normalizedText);
    }
    String newText = cache.get(text);
    if (!newText.equals(text))
        t.setTermText(newText);
    return t;
}
Example 34
Project: andromda-master  File: NameMasker.java View source code
/**
     * Returns the name with the appropriate <code>mask</code> applied. The mask, must match one of the valid mask
     * properties or will be ignored.
     *
     * @param name the name to be masked
     * @param mask the mask to apply
     * @return the masked name.
     */
public String mask(String name, String mask) {
    mask = StringUtils.trimToEmpty(mask);
    name = StringUtils.trimToEmpty(name);
    if (!mask.equalsIgnoreCase(NONE)) {
        if (mask.equalsIgnoreCase(UPPERCASE)) {
            name = name.toUpperCase();
        } else if (mask.equalsIgnoreCase(UNDERSCORE)) {
            name = StringUtilsHelper.separate(name, "_");
        } else if (mask.equalsIgnoreCase(UPPERUNDERSCORE)) {
            name = StringUtilsHelper.separate(name, "_").toUpperCase();
        } else if (mask.equalsIgnoreCase(LOWERCASE)) {
            name = name.toLowerCase();
        } else if (mask.equalsIgnoreCase(LOWERUNDERSCORE)) {
            name = StringUtilsHelper.separate(name, "_").toLowerCase();
        } else if (mask.equalsIgnoreCase(LOWERCAMELCASE)) {
            name = StringUtilsHelper.lowerCamelCaseName(name);
        } else if (mask.equalsIgnoreCase(UPPERCAMELCASE)) {
            name = StringUtilsHelper.upperCamelCaseName(name);
        } else if (mask.equalsIgnoreCase(NOSPACE)) {
            name = StringUtils.deleteWhitespace(name);
        } else if (mask.equalsIgnoreCase(NOACCENT)) {
            name = Normalizer.normalize(name, java.text.Normalizer.Form.NFD).replaceAll("[^\\p{ASCII}]", "");
        }
    }
    return name;
}
Example 35
Project: jackrabbit-master  File: NodeNameNormalizer.java View source code
public static void check(Name name) {
    if (log.isDebugEnabled()) {
        String lname = name.getLocalName();
        String normalized = Normalizer.normalize(lname, Form.NFC);
        if (!lname.equals(normalized)) {
            String message = "The new node name '" + dump(lname) + "' is not in Unicode NFC form ('" + dump(normalized) + "').";
            log.debug(message, new Exception("Call chain"));
        }
    }
}
Example 36
Project: josm-older-master  File: SearchCompiler.java View source code
@Override
public boolean match(OsmPrimitive osm) {
    if (keyPattern != null) {
        if (!osm.hasKeys())
            return false;
        for (String k : osm.keySet()) {
            String v = osm.get(k);
            Matcher matcherKey = keyPattern.matcher(k);
            boolean matchedKey = matcherKey.find();
            if (matchedKey) {
                Matcher matcherValue = valuePattern.matcher(v);
                boolean matchedValue = matcherValue.find();
                if (matchedValue)
                    return true;
            }
        }
    } else {
        String mv = null;
        if (key.equals("timestamp")) {
            mv = DateUtils.fromDate(osm.getTimestamp());
        } else {
            mv = osm.get(key);
        }
        if (mv == null)
            return false;
        String v1 = caseSensitive ? mv : mv.toLowerCase();
        String v2 = caseSensitive ? value : value.toLowerCase();
        //v2 = java.text.Normalizer.normalize(v2, java.text.Normalizer.Form.NFC);
        return v1.indexOf(v2) != -1;
    }
    return false;
}
Example 37
Project: Liferay-CIFS-master  File: UTF8Normalizer.java View source code
/**
   * Normalize a UTF-8 string
   * 
   * @param utf8str String
   * @return String
   */
public final String normalize(String utf8str) {
    // Determine the method to be called
    String normStr = null;
    try {
        switch(isType()) {
            case IBMICU:
                // Call the compose(String, boolean) method
                normStr = (String) m_method.invoke(null, utf8str, false);
                break;
            case Java5:
                // Call the compose(String, boolean, int) method
                normStr = (String) m_method.invoke(null, utf8str, false, 0);
                break;
            case Java6:
                // Call the normalize(CharSequence, Normalizer.Form) method
                normStr = (String) m_method.invoke(null, utf8str, m_field.get(null));
                break;
            case Unknown:
                throw new RuntimeException("Normalizer is not initialized");
        }
    } catch (InvocationTargetException ex) {
    } catch (IllegalAccessException ex) {
    }
    return normStr;
}
Example 38
Project: deeplearning4j-master  File: InputHomogenization.java View source code
/**
     * Returns the normalized text passed in via constructor
     * @return the normalized text passed in via constructor
     */
public String transform() {
    StringBuilder sb = new StringBuilder();
    for (int i = 0; i < input.length(); i++) {
        if (ignoreCharactersContaining != null && ignoreCharactersContaining.contains(String.valueOf(input.charAt(i))))
            sb.append(input.charAt(i));
        else if (Character.isDigit(input.charAt(i)))
            sb.append("d");
        else if (Character.isUpperCase(input.charAt(i)) && !preserveCase)
            sb.append(Character.toLowerCase(input.charAt(i)));
        else
            sb.append(input.charAt(i));
    }
    String normalized = Normalizer.normalize(sb.toString(), Form.NFD);
    normalized = normalized.replace(".", "");
    normalized = normalized.replace(",", "");
    normalized = normalized.replaceAll("\"", "");
    normalized = normalized.replace("'", "");
    normalized = normalized.replace("(", "");
    normalized = normalized.replace(")", "");
    normalized = normalized.replace("“", "");
    normalized = normalized.replace("�", "");
    normalized = normalized.replace("…", "");
    normalized = normalized.replace("|", "");
    normalized = normalized.replace("/", "");
    normalized = normalized.replace("\\", "");
    normalized = normalized.replace("[", "");
    normalized = normalized.replace("]", "");
    normalized = normalized.replace("‘", "");
    normalized = normalized.replace("’", "");
    normalized = normalized.replaceAll("[!]+", "!");
    return normalized;
}
Example 39
Project: jnap-common-master  File: SeoStringUtil.java View source code
/**
	 * 
	 * @param src
	 * @param locale
	 * @return
	 */
public static String makeSeoFriendly(String src, Locale locale) {
    String seoFriendlyText = src.trim();
    // normalize
    seoFriendlyText = Normalizer.normalize(src, Form.NFD);
    // try to remove stop words if locale is specified
    if (locale != null) {
        SeoStopWordCleaner wordCleaner = null;
        for (SeoStopWordCleaner cleaner : seoStopWordCleaners) {
            if (ArrayUtils.contains(cleaner.getSupportedLocales(), locale)) {
                wordCleaner = cleaner;
                break;
            }
        }
        if (wordCleaner == null) {
            logger.warn(MessageFormat.format("A locale was specified ({0}) but no " + "SeoStopWordCleaner was found for it", locale.toString()));
        } else {
            seoFriendlyText = wordCleaner.clean(seoFriendlyText);
        }
    }
    // replace duplicated spaces with a single one
    seoFriendlyText = seoFriendlyText.replaceAll("[\\s]{2,}", " ");
    // replace spaces with '-'
    seoFriendlyText = seoFriendlyText.replaceAll("[\\s]", "-");
    // remove remaining non-latin characters
    seoFriendlyText = seoFriendlyText.replaceAll("[^\\w-]", StringUtils.EMPTY);
    // convert to lowercase (using english locale rules) and return
    return seoFriendlyText.toLowerCase(Locale.ENGLISH);
}
Example 40
Project: metafacture-core-master  File: StreamUnicodeNormalizerTest.java View source code
@Test
public void shouldNormalizeToNFDIfConfigured() {
    streamUnicodeNormalizer.setNormalizationForm(Normalizer.Form.NFD);
    streamUnicodeNormalizer.startRecord(RECORD_ID);
    streamUnicodeNormalizer.literal(LITERAL_NAME, KEY_WITH_PRECOMPOSED_CHARS);
    streamUnicodeNormalizer.endRecord();
    verify(receiver).literal(LITERAL_NAME, KEY_WITH_DIACRITICS);
}
Example 41
Project: Aero-master  File: NormalizeUtf8TransformTest.java View source code
@Test
public void testTransformDefaultNormalizationFormAndOverwriteInput() {
    Config config = ConfigFactory.parseString(makeConfigWithoutNormalizationFormAndOutput());
    Transform transform = TransformFactory.createTransform(config, "test_normalize_utf_8");
    FeatureVector featureVector = makeFeatureVector();
    Map<String, Set<String>> stringFeatures = featureVector.getStringFeatures();
    transform.doTransform(featureVector);
    assertNotNull(stringFeatures);
    assertEquals(1, stringFeatures.size());
    Set<String> output = stringFeatures.get("strFeature1");
    assertNotNull(output);
    assertEquals(1, output.size());
    assertTrue(output.contains(Normalizer.normalize("Funky string: ϓϔẛ", NormalizeUtf8Transform.DEFAULT_NORMALIZATION_FORM)));
}
Example 42
Project: aerosolve-master  File: NormalizeUtf8TransformTest.java View source code
@Test
public void testTransformDefaultNormalizationFormAndOverwriteInput() {
    Config config = ConfigFactory.parseString(makeConfigWithoutNormalizationFormAndOutput());
    Transform transform = TransformFactory.createTransform(config, "test_normalize_utf_8");
    FeatureVector featureVector = makeFeatureVector();
    Map<String, Set<String>> stringFeatures = featureVector.getStringFeatures();
    transform.doTransform(featureVector);
    assertNotNull(stringFeatures);
    assertEquals(1, stringFeatures.size());
    Set<String> output = stringFeatures.get("strFeature1");
    assertNotNull(output);
    assertEquals(1, output.size());
    assertTrue(output.contains(Normalizer.normalize("Funky string: ϓϔẛ", NormalizeUtf8Transform.DEFAULT_NORMALIZATION_FORM)));
}
Example 43
Project: AnalyseSI-master  File: UnicodeUtils.java View source code
/**
     * Strips accents from an input String, and decompose combined characters
     * into multiple basic ASCII characters.
     * 
     * The method is based on the Unicode KD normalization form. It iterates
     * over the resulting characters, and the strips everything that is not in
     * the Basic Latin Unicode block.
     * 
     * Based on http://www.codeproject.com/KB/cs/UnicodeNormalization.aspx
     * (found while Google-ing "stripping accents unicode string"), but with
     * legacy Java 1.6 classes. Also inspired by
     * http://www.nntp.perl.org/group/perl.i18n/2008/05/msg209.html
     * 
     * @param accentedString
     *            A string that contains accents.
     * @return The same string, without accents.
     * @see Normalizer.Form.NFKD, Character.UnicodeBlock.BASIC_LATIN
     */
public static String decomposeToBasicLatin(String accentedString) {
    StringBuilder unaccentedString = new StringBuilder();
    String normalizedString = Normalizer.normalize(accentedString, Normalizer.Form.NFKD);
    CharacterIterator iterator = new StringCharacterIterator(normalizedString);
    for (char c = iterator.first(); c != CharacterIterator.DONE; c = iterator.next()) if (decomposedChars.containsKey(c))
        unaccentedString.append(decomposedChars.get(c));
    else if (Character.UnicodeBlock.BASIC_LATIN.equals(Character.UnicodeBlock.of(c)))
        unaccentedString.append(c);
    return unaccentedString.toString();
}
Example 44
Project: android-libcore64-master  File: NormalizerTest.java View source code
public void testNormalize() {
    final String src = "ϓϔẛ";
    // Should already be canonical composed
    assertEquals(src, Normalizer.normalize(src, Normalizer.Form.NFC));
    // Composed to canonical decomposed
    assertEquals("ϓϔẛ", Normalizer.normalize(src, Normalizer.Form.NFD));
    // Composed to compatibility composed
    assertEquals("ΎΫṡ", Normalizer.normalize(src, Normalizer.Form.NFKC));
    // Composed to compatibility decomposed
    assertEquals("ΎΫṡ", Normalizer.normalize(src, Normalizer.Form.NFKD));
    // Decomposed to canonical composed
    assertEquals("é", Normalizer.normalize("é", Normalizer.Form.NFC));
    // Decomposed to compatibility composed
    assertEquals("ṩ", Normalizer.normalize("ẛ̣", Normalizer.Form.NFKC));
    try {
        Normalizer.normalize(null, Normalizer.Form.NFC);
        fail("Did not throw error on null argument");
    } catch (NullPointerException e) {
    }
}
Example 45
Project: android_platform_libcore-master  File: NormalizerTest.java View source code
public void testNormalize() {
    final String src = "ϓϔẛ";
    // Should already be canonical composed
    assertEquals(src, Normalizer.normalize(src, Normalizer.Form.NFC));
    // Composed to canonical decomposed
    assertEquals("ϓϔẛ", Normalizer.normalize(src, Normalizer.Form.NFD));
    // Composed to compatibility composed
    assertEquals("ΎΫṡ", Normalizer.normalize(src, Normalizer.Form.NFKC));
    // Composed to compatibility decomposed
    assertEquals("ΎΫṡ", Normalizer.normalize(src, Normalizer.Form.NFKD));
    // Decomposed to canonical composed
    assertEquals("é", Normalizer.normalize("é", Normalizer.Form.NFC));
    // Decomposed to compatibility composed
    assertEquals("ṩ", Normalizer.normalize("ẛ̣", Normalizer.Form.NFKC));
    try {
        Normalizer.normalize(null, Normalizer.Form.NFC);
        fail("Did not throw error on null argument");
    } catch (NullPointerException e) {
    }
}
Example 46
Project: bugvm-master  File: NativeNormalizer.java View source code
private static int toUNormalizationMode(Form form) {
    // See UNormalizationMode in "unicode/unorm.h". Stable API since ICU 2.0.
    switch(form) {
        case NFC:
            return 4;
        case NFD:
            return 2;
        case NFKC:
            return 5;
        case NFKD:
            return 3;
    }
    throw new AssertionError("unknown Normalizer.Form " + form);
}
Example 47
Project: fakecontacts-master  File: NativeNormalizer.java View source code
private static int toUNormalizationMode(Form form) {
    // See UNormalizationMode in "unicode/unorm.h". Stable API since ICU 2.0.
    switch(form) {
        case NFC:
            return 4;
        case NFD:
            return 2;
        case NFKC:
            return 5;
        case NFKD:
            return 3;
    }
    throw new AssertionError("unknown Normalizer.Form " + form);
}
Example 48
Project: gbif-api-master  File: UnicodeUtils.java View source code
/**
     * Replaces all diacretics with their ascii counterpart.
     */
public static String ascii(String x) {
    if (x == null) {
        return null;
    }
    // manually normalize characters not dealt with by the java Normalizer
    x = StringUtils.replaceChars(x, "øØð�", "oOdD");
    // use java unicode normalizer to remove accents and punctuation
    x = Normalizer.normalize(x, Normalizer.Form.NFD);
    x = x.replaceAll("\\p{M}", "");
    return x;
}
Example 49
Project: IMCKTG-master  File: Morse.java View source code
private static void morse(String s, List<String> sb) {
    if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.GINGERBREAD)
        s = Normalizer.normalize(s, Normalizer.Form.NFKD);
    s = s.toLowerCase(Locale.getDefault());
    s.replaceAll("\\s+", " ");
    for (char c : s.toCharArray()) if (IMCmap.containsKey(c))
        sb.add(IMCmap.get(c));
}
Example 50
Project: jease-master  File: Filenames.java View source code
public String convert(String filename) {
    if (filename == null) {
        return null;
    }
    for (String codePair : I18N.get("ASCII_CODES").split(",")) {
        if (codePair.contains(":")) {
            String[] codePairArray = codePair.split(":");
            filename = filename.replace(codePairArray[0], codePairArray[1]);
        }
    }
    return Normalizer.normalize(filename, Normalizer.Form.NFD).replaceAll("[\\p{InCombiningDiacriticalMarks}]+", "").replaceAll("[^a-zA-Z0-9_/.-]", "-");
}
Example 51
Project: orientdb-master  File: OSQLMethodNormalize.java View source code
@Override
public Object execute(Object iThis, OIdentifiable iCurrentRecord, OCommandContext iContext, Object ioResult, Object[] iParams) {
    if (ioResult != null) {
        final Normalizer.Form form = iParams != null && iParams.length > 0 ? Normalizer.Form.valueOf(OIOUtils.getStringContent(iParams[0].toString())) : Normalizer.Form.NFD;
        String normalized = Normalizer.normalize(ioResult.toString(), form);
        if (iParams != null && iParams.length > 1) {
            normalized = normalized.replaceAll(OIOUtils.getStringContent(iParams[0].toString()), "");
        } else {
            normalized = OPatternConst.PATTERN_DIACRITICAL_MARKS.matcher(normalized).replaceAll("");
        }
        ioResult = normalized;
    }
    return ioResult;
}
Example 52
Project: robovm-master  File: Main.java View source code
static void testNormalizer() {
    String composed = "BlÁah";
    String decomposed = "BlÁah";
    String res;
    res = Normalizer.normalize(composed, Normalizer.Form.NFD);
    if (!decomposed.equals(res)) {
        System.err.println("Bad decompose: '" + composed + "' --> '" + res + "'");
    }
    res = Normalizer.normalize(decomposed, Normalizer.Form.NFC);
    if (!composed.equals(res)) {
        System.err.println("Bad compose: '" + decomposed + "' --> '" + res + "'");
    }
    System.out.println("Normalizer passed");
}
Example 53
Project: XobotOS-master  File: NativeNormalizer.java View source code
private static int toUNormalizationMode(Form form) {
    // See UNormalizationMode in "unicode/unorm.h". Stable API since ICU 2.0.
    switch(form) {
        case NFC:
            return 4;
        case NFD:
            return 2;
        case NFKC:
            return 5;
        case NFKD:
            return 3;
    }
    throw new AssertionError("unknown Normalizer.Form " + form);
}
Example 54
Project: Ivory-master  File: LuceneArabicAnalyzer.java View source code
@Override
public String[] processContent(String text) {
    text = preNormalize(text);
    tokenizer = new StandardTokenizer(Version.LUCENE_35, new StringReader(text));
    TokenStream tokenStream = new LowerCaseFilter(Version.LUCENE_35, tokenizer);
    String tokenized = postNormalize(streamToString(tokenStream));
    tokenized = Normalizer.normalize(tokenized, Form.NFKC);
    StringBuilder finalTokenized = new StringBuilder();
    for (String token : tokenized.split(" ")) {
        if (isStopwordRemoval() && isDiscard(false, token)) {
            continue;
        }
        finalTokenized.append(token + " ");
    }
    String stemmedTokenized = finalTokenized.toString().trim();
    if (isStemming()) {
        // then, run the Lucene normalization and stemming on the stopword-removed text
        stemmedTokenized = stem(stemmedTokenized);
    }
    return stemmedTokenized.split(" ");
}
Example 55
Project: java-utils-text-master  File: SEOEncoder.java View source code
/**
     * Replaces all charactars unsuitable for URLs with logical alternatives using <code>java.text.Normalizer</code><br>
     * 
     * TODO take care of tapestries url-encoding & -> + -> $002b
     * 
     * @param text
     * @return
     */
public static String encodeUnicode(final String text) {
    final String normalized = Normalizer.normalize(text, Normalizer.Form.NFD);
    final String withoutDiacritics = normalized.replaceAll("\\p{InCombiningDiacriticalMarks}+", "");
    final String encoded = withoutDiacritics.replace('&', '+').replace('/', '-').replaceAll("[^\\p{Alnum}-\\+]+", "_");
    final String trimmed = (encoded.endsWith("_")) ? encoded.substring(0, encoded.length() - 1) : encoded;
    return trimmed;
}
Example 56
Project: talismane-master  File: TextPerLineCorpusReader.java View source code
@Override
public boolean hasNextText() {
    if (this.getMaxSentenceCount() > 0 && sentenceCount >= this.getMaxSentenceCount()) {
    // we've reached the end, do nothing
    } else {
        while (sentence == null) {
            if (scanner != null && !scanner.hasNextLine()) {
                scanner.close();
                scanner = null;
            }
            while (scanner == null) {
                if (localeIterator.hasNext()) {
                    currentLocale = localeIterator.next();
                    Reader reader = readerMap.get(currentLocale);
                    scanner = new Scanner(reader);
                    if (scanner.hasNextLine()) {
                        break;
                    }
                    scanner.close();
                    scanner = null;
                } else {
                    break;
                }
            }
            if (scanner == null)
                break;
            sentence = scanner.nextLine().trim();
            sentence = sentence.toLowerCase(Locale.ENGLISH);
            sentence = Normalizer.normalize(sentence, Form.NFD).replaceAll("\\p{InCombiningDiacriticalMarks}+", "");
            if (sentence.length() == 0) {
                sentence = null;
                continue;
            }
            boolean includeMe = true;
            // check cross-validation
            if (this.getCrossValidationSize() > 0) {
                if (this.getIncludeIndex() >= 0) {
                    if (sentenceCount % this.getCrossValidationSize() != this.getIncludeIndex()) {
                        includeMe = false;
                    }
                } else if (this.getExcludeIndex() >= 0) {
                    if (sentenceCount % this.getCrossValidationSize() == this.getExcludeIndex()) {
                        includeMe = false;
                    }
                }
            }
            if (this.getStartSentence() > sentenceCount) {
                includeMe = false;
            }
            sentenceCount++;
            if (!includeMe) {
                sentence = null;
                continue;
            }
        }
    }
    return sentence != null;
}
Example 57
Project: Duke-master  File: LowerCaseNormalizeCleaner.java View source code
public String clean(String value) {
    if (strip_accents)
        // after this, accents will be represented as separate combining
        // accent characters trailing the character they belong with. the
        // next step will strip them out.
        value = Normalizer.normalize(value, Normalizer.Form.NFD);
    char[] tmp = new char[value.length()];
    int pos = 0;
    boolean prevws = false;
    for (int ix = 0; ix < tmp.length; ix++) {
        char ch = value.charAt(ix);
        // should *not* be normalized
        if (ch == 0x030A && (value.charAt(ix - 1) == 'a' || value.charAt(ix - 1) == 'A')) {
            prevws = false;
            // this overwrites the previously written 'a' with 'aa'
            tmp[pos - 1] = 'å';
            continue;
        }
        // if character is combining diacritical mark, skip it.
        if ((ch >= 0x0300 && ch <= 0x036F) || (ch >= 0x1DC0 && ch <= 0x1DFF) || (ch >= 0x20D0 && ch <= 0x20FF) || (ch >= 0xFE20 && ch <= 0xFE2F))
            continue;
        // whitespace processing
        if (ch != ' ' && ch != '\t' && ch != '\n' && ch != '\r' && ch != 0xA0) /* NBSP */
        {
            if (prevws && pos != 0)
                tmp[pos++] = ' ';
            tmp[pos++] = Character.toLowerCase(ch);
            prevws = false;
        } else
            prevws = true;
    }
    return new String(tmp, 0, pos);
}
Example 58
Project: hsearch-master  File: NormalizeAccents.java View source code
public boolean visit(Object docObj) throws ApplicationFault, SystemFault {
    if (null == docObj)
        return false;
    Doc doc = (Doc) docObj;
    DocTeaser teaser = doc.teaser;
    if (null != teaser) {
        String titleText = teaser.getTitle();
        if (null != titleText)
            teaser.setTitle(Normalizer.normalize(titleText, Normalizer.Form.NFD));
        String cacheText = teaser.getCachedText();
        if (null != cacheText)
            teaser.setCacheText(Normalizer.normalize(cacheText, Normalizer.Form.NFD));
    }
    return true;
}
Example 59
Project: hsearch-obsolete-master  File: NormalizeAccents.java View source code
public void visit(Object docObj, boolean multiWriter) throws ApplicationFault, SystemFault {
    if (null == docObj)
        throw new ApplicationFault("No document");
    Doc doc = (Doc) docObj;
    DocTeaser teaser = doc.teaser;
    if (null != teaser) {
        String titleText = teaser.getTitle();
        if (null != titleText)
            teaser.setTitle(Normalizer.normalize(titleText, Normalizer.Form.NFD));
        String cacheText = teaser.getCachedText();
        if (null != cacheText)
            teaser.setCacheText(Normalizer.normalize(cacheText, Normalizer.Form.NFD));
    }
}
Example 60
Project: incubator-hivemall-master  File: NormalizeUnicodeUDF.java View source code
@Nullable
public String evaluate(@Nullable String str, @Nullable String form) {
    if (str == null) {
        return null;
    }
    if (form == null) {
        return Normalizer.normalize(str, Normalizer.Form.NFC);
    } else if ("NFC".equals(form)) {
        return Normalizer.normalize(str, Normalizer.Form.NFC);
    } else if ("NFD".equals(form)) {
        return Normalizer.normalize(str, Normalizer.Form.NFD);
    } else if ("NFKC".equals(form)) {
        return Normalizer.normalize(str, Normalizer.Form.NFKC);
    } else if ("NFKD".equals(form)) {
        return Normalizer.normalize(str, Normalizer.Form.NFKD);
    } else {
        return Normalizer.normalize(str, Normalizer.Form.NFC);
    }
}
Example 61
Project: lodmill-master  File: PicaXmlHandler.java View source code
@Override
public void endElement(final String uri, final String localName, final String qName) throws SAXException {
    if (SUBFIELD.equals(localName)) {
        getReceiver().literal(currentTag, Normalizer.normalize(builder.toString().trim(), Normalizer.Form.NFC));
    } else if (DATAFIELD.equals(localName)) {
        getReceiver().endEntity();
    } else if (RECORD.equals(localName) && NAMESPACE.equals(uri)) {
        getReceiver().endRecord();
    }
}
Example 62
Project: MusicDNA-master  File: Genius.java View source code
public static ArrayList<Lyrics> search(String query) {
    ArrayList<Lyrics> results = new ArrayList<>();
    query = Normalizer.normalize(query, Normalizer.Form.NFD).replaceAll("\\p{InCombiningDiacriticalMarks}+", "");
    JsonObject response = null;
    try {
        URL queryURL = new URL(String.format("http://api.genius.com/search?q=%s", URLEncoder.encode(query, "UTF-8")));
        Connection connection = Jsoup.connect(queryURL.toExternalForm()).header("Authorization", "Bearer " + Config.GENIUS).timeout(0).ignoreContentType(true);
        Document document = connection.userAgent(Net.USER_AGENT).get();
        response = new JsonParser().parse(document.text()).getAsJsonObject();
    } catch (Exception e) {
        e.printStackTrace();
    }
    if (response == null || response.getAsJsonObject("meta").get("status").getAsInt() != 200)
        return results;
    JsonArray hits = response.getAsJsonObject("response").getAsJsonArray("hits");
    int processed = 0;
    while (processed < hits.size()) {
        JsonObject song = hits.get(processed).getAsJsonObject().getAsJsonObject("result");
        String artist = song.getAsJsonObject("primary_artist").get("name").getAsString();
        String title = song.get("title").getAsString();
        String url = "http://genius.com/songs/" + song.get("id").getAsString();
        Lyrics l = new Lyrics(Lyrics.SEARCH_ITEM);
        l.setArtist(artist);
        l.setTitle(title);
        l.setURL(url);
        l.setSource("Genius");
        results.add(l);
        processed++;
    }
    return results;
}
Example 63
Project: ramais-pti-android-master  File: UsefulSearchFragment.java View source code
@SuppressLint("NewApi")
public String normalizar(String s) {
    String str;
    if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.GINGERBREAD) {
        /* Use Normalizer normally */
        str = Normalizer.normalize(s, Normalizer.Form.NFD);
        str = str.replaceAll("[^\\p{ASCII}]", "");
        str = str.replaceAll(" ", "%20");
        return str.toLowerCase();
    } else {
        str = s;
        str = str.replaceAll("[çÇ]+", "c");
        str = str.replaceAll("[ãÃá�]+", "a");
        str = str.replaceAll("[éÉ]+", "e");
        str = str.replaceAll("[í�]+", "i");
        str = str.replaceAll("[õÕóÓ]+", "o");
        str = str.replaceAll("[úÚ]+", "u");
        str = str.replaceAll("[^\\p{ASCII}]", "");
        str = str.replaceAll(" ", "%20");
        Log.d("Teste", str);
        return str.toLowerCase();
    }
}
Example 64
Project: universal-java-matrix-package-master  File: HtmlUtil.java View source code
public static final String toSlug(String s) {
    s = Normalizer.normalize(s, Normalizer.Form.NFD);
    s = s.replaceAll("\\.", "-");
    s = s.replaceAll(":", "-");
    s = s.replaceAll("\\s+", "-");
    s = s.replaceAll("[^\\p{ASCII}]", "");
    s = s.replaceAll("[^a-zA-Z0-9- ]", "");
    s = s.toLowerCase();
    s = s.replaceAll("--", "-");
    s = s.replaceAll("--", "-");
    s = s.replaceAll("--", "-");
    return s;
}
Example 65
Project: androidbible-master  File: ReverseIndexer.java View source code
public static void createReverseIndex(File outDir, String prefix, TextDb teksDb) {
    Pattern p_word = Pattern.compile("[A-Za-z]+(?:[-'][A-Za-z]+)*");
    Map<String, Set<Integer>> map = new TreeMap<>(new Comparator<String>() {

        @Override
        public int compare(String o1, String o2) {
            int lenc = o1.length() - o2.length();
            if (lenc == 0) {
                return o1.compareTo(o2);
            } else {
                return lenc;
            }
        }
    });
    {
        int lid = 0;
        for (Rec rec : teksDb.toRecList()) {
            lid++;
            String text = Normalizer.normalize(rec.text, Normalizer.Form.NFD).replaceAll("\\p{InCombiningDiacriticalMarks}+", "");
            text = text.toLowerCase();
            Matcher m = p_word.matcher(text);
            while (m.find()) {
                String word = m.group();
                Set<Integer> locations = map.get(word);
                if (locations == null) {
                    locations = new TreeSet<>();
                    map.put(word, locations);
                }
                locations.add(lid);
            }
        }
        System.out.println("Last lid = " + lid);
    }
    int maxwordlen = 0;
    for (Map.Entry<String, Set<Integer>> e : map.entrySet()) {
        String word = e.getKey();
        System.out.println("word " + word + " lids=" + e.getValue());
        if (word.length() > maxwordlen)
            maxwordlen = word.length();
    }
    System.out.println("Number of words: " + map.size());
    System.out.println("Longest word: " + maxwordlen);
    int stat_lid_absolute = 0;
    int stat_lid_delta = 0;
    try {
        BintexWriter bw = new BintexWriter(new FileOutputStream(new File(outDir, String.format("%s_revindex_bt.bt", prefix))));
        // :: int word_count
        bw.writeInt(map.size());
        // split based on word length
        for (int i = 1; i <= maxwordlen; i++) {
            Map<String, Set<Integer>> lenmap = new TreeMap<>();
            for (Map.Entry<String, Set<Integer>> e : map.entrySet()) {
                String word = e.getKey();
                if (i == word.length()) {
                    lenmap.put(word, e.getValue());
                }
            }
            int cnt = lenmap.size();
            System.out.println("Words with length " + i + ": " + cnt);
            if (cnt != 0) {
                // :: uint8 word_len
                // :: int word_by_len_count
                bw.writeUint8(i);
                bw.writeInt(cnt);
                for (Map.Entry<String, Set<Integer>> e : lenmap.entrySet()) {
                    String word = e.getKey();
                    Set<Integer> lids = e.getValue();
                    // :: byte[word_len] word
                    // :: uint16 lid_count
                    bw.writeRaw(word.getBytes(ascii));
                    bw.writeUint16(lids.size());
                    int last_lid = 0;
                    for (int lid : lids) {
                        int delta = lid - last_lid;
                        if (delta <= 0x7f) {
                            bw.writeUint8(delta);
                            stat_lid_delta++;
                        } else {
                            bw.writeChar((char) (0x8000 | lid));
                            stat_lid_absolute++;
                        }
                        last_lid = lid;
                    }
                }
            }
        }
        bw.close();
        System.out.println("Lid written using delta = " + stat_lid_delta);
        System.out.println("Lid written using absolute = " + stat_lid_absolute);
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
}
Example 66
Project: cloudhopper-commons-charset-master  File: MobileTextUtil.java View source code
/**
     * Replace accented characters with their ascii equivalents.  For example,
     * convert é to e.<br><br>
     * NOTE: This method is not very efficient.  The String will be copied
     * twice during conversion, so you'll likely only want to run this against
     * small strings.
     *
     * @param buffer The buffer containing the characters to analyze and replace
     *      if necessary.
     * @return The number of characters replaced
     */
public static int replaceAccentedChars(StringBuilder buffer) {
    // save the size before we strip out the accents
    int sizeBefore = buffer.length();
    // each accented char will be converted into 2 chars -- the ascii version
    // followed by the accent character
    String s = Normalizer.normalize(buffer, Normalizer.Form.NFD);
    // new size will include accented chars
    int sizeAfter = s.length();
    // efficiency check #1 - if the length hasn't changed, do nothing
    int replaced = sizeAfter - sizeBefore;
    if (replaced <= 0) {
        return 0;
    }
    // replace the accents with nothing
    s = s.replaceAll("\\p{InCombiningDiacriticalMarks}+", "");
    buffer.setLength(0);
    buffer.append(s);
    return replaced;
}
Example 67
Project: commcare-master  File: StringUtils.java View source code
/**
     * @param input A non-null string
     * @return a canonical version of the passed in string that is lower cased and has removed diacritical marks
     * like accents.
     */
@SuppressLint("NewApi")
public static synchronized String normalize(String input) {
    if (normalizationCache == null) {
        normalizationCache = new LruCache<>(cacheSize);
        diacritics = Pattern.compile("\\p{InCombiningDiacriticalMarks}+");
    }
    String cachedString = normalizationCache.get(input);
    if (cachedString != null) {
        return cachedString;
    }
    //Initialized the normalized string (If we can, we'll use the Normalizer API on it)
    String normalized = input;
    //issues, but we can at least still eliminate diacritics.
    if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.GINGERBREAD) {
        normalized = Normalizer.normalize(input, Normalizer.Form.NFD);
    } else {
    //TODO: I doubt it's worth it, but in theory we could run
    //some other normalization for the minority of pre-API9
    //devices.
    }
    String output = diacritics.matcher(normalized).replaceAll("").toLowerCase();
    normalizationCache.put(input, output);
    return output;
}
Example 68
Project: idea-php-typo3-plugin-master  File: Slugify.java View source code
private String normalize(final String input) {
    String text = Normalizer.normalize(input, Normalizer.Form.NFKD);
    text = PATTERN_NORMALIZE_NON_ASCII.matcher(text).replaceAll(EMPTY);
    text = PATTERN_NORMALIZE_SEPARATOR.matcher(text).replaceAll(underscoreSeparator ? "_" : "-");
    text = PATTERN_NORMALIZE_TRIM_DASH.matcher(text).replaceAll(EMPTY);
    return text;
}
Example 69
Project: ios-driver-master  File: LanguageDictionary.java View source code
public boolean match(String content, String originalText) {
    String normalizedContent = Normalizer.normalize(content, norme);
    String normalizedOriginalText = Normalizer.normalize(originalText, norme);
    String pattern = getRegexPattern(normalizedOriginalText);
    try {
        boolean regex = normalizedContent.matches(pattern);
        return regex;
    } catch (PatternSyntaxException e) {
    }
    return false;
}
Example 70
Project: j2objc-master  File: NormalizerTest.java View source code
public void testNormalize() {
    final String src = "ϓϔẛ";
    // Should already be canonical composed
    assertEquals(src, Normalizer.normalize(src, Normalizer.Form.NFC));
    // Composed to canonical decomposed
    assertEquals("ϓϔẛ", Normalizer.normalize(src, Normalizer.Form.NFD));
    // Composed to compatibility composed
    assertEquals("ΎΫṡ", Normalizer.normalize(src, Normalizer.Form.NFKC));
    // Composed to compatibility decomposed
    assertEquals("ΎΫṡ", Normalizer.normalize(src, Normalizer.Form.NFKD));
    // Decomposed to canonical composed
    assertEquals("é", Normalizer.normalize("é", Normalizer.Form.NFC));
    // Decomposed to compatibility composed
    assertEquals("ṩ", Normalizer.normalize("ẛ̣", Normalizer.Form.NFKC));
    try {
        Normalizer.normalize(null, Normalizer.Form.NFC);
        fail("Did not throw error on null argument");
    } catch (NullPointerException e) {
    }
}
Example 71
Project: KISS-master  File: StringNormalizer.java View source code
/**
     * Make the given string easier to compare by performing a number of simplifications on it
     * <p/>
     * 1. Decompose combination characters into their respective parts (see below)
     * 2. Strip all combining character marks (see below)
     * 3. Strip some other common-but-not-very-useful characters (such as dashes)
     * 4. Lower-case the string
     * <p/>
     * Combination characters are characters that (essentially) have the same meaning as one or
     * more other, more common, characters. Examples for these include:
     * Roman numerals (`Ⅱ` → `II`) and half-width katakana (`�` → `ミ`)
     * <p/>
     * Combining character marks are diacritics and other extra strokes that are often found as
     * part of many characters in non-English roman scripts. Examples for these include:
     * Diaereses (`ë` → `e`), acutes (`á` → `a`) and macrons (`Å?` → `o`)
     *
     * @param input string input, with accents and anything else you can think of
     * @return normalized string and list that maps each result string position to its source
     * string position
     */
public static Pair<String, int[]> normalizeWithMap(String input) {
    StringBuilder resultString = new StringBuilder();
    IntSequenceBuilder resultMap = new IntSequenceBuilder(input.length() * 3 / 2);
    StringBuilder charBuffer = new StringBuilder(2);
    int inputOffset = 0, inputLength = input.length();
    while (inputOffset < inputLength) {
        int inputChar = input.codePointAt(inputOffset);
        // Decompose codepoint at given position
        charBuffer.append(Character.toChars(inputChar));
        String decomposedCharString = Normalizer.normalize(charBuffer, Normalizer.Form.NFKD);
        charBuffer.delete(0, charBuffer.length());
        // `inputChar` codepoint may be decomposed to four (or maybe even more) new code points
        int decomposedCharOffset = 0;
        while (decomposedCharOffset < decomposedCharString.length()) {
            int resultChar = decomposedCharString.codePointAt(decomposedCharOffset);
            // See the method's description for more information
            switch(Character.getType(resultChar)) {
                case Character.NON_SPACING_MARK:
                case Character.COMBINING_SPACING_MARK:
                    // Some combining character found
                    break;
                case Character.DASH_PUNCTUATION:
                    // Some other unwanted character found
                    break;
                default:
                    resultString.appendCodePoint(Character.toLowerCase(resultChar));
                    resultMap.add(inputOffset);
            }
            decomposedCharOffset += Character.charCount(resultChar);
        }
        inputOffset += Character.charCount(inputChar);
    }
    // before returning it.
    return new Pair<>(resultString.toString(), resultMap.toArray());
}
Example 72
Project: lyrics-master  File: Genius.java View source code
public static ArrayList<Lyrics> search(String query) {
    ArrayList<Lyrics> results = new ArrayList<>();
    query = Normalizer.normalize(query, Normalizer.Form.NFD).replaceAll("\\p{InCombiningDiacriticalMarks}+", "");
    JsonObject response = null;
    try {
        URL queryURL = new URL(String.format("http://api.genius.com/search?q=%s", URLEncoder.encode(query, "UTF-8")));
        Connection connection = Jsoup.connect(queryURL.toExternalForm()).header("Authorization", "Bearer " + Keys.GENIUS).timeout(0).ignoreContentType(true);
        Document document = connection.userAgent(Net.USER_AGENT).get();
        response = new JsonParser().parse(document.text()).getAsJsonObject();
    } catch (JsonSyntaxException e) {
        e.printStackTrace();
    } catch (IOException e) {
        e.printStackTrace();
    }
    if (response == null || response.getAsJsonObject("meta").get("status").getAsInt() != 200)
        return results;
    JsonArray hits = response.getAsJsonObject("response").getAsJsonArray("hits");
    int processed = 0;
    while (processed < hits.size()) {
        JsonObject song = hits.get(processed).getAsJsonObject().getAsJsonObject("result");
        String artist = song.getAsJsonObject("primary_artist").get("name").getAsString();
        String title = song.get("title").getAsString();
        String url = "http://genius.com/songs/" + song.get("id").getAsString();
        Lyrics l = new Lyrics(Lyrics.SEARCH_ITEM);
        l.setArtist(artist);
        l.setTitle(title);
        l.setURL(url);
        l.setSource("Genius");
        results.add(l);
        processed++;
    }
    return results;
}
Example 73
Project: musicmount-master  File: SimpleAssetLocatorTest.java View source code
@Test
public void testGetAssetPath() throws IOException, URISyntaxException {
    ResourceProvider resourceProvider = new FileResourceProvider();
    Resource baseFolder = resourceProvider.newResource(System.getProperty("user.home"));
    SimpleAssetLocator locator = new SimpleAssetLocator(baseFolder, "music", null);
    Assert.assertEquals("music/sample-aac.m4a", locator.getAssetPath(baseFolder.resolve("sample-aac.m4a")));
    Assert.assertEquals("music/sample%20aac.m4a", locator.getAssetPath(baseFolder.resolve("sample aac.m4a")));
    Assert.assertEquals("music/Bj%C3%B6rk/Vespertine/07%20Aurora.m4a", locator.getAssetPath(baseFolder.resolve("Björk/Vespertine/07 Aurora.m4a")));
    // behavior changed from jdk7u25 to jdk7u45, seems to normalize to NFC by default!!!
    //		Assert.assertEquals("music/Bjo%CC%88rk/Vespertine/07%20Aurora.m4a", locator.getAssetPath(baseFolder.resolve("Bjo\u0308rk/Vespertine/07 Aurora.m4a"))); // combining diaeresis
    // perform character composition
    locator = new SimpleAssetLocator(baseFolder, "music", Normalizer.Form.NFC);
    Assert.assertEquals("music/Bj%C3%B6rk/Vespertine/07%20Aurora.m4a", locator.getAssetPath(baseFolder.resolve("Björk/Vespertine/07 Aurora.m4a")));
    // perform character decomposition
    locator = new SimpleAssetLocator(baseFolder, "music", Normalizer.Form.NFD);
    Assert.assertEquals("music/Bjo%CC%88rk/Vespertine/07%20Aurora.m4a", locator.getAssetPath(baseFolder.resolve("Björk/Vespertine/07 Aurora.m4a")));
    locator = new SimpleAssetLocator(baseFolder, "///music///", null);
    Assert.assertEquals("/music/sample-aac.m4a", locator.getAssetPath(baseFolder.resolve("sample-aac.m4a")));
    locator = new SimpleAssetLocator(baseFolder, "", null);
    Assert.assertEquals("sample-aac.m4a", locator.getAssetPath(baseFolder.resolve("sample-aac.m4a")));
    locator = new SimpleAssetLocator(baseFolder, "music/sample-album", null);
    Assert.assertEquals("music/sample-album/sample-aac.m4a", locator.getAssetPath(baseFolder.resolve("sample-aac.m4a")));
    locator = new SimpleAssetLocator(baseFolder, "my music", null);
    Assert.assertEquals("my%20music/sample-aac.m4a", locator.getAssetPath(baseFolder.resolve("sample-aac.m4a")));
}
Example 74
Project: QuickLyric-master  File: Genius.java View source code
public static ArrayList<Lyrics> search(String query) {
    ArrayList<Lyrics> results = new ArrayList<>();
    query = Normalizer.normalize(query, Normalizer.Form.NFD).replaceAll("\\p{InCombiningDiacriticalMarks}+", "");
    JsonObject response = null;
    try {
        URL queryURL = new URL(String.format("http://api.genius.com/search?q=%s", URLEncoder.encode(query, "UTF-8")));
        Connection connection = Jsoup.connect(queryURL.toExternalForm()).header("Authorization", "Bearer " + Keys.GENIUS).timeout(0).ignoreContentType(true);
        Document document = connection.userAgent(Net.USER_AGENT).get();
        response = new JsonParser().parse(document.text()).getAsJsonObject();
    } catch (JsonSyntaxException e) {
        e.printStackTrace();
    } catch (IOException e) {
        e.printStackTrace();
    }
    if (response == null || response.getAsJsonObject("meta").get("status").getAsInt() != 200)
        return results;
    JsonArray hits = response.getAsJsonObject("response").getAsJsonArray("hits");
    int processed = 0;
    while (processed < hits.size()) {
        JsonObject song = hits.get(processed).getAsJsonObject().getAsJsonObject("result");
        String artist = song.getAsJsonObject("primary_artist").get("name").getAsString();
        String title = song.get("title").getAsString();
        String url = "http://genius.com/songs/" + song.get("id").getAsString();
        Lyrics l = new Lyrics(Lyrics.SEARCH_ITEM);
        l.setArtist(artist);
        l.setTitle(title);
        l.setURL(url);
        l.setSource("Genius");
        results.add(l);
        processed++;
    }
    return results;
}
Example 75
Project: Raildelays-master  File: StationBasedExcelRowComparator.java View source code
protected Function<T, String> getStationName(Function<T, Station> keyExtractor) {
    return  excelRow -> {
        Station station = keyExtractor.apply(excelRow);
        String result = null;
        if (station != null) {
            String stationName = station.getName(language);
            if (!"".equals(stationName)) {
                result = Normalizer.normalize(stationName, Normalizer.Form.NFD).replaceAll("[^\\p{ASCII}]", "").toUpperCase(Locale.ENGLISH);
            }
        }
        return result;
    };
}
Example 76
Project: roboconf-platform-master  File: AbstractApplication.java View source code
/**
	 * @param name the name to set
	 */
public final void setName(String name) {
    // "name" cannot, we replace them by their equivalent without accent.
    if (name == null) {
        this.name = null;
        this.displayName = null;
    } else if (Utils.isEmptyOrWhitespaces(name)) {
        this.displayName = name.trim();
        this.name = name.trim();
    } else {
        this.displayName = name.trim();
        String temp = Normalizer.normalize(name, Normalizer.Form.NFD);
        Pattern pattern = Pattern.compile("\\p{InCombiningDiacriticalMarks}+");
        this.name = pattern.matcher(temp).replaceAll("").trim();
    }
}
Example 77
Project: sigio.jar-master  File: JSONStringAdapter.java View source code
/**
   * Converts content of a java.lang.String to a format suitable for
   * JSON.
   */
static String toJSONString(String str) {
    StringBuilder sb = new StringBuilder();
    for (int i = 0; i < str.length(); i++) {
        int c = str.codePointAt(i);
        switch(c) {
            case 0x0008:
                sb.append(JSONStringAdapter.backspace_seq);
                break;
            case 0x0009:
                sb.append(JSONStringAdapter.tab_seq);
                break;
            case 0x000A:
                sb.append(JSONStringAdapter.nl_seq);
                break;
            case 0x000C:
                sb.append(JSONStringAdapter.ff_seq);
                break;
            case 0x000D:
                sb.append(JSONStringAdapter.cr_seq);
                break;
            case 0x002F:
                sb.append(JSONStringAdapter.solidus_seq);
                break;
            case 0x005C:
                sb.append(JSON.ESCAPE_CHAR);
                break;
            case JSON.QUOTE_CHAR:
                sb.append(JSONStringAdapter.quote_seq);
                break;
            default:
                if (c >= 0x0020)
                    sb.append((char) c);
                break;
        }
    }
    // Surround the string with quotes:
    sb.insert(0, JSON.QUOTE_CHAR);
    sb.append(JSON.QUOTE_CHAR);
    // Canonical Decomposition of Unicode (NFD).
    String string = Normalizer.normalize(sb.toString(), Normalizer.Form.NFD);
    return string;
}
Example 78
Project: songbook-master  File: SongDatabase.java View source code
private static String encodeId(String id) {
    try {
        id = id.replace("'", " ").replace("\"", " ").trim();
        id = Normalizer.normalize(id, Normalizer.Form.NFD);
        id = id.replaceAll("\\p{M}", "").toLowerCase();
        return URLEncoder.encode(id, "UTF-8");
    } catch (UnsupportedEncodingException e) {
        return id;
    }
}
Example 79
Project: uberfire-master  File: TextUtil.java View source code
public static String normalizeRepositoryName(String input) {
    // Remove leading and/or trailing '.' and '-'
    if (input.startsWith(".") || input.startsWith("-")) {
        input = normalizeRepositoryName(input.substring(1));
    }
    if (input.endsWith(".") || input.endsWith("-")) {
        input = normalizeRepositoryName(input.substring(0, input.length() - 1));
    }
    // Repository operations are not too frequent so instantiate corresponding matchers on demand
    return repoP3.matcher(repoP2.matcher(repoP1.matcher(Normalizer.normalize(input, Normalizer.Form.NFD)).replaceAll(EMPTY_STRING)).replaceAll(EMPTY_STRING)).replaceAll(EMPTY_STRING);
}
Example 80
Project: wildfly-elytron-master  File: ExternalSaslServer.java View source code
public byte[] evaluateResponse(final byte[] response) throws SaslException {
    if (complete) {
        throw log.mechMessageAfterComplete(getMechanismName()).toSaslException();
    }
    complete = true;
    String authorizationId;
    if (response.length == 0) {
        authorizationId = null;
    } else {
        authorizationId = Normalizer.normalize(new String(response, StandardCharsets.UTF_8), Normalizer.Form.NFKC);
        if (authorizationId.indexOf(0) != -1) {
            throw log.mechUserNameContainsInvalidCharacter(getMechanismName()).toSaslException();
        }
    }
    final AuthorizeCallback authorizeCallback = new AuthorizeCallback(null, authorizationId);
    try {
        cbh.handle(Arrays2.of(authorizeCallback));
    } catch (SaslException e) {
        throw e;
    } catch (IOException e) {
        throw log.mechAuthorizationFailed(getMechanismName(), e).toSaslException();
    } catch (UnsupportedCallbackException e) {
        throw log.mechAuthorizationFailed(getMechanismName(), e).toSaslException();
    }
    if (!authorizeCallback.isAuthorized()) {
        throw log.mechAuthorizationFailed(getMechanismName(), null, authorizationId).toSaslException();
    }
    this.authorizationID = authorizeCallback.getAuthorizedID();
    return null;
}
Example 81
Project: wildfly-security-master  File: ExternalSaslServer.java View source code
public byte[] evaluateResponse(final byte[] response) throws SaslException {
    if (complete) {
        throw log.mechMessageAfterComplete(getMechanismName()).toSaslException();
    }
    complete = true;
    String authorizationId;
    if (response.length == 0) {
        authorizationId = null;
    } else {
        authorizationId = Normalizer.normalize(new String(response, StandardCharsets.UTF_8), Normalizer.Form.NFKC);
        if (authorizationId.indexOf(0) != -1) {
            throw log.mechUserNameContainsInvalidCharacter(getMechanismName()).toSaslException();
        }
    }
    final AuthorizeCallback authorizeCallback = new AuthorizeCallback(null, authorizationId);
    try {
        cbh.handle(Arrays2.of(authorizeCallback));
    } catch (SaslException e) {
        throw e;
    } catch (IOException e) {
        throw log.mechAuthorizationFailed(getMechanismName(), e).toSaslException();
    } catch (UnsupportedCallbackException e) {
        throw log.mechAuthorizationFailed(getMechanismName(), e).toSaslException();
    }
    if (!authorizeCallback.isAuthorized()) {
        throw log.mechAuthorizationFailed(getMechanismName(), null, authorizationId).toSaslException();
    }
    this.authorizationID = authorizeCallback.getAuthorizedID();
    return null;
}
Example 82
Project: openjdk-master  File: CDataTransferer.java View source code
@Override
public Object translateBytes(byte[] bytes, DataFlavor flavor, long format, Transferable transferable) throws IOException {
    if (format == CF_URL && URL.class.equals(flavor.getRepresentationClass())) {
        String charset = Charset.defaultCharset().name();
        if (transferable != null && transferable.isDataFlavorSupported(javaTextEncodingFlavor)) {
            try {
                charset = new String((byte[]) transferable.getTransferData(javaTextEncodingFlavor), StandardCharsets.UTF_8);
            } catch (UnsupportedFlavorException cannotHappen) {
            }
        }
        String xml = new String(bytes, charset);
        // let's extract it.
        return new URL(extractURL(xml));
    }
    if (isUriListFlavor(flavor) && format == CF_FILE) {
        // dragQueryFile works fine with files and url,
        // it parses and extracts values from property list.
        // maxosx always returns property list for
        // CF_URL and CF_FILE
        String[] strings = dragQueryFile(bytes);
        if (strings == null) {
            return null;
        }
        bytes = String.join(System.getProperty("line.separator"), strings).getBytes();
        // now we extracted uri from xml, now we should treat it as
        // regular string that allows to translate data to target represantation
        // class by base method
        format = CF_STRING;
    } else if (format == CF_STRING) {
        bytes = Normalizer.normalize(new String(bytes, "UTF8"), Form.NFC).getBytes("UTF8");
    }
    return super.translateBytes(bytes, flavor, format, transferable);
}
Example 83
Project: behave-master  File: ScreenShootingMaker.java View source code
public void afterScenarioFailure(UUIDExceptionWrapper uuidWrappedFailure) throws Exception {
    // Não captura tela dos passos pendentes
    if (uuidWrappedFailure instanceof PendingStepFound) {
        return;
    }
    // Ignora o erro quando não existe o runner
    Runner runner = null;
    try {
        runner = (Runner) InjectionManager.getInstance().getInstanceDependecy(Runner.class);
    } catch (BehaveException e) {
    }
    if (runner == null) {
        return;
    }
    String scenario = BehaveContext.getInstance().getCurrentScenario();
    String screenshotPathWithScenario = "";
    // Convert string to path
    if (scenario != null) {
        String ret = Normalizer.normalize(scenario, Normalizer.Form.NFD).replace(" ", "").replaceAll("\\p{InCombiningDiacriticalMarks}+", "");
        scenario = ret.replaceAll("[-]", "").replaceAll("[:]", "").replaceAll("[.]", "").replaceAll("[#]", "");
        screenshotPathWithScenario = screenshotPathWithScenario(scenario, uuidWrappedFailure.getUUID());
    }
    String screenshotPath = screenshotPath(uuidWrappedFailure.getUUID());
    String currentUrl = "";
    try {
        currentUrl = runner.getCurrentUrl();
    } catch (Exception e) {
    }
    try {
        runner.saveScreenshotTo(screenshotPath);
        if (!screenshotPathWithScenario.equals("")) {
            runner.saveScreenshotTo(screenshotPathWithScenario);
        }
    } catch (Exception ex) {
        logger.error(message.getString("exception-screen-save", currentUrl, screenshotPath, ex.getMessage()));
        logger.error(ex);
        return;
    }
    logger.info(message.getString("message-screen-save", currentUrl, screenshotPath, new File(screenshotPath).length()));
}
Example 84
Project: Biblivre-3-master  File: Z3950Client.java View source code
public List<Record> doSearch(final Z3950ServerDTO server, final Z3950SearchDTO search) {
    List<Record> listRecords = new ArrayList<Record>();
    factory.setHost(server.getUrl());
    factory.setPort(server.getPort());
    factory.setCharsetEncoding("UTF-8");
    factory.setApplicationContext(z3950Context);
    factory.setDefaultRecordSyntax("usmarc");
    factory.setDefaultElementSetName("F");
    factory.setDoCharsetNeg(true);
    factory.getRecordArchetypes().put("Default", "usmarc::F");
    factory.getRecordArchetypes().put("FullDisplay", "usmarc::F");
    factory.getRecordArchetypes().put("BriefDisplay", "usmarc::B");
    final String qry = QUERY_PREFIX + search.getType() + " \"" + TextUtils.removeDiacriticals(search.getValue()) + "\"";
    IRQuery query = new IRQuery();
    query.collections = new Vector();
    query.collections.add(server.getCollection());
    query.query = new org.jzkit.search.util.QueryModel.PrefixString.PrefixString(qry);
    try {
        Searchable s = factory.newSearchable();
        s.setApplicationContext(z3950Context);
        IRResultSet result = s.evaluate(query);
        // Wait without timeout until result set is complete or failure
        result.waitForStatus(IRResultSetStatus.COMPLETE | IRResultSetStatus.FAILURE, 0);
        if (result.getStatus() == IRResultSetStatus.FAILURE) {
            log.error("IRResultSetStatus == FAILURE");
        }
        if (result.getFragmentCount() == 0) {
            return listRecords;
        }
        String encoding = server.getCharset();
        AnselToUnicode atu = new AnselToUnicode();
        Enumeration e = new ReadAheadEnumeration(result, new ArchetypeRecordFormatSpecification("Default"));
        int errorRecords = 0;
        Record record = null;
        for (int i = 0; e.hasMoreElements(); i++) {
            iso2709 o = (iso2709) e.nextElement();
            try {
                String iso = "";
                if (encoding.equals("MARC-8")) {
                    iso = Normalizer.normalize(atu.convert(new String((byte[]) o.getOriginalObject(), "ISO-8859-1")), Normalizer.Form.NFC);
                } else {
                    iso = new String((byte[]) o.getOriginalObject(), encoding);
                }
                try {
                    record = MarcUtils.iso2709ToRecordAsIso(iso, false);
                } catch (Exception encodeE) {
                }
                if (record == null) {
                    try {
                        record = MarcUtils.iso2709ToRecord(iso, false);
                    } catch (Exception encodeE) {
                    }
                }
                if (record == null) {
                    try {
                        record = MarcUtils.iso2709ToRecordAsIso(new String((byte[]) o.getOriginalObject(), "ISO-8859-1"), false);
                    } catch (Exception encodeE) {
                    }
                }
                if (record == null) {
                    try {
                        record = MarcUtils.iso2709ToRecord(new String((byte[]) o.getOriginalObject(), "ISO-8859-1"), false);
                    } catch (Exception encodeE) {
                    }
                }
            } catch (Exception ex) {
            }
            if (record != null) {
                listRecords.add(record);
            } else {
                ++errorRecords;
            }
        }
        if (errorRecords > 0) {
            log.warn("Total number of records that failed the conversion: " + errorRecords);
        }
        try {
            result.close();
            s.close();
        } catch (Exception closingException) {
            log.error(closingException.getMessage());
        }
    } catch (Exception e) {
        log.error(e.getMessage(), e);
    }
    log.info("returning results");
    return listRecords;
}
Example 85
Project: cloudhopper-commons-master  File: MobileTextUtil.java View source code
/**
     * Replace accented characters with their ascii equivalents.  For example,
     * convert é to e.<br><br>
     * NOTE: This method is not very efficient.  The String will be copied
     * twice during conversion, so you'll likely only want to run this against
     * small strings.
     *
     * @param buffer The buffer containing the characters to analyze and replace
     *      if necessary.
     * @return The number of characters replaced
     */
public static int replaceAccentedChars(StringBuilder buffer) {
    // save the size before we strip out the accents
    int sizeBefore = buffer.length();
    // each accented char will be converted into 2 chars -- the ascii version
    // followed by the accent character
    String s = Normalizer.normalize(buffer, Normalizer.Form.NFD);
    // new size will include accented chars
    int sizeAfter = s.length();
    // efficiency check #1 - if the length hasn't changed, do nothing
    int replaced = sizeAfter - sizeBefore;
    if (replaced <= 0) {
        return 0;
    }
    // replace the accents with nothing
    s = s.replaceAll("\\p{InCombiningDiacriticalMarks}+", "");
    buffer.setLength(0);
    buffer.append(s);
    return replaced;
}
Example 86
Project: commcare-odk-master  File: StringUtils.java View source code
/**  
     * @param input A non-null string
     * @return a canonical version of the passed in string that is lower cased and has removed diacritical marks
     * like accents. 
     */
public static synchronized String normalize(String input) {
    if (normalizationCache == null) {
        normalizationCache = new LruCache<String, String>(cacheSize);
        diacritics = Pattern.compile("\\p{InCombiningDiacriticalMarks}+");
    }
    String normalized = normalizationCache.get(input);
    if (normalized != null) {
        return normalizationCache.get(input);
    }
    normalized = diacritics.matcher(Normalizer.normalize(input, Normalizer.Form.NFD)).replaceAll("").toLowerCase();
    normalizationCache.put(input, normalized);
    return normalized;
}
Example 87
Project: epublib-master  File: SearchIndex.java View source code
/**
	 * Turns html encoded text into plain text.
	 * 
	 * Replaces &ouml; type of expressions into ¨<br/>
	 * Removes accents<br/>
	 * Replaces multiple whitespaces with a single space.<br/>
	 * 
	 * @param text
	 * @return html encoded text turned into plain text.
	 */
public static String cleanText(String text) {
    text = unicodeTrim(text);
    // replace all multiple whitespaces by a single space
    Matcher matcher = WHITESPACE_PATTERN.matcher(text);
    text = matcher.replaceAll(" ");
    // turn accented characters into normalized form. Turns ö into o"
    text = Normalizer.normalize(text, Normalizer.Form.NFD);
    // removes the marks found in the previous line.
    text = REMOVE_ACCENT_PATTERN.matcher(text).replaceAll("");
    // lowercase everything
    text = text.toLowerCase();
    return text;
}
Example 88
Project: FilmTit-master  File: SubtitleDownloadServlet.java View source code
@Override
public void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
    // reads the parameters from the http request
    String docId = request.getParameter("docId");
    String sessionId = request.getParameter("sessionId");
    String typeString = request.getParameter("type");
    String wayString = request.getParameter("way");
    if (docId == null || sessionId == null || typeString == null || wayString == null) {
        writeError(response, "no parameter");
        return;
    }
    Long docIdLong;
    try {
        docIdLong = new Long(docId);
    } catch (NumberFormatException e) {
        writeError(response, "wrong documentId");
        return;
    }
    if (!backend.canReadDocument(sessionId, docIdLong)) {
        writeError(response, "no rights to read document");
        return;
    }
    TimedChunk.FileType type;
    String responseType;
    if (typeString.equals("srt")) {
        type = TimedChunk.FileType.SRT;
        responseType = "application/x-subrip";
    } else if (typeString.equals("sub")) {
        type = TimedChunk.FileType.SUB;
        responseType = "text/plain";
    } else if (typeString.equals("txt")) {
        type = TimedChunk.FileType.TXT;
        responseType = "text/plain";
    } else {
        writeError(response, "wrong format " + typeString);
        return;
    }
    ChunkStringGenerator.ResultToChunkConverter way;
    if (wayString.equals("source")) {
        way = ChunkStringGenerator.SOURCE_SIDE;
    } else if (wayString.equals("target")) {
        way = ChunkStringGenerator.TARGET_SIDE;
    } else if (wayString.equals("targetthrowback")) {
        way = ChunkStringGenerator.TARGET_SIDE_WITH_THROWBACK;
    } else {
        writeError(response, "no such way as " + wayString);
        return;
    }
    try {
        // generate thi file name
        USDocument document = backend.getActiveDocument(sessionId, docIdLong);
        // split chars and accents
        String fileName = Normalizer.normalize(document.getTitle(), Normalizer.Form.NFD);
        // removes accents
        fileName = fileName.replaceAll("[\\p{InCombiningDiacriticalMarks}\\p{IsLm}\\p{IsSk}]+", "");
        // removes non ASCII characters
        fileName = fileName.replaceAll("[^\\x00-\\x7f]", "");
        // removes not allowed characters
        fileName = fileName.replaceAll("[|\\?\\*\\\\<>+/\\[\\]]+", "");
        // replace spaces by underscores
        fileName = fileName.replaceAll(" ", "_");
        // solve the language code
        String languageToFileName = null;
        String language1 = ConfigurationSingleton.getConf().l1().getCode();
        String language2 = ConfigurationSingleton.getConf().l2().getCode();
        if (way == ChunkStringGenerator.SOURCE_SIDE) {
            languageToFileName = document.getLanguage().getCode();
        } else {
            if (document.getLanguage().getCode().equals(language1)) {
                languageToFileName = language2;
            } else {
                languageToFileName = language1;
            }
        }
        // adds the ending
        fileName += "." + languageToFileName + "." + typeString;
        // generate the actual content of the file
        String fileContent = backend.getSourceSubtitles(sessionId, docIdLong, 25L, type, way);
        response.setContentType(responseType);
        response.setHeader("Content-Disposition", "attachment; filename=" + fileName + "; charset=UTF-8");
        response.setCharacterEncoding("UTF-8");
        ServletOutputStream out = response.getOutputStream();
        out.write(fileContent.getBytes("UTF-8"));
    } catch (InvalidSessionIdException e) {
        writeError(response, "Invalid session id exception");
        return;
    } catch (InvalidDocumentIdException e) {
        writeError(response, "Invalid document ID exception");
        return;
    } catch (IOException e) {
        writeError(response, "IOexception");
        return;
    }
}
Example 89
Project: gdl-tools-master  File: NodeConversor.java View source code
public static void filterByText(SelectableNode<?> rootNode, String filtro) {
    boolean visible = false;
    if (rootNode.getName() != null) {
        if (filtro.trim().length() > 0) {
            String desc1 = Normalizer.normalize(rootNode.getName(), Normalizer.Form.NFD);
            desc1 = FormatConverter.textWithoutPunctuation(desc1);
            String desc2 = Normalizer.normalize(filtro, Normalizer.Form.NFD);
            desc2 = FormatConverter.textWithoutPunctuation(desc2);
            visible = desc1.contains(desc2);
        } else {
            visible = true;
        }
    }
    if (rootNode.isSingleSelectionMode()) {
        rootNode.setSelected(false);
    }
    rootNode.setVisible(visible);
    if (visible) {
        filtro = new String();
    }
    if (!rootNode.isLeaf()) {
        Enumeration<?> e = rootNode.getAllchildren();
        while (e.hasMoreElements()) {
            NodeConversor.filterByText((SelectableNode<?>) e.nextElement(), filtro);
        }
        if (rootNode.getChildCount() != 0) {
            rootNode.setVisible(true);
        }
    }
}
Example 90
Project: jeboorker-master  File: SearchIndex.java View source code
/**
	 * Turns html encoded text into plain text.
	 * 
	 * Replaces &ouml; type of expressions into ¨<br/>
	 * Removes accents<br/>
	 * Replaces multiple whitespaces with a single space.<br/>
	 * 
	 * @param text
	 * @return
	 */
public static String cleanText(String text) {
    text = unicodeTrim(text);
    // replace all multiple whitespaces by a single space
    Matcher matcher = WHITESPACE_PATTERN.matcher(text);
    text = matcher.replaceAll(" ");
    // turn accented characters into normalized form. Turns ö into o"
    text = Normalizer.normalize(text, Normalizer.Form.NFD);
    // removes the marks found in the previous line.
    text = REMOVE_ACCENT_PATTERN.matcher(text).replaceAll("");
    // lowercase everything
    text = text.toLowerCase();
    return text;
}
Example 91
Project: jw-community-master  File: LogUtil.java View source code
/**
     * Convenient method to retrieve all tomcat log file by file name 
     * @return 
     */
public static File getTomcatLogFile(String filename) {
    // validate input
    String normalizedFileName = Normalizer.normalize(filename, Normalizer.Form.NFKC);
    if (normalizedFileName.contains("../") || normalizedFileName.contains("..\\")) {
        throw new SecurityException("Invalid filename " + normalizedFileName);
    }
    String path = System.getProperty("catalina.base");
    if (path != null) {
        try {
            String pureFilename = (new File(URLDecoder.decode(filename, "UTF-8"))).getName();
            String logPath = path + File.separator + "logs";
            File file = new File(logPath, pureFilename);
            if (file.exists() && !file.isDirectory()) {
                return file;
            }
        } catch (Exception e) {
        }
    }
    return null;
}
Example 92
Project: MoCA-master  File: TokenInfoDictionaryBuilder.java View source code
public TokenInfoDictionary buildDictionary(List<File> csvFiles) throws IOException {
    TokenInfoDictionary dictionary = new TokenInfoDictionary(10 * 1024 * 1024);
    for (File file : csvFiles) {
        FileInputStream inputStream = new FileInputStream(file);
        InputStreamReader streamReader = new InputStreamReader(inputStream, encoding);
        BufferedReader reader = new BufferedReader(streamReader);
        String line = null;
        while ((line = reader.readLine()) != null) {
            String[] entry = CSVUtil.parse(line);
            if (entry.length < 13) {
                System.out.println("Entry in CSV is not valid: " + line);
                continue;
            }
            int next = dictionary.put(formatEntry(entry));
            if (next == offset) {
                System.out.println("Failed to process line: " + line);
                continue;
            }
            dictionaryEntries.put(offset, entry[0]);
            offset = next;
            // NFKC normalize dictionary entry
            if (normalizeEntries) {
                if (entry[0].equals(Normalizer.normalize(entry[0], Normalizer.Form.NFKC))) {
                    continue;
                }
                String[] normalizedEntry = new String[entry.length];
                for (int i = 0; i < entry.length; i++) {
                    normalizedEntry[i] = Normalizer.normalize(entry[i], Normalizer.Form.NFKC);
                }
                next = dictionary.put(formatEntry(normalizedEntry));
                dictionaryEntries.put(offset, normalizedEntry[0]);
                offset = next;
            }
        }
    }
    return dictionary;
}
Example 93
Project: nuxeo-master  File: FileManagerUtils.java View source code
public static DocumentModel getExistingDocByPropertyName(CoreSession documentManager, String path, String value, String propertyName) {
    value = Normalizer.normalize(value, Normalizer.Form.NFC);
    DocumentModel existing = null;
    String parentId = documentManager.getDocument(new PathRef(path)).getId();
    String query = "SELECT * FROM Document WHERE ecm:parentId = '" + parentId + "' AND " + propertyName + " = '" + value.replace("'", "\\\'") + "' AND ecm:currentLifeCycleState != '" + LifeCycleConstants.DELETED_STATE + "'";
    DocumentModelList docs = documentManager.query(query, 1);
    if (docs.size() > 0) {
        existing = docs.get(0);
    }
    return existing;
}
Example 94
Project: sisob-academic-data-extractor-master  File: CandidateTypeURL.java View source code
/*
     * Canonice the string (no accentuation and lower case)
     * @param input
     * @return 
     */
public static String getCanonicalName(String input) {
    //Super accent cleaner
    // Canonic descomposition
    String normalized = Normalizer.normalize(input, Normalizer.Form.NFD);
    Pattern pattern = Pattern.compile("\\P{ASCII}");
    String output = pattern.matcher(normalized).replaceAll("");
    //~Super accent cleaner
    output = output.replace(". ", ".");
    String symbols1 = "_().,|<>-";
    for (char c : symbols1.toCharArray()) output = output.replace(c, ' ');
    //        String symbols2 = "-";
    //        for (int i=0; i<symbols2.length(); i++)                     
    //            output = output.replace(symbols2.charAt(i), ' ');
    output = output.toLowerCase();
    /*
         * Clean associate_text
         */
    output = output.trim().replace("\r\n", " ").replace("\r", " ").replace("\n", " ").replace("\t", " ");
    while (output.contains("  ")) output = output.replace("  ", " ");
    while (output.contains("  ")) {
        output = output.replace("  ", " ");
    }
    return output.trim();
}
Example 95
Project: trombone-master  File: Keywords.java View source code
public void sort() {
    List<String> strings = new ArrayList<String>(keywords);
    Collections.sort(strings, new Comparator<String>() {

        @Override
        public int compare(String s1, String s2) {
            return Normalizer.normalize(s1, Normalizer.Form.NFD).compareToIgnoreCase(Normalizer.normalize(s2, Normalizer.Form.NFD));
        }
    });
    keywords.clear();
    keywords.addAll(strings);
}
Example 96
Project: wattdepot-master  File: UnitsHelper.java View source code
/**
   * @param type
   *          The type of the unit, Energy, Power, Mass, etc.
   * @param unit
   *          The Unit<?>.
   * @return The name.
   */
public static String buildName(String type, Unit<?> unit) {
    String s = unit.toString();
    String s1 = Normalizer.normalize(s, Normalizer.Form.NFKD);
    String regex = Pattern.quote("[\\p{InCombiningDiacriticalMarks}\\p{IsLm}\\p{IsSk}]+");
    try {
        String s2 = new String(s1.replaceAll(regex, "").getBytes("ascii"), "ascii");
        s2 = s2.replace("?", "");
        return type + " (" + s2 + ")";
    } catch (// NOPMD
    UnsupportedEncodingException // NOPMD
    e) {
    }
    return type + " (" + s1 + ")";
}
Example 97
Project: youtestit-master  File: AbstractCreateDocument.java View source code
// =========================================================================
// METHODS
// =========================================================================
/**
     * Allow to generate path.
     * 
     * @return path String representation
     */
protected String generatePath(final String title) {
    final StrBuilder result = new StrBuilder();
    result.append(parentPath);
    if (!parentPath.endsWith(PATH_SPLIT)) {
        result.append(PATH_SPLIT);
    }
    final StrBuilder projectPath = new StrBuilder();
    projectPath.append(Normalizer.normalize(title, Normalizer.Form.NFD).replaceAll("[^\\p{ASCII}]", ""));
    projectPath.replaceAll(" ", "_");
    for (String item : TO_REPLACE) {
        projectPath.replaceAll(item, "-");
    }
    result.append(projectPath.toString());
    return result.toString().trim();
}
Example 98
Project: archiv-editor-master  File: FNStr.java View source code
/**
   * Returns normalized unicode.
   * @param ctx query context
   * @return string
   * @throws QueryException query exception
   */
private Item normuni(final QueryContext ctx) throws QueryException {
    final byte[] str = checkEStr(expr[0], ctx);
    Form form = Form.NFC;
    if (expr.length == 2) {
        final byte[] n = uc(trim(checkStr(expr[1], ctx)));
        if (n.length == 0)
            return Str.get(str);
        try {
            form = Form.valueOf(string(n));
        } catch (final IllegalArgumentException ex) {
            NORMUNI.thrw(input, n);
        }
    }
    return ascii(str) ? Str.get(str) : Str.get(Normalizer.normalize(string(str), form));
}
Example 99
Project: latke-master  File: URICoder.java View source code
/**
     * Encodes a string containing non ASCII characters using an UTF-8 encoder.
     * 
     * @param s The string the encode (assuming ASCII characters only)
     * @param e A character that does not require encoding if found in the string.
     */
private static String encode_UTF8(String s, char e) {
    // TODO: Normalizer requires Java 6!
    String n = (Normalizer.isNormalized(s, Form.NFKC)) ? s : Normalizer.normalize(s, Form.NFKC);
    // convert String to UTF-8
    ByteBuffer bb = UTF8.encode(n);
    // URI encode
    StringBuffer sb = new StringBuffer();
    while (bb.hasRemaining()) {
        int b = bb.get() & 0xff;
        if (isUnreserved(b) || b == e) {
            sb.append((char) b);
        } else {
            appendEscape(sb, (byte) b);
        }
    }
    return sb.toString();
}
Example 100
Project: rascal-master  File: RandomValueTypeVisitor.java View source code
@Override
public IValue visitString(Type type) {
    if (stRandom.nextBoolean() || maxDepth <= 0) {
        return vf.string("");
    }
    String result = RandomUtil.string(stRandom, 1 + stRandom.nextInt(maxDepth + 3));
    // make sure we are not generating very strange sequences
    result = Normalizer.normalize(result, Form.NFC);
    return vf.string(result);
}
Example 101
Project: bitcoinj-master  File: BIP38PrivateKey.java View source code
public ECKey decrypt(String passphrase) throws BadPassphraseException {
    String normalizedPassphrase = Normalizer.normalize(passphrase, Normalizer.Form.NFC);
    ECKey key = ecMultiply ? decryptEC(normalizedPassphrase) : decryptNoEC(normalizedPassphrase);
    Sha256Hash hash = Sha256Hash.twiceOf(key.toAddress(params).toString().getBytes(Charsets.US_ASCII));
    byte[] actualAddressHash = Arrays.copyOfRange(hash.getBytes(), 0, 4);
    if (!Arrays.equals(actualAddressHash, addressHash))
        throw new BadPassphraseException();
    return key;
}