Java Examples for java.text.Normalizer
The following java examples will help you to understand the usage of java.text.Normalizer. These source code samples are taken from different open source projects.
Example 1
| Project: QMAClone-master File: Normalizer.java View source code |
public static String normalize(String s) {
s = java.text.Normalizer.normalize(s, Form.NFKC);
final char[] charArray = s.toCharArray();
for (int i = 0; i < charArray.length; ++i) {
int c = charArray[i];
if (65281 <= c && c <= 65374) {
c -= 65248;
}
charArray[i] = (char) c;
}
s = new String(charArray);
s = s.toLowerCase();
return s;
}Example 2
| Project: QueryHighlighter-master File: Normalizer.java View source code |
public static final String forSearch(CharSequence searchTerm) {
if (searchTerm == null) {
return null;
}
String result = java.text.Normalizer.normalize(searchTerm, java.text.Normalizer.Form.NFD);
result = PATTERN_DIACRITICS.matcher(result).replaceAll("");
result = PATTERN_NON_LETTER_DIGIT_TO_SPACES.matcher(result).replaceAll(" ");
return result.toLowerCase(Locale.ROOT);
}Example 3
| Project: osm-address-extractor-master File: MapUtils.java View source code |
public static String stringToId(String val) {
try {
String regex = "[\\p{InCombiningDiacriticalMarks}\\p{IsLm}\\p{IsSk}]+";
String normalized = Normalizer.normalize(val, Normalizer.Form.NFKD);
normalized = new String(normalized.replaceAll(regex, "").getBytes("ascii"), "ascii");
return normalized.toLowerCase().replaceAll("\\s", "-");
} catch (Exception e) {
e.printStackTrace();
System.exit(-1);
return null;
}
}Example 4
| Project: lombok-pg-master File: IParameterSanitizer.java View source code |
@Override
public Statement<?> getStatementFor(final Object argumentType, final String argumentName, final String newArgumentName, final java.lang.annotation.Annotation annotation) {
final Normalizer.Form normalizerForm = ((Sanitize.Normalize) annotation).value();
return LocalDecl(Type(argumentType), newArgumentName).makeFinal().withInitialization(Call(//
Name("java.text.Normalizer"), //
"normalize").withArgument(Name(argumentName)).withArgument(Name(String.format("java.text.Normalizer.Form.%s", normalizerForm.name()))));
}Example 5
| Project: longneck-core-master File: UnicodeNormalize.java View source code |
@Override
public void apply(Record record, VariableSpace parentScope) {
for (String fieldName : applyTo) {
String value = BlockUtils.getValue(fieldName, record, parentScope);
if (value == null || "".equals(value)) {
continue;
}
value = Normalizer.normalize(value, form);
BlockUtils.setValue(fieldName, value, record, parentScope);
}
}Example 6
| Project: memoryfilesystem-master File: WindowsFileSystemComptiblityTest.java View source code |
@Test
@Ignore
public void windowsNormalization() throws IOException {
FileSystem fileSystem = this.getFileSystem();
String aUmlaut = "Ä";
Path aPath = fileSystem.getPath(aUmlaut);
String normalized = Normalizer.normalize(aUmlaut, Form.NFD);
Path nPath = fileSystem.getPath(normalized);
Path createdFile = null;
try {
createdFile = Files.createFile(nPath);
assertEquals(2, createdFile.getFileName().toString().length());
assertEquals(2, createdFile.toAbsolutePath().getFileName().toString().length());
// REVIEW ??
assertEquals(2, createdFile.toRealPath().getFileName().toString().length());
assertThat(aPath, not(exists()));
assertThat(nPath, exists());
//assertTrue(Files.isSameFile(aPath, nPath));
//assertTrue(Files.isSameFile(nPath, aPath));
assertThat(aPath, not(equalTo(nPath)));
} finally {
if (createdFile != null) {
Files.delete(createdFile);
}
}
}Example 7
| Project: voj-master File: SlugifyUtils.java View source code |
/**
* 获å?–å—符串的Slug.
* @param str - 待获å?–Slugçš„å—符串
* @return å—符串对应的Slug
*/
public static String getSlug(String str) {
if (str == null) {
return "";
}
// Rid of White Spaces
String noWhiteSpace = WHITESPACE.matcher(str.trim()).replaceAll("-");
// Processing Non-ASCII Characters
try {
noWhiteSpace = URLEncoder.encode(noWhiteSpace, "UTF-8");
} catch (UnsupportedEncodingException e) {
}
// Slugify String
String normalized = Normalizer.normalize(noWhiteSpace, Form.NFD);
return normalized.toLowerCase();
}Example 8
| Project: GT-FHIR-master File: QueryUtilities.java View source code |
public static String normalizeString(String theString) {
char[] out = new char[theString.length()];
theString = Normalizer.normalize(theString, Normalizer.Form.NFD);
int j = 0;
for (int i = 0, n = theString.length(); i < n; ++i) {
char c = theString.charAt(i);
if (c <= '') {
out[j++] = c;
}
}
// return new String(out).toUpperCase();
return new String(out);
}Example 9
| Project: invoicexpress-android-master File: Test.java View source code |
public static void compareStrings() {
System.out.println(Normalizer.normalize("É", Normalizer.Form.NFD).replaceAll("[^\\p{ASCII}]", ""));
System.out.println(Normalizer.normalize("E", Normalizer.Form.NFD).replaceAll("[^\\p{ASCII}]", ""));
System.out.println("Resultado da comparação " + "E".compareTo("É"));
System.out.println("Resultado da comparação " + "T".compareTo("É"));
System.out.println("Resultado da comparação " + "É".compareTo("Z"));
}Example 10
| Project: smile-master File: SimpleNormalizer.java View source code |
@Override
public String normalize(String text) {
text = text.trim();
if (!java.text.Normalizer.isNormalized(text, java.text.Normalizer.Form.NFKC)) {
text = java.text.Normalizer.normalize(text, java.text.Normalizer.Form.NFKC);
}
text = WHITESPACE.matcher(text).replaceAll(" ");
text = CONTROL_FORMAT_CHARS.matcher(text).replaceAll("");
text = DOUBLE_QUOTES.matcher(text).replaceAll("\"");
text = SINGLE_QUOTES.matcher(text).replaceAll("'");
return text;
}Example 11
| Project: zamiaDroid-master File: StringASCIIFormat.java View source code |
public static String toASCII(String input) {
String s1 = Normalizer.normalize(input, Normalizer.Form.NFKD);
String regex = "[\\p{InCombiningDiacriticalMarks}\\p{IsLm}\\p{IsSk}]+";
String s2 = "";
try {
s2 = new String(s1.replaceAll(regex, "").getBytes("ascii"), "ascii");
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}
return s2;
}Example 12
| Project: Dolomite-master File: BaseController.java View source code |
protected static String normalize(String original) {
// to lower case
String str = original.toLowerCase();
// replace multiple spaces with one space
str = str.replaceAll(" +", " ");
// drop initial or final spaces
str = str.trim();
// normalize and remove accents (diacritics)
str = java.text.Normalizer.normalize(str, java.text.Normalizer.Form.NFD).replaceAll("\\p{InCombiningDiacriticalMarks}+", "");
// replace some separators with underscore
str = str.replaceAll("[- .']", "_");
// keep only alphanumeric characters and underscores
str = str.replaceAll("[^(_|a-z|0-9)]", "");
return str;
}Example 13
| Project: completely-master File: DiacriticsTransformer.java View source code |
@Override
public Collection<String> apply(Collection<String> input) {
checkPointer(input != null);
List<String> result = new LinkedList<>();
for (String text : input) {
checkPointer(text != null);
StringBuilder builder = new StringBuilder();
String canonical = Normalizer.normalize(text, Normalizer.Form.NFD);
for (int i = 0; i < canonical.length(); ++i) {
if (Character.getType(canonical.charAt(i)) != Character.NON_SPACING_MARK) {
builder.append(canonical.charAt(i));
}
}
result.add(builder.toString());
}
return result;
}Example 14
| Project: android-sdk-sources-for-api-level-23-master File: NormalizerTest.java View source code |
/**
* @tests java.text.Normalizer#isNormalized(CharSequence, Form)
*/
public void test_isNormalized() throws Exception {
String src = "Á";
assertTrue(Normalizer.isNormalized(src, Form.NFC));
assertFalse(Normalizer.isNormalized(src, Form.NFD));
assertTrue(Normalizer.isNormalized(src, Form.NFKC));
assertFalse(Normalizer.isNormalized(src, Form.NFKD));
src = "Á";
assertFalse(Normalizer.isNormalized(src, Form.NFC));
assertTrue(Normalizer.isNormalized(src, Form.NFD));
assertFalse(Normalizer.isNormalized(src, Form.NFKC));
assertTrue(Normalizer.isNormalized(src, Form.NFKD));
src = "ffi";
assertTrue(Normalizer.isNormalized(src, Form.NFC));
assertTrue(Normalizer.isNormalized(src, Form.NFD));
assertFalse(Normalizer.isNormalized(src, Form.NFKC));
assertFalse(Normalizer.isNormalized(src, Form.NFKD));
src = "ffi";
assertTrue(Normalizer.isNormalized(src, Form.NFC));
assertTrue(Normalizer.isNormalized(src, Form.NFD));
assertTrue(Normalizer.isNormalized(src, Form.NFKC));
assertTrue(Normalizer.isNormalized(src, Form.NFKD));
src = "";
assertTrue(Normalizer.isNormalized(src, Form.NFC));
assertTrue(Normalizer.isNormalized(src, Form.NFD));
assertTrue(Normalizer.isNormalized(src, Form.NFKC));
assertTrue(Normalizer.isNormalized(src, Form.NFKD));
}Example 15
| Project: android_libcore-master File: NativeNormalizer.java View source code |
private static int toUNormalizationMode(Form form) {
// See UNormalizationMode in "unicode/unorm.h". Stable API since ICU 2.0.
switch(form) {
case NFC:
return 4;
case NFD:
return 2;
case NFKC:
return 5;
case NFKD:
return 3;
}
throw new AssertionError("unknown Normalizer.Form " + form);
}Example 16
| Project: ARTPart-master File: Main.java View source code |
static void testNormalizer() {
String composed = "BlÁah";
String decomposed = "BlÁah";
String res;
res = Normalizer.normalize(composed, Normalizer.Form.NFD);
if (!decomposed.equals(res)) {
System.err.println("Bad decompose: '" + composed + "' --> '" + res + "'");
}
res = Normalizer.normalize(decomposed, Normalizer.Form.NFC);
if (!composed.equals(res)) {
System.err.println("Bad compose: '" + decomposed + "' --> '" + res + "'");
}
System.out.println("Normalizer passed");
}Example 17
| Project: greenhouse-master File: SlugUtils.java View source code |
/**
* Convert the String input to a slug.
*/
public static String toSlug(String input) {
if (input == null) {
throw new IllegalArgumentException("Input cannot be null");
}
String nowhitespace = WHITESPACE.matcher(input).replaceAll("-");
String normalized = Normalizer.normalize(nowhitespace, Form.NFD);
String slug = NONLATIN.matcher(normalized).replaceAll("");
return slug.toLowerCase(Locale.ENGLISH);
}Example 18
| Project: midpoint-master File: PrismDefaultPolyStringNormalizer.java View source code |
/* (non-Javadoc)
* @see com.evolveum.midpoint.prism.polystring.PolyStringNormalizer#normalize(java.lang.String)
*/
@Override
public String normalize(String orig) {
if (orig == null) {
return null;
}
String s = StringUtils.trim(orig);
s = Normalizer.normalize(s, Normalizer.Form.NFKD);
s = s.replaceAll("[^\\w\\s\\d]", "");
s = s.replaceAll("\\s+", " ");
if (StringUtils.isBlank(s)) {
s = "";
}
return StringUtils.lowerCase(s);
}Example 19
| Project: openrocket-master File: L10NGenerator.java View source code |
private static void output(char ch) {
String text = "" + ch;
StringBuilder sb = new StringBuilder(text.length());
// s = normalize(s);
text = Normalizer.normalize(text, Normalizer.Form.NFKD);
for (char c : text.toCharArray()) {
if (c < 128) {
sb.append(c);
} else if (c == Chars.FRACTION) {
sb.append('/');
}
}
text = sb.toString().trim();
if (text.length() > 0) {
print(ch, text);
}
}Example 20
| Project: package-drone-master File: Tokens.java View source code |
public static String hashIt(final String salt, String data) {
data = Normalizer.normalize(data, Form.NFC);
final byte[] strData = data.getBytes(StandardCharsets.UTF_8);
final byte[] saltData = salt.getBytes(StandardCharsets.UTF_8);
final byte[] first = new byte[saltData.length + strData.length];
System.arraycopy(saltData, 0, first, 0, saltData.length);
System.arraycopy(strData, 0, first, saltData.length, strData.length);
MessageDigest md;
try {
md = MessageDigest.getInstance("SHA-256");
} catch (final NoSuchAlgorithmException e) {
throw new IllegalStateException(e);
}
byte[] digest = md.digest(first);
final byte[] current = new byte[saltData.length + digest.length];
for (int i = 0; i < 1000; i++) {
System.arraycopy(saltData, 0, current, 0, saltData.length);
System.arraycopy(digest, 0, current, saltData.length, digest.length);
digest = md.digest(current);
}
return Base64.getEncoder().encodeToString(digest);
}Example 21
| Project: spring-greenhouse-clickstart-master File: SlugUtils.java View source code |
/**
* Convert the String input to a slug.
*/
public static String toSlug(String input) {
if (input == null) {
throw new IllegalArgumentException("Input cannot be null");
}
String nowhitespace = WHITESPACE.matcher(input).replaceAll("-");
String normalized = Normalizer.normalize(nowhitespace, Form.NFD);
String slug = NONLATIN.matcher(normalized).replaceAll("");
return slug.toLowerCase(Locale.ENGLISH);
}Example 22
| Project: spring-mvc-movies-master File: Slug.java View source code |
public static String makeSlug(String input) {
if (input == null)
throw new IllegalArgumentException();
String nowhitespace = WHITESPACE.matcher(input).replaceAll("-");
String normalized = Normalizer.normalize(nowhitespace, Form.NFD);
String slug = NONLATIN.matcher(normalized).replaceAll("");
return slug.toLowerCase(Locale.ENGLISH);
}Example 23
| Project: DrawBridge-master File: Normalizer.java View source code |
/**
* A conservative heuristic as to whether s is normalized according to Unicode
* Normal Form C. It is heuristic, because Caja needs to run with versions
* of the Java standard libraries that do not include normalization.
* @return false if s is not normalized.
*/
public static boolean isNormalized(CharSequence s) {
if (IS_NORMALIZED != null) {
try {
return ((Boolean) IS_NORMALIZED.invoke(null, s, NORMAL_FORM_C)).booleanValue();
} catch (IllegalAccessException ex) {
throw new SomethingWidgyHappenedError("Normalizer unexpectedly uninvokable", ex);
} catch (InvocationTargetException ex) {
Throwable th = ex.getTargetException();
throw new SomethingWidgyHappenedError("Normalizer unexpectedly uninvokable", th);
}
}
// ...
for (int i = s.length(); --i >= 0; ) {
char ch = s.charAt(i);
// will never reach here.
if (ch >= 256) {
return false;
}
}
return true;
}Example 24
| Project: solrmarc-master File: FieldFormatterBase.java View source code |
public String cleanData(VariableField vf, boolean isSubfieldA, String data) {
final EnumSet<eCleanVal> cleanVal = getCleanVal();
int numToDel = 0;
String trimmed = data;
if (cleanVal.contains(eCleanVal.STRIP_INDICATOR_2) && isSubfieldA && vf instanceof DataField) {
DataField df = (DataField) vf;
char ind2Val = df.getIndicator2();
numToDel = (ind2Val >= '0' && ind2Val <= '9') ? ind2Val - '0' : 0;
if (numToDel > 0)
trimmed = trimmed.substring(numToDel);
}
trimmed = cleanVal.contains(eCleanVal.UNTRIMMED) ? getSubstring(trimmed) : getSubstring(trimmed).trim();
String str = (cleanVal.contains(eCleanVal.CLEAN_EACH)) ? DataUtil.cleanData(trimmed) : trimmed;
if (!cleanVal.contains(eCleanVal.STRIP_ACCCENTS) && !cleanVal.contains(eCleanVal.STRIP_ALL_PUNCT) && !cleanVal.contains(eCleanVal.TO_LOWER) && !cleanVal.contains(eCleanVal.TO_UPPER) && !cleanVal.contains(eCleanVal.TO_TITLECASE) && !cleanVal.contains(eCleanVal.STRIP_INDICATOR_2)) {
return (str);
}
// Do more extensive cleaning of data.
if (cleanVal.contains(eCleanVal.STRIP_ACCCENTS)) {
str = ACCENTS.matcher(Normalizer.normalize(str, Form.NFD)).replaceAll("");
StringBuilder folded = new StringBuilder();
boolean replaced = false;
for (char c : str.toCharArray()) {
char newc = Utils.foldDiacriticLatinChar(c);
if (newc != 0x00) {
folded.append(newc);
replaced = true;
} else {
folded.append(c);
}
}
if (replaced)
str = folded.toString();
}
if (cleanVal.contains(eCleanVal.STRIP_ALL_PUNCT))
str = str.replaceAll("( |\\p{Punct})+", " ");
if (!cleanVal.contains(eCleanVal.UNTRIMMED))
str = str.trim();
if (cleanVal.contains(eCleanVal.TO_LOWER)) {
str = str.toLowerCase();
} else if (cleanVal.contains(eCleanVal.TO_UPPER)) {
str = str.toUpperCase();
} else if (cleanVal.contains(eCleanVal.TO_TITLECASE)) {
str = DataUtil.toTitleCase(str);
}
return str;
}Example 25
| Project: agile-itsm-master File: LookupProcessContrato.java View source code |
@SuppressWarnings({ "rawtypes", "unchecked" })
public List processLookup(LookupDTO lookupObject) throws LogicException, Exception {
String sql = "";
// Collection colRetorno = new ArrayList();
LookupFieldUtil lookUpField = new LookupFieldUtil();
Collection colCamposRet = lookUpField.getCamposRetorno(lookupObject.getNomeLookup());
Iterator itRet = colCamposRet.iterator();
Campo cp;
while (itRet.hasNext()) {
cp = (Campo) itRet.next();
if (!sql.equalsIgnoreCase("")) {
sql = sql + ",";
}
sql = sql + cp.getNomeFisico();
}
sql = "SELECT " + sql;
sql += " FROM CONTRATOS PRJ INNER JOIN CLIENTES CLI on CLI.idCliente = PRJ.idCliente INNER JOIN Fornecedor FORN on FORN.idFornecedor = PRJ.idFornecedor ";
String where = " (PRJ.deleted IS NULL or PRJ.deleted = 'N')";
ContratosGruposService contratosGruposService = (ContratosGruposService) ServiceLocator.getInstance().getService(ContratosGruposService.class, null);
String COLABORADORES_VINC_CONTRATOS = ParametroUtil.getValorParametroCitSmartHashMap(br.com.centralit.citcorpore.util.Enumerados.ParametroSistema.COLABORADORES_VINC_CONTRATOS, "N");
if (COLABORADORES_VINC_CONTRATOS == null)
COLABORADORES_VINC_CONTRATOS = "N";
if (COLABORADORES_VINC_CONTRATOS.equalsIgnoreCase("S")) {
where += " AND PRJ.idContrato in (-1";
UsuarioDTO usuarioDto = (UsuarioDTO) lookupObject.getUser();
if (usuarioDto != null) {
Collection<ContratosGruposDTO> colContratosColab = contratosGruposService.findByIdEmpregado(usuarioDto.getIdEmpregado());
if (colContratosColab != null) {
for (ContratosGruposDTO contratosGruposDto : colContratosColab) {
where += "," + contratosGruposDto.getIdContrato();
}
}
}
where += ")";
}
Collection colCamposPesq = lookUpField.getCamposPesquisa(lookupObject.getNomeLookup());
Iterator itPesq = colCamposPesq.iterator();
String obj = null;
int count = 1;
while (itPesq.hasNext()) {
cp = (Campo) itPesq.next();
obj = null;
obj = this.getValueParmLookup(lookupObject, count);
if (obj != null) {
String[] trataGetNomeFisico = cp.getNomeFisico().split("\\.");
String nomeFisico = cp.getNomeFisico();
if (trataGetNomeFisico.length > 1) {
cp.setNomeFisico(trataGetNomeFisico[1]);
nomeFisico = trataGetNomeFisico[0] + "." + trataGetNomeFisico[1];
}
if (!obj.equalsIgnoreCase("")) {
if (!where.equalsIgnoreCase("")) {
where = where + " AND ";
}
if (cp.getType().equalsIgnoreCase(Constantes.getValue("FIELDTYPE_TEXT").trim()) || cp.getType().equalsIgnoreCase(Constantes.getValue("FIELDTYPE_TEXTAREA").trim())) {
String func = Constantes.getValue("FUNCAO_CONVERTE_MAIUSCULO");
if (func != null && !func.trim().equalsIgnoreCase("")) {
where = where + func + "(" + nomeFisico + ")";
} else {
where = where + cp.getNomeFisico();
}
where = where + " LIKE '%";
} else {
if (cp.getType().equalsIgnoreCase(Constantes.getValue("FIELDTYPE_COMBO").trim())) {
where = where + cp.getNomeFisico();
where = where + " IN (";
} else if (cp.getType().equalsIgnoreCase("DATE")) {
where = where + cp.getNomeFisico();
where = where + " = '";
} else {
where = where + nomeFisico;
where = where + " = ";
}
}
if (cp.isSomenteBusca()) {
obj = obj.trim();
obj = obj.toUpperCase();
obj = Normalizer.normalize(obj, Normalizer.Form.NFD);
obj = obj.replaceAll("[^\\p{ASCII}]", "");
}
if (StringUtils.contains(obj, "'") && !cp.getType().equalsIgnoreCase(Constantes.getValue("FIELDTYPE_COMBO").trim())) {
obj = StringEscapeUtils.escapeSql(obj);
}
where = where + obj;
if (cp.getType().equalsIgnoreCase(Constantes.getValue("FIELDTYPE_TEXT").trim()) || cp.getType().equalsIgnoreCase(Constantes.getValue("FIELDTYPE_TEXTAREA").trim())) {
where = where + "%'";
} else if (cp.getType().equalsIgnoreCase("DATE")) {
where = where + "'";
} else if (cp.getType().equalsIgnoreCase(Constantes.getValue("FIELDTYPE_COMBO").trim())) {
where = where + ")";
}
}
}
count++;
}
String strAux;
if (!where.equalsIgnoreCase("")) {
sql = sql + " WHERE " + where;
strAux = lookUpField.getWhere(lookupObject.getNomeLookup());
if (!strAux.equalsIgnoreCase("")) {
sql = sql + " AND ";
sql = sql + strAux;
}
} else {
strAux = lookUpField.getWhere(lookupObject.getNomeLookup());
if (!strAux.equalsIgnoreCase("")) {
sql = sql + " WHERE " + strAux;
}
}
Collection colCamposOrd = lookUpField.getCamposOrdenacao(lookupObject.getNomeLookup());
Iterator itOrd = colCamposOrd.iterator();
String ordem = "";
while (itOrd.hasNext()) {
cp = (Campo) itOrd.next();
if (!ordem.equalsIgnoreCase("")) {
ordem = ordem + ",";
}
ordem = ordem + cp.getNomeFisico();
}
if (!ordem.equalsIgnoreCase("")) {
sql = sql + " ORDER BY " + ordem;
}
sql = sql.toUpperCase();
List lista = execSQL(sql, null);
if (lista == null || lista.size() == 0) {
TransactionControler tc = this.getTransactionControler();
if (tc != null) {
tc.close();
}
return null;
}
// Processa o resultado.
List result = new ArrayList();
if (lista == null || lista.size() == 0) {
TransactionControler tc = this.getTransactionControler();
if (tc != null) {
tc.close();
}
return result;
}
if (lista.size() > 400) {
TransactionControler tc = this.getTransactionControler();
if (tc != null) {
tc.close();
}
throw new LogicException("citcorpore.comum.consultaEstourouLimite");
}
Iterator it = lista.iterator();
Campo campoAux;
int i;
Collection colAux;
Object auxObj;
while (it.hasNext()) {
Object[] row = (Object[]) it.next();
itRet = colCamposRet.iterator();
i = 0;
campoAux = null;
colAux = new ArrayList();
while (itRet.hasNext()) {
cp = (Campo) itRet.next();
campoAux = new Campo(cp.getNomeFisico(), cp.getDescricao(), cp.isObrigatorio(), cp.getType(), cp.getTamanho());
if (cp.getType().equalsIgnoreCase(Constantes.getValue("FIELDTYPE_TEXT").trim()) || cp.getType().equalsIgnoreCase(Constantes.getValue("FIELDTYPE_TEXTAREA").trim())) {
if (row[i] == null) {
auxObj = new String("");
} else {
String str = new String(row[i].toString());
auxObj = str.replaceAll("\"", """).replaceAll("'", "´");
/*
* alteracao feita por Cleon, pois ao tentar restaurar através de uma lookup um elemento textarea com quebra de linha, o setretorno nao estava comportando de forma correta
* disparando um erro
*/
auxObj = str.replaceAll("\n", " ");
}
campoAux.setObjValue(auxObj);
} else if (cp.getType().equalsIgnoreCase(Constantes.getValue("FIELDTYPE_DATE").trim())) {
if (row[i] == null) {
campoAux.setObjValue(null);
} else {
auxObj = row[i];
if ((auxObj instanceof java.sql.Date)) {
campoAux.setObjValue(UtilDatas.dateToSTR((java.sql.Date) auxObj));
} else if ((auxObj instanceof java.sql.Timestamp)) {
campoAux.setObjValue(UtilDatas.dateToSTR((java.sql.Timestamp) auxObj));
} else {
campoAux.setObjValue(auxObj.toString());
}
}
} else if (cp.getType().equalsIgnoreCase(Constantes.getValue("FIELDTYPE_MOEDA").trim())) {
if (row[i] == null) {
campoAux.setObjValue(null);
} else {
auxObj = row[i];
String valorTransf = null;
if ((auxObj instanceof Double)) {
valorTransf = UtilFormatacao.formatBigDecimal(new BigDecimal(((Double) auxObj).doubleValue()), 2);
} else if ((auxObj instanceof BigDecimal)) {
valorTransf = UtilFormatacao.formatBigDecimal(((BigDecimal) auxObj), 2);
} else {
valorTransf = auxObj.toString();
}
campoAux.setObjValue(valorTransf);
}
}
colAux.add(campoAux);
i++;
}
result.add(colAux);
}
TransactionControler tc = this.getTransactionControler();
if (tc != null) {
tc.close();
}
return result;
}Example 26
| Project: bagit-java-master File: CheckIfFileExistsTask.java View source code |
/**
* if a file is parially normalized or of a different normalization then the manifest specifies it will fail the existence test.
* This method checks for that by normalizing what is on disk with the normalized filename and see if they match.
*
* @return true if the normalized filename matches one on disk in the specified folder
*/
private boolean existsNormalized() {
final String normalizedFile = Normalizer.normalize(file.toString(), Normalizer.Form.NFD);
final Path parent = file.getParent();
if (parent != null) {
try (final DirectoryStream<Path> files = Files.newDirectoryStream(parent)) {
for (final Path fileToCheck : files) {
final String normalizedFileToCheck = Normalizer.normalize(fileToCheck.toString(), Normalizer.Form.NFD);
if (normalizedFile.equals(normalizedFileToCheck)) {
return true;
}
}
} catch (IOException e) {
logger.error(messages.getString("error_reading_normalized_file"), parent, normalizedFile, e);
}
}
return false;
}Example 27
| Project: basex-master File: FnNormalizeUnicode.java View source code |
@Override
public Item item(final QueryContext qc, final InputInfo ii) throws QueryException {
final byte[] str = toEmptyToken(exprs[0], qc);
Form form = Form.NFC;
if (exprs.length == 2) {
final byte[] n = uc(trim(toToken(exprs[1], qc)));
if (n.length == 0)
return Str.get(str);
try {
form = Form.valueOf(string(n));
} catch (final IllegalArgumentException ex) {
throw NORMUNI_X.get(info, n);
}
}
return ascii(str) ? Str.get(str) : Str.get(Normalizer.normalize(string(str), form));
}Example 28
| Project: cognitionis-nlp-libraries-master File: WikiHtml2PlainHandler.java View source code |
@Override
public void endElement(final String uri, final String localName, final String tag) throws SAXException {
if (tag.equalsIgnoreCase("html") && inText) {
inText = false;
if (!hasSentence) {
System.out.println("no sentences");
strBuilder = textStrb;
} else {
int n = sentences.size() - 1;
for (int i = 0; i < n; i++) {
strBuilder.append(sentences.get(i) + "\n\n");
}
strBuilder.append(sentences.get(n));
sentences = null;
}
// For the garbage collector - free memory
textStrb = null;
}
if (tag.equalsIgnoreCase("p") && inSentence) {
inSentence = false;
if (sentenceStrb.length() > 0) {
String temp = sentenceStrb.toString().replaceAll("(\n|\r|\\p{javaSpaceChar})", " ").replaceAll("\\s+", " ").replaceAll("(—|–)", " - ").replaceAll("’", "'").trim();
if (encoding.equals("ascii")) {
temp = java.text.Normalizer.normalize(temp, java.text.Normalizer.Form.NFD);
temp = temp.replaceAll("[^\\p{ASCII}]", "");
}
sentences.add(temp);
}
// For the garbage collector - free memory
sentenceStrb = null;
}
if (tag.equalsIgnoreCase("table") && inTable > 0) {
inTable--;
}
if (tag.equalsIgnoreCase("sup") && inSup) {
inSup = false;
}
if (tag.matches("h[1234]")) {
inH = false;
if (H2Strb.length() > 0 && !H2Strb.toString().replaceAll("(\n|\r|\\s*\\[\\s*edit(ar)?\\s*\\]\\s*)", "").matches("(Media|Animated maps|See also|Notes|References|External links)")) {
String temp = H2Strb.toString().replaceAll("(\n|\r|\\s*\\[\\s*edit(ar)?\\s*\\]\\s*|\t)", " ").replaceAll("\\s+", " ").replaceAll("(—|–)", " - ").replaceAll("’", "'").trim();
// NOT ALWAYS WORK THAT BELOW NFD + ASCII
if (encoding.equals("ascii")) {
temp = java.text.Normalizer.normalize(temp, java.text.Normalizer.Form.NFD);
temp = temp.replaceAll("[^\\p{ASCII}]", "");
}
sentences.add(temp + ".");
}
// For the garbage collector - free memory
H2Strb = null;
}
// ho puc fer quan s'acaba el document si no tenia text...
if (tag.equalsIgnoreCase(root_tag)) {
if (!hasText) {
if (hasSentence) {
int n = sentences.size() - 1;
for (int i = 0; i < n; i++) {
strBuilder.append(sentences.get(i) + "\n");
}
strBuilder.append(sentences.get(n));
sentences = null;
} else {
strBuilder = textStrb;
// For the garbage collector - free memory
textStrb = null;
}
}
}
}Example 29
| Project: facebook-hive-udfs-master File: UDFNormalizeUnicode.java View source code |
public String evaluate(String s, String form) {
if (s == null || form == null) {
return null;
}
if (form.equals("NFC")) {
return Normalizer.normalize(s, Normalizer.Form.NFC);
} else if (form.equals("NFD")) {
return Normalizer.normalize(s, Normalizer.Form.NFD);
} else if (form.equals("NFKC")) {
return Normalizer.normalize(s, Normalizer.Form.NFKC);
} else if (form.equals("NFKD")) {
return Normalizer.normalize(s, Normalizer.Form.NFKD);
} else {
return null;
}
}Example 30
| Project: iswc2012metadata-master File: ToolText2Rdf.java View source code |
public static String removeDiacritics(String input) {
String nrml = Normalizer.normalize(input, Normalizer.Form.NFD);
StringBuilder stripped = new StringBuilder();
for (int i = 0; i < nrml.length(); ++i) {
if (Character.getType(nrml.charAt(i)) != Character.NON_SPACING_MARK) {
stripped.append(nrml.charAt(i));
}
}
return stripped.toString();
}Example 31
| Project: skalli-master File: NormalizeUtil.java View source code |
@SuppressWarnings("nls")
public static String normalize(String s) {
if (s == null) {
return null;
}
s = s.replaceAll("ä", "ae");
s = s.replaceAll("ö", "oe");
s = s.replaceAll("ü", "ue");
s = s.replaceAll("Ä", "Ae");
s = s.replaceAll("Ö", "Oe");
s = s.replaceAll("Ü", "Ue");
s = s.replaceAll("ß", "ss");
return Normalizer.normalize(s, Normalizer.Form.NFD).replaceAll("\\p{InCombiningDiacriticalMarks}+", "");
}Example 32
| Project: TweetLanes-master File: Validator.java View source code |
public int getTweetLength(String text) {
text = Normalizer.normalize(text, Normalizer.Form.NFC);
int length = text.codePointCount(0, text.length());
for (Extractor.Entity urlEntity : extractor.extractURLsWithIndices(text)) {
length += urlEntity.start - urlEntity.end;
length += urlEntity.value.toLowerCase().startsWith("https://") ? shortUrlLengthHttps : shortUrlLength;
}
return length;
}Example 33
| Project: xtf-master File: UnicodeNormalizingFilter.java View source code |
@Override
public Token next() throws IOException {
Token t = input.next();
if (t == null) {
return null;
}
// Only do the (sometimes lengthy) normalization step if we haven't already
// looked up this token.
//
String text = t.termText();
if (!cache.contains(text)) {
String normalizedText = Normalizer.normalize(text);
cache.put(text, normalizedText);
}
String newText = cache.get(text);
if (!newText.equals(text))
t.setTermText(newText);
return t;
}Example 34
| Project: andromda-master File: NameMasker.java View source code |
/**
* Returns the name with the appropriate <code>mask</code> applied. The mask, must match one of the valid mask
* properties or will be ignored.
*
* @param name the name to be masked
* @param mask the mask to apply
* @return the masked name.
*/
public String mask(String name, String mask) {
mask = StringUtils.trimToEmpty(mask);
name = StringUtils.trimToEmpty(name);
if (!mask.equalsIgnoreCase(NONE)) {
if (mask.equalsIgnoreCase(UPPERCASE)) {
name = name.toUpperCase();
} else if (mask.equalsIgnoreCase(UNDERSCORE)) {
name = StringUtilsHelper.separate(name, "_");
} else if (mask.equalsIgnoreCase(UPPERUNDERSCORE)) {
name = StringUtilsHelper.separate(name, "_").toUpperCase();
} else if (mask.equalsIgnoreCase(LOWERCASE)) {
name = name.toLowerCase();
} else if (mask.equalsIgnoreCase(LOWERUNDERSCORE)) {
name = StringUtilsHelper.separate(name, "_").toLowerCase();
} else if (mask.equalsIgnoreCase(LOWERCAMELCASE)) {
name = StringUtilsHelper.lowerCamelCaseName(name);
} else if (mask.equalsIgnoreCase(UPPERCAMELCASE)) {
name = StringUtilsHelper.upperCamelCaseName(name);
} else if (mask.equalsIgnoreCase(NOSPACE)) {
name = StringUtils.deleteWhitespace(name);
} else if (mask.equalsIgnoreCase(NOACCENT)) {
name = Normalizer.normalize(name, java.text.Normalizer.Form.NFD).replaceAll("[^\\p{ASCII}]", "");
}
}
return name;
}Example 35
| Project: jackrabbit-master File: NodeNameNormalizer.java View source code |
public static void check(Name name) {
if (log.isDebugEnabled()) {
String lname = name.getLocalName();
String normalized = Normalizer.normalize(lname, Form.NFC);
if (!lname.equals(normalized)) {
String message = "The new node name '" + dump(lname) + "' is not in Unicode NFC form ('" + dump(normalized) + "').";
log.debug(message, new Exception("Call chain"));
}
}
}Example 36
| Project: josm-older-master File: SearchCompiler.java View source code |
@Override
public boolean match(OsmPrimitive osm) {
if (keyPattern != null) {
if (!osm.hasKeys())
return false;
for (String k : osm.keySet()) {
String v = osm.get(k);
Matcher matcherKey = keyPattern.matcher(k);
boolean matchedKey = matcherKey.find();
if (matchedKey) {
Matcher matcherValue = valuePattern.matcher(v);
boolean matchedValue = matcherValue.find();
if (matchedValue)
return true;
}
}
} else {
String mv = null;
if (key.equals("timestamp")) {
mv = DateUtils.fromDate(osm.getTimestamp());
} else {
mv = osm.get(key);
}
if (mv == null)
return false;
String v1 = caseSensitive ? mv : mv.toLowerCase();
String v2 = caseSensitive ? value : value.toLowerCase();
//v2 = java.text.Normalizer.normalize(v2, java.text.Normalizer.Form.NFC);
return v1.indexOf(v2) != -1;
}
return false;
}Example 37
| Project: Liferay-CIFS-master File: UTF8Normalizer.java View source code |
/**
* Normalize a UTF-8 string
*
* @param utf8str String
* @return String
*/
public final String normalize(String utf8str) {
// Determine the method to be called
String normStr = null;
try {
switch(isType()) {
case IBMICU:
// Call the compose(String, boolean) method
normStr = (String) m_method.invoke(null, utf8str, false);
break;
case Java5:
// Call the compose(String, boolean, int) method
normStr = (String) m_method.invoke(null, utf8str, false, 0);
break;
case Java6:
// Call the normalize(CharSequence, Normalizer.Form) method
normStr = (String) m_method.invoke(null, utf8str, m_field.get(null));
break;
case Unknown:
throw new RuntimeException("Normalizer is not initialized");
}
} catch (InvocationTargetException ex) {
} catch (IllegalAccessException ex) {
}
return normStr;
}Example 38
| Project: deeplearning4j-master File: InputHomogenization.java View source code |
/**
* Returns the normalized text passed in via constructor
* @return the normalized text passed in via constructor
*/
public String transform() {
StringBuilder sb = new StringBuilder();
for (int i = 0; i < input.length(); i++) {
if (ignoreCharactersContaining != null && ignoreCharactersContaining.contains(String.valueOf(input.charAt(i))))
sb.append(input.charAt(i));
else if (Character.isDigit(input.charAt(i)))
sb.append("d");
else if (Character.isUpperCase(input.charAt(i)) && !preserveCase)
sb.append(Character.toLowerCase(input.charAt(i)));
else
sb.append(input.charAt(i));
}
String normalized = Normalizer.normalize(sb.toString(), Form.NFD);
normalized = normalized.replace(".", "");
normalized = normalized.replace(",", "");
normalized = normalized.replaceAll("\"", "");
normalized = normalized.replace("'", "");
normalized = normalized.replace("(", "");
normalized = normalized.replace(")", "");
normalized = normalized.replace("“", "");
normalized = normalized.replace("�", "");
normalized = normalized.replace("…", "");
normalized = normalized.replace("|", "");
normalized = normalized.replace("/", "");
normalized = normalized.replace("\\", "");
normalized = normalized.replace("[", "");
normalized = normalized.replace("]", "");
normalized = normalized.replace("‘", "");
normalized = normalized.replace("’", "");
normalized = normalized.replaceAll("[!]+", "!");
return normalized;
}Example 39
| Project: jnap-common-master File: SeoStringUtil.java View source code |
/**
*
* @param src
* @param locale
* @return
*/
public static String makeSeoFriendly(String src, Locale locale) {
String seoFriendlyText = src.trim();
// normalize
seoFriendlyText = Normalizer.normalize(src, Form.NFD);
// try to remove stop words if locale is specified
if (locale != null) {
SeoStopWordCleaner wordCleaner = null;
for (SeoStopWordCleaner cleaner : seoStopWordCleaners) {
if (ArrayUtils.contains(cleaner.getSupportedLocales(), locale)) {
wordCleaner = cleaner;
break;
}
}
if (wordCleaner == null) {
logger.warn(MessageFormat.format("A locale was specified ({0}) but no " + "SeoStopWordCleaner was found for it", locale.toString()));
} else {
seoFriendlyText = wordCleaner.clean(seoFriendlyText);
}
}
// replace duplicated spaces with a single one
seoFriendlyText = seoFriendlyText.replaceAll("[\\s]{2,}", " ");
// replace spaces with '-'
seoFriendlyText = seoFriendlyText.replaceAll("[\\s]", "-");
// remove remaining non-latin characters
seoFriendlyText = seoFriendlyText.replaceAll("[^\\w-]", StringUtils.EMPTY);
// convert to lowercase (using english locale rules) and return
return seoFriendlyText.toLowerCase(Locale.ENGLISH);
}Example 40
| Project: metafacture-core-master File: StreamUnicodeNormalizerTest.java View source code |
@Test
public void shouldNormalizeToNFDIfConfigured() {
streamUnicodeNormalizer.setNormalizationForm(Normalizer.Form.NFD);
streamUnicodeNormalizer.startRecord(RECORD_ID);
streamUnicodeNormalizer.literal(LITERAL_NAME, KEY_WITH_PRECOMPOSED_CHARS);
streamUnicodeNormalizer.endRecord();
verify(receiver).literal(LITERAL_NAME, KEY_WITH_DIACRITICS);
}Example 41
| Project: Aero-master File: NormalizeUtf8TransformTest.java View source code |
@Test
public void testTransformDefaultNormalizationFormAndOverwriteInput() {
Config config = ConfigFactory.parseString(makeConfigWithoutNormalizationFormAndOutput());
Transform transform = TransformFactory.createTransform(config, "test_normalize_utf_8");
FeatureVector featureVector = makeFeatureVector();
Map<String, Set<String>> stringFeatures = featureVector.getStringFeatures();
transform.doTransform(featureVector);
assertNotNull(stringFeatures);
assertEquals(1, stringFeatures.size());
Set<String> output = stringFeatures.get("strFeature1");
assertNotNull(output);
assertEquals(1, output.size());
assertTrue(output.contains(Normalizer.normalize("Funky string: ϓϔẛ", NormalizeUtf8Transform.DEFAULT_NORMALIZATION_FORM)));
}Example 42
| Project: aerosolve-master File: NormalizeUtf8TransformTest.java View source code |
@Test
public void testTransformDefaultNormalizationFormAndOverwriteInput() {
Config config = ConfigFactory.parseString(makeConfigWithoutNormalizationFormAndOutput());
Transform transform = TransformFactory.createTransform(config, "test_normalize_utf_8");
FeatureVector featureVector = makeFeatureVector();
Map<String, Set<String>> stringFeatures = featureVector.getStringFeatures();
transform.doTransform(featureVector);
assertNotNull(stringFeatures);
assertEquals(1, stringFeatures.size());
Set<String> output = stringFeatures.get("strFeature1");
assertNotNull(output);
assertEquals(1, output.size());
assertTrue(output.contains(Normalizer.normalize("Funky string: ϓϔẛ", NormalizeUtf8Transform.DEFAULT_NORMALIZATION_FORM)));
}Example 43
| Project: AnalyseSI-master File: UnicodeUtils.java View source code |
/**
* Strips accents from an input String, and decompose combined characters
* into multiple basic ASCII characters.
*
* The method is based on the Unicode KD normalization form. It iterates
* over the resulting characters, and the strips everything that is not in
* the Basic Latin Unicode block.
*
* Based on http://www.codeproject.com/KB/cs/UnicodeNormalization.aspx
* (found while Google-ing "stripping accents unicode string"), but with
* legacy Java 1.6 classes. Also inspired by
* http://www.nntp.perl.org/group/perl.i18n/2008/05/msg209.html
*
* @param accentedString
* A string that contains accents.
* @return The same string, without accents.
* @see Normalizer.Form.NFKD, Character.UnicodeBlock.BASIC_LATIN
*/
public static String decomposeToBasicLatin(String accentedString) {
StringBuilder unaccentedString = new StringBuilder();
String normalizedString = Normalizer.normalize(accentedString, Normalizer.Form.NFKD);
CharacterIterator iterator = new StringCharacterIterator(normalizedString);
for (char c = iterator.first(); c != CharacterIterator.DONE; c = iterator.next()) if (decomposedChars.containsKey(c))
unaccentedString.append(decomposedChars.get(c));
else if (Character.UnicodeBlock.BASIC_LATIN.equals(Character.UnicodeBlock.of(c)))
unaccentedString.append(c);
return unaccentedString.toString();
}Example 44
| Project: android-libcore64-master File: NormalizerTest.java View source code |
public void testNormalize() {
final String src = "ϓϔẛ";
// Should already be canonical composed
assertEquals(src, Normalizer.normalize(src, Normalizer.Form.NFC));
// Composed to canonical decomposed
assertEquals("ϓϔẛ", Normalizer.normalize(src, Normalizer.Form.NFD));
// Composed to compatibility composed
assertEquals("ΎΫṡ", Normalizer.normalize(src, Normalizer.Form.NFKC));
// Composed to compatibility decomposed
assertEquals("ΎΫṡ", Normalizer.normalize(src, Normalizer.Form.NFKD));
// Decomposed to canonical composed
assertEquals("é", Normalizer.normalize("é", Normalizer.Form.NFC));
// Decomposed to compatibility composed
assertEquals("ṩ", Normalizer.normalize("ẛ̣", Normalizer.Form.NFKC));
try {
Normalizer.normalize(null, Normalizer.Form.NFC);
fail("Did not throw error on null argument");
} catch (NullPointerException e) {
}
}Example 45
| Project: android_platform_libcore-master File: NormalizerTest.java View source code |
public void testNormalize() {
final String src = "ϓϔẛ";
// Should already be canonical composed
assertEquals(src, Normalizer.normalize(src, Normalizer.Form.NFC));
// Composed to canonical decomposed
assertEquals("ϓϔẛ", Normalizer.normalize(src, Normalizer.Form.NFD));
// Composed to compatibility composed
assertEquals("ΎΫṡ", Normalizer.normalize(src, Normalizer.Form.NFKC));
// Composed to compatibility decomposed
assertEquals("ΎΫṡ", Normalizer.normalize(src, Normalizer.Form.NFKD));
// Decomposed to canonical composed
assertEquals("é", Normalizer.normalize("é", Normalizer.Form.NFC));
// Decomposed to compatibility composed
assertEquals("ṩ", Normalizer.normalize("ẛ̣", Normalizer.Form.NFKC));
try {
Normalizer.normalize(null, Normalizer.Form.NFC);
fail("Did not throw error on null argument");
} catch (NullPointerException e) {
}
}Example 46
| Project: bugvm-master File: NativeNormalizer.java View source code |
private static int toUNormalizationMode(Form form) {
// See UNormalizationMode in "unicode/unorm.h". Stable API since ICU 2.0.
switch(form) {
case NFC:
return 4;
case NFD:
return 2;
case NFKC:
return 5;
case NFKD:
return 3;
}
throw new AssertionError("unknown Normalizer.Form " + form);
}Example 47
| Project: fakecontacts-master File: NativeNormalizer.java View source code |
private static int toUNormalizationMode(Form form) {
// See UNormalizationMode in "unicode/unorm.h". Stable API since ICU 2.0.
switch(form) {
case NFC:
return 4;
case NFD:
return 2;
case NFKC:
return 5;
case NFKD:
return 3;
}
throw new AssertionError("unknown Normalizer.Form " + form);
}Example 48
| Project: gbif-api-master File: UnicodeUtils.java View source code |
/**
* Replaces all diacretics with their ascii counterpart.
*/
public static String ascii(String x) {
if (x == null) {
return null;
}
// manually normalize characters not dealt with by the java Normalizer
x = StringUtils.replaceChars(x, "øØð�", "oOdD");
// use java unicode normalizer to remove accents and punctuation
x = Normalizer.normalize(x, Normalizer.Form.NFD);
x = x.replaceAll("\\p{M}", "");
return x;
}Example 49
| Project: IMCKTG-master File: Morse.java View source code |
private static void morse(String s, List<String> sb) {
if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.GINGERBREAD)
s = Normalizer.normalize(s, Normalizer.Form.NFKD);
s = s.toLowerCase(Locale.getDefault());
s.replaceAll("\\s+", " ");
for (char c : s.toCharArray()) if (IMCmap.containsKey(c))
sb.add(IMCmap.get(c));
}Example 50
| Project: jease-master File: Filenames.java View source code |
public String convert(String filename) {
if (filename == null) {
return null;
}
for (String codePair : I18N.get("ASCII_CODES").split(",")) {
if (codePair.contains(":")) {
String[] codePairArray = codePair.split(":");
filename = filename.replace(codePairArray[0], codePairArray[1]);
}
}
return Normalizer.normalize(filename, Normalizer.Form.NFD).replaceAll("[\\p{InCombiningDiacriticalMarks}]+", "").replaceAll("[^a-zA-Z0-9_/.-]", "-");
}Example 51
| Project: orientdb-master File: OSQLMethodNormalize.java View source code |
@Override
public Object execute(Object iThis, OIdentifiable iCurrentRecord, OCommandContext iContext, Object ioResult, Object[] iParams) {
if (ioResult != null) {
final Normalizer.Form form = iParams != null && iParams.length > 0 ? Normalizer.Form.valueOf(OIOUtils.getStringContent(iParams[0].toString())) : Normalizer.Form.NFD;
String normalized = Normalizer.normalize(ioResult.toString(), form);
if (iParams != null && iParams.length > 1) {
normalized = normalized.replaceAll(OIOUtils.getStringContent(iParams[0].toString()), "");
} else {
normalized = OPatternConst.PATTERN_DIACRITICAL_MARKS.matcher(normalized).replaceAll("");
}
ioResult = normalized;
}
return ioResult;
}Example 52
| Project: robovm-master File: Main.java View source code |
static void testNormalizer() {
String composed = "BlÁah";
String decomposed = "BlÁah";
String res;
res = Normalizer.normalize(composed, Normalizer.Form.NFD);
if (!decomposed.equals(res)) {
System.err.println("Bad decompose: '" + composed + "' --> '" + res + "'");
}
res = Normalizer.normalize(decomposed, Normalizer.Form.NFC);
if (!composed.equals(res)) {
System.err.println("Bad compose: '" + decomposed + "' --> '" + res + "'");
}
System.out.println("Normalizer passed");
}Example 53
| Project: XobotOS-master File: NativeNormalizer.java View source code |
private static int toUNormalizationMode(Form form) {
// See UNormalizationMode in "unicode/unorm.h". Stable API since ICU 2.0.
switch(form) {
case NFC:
return 4;
case NFD:
return 2;
case NFKC:
return 5;
case NFKD:
return 3;
}
throw new AssertionError("unknown Normalizer.Form " + form);
}Example 54
| Project: Ivory-master File: LuceneArabicAnalyzer.java View source code |
@Override
public String[] processContent(String text) {
text = preNormalize(text);
tokenizer = new StandardTokenizer(Version.LUCENE_35, new StringReader(text));
TokenStream tokenStream = new LowerCaseFilter(Version.LUCENE_35, tokenizer);
String tokenized = postNormalize(streamToString(tokenStream));
tokenized = Normalizer.normalize(tokenized, Form.NFKC);
StringBuilder finalTokenized = new StringBuilder();
for (String token : tokenized.split(" ")) {
if (isStopwordRemoval() && isDiscard(false, token)) {
continue;
}
finalTokenized.append(token + " ");
}
String stemmedTokenized = finalTokenized.toString().trim();
if (isStemming()) {
// then, run the Lucene normalization and stemming on the stopword-removed text
stemmedTokenized = stem(stemmedTokenized);
}
return stemmedTokenized.split(" ");
}Example 55
| Project: java-utils-text-master File: SEOEncoder.java View source code |
/**
* Replaces all charactars unsuitable for URLs with logical alternatives using <code>java.text.Normalizer</code><br>
*
* TODO take care of tapestries url-encoding & -> + -> $002b
*
* @param text
* @return
*/
public static String encodeUnicode(final String text) {
final String normalized = Normalizer.normalize(text, Normalizer.Form.NFD);
final String withoutDiacritics = normalized.replaceAll("\\p{InCombiningDiacriticalMarks}+", "");
final String encoded = withoutDiacritics.replace('&', '+').replace('/', '-').replaceAll("[^\\p{Alnum}-\\+]+", "_");
final String trimmed = (encoded.endsWith("_")) ? encoded.substring(0, encoded.length() - 1) : encoded;
return trimmed;
}Example 56
| Project: talismane-master File: TextPerLineCorpusReader.java View source code |
@Override
public boolean hasNextText() {
if (this.getMaxSentenceCount() > 0 && sentenceCount >= this.getMaxSentenceCount()) {
// we've reached the end, do nothing
} else {
while (sentence == null) {
if (scanner != null && !scanner.hasNextLine()) {
scanner.close();
scanner = null;
}
while (scanner == null) {
if (localeIterator.hasNext()) {
currentLocale = localeIterator.next();
Reader reader = readerMap.get(currentLocale);
scanner = new Scanner(reader);
if (scanner.hasNextLine()) {
break;
}
scanner.close();
scanner = null;
} else {
break;
}
}
if (scanner == null)
break;
sentence = scanner.nextLine().trim();
sentence = sentence.toLowerCase(Locale.ENGLISH);
sentence = Normalizer.normalize(sentence, Form.NFD).replaceAll("\\p{InCombiningDiacriticalMarks}+", "");
if (sentence.length() == 0) {
sentence = null;
continue;
}
boolean includeMe = true;
// check cross-validation
if (this.getCrossValidationSize() > 0) {
if (this.getIncludeIndex() >= 0) {
if (sentenceCount % this.getCrossValidationSize() != this.getIncludeIndex()) {
includeMe = false;
}
} else if (this.getExcludeIndex() >= 0) {
if (sentenceCount % this.getCrossValidationSize() == this.getExcludeIndex()) {
includeMe = false;
}
}
}
if (this.getStartSentence() > sentenceCount) {
includeMe = false;
}
sentenceCount++;
if (!includeMe) {
sentence = null;
continue;
}
}
}
return sentence != null;
}Example 57
| Project: Duke-master File: LowerCaseNormalizeCleaner.java View source code |
public String clean(String value) {
if (strip_accents)
// after this, accents will be represented as separate combining
// accent characters trailing the character they belong with. the
// next step will strip them out.
value = Normalizer.normalize(value, Normalizer.Form.NFD);
char[] tmp = new char[value.length()];
int pos = 0;
boolean prevws = false;
for (int ix = 0; ix < tmp.length; ix++) {
char ch = value.charAt(ix);
// should *not* be normalized
if (ch == 0x030A && (value.charAt(ix - 1) == 'a' || value.charAt(ix - 1) == 'A')) {
prevws = false;
// this overwrites the previously written 'a' with 'aa'
tmp[pos - 1] = 'å';
continue;
}
// if character is combining diacritical mark, skip it.
if ((ch >= 0x0300 && ch <= 0x036F) || (ch >= 0x1DC0 && ch <= 0x1DFF) || (ch >= 0x20D0 && ch <= 0x20FF) || (ch >= 0xFE20 && ch <= 0xFE2F))
continue;
// whitespace processing
if (ch != ' ' && ch != '\t' && ch != '\n' && ch != '\r' && ch != 0xA0) /* NBSP */
{
if (prevws && pos != 0)
tmp[pos++] = ' ';
tmp[pos++] = Character.toLowerCase(ch);
prevws = false;
} else
prevws = true;
}
return new String(tmp, 0, pos);
}Example 58
| Project: hsearch-master File: NormalizeAccents.java View source code |
public boolean visit(Object docObj) throws ApplicationFault, SystemFault {
if (null == docObj)
return false;
Doc doc = (Doc) docObj;
DocTeaser teaser = doc.teaser;
if (null != teaser) {
String titleText = teaser.getTitle();
if (null != titleText)
teaser.setTitle(Normalizer.normalize(titleText, Normalizer.Form.NFD));
String cacheText = teaser.getCachedText();
if (null != cacheText)
teaser.setCacheText(Normalizer.normalize(cacheText, Normalizer.Form.NFD));
}
return true;
}Example 59
| Project: hsearch-obsolete-master File: NormalizeAccents.java View source code |
public void visit(Object docObj, boolean multiWriter) throws ApplicationFault, SystemFault {
if (null == docObj)
throw new ApplicationFault("No document");
Doc doc = (Doc) docObj;
DocTeaser teaser = doc.teaser;
if (null != teaser) {
String titleText = teaser.getTitle();
if (null != titleText)
teaser.setTitle(Normalizer.normalize(titleText, Normalizer.Form.NFD));
String cacheText = teaser.getCachedText();
if (null != cacheText)
teaser.setCacheText(Normalizer.normalize(cacheText, Normalizer.Form.NFD));
}
}Example 60
| Project: incubator-hivemall-master File: NormalizeUnicodeUDF.java View source code |
@Nullable
public String evaluate(@Nullable String str, @Nullable String form) {
if (str == null) {
return null;
}
if (form == null) {
return Normalizer.normalize(str, Normalizer.Form.NFC);
} else if ("NFC".equals(form)) {
return Normalizer.normalize(str, Normalizer.Form.NFC);
} else if ("NFD".equals(form)) {
return Normalizer.normalize(str, Normalizer.Form.NFD);
} else if ("NFKC".equals(form)) {
return Normalizer.normalize(str, Normalizer.Form.NFKC);
} else if ("NFKD".equals(form)) {
return Normalizer.normalize(str, Normalizer.Form.NFKD);
} else {
return Normalizer.normalize(str, Normalizer.Form.NFC);
}
}Example 61
| Project: lodmill-master File: PicaXmlHandler.java View source code |
@Override
public void endElement(final String uri, final String localName, final String qName) throws SAXException {
if (SUBFIELD.equals(localName)) {
getReceiver().literal(currentTag, Normalizer.normalize(builder.toString().trim(), Normalizer.Form.NFC));
} else if (DATAFIELD.equals(localName)) {
getReceiver().endEntity();
} else if (RECORD.equals(localName) && NAMESPACE.equals(uri)) {
getReceiver().endRecord();
}
}Example 62
| Project: MusicDNA-master File: Genius.java View source code |
public static ArrayList<Lyrics> search(String query) {
ArrayList<Lyrics> results = new ArrayList<>();
query = Normalizer.normalize(query, Normalizer.Form.NFD).replaceAll("\\p{InCombiningDiacriticalMarks}+", "");
JsonObject response = null;
try {
URL queryURL = new URL(String.format("http://api.genius.com/search?q=%s", URLEncoder.encode(query, "UTF-8")));
Connection connection = Jsoup.connect(queryURL.toExternalForm()).header("Authorization", "Bearer " + Config.GENIUS).timeout(0).ignoreContentType(true);
Document document = connection.userAgent(Net.USER_AGENT).get();
response = new JsonParser().parse(document.text()).getAsJsonObject();
} catch (Exception e) {
e.printStackTrace();
}
if (response == null || response.getAsJsonObject("meta").get("status").getAsInt() != 200)
return results;
JsonArray hits = response.getAsJsonObject("response").getAsJsonArray("hits");
int processed = 0;
while (processed < hits.size()) {
JsonObject song = hits.get(processed).getAsJsonObject().getAsJsonObject("result");
String artist = song.getAsJsonObject("primary_artist").get("name").getAsString();
String title = song.get("title").getAsString();
String url = "http://genius.com/songs/" + song.get("id").getAsString();
Lyrics l = new Lyrics(Lyrics.SEARCH_ITEM);
l.setArtist(artist);
l.setTitle(title);
l.setURL(url);
l.setSource("Genius");
results.add(l);
processed++;
}
return results;
}Example 63
| Project: ramais-pti-android-master File: UsefulSearchFragment.java View source code |
@SuppressLint("NewApi")
public String normalizar(String s) {
String str;
if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.GINGERBREAD) {
/* Use Normalizer normally */
str = Normalizer.normalize(s, Normalizer.Form.NFD);
str = str.replaceAll("[^\\p{ASCII}]", "");
str = str.replaceAll(" ", "%20");
return str.toLowerCase();
} else {
str = s;
str = str.replaceAll("[çÇ]+", "c");
str = str.replaceAll("[ãÃá�]+", "a");
str = str.replaceAll("[éÉ]+", "e");
str = str.replaceAll("[ÃÃ?]+", "i");
str = str.replaceAll("[õÕóÓ]+", "o");
str = str.replaceAll("[úÚ]+", "u");
str = str.replaceAll("[^\\p{ASCII}]", "");
str = str.replaceAll(" ", "%20");
Log.d("Teste", str);
return str.toLowerCase();
}
}Example 64
| Project: universal-java-matrix-package-master File: HtmlUtil.java View source code |
public static final String toSlug(String s) {
s = Normalizer.normalize(s, Normalizer.Form.NFD);
s = s.replaceAll("\\.", "-");
s = s.replaceAll(":", "-");
s = s.replaceAll("\\s+", "-");
s = s.replaceAll("[^\\p{ASCII}]", "");
s = s.replaceAll("[^a-zA-Z0-9- ]", "");
s = s.toLowerCase();
s = s.replaceAll("--", "-");
s = s.replaceAll("--", "-");
s = s.replaceAll("--", "-");
return s;
}Example 65
| Project: androidbible-master File: ReverseIndexer.java View source code |
public static void createReverseIndex(File outDir, String prefix, TextDb teksDb) {
Pattern p_word = Pattern.compile("[A-Za-z]+(?:[-'][A-Za-z]+)*");
Map<String, Set<Integer>> map = new TreeMap<>(new Comparator<String>() {
@Override
public int compare(String o1, String o2) {
int lenc = o1.length() - o2.length();
if (lenc == 0) {
return o1.compareTo(o2);
} else {
return lenc;
}
}
});
{
int lid = 0;
for (Rec rec : teksDb.toRecList()) {
lid++;
String text = Normalizer.normalize(rec.text, Normalizer.Form.NFD).replaceAll("\\p{InCombiningDiacriticalMarks}+", "");
text = text.toLowerCase();
Matcher m = p_word.matcher(text);
while (m.find()) {
String word = m.group();
Set<Integer> locations = map.get(word);
if (locations == null) {
locations = new TreeSet<>();
map.put(word, locations);
}
locations.add(lid);
}
}
System.out.println("Last lid = " + lid);
}
int maxwordlen = 0;
for (Map.Entry<String, Set<Integer>> e : map.entrySet()) {
String word = e.getKey();
System.out.println("word " + word + " lids=" + e.getValue());
if (word.length() > maxwordlen)
maxwordlen = word.length();
}
System.out.println("Number of words: " + map.size());
System.out.println("Longest word: " + maxwordlen);
int stat_lid_absolute = 0;
int stat_lid_delta = 0;
try {
BintexWriter bw = new BintexWriter(new FileOutputStream(new File(outDir, String.format("%s_revindex_bt.bt", prefix))));
// :: int word_count
bw.writeInt(map.size());
// split based on word length
for (int i = 1; i <= maxwordlen; i++) {
Map<String, Set<Integer>> lenmap = new TreeMap<>();
for (Map.Entry<String, Set<Integer>> e : map.entrySet()) {
String word = e.getKey();
if (i == word.length()) {
lenmap.put(word, e.getValue());
}
}
int cnt = lenmap.size();
System.out.println("Words with length " + i + ": " + cnt);
if (cnt != 0) {
// :: uint8 word_len
// :: int word_by_len_count
bw.writeUint8(i);
bw.writeInt(cnt);
for (Map.Entry<String, Set<Integer>> e : lenmap.entrySet()) {
String word = e.getKey();
Set<Integer> lids = e.getValue();
// :: byte[word_len] word
// :: uint16 lid_count
bw.writeRaw(word.getBytes(ascii));
bw.writeUint16(lids.size());
int last_lid = 0;
for (int lid : lids) {
int delta = lid - last_lid;
if (delta <= 0x7f) {
bw.writeUint8(delta);
stat_lid_delta++;
} else {
bw.writeChar((char) (0x8000 | lid));
stat_lid_absolute++;
}
last_lid = lid;
}
}
}
}
bw.close();
System.out.println("Lid written using delta = " + stat_lid_delta);
System.out.println("Lid written using absolute = " + stat_lid_absolute);
} catch (Exception e) {
throw new RuntimeException(e);
}
}Example 66
| Project: cloudhopper-commons-charset-master File: MobileTextUtil.java View source code |
/**
* Replace accented characters with their ascii equivalents. For example,
* convert é to e.<br><br>
* NOTE: This method is not very efficient. The String will be copied
* twice during conversion, so you'll likely only want to run this against
* small strings.
*
* @param buffer The buffer containing the characters to analyze and replace
* if necessary.
* @return The number of characters replaced
*/
public static int replaceAccentedChars(StringBuilder buffer) {
// save the size before we strip out the accents
int sizeBefore = buffer.length();
// each accented char will be converted into 2 chars -- the ascii version
// followed by the accent character
String s = Normalizer.normalize(buffer, Normalizer.Form.NFD);
// new size will include accented chars
int sizeAfter = s.length();
// efficiency check #1 - if the length hasn't changed, do nothing
int replaced = sizeAfter - sizeBefore;
if (replaced <= 0) {
return 0;
}
// replace the accents with nothing
s = s.replaceAll("\\p{InCombiningDiacriticalMarks}+", "");
buffer.setLength(0);
buffer.append(s);
return replaced;
}Example 67
| Project: commcare-master File: StringUtils.java View source code |
/**
* @param input A non-null string
* @return a canonical version of the passed in string that is lower cased and has removed diacritical marks
* like accents.
*/
@SuppressLint("NewApi")
public static synchronized String normalize(String input) {
if (normalizationCache == null) {
normalizationCache = new LruCache<>(cacheSize);
diacritics = Pattern.compile("\\p{InCombiningDiacriticalMarks}+");
}
String cachedString = normalizationCache.get(input);
if (cachedString != null) {
return cachedString;
}
//Initialized the normalized string (If we can, we'll use the Normalizer API on it)
String normalized = input;
//issues, but we can at least still eliminate diacritics.
if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.GINGERBREAD) {
normalized = Normalizer.normalize(input, Normalizer.Form.NFD);
} else {
//TODO: I doubt it's worth it, but in theory we could run
//some other normalization for the minority of pre-API9
//devices.
}
String output = diacritics.matcher(normalized).replaceAll("").toLowerCase();
normalizationCache.put(input, output);
return output;
}Example 68
| Project: idea-php-typo3-plugin-master File: Slugify.java View source code |
private String normalize(final String input) {
String text = Normalizer.normalize(input, Normalizer.Form.NFKD);
text = PATTERN_NORMALIZE_NON_ASCII.matcher(text).replaceAll(EMPTY);
text = PATTERN_NORMALIZE_SEPARATOR.matcher(text).replaceAll(underscoreSeparator ? "_" : "-");
text = PATTERN_NORMALIZE_TRIM_DASH.matcher(text).replaceAll(EMPTY);
return text;
}Example 69
| Project: ios-driver-master File: LanguageDictionary.java View source code |
public boolean match(String content, String originalText) {
String normalizedContent = Normalizer.normalize(content, norme);
String normalizedOriginalText = Normalizer.normalize(originalText, norme);
String pattern = getRegexPattern(normalizedOriginalText);
try {
boolean regex = normalizedContent.matches(pattern);
return regex;
} catch (PatternSyntaxException e) {
}
return false;
}Example 70
| Project: j2objc-master File: NormalizerTest.java View source code |
public void testNormalize() {
final String src = "ϓϔẛ";
// Should already be canonical composed
assertEquals(src, Normalizer.normalize(src, Normalizer.Form.NFC));
// Composed to canonical decomposed
assertEquals("ϓϔẛ", Normalizer.normalize(src, Normalizer.Form.NFD));
// Composed to compatibility composed
assertEquals("ΎΫṡ", Normalizer.normalize(src, Normalizer.Form.NFKC));
// Composed to compatibility decomposed
assertEquals("ΎΫṡ", Normalizer.normalize(src, Normalizer.Form.NFKD));
// Decomposed to canonical composed
assertEquals("é", Normalizer.normalize("é", Normalizer.Form.NFC));
// Decomposed to compatibility composed
assertEquals("ṩ", Normalizer.normalize("ẛ̣", Normalizer.Form.NFKC));
try {
Normalizer.normalize(null, Normalizer.Form.NFC);
fail("Did not throw error on null argument");
} catch (NullPointerException e) {
}
}Example 71
| Project: KISS-master File: StringNormalizer.java View source code |
/**
* Make the given string easier to compare by performing a number of simplifications on it
* <p/>
* 1. Decompose combination characters into their respective parts (see below)
* 2. Strip all combining character marks (see below)
* 3. Strip some other common-but-not-very-useful characters (such as dashes)
* 4. Lower-case the string
* <p/>
* Combination characters are characters that (essentially) have the same meaning as one or
* more other, more common, characters. Examples for these include:
* Roman numerals (`Ⅱ` → `II`) and half-width katakana (`�` → `ミ`)
* <p/>
* Combining character marks are diacritics and other extra strokes that are often found as
* part of many characters in non-English roman scripts. Examples for these include:
* Diaereses (`ë` → `e`), acutes (`á` → `a`) and macrons (`Å?` → `o`)
*
* @param input string input, with accents and anything else you can think of
* @return normalized string and list that maps each result string position to its source
* string position
*/
public static Pair<String, int[]> normalizeWithMap(String input) {
StringBuilder resultString = new StringBuilder();
IntSequenceBuilder resultMap = new IntSequenceBuilder(input.length() * 3 / 2);
StringBuilder charBuffer = new StringBuilder(2);
int inputOffset = 0, inputLength = input.length();
while (inputOffset < inputLength) {
int inputChar = input.codePointAt(inputOffset);
// Decompose codepoint at given position
charBuffer.append(Character.toChars(inputChar));
String decomposedCharString = Normalizer.normalize(charBuffer, Normalizer.Form.NFKD);
charBuffer.delete(0, charBuffer.length());
// `inputChar` codepoint may be decomposed to four (or maybe even more) new code points
int decomposedCharOffset = 0;
while (decomposedCharOffset < decomposedCharString.length()) {
int resultChar = decomposedCharString.codePointAt(decomposedCharOffset);
// See the method's description for more information
switch(Character.getType(resultChar)) {
case Character.NON_SPACING_MARK:
case Character.COMBINING_SPACING_MARK:
// Some combining character found
break;
case Character.DASH_PUNCTUATION:
// Some other unwanted character found
break;
default:
resultString.appendCodePoint(Character.toLowerCase(resultChar));
resultMap.add(inputOffset);
}
decomposedCharOffset += Character.charCount(resultChar);
}
inputOffset += Character.charCount(inputChar);
}
// before returning it.
return new Pair<>(resultString.toString(), resultMap.toArray());
}Example 72
| Project: lyrics-master File: Genius.java View source code |
public static ArrayList<Lyrics> search(String query) {
ArrayList<Lyrics> results = new ArrayList<>();
query = Normalizer.normalize(query, Normalizer.Form.NFD).replaceAll("\\p{InCombiningDiacriticalMarks}+", "");
JsonObject response = null;
try {
URL queryURL = new URL(String.format("http://api.genius.com/search?q=%s", URLEncoder.encode(query, "UTF-8")));
Connection connection = Jsoup.connect(queryURL.toExternalForm()).header("Authorization", "Bearer " + Keys.GENIUS).timeout(0).ignoreContentType(true);
Document document = connection.userAgent(Net.USER_AGENT).get();
response = new JsonParser().parse(document.text()).getAsJsonObject();
} catch (JsonSyntaxException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
if (response == null || response.getAsJsonObject("meta").get("status").getAsInt() != 200)
return results;
JsonArray hits = response.getAsJsonObject("response").getAsJsonArray("hits");
int processed = 0;
while (processed < hits.size()) {
JsonObject song = hits.get(processed).getAsJsonObject().getAsJsonObject("result");
String artist = song.getAsJsonObject("primary_artist").get("name").getAsString();
String title = song.get("title").getAsString();
String url = "http://genius.com/songs/" + song.get("id").getAsString();
Lyrics l = new Lyrics(Lyrics.SEARCH_ITEM);
l.setArtist(artist);
l.setTitle(title);
l.setURL(url);
l.setSource("Genius");
results.add(l);
processed++;
}
return results;
}Example 73
| Project: musicmount-master File: SimpleAssetLocatorTest.java View source code |
@Test
public void testGetAssetPath() throws IOException, URISyntaxException {
ResourceProvider resourceProvider = new FileResourceProvider();
Resource baseFolder = resourceProvider.newResource(System.getProperty("user.home"));
SimpleAssetLocator locator = new SimpleAssetLocator(baseFolder, "music", null);
Assert.assertEquals("music/sample-aac.m4a", locator.getAssetPath(baseFolder.resolve("sample-aac.m4a")));
Assert.assertEquals("music/sample%20aac.m4a", locator.getAssetPath(baseFolder.resolve("sample aac.m4a")));
Assert.assertEquals("music/Bj%C3%B6rk/Vespertine/07%20Aurora.m4a", locator.getAssetPath(baseFolder.resolve("Björk/Vespertine/07 Aurora.m4a")));
// behavior changed from jdk7u25 to jdk7u45, seems to normalize to NFC by default!!!
// Assert.assertEquals("music/Bjo%CC%88rk/Vespertine/07%20Aurora.m4a", locator.getAssetPath(baseFolder.resolve("Bjo\u0308rk/Vespertine/07 Aurora.m4a"))); // combining diaeresis
// perform character composition
locator = new SimpleAssetLocator(baseFolder, "music", Normalizer.Form.NFC);
Assert.assertEquals("music/Bj%C3%B6rk/Vespertine/07%20Aurora.m4a", locator.getAssetPath(baseFolder.resolve("Björk/Vespertine/07 Aurora.m4a")));
// perform character decomposition
locator = new SimpleAssetLocator(baseFolder, "music", Normalizer.Form.NFD);
Assert.assertEquals("music/Bjo%CC%88rk/Vespertine/07%20Aurora.m4a", locator.getAssetPath(baseFolder.resolve("Björk/Vespertine/07 Aurora.m4a")));
locator = new SimpleAssetLocator(baseFolder, "///music///", null);
Assert.assertEquals("/music/sample-aac.m4a", locator.getAssetPath(baseFolder.resolve("sample-aac.m4a")));
locator = new SimpleAssetLocator(baseFolder, "", null);
Assert.assertEquals("sample-aac.m4a", locator.getAssetPath(baseFolder.resolve("sample-aac.m4a")));
locator = new SimpleAssetLocator(baseFolder, "music/sample-album", null);
Assert.assertEquals("music/sample-album/sample-aac.m4a", locator.getAssetPath(baseFolder.resolve("sample-aac.m4a")));
locator = new SimpleAssetLocator(baseFolder, "my music", null);
Assert.assertEquals("my%20music/sample-aac.m4a", locator.getAssetPath(baseFolder.resolve("sample-aac.m4a")));
}Example 74
| Project: QuickLyric-master File: Genius.java View source code |
public static ArrayList<Lyrics> search(String query) {
ArrayList<Lyrics> results = new ArrayList<>();
query = Normalizer.normalize(query, Normalizer.Form.NFD).replaceAll("\\p{InCombiningDiacriticalMarks}+", "");
JsonObject response = null;
try {
URL queryURL = new URL(String.format("http://api.genius.com/search?q=%s", URLEncoder.encode(query, "UTF-8")));
Connection connection = Jsoup.connect(queryURL.toExternalForm()).header("Authorization", "Bearer " + Keys.GENIUS).timeout(0).ignoreContentType(true);
Document document = connection.userAgent(Net.USER_AGENT).get();
response = new JsonParser().parse(document.text()).getAsJsonObject();
} catch (JsonSyntaxException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
if (response == null || response.getAsJsonObject("meta").get("status").getAsInt() != 200)
return results;
JsonArray hits = response.getAsJsonObject("response").getAsJsonArray("hits");
int processed = 0;
while (processed < hits.size()) {
JsonObject song = hits.get(processed).getAsJsonObject().getAsJsonObject("result");
String artist = song.getAsJsonObject("primary_artist").get("name").getAsString();
String title = song.get("title").getAsString();
String url = "http://genius.com/songs/" + song.get("id").getAsString();
Lyrics l = new Lyrics(Lyrics.SEARCH_ITEM);
l.setArtist(artist);
l.setTitle(title);
l.setURL(url);
l.setSource("Genius");
results.add(l);
processed++;
}
return results;
}Example 75
| Project: Raildelays-master File: StationBasedExcelRowComparator.java View source code |
protected Function<T, String> getStationName(Function<T, Station> keyExtractor) {
return excelRow -> {
Station station = keyExtractor.apply(excelRow);
String result = null;
if (station != null) {
String stationName = station.getName(language);
if (!"".equals(stationName)) {
result = Normalizer.normalize(stationName, Normalizer.Form.NFD).replaceAll("[^\\p{ASCII}]", "").toUpperCase(Locale.ENGLISH);
}
}
return result;
};
}Example 76
| Project: roboconf-platform-master File: AbstractApplication.java View source code |
/**
* @param name the name to set
*/
public final void setName(String name) {
// "name" cannot, we replace them by their equivalent without accent.
if (name == null) {
this.name = null;
this.displayName = null;
} else if (Utils.isEmptyOrWhitespaces(name)) {
this.displayName = name.trim();
this.name = name.trim();
} else {
this.displayName = name.trim();
String temp = Normalizer.normalize(name, Normalizer.Form.NFD);
Pattern pattern = Pattern.compile("\\p{InCombiningDiacriticalMarks}+");
this.name = pattern.matcher(temp).replaceAll("").trim();
}
}Example 77
| Project: sigio.jar-master File: JSONStringAdapter.java View source code |
/**
* Converts content of a java.lang.String to a format suitable for
* JSON.
*/
static String toJSONString(String str) {
StringBuilder sb = new StringBuilder();
for (int i = 0; i < str.length(); i++) {
int c = str.codePointAt(i);
switch(c) {
case 0x0008:
sb.append(JSONStringAdapter.backspace_seq);
break;
case 0x0009:
sb.append(JSONStringAdapter.tab_seq);
break;
case 0x000A:
sb.append(JSONStringAdapter.nl_seq);
break;
case 0x000C:
sb.append(JSONStringAdapter.ff_seq);
break;
case 0x000D:
sb.append(JSONStringAdapter.cr_seq);
break;
case 0x002F:
sb.append(JSONStringAdapter.solidus_seq);
break;
case 0x005C:
sb.append(JSON.ESCAPE_CHAR);
break;
case JSON.QUOTE_CHAR:
sb.append(JSONStringAdapter.quote_seq);
break;
default:
if (c >= 0x0020)
sb.append((char) c);
break;
}
}
// Surround the string with quotes:
sb.insert(0, JSON.QUOTE_CHAR);
sb.append(JSON.QUOTE_CHAR);
// Canonical Decomposition of Unicode (NFD).
String string = Normalizer.normalize(sb.toString(), Normalizer.Form.NFD);
return string;
}Example 78
| Project: songbook-master File: SongDatabase.java View source code |
private static String encodeId(String id) {
try {
id = id.replace("'", " ").replace("\"", " ").trim();
id = Normalizer.normalize(id, Normalizer.Form.NFD);
id = id.replaceAll("\\p{M}", "").toLowerCase();
return URLEncoder.encode(id, "UTF-8");
} catch (UnsupportedEncodingException e) {
return id;
}
}Example 79
| Project: uberfire-master File: TextUtil.java View source code |
public static String normalizeRepositoryName(String input) {
// Remove leading and/or trailing '.' and '-'
if (input.startsWith(".") || input.startsWith("-")) {
input = normalizeRepositoryName(input.substring(1));
}
if (input.endsWith(".") || input.endsWith("-")) {
input = normalizeRepositoryName(input.substring(0, input.length() - 1));
}
// Repository operations are not too frequent so instantiate corresponding matchers on demand
return repoP3.matcher(repoP2.matcher(repoP1.matcher(Normalizer.normalize(input, Normalizer.Form.NFD)).replaceAll(EMPTY_STRING)).replaceAll(EMPTY_STRING)).replaceAll(EMPTY_STRING);
}Example 80
| Project: wildfly-elytron-master File: ExternalSaslServer.java View source code |
public byte[] evaluateResponse(final byte[] response) throws SaslException {
if (complete) {
throw log.mechMessageAfterComplete(getMechanismName()).toSaslException();
}
complete = true;
String authorizationId;
if (response.length == 0) {
authorizationId = null;
} else {
authorizationId = Normalizer.normalize(new String(response, StandardCharsets.UTF_8), Normalizer.Form.NFKC);
if (authorizationId.indexOf(0) != -1) {
throw log.mechUserNameContainsInvalidCharacter(getMechanismName()).toSaslException();
}
}
final AuthorizeCallback authorizeCallback = new AuthorizeCallback(null, authorizationId);
try {
cbh.handle(Arrays2.of(authorizeCallback));
} catch (SaslException e) {
throw e;
} catch (IOException e) {
throw log.mechAuthorizationFailed(getMechanismName(), e).toSaslException();
} catch (UnsupportedCallbackException e) {
throw log.mechAuthorizationFailed(getMechanismName(), e).toSaslException();
}
if (!authorizeCallback.isAuthorized()) {
throw log.mechAuthorizationFailed(getMechanismName(), null, authorizationId).toSaslException();
}
this.authorizationID = authorizeCallback.getAuthorizedID();
return null;
}Example 81
| Project: wildfly-security-master File: ExternalSaslServer.java View source code |
public byte[] evaluateResponse(final byte[] response) throws SaslException {
if (complete) {
throw log.mechMessageAfterComplete(getMechanismName()).toSaslException();
}
complete = true;
String authorizationId;
if (response.length == 0) {
authorizationId = null;
} else {
authorizationId = Normalizer.normalize(new String(response, StandardCharsets.UTF_8), Normalizer.Form.NFKC);
if (authorizationId.indexOf(0) != -1) {
throw log.mechUserNameContainsInvalidCharacter(getMechanismName()).toSaslException();
}
}
final AuthorizeCallback authorizeCallback = new AuthorizeCallback(null, authorizationId);
try {
cbh.handle(Arrays2.of(authorizeCallback));
} catch (SaslException e) {
throw e;
} catch (IOException e) {
throw log.mechAuthorizationFailed(getMechanismName(), e).toSaslException();
} catch (UnsupportedCallbackException e) {
throw log.mechAuthorizationFailed(getMechanismName(), e).toSaslException();
}
if (!authorizeCallback.isAuthorized()) {
throw log.mechAuthorizationFailed(getMechanismName(), null, authorizationId).toSaslException();
}
this.authorizationID = authorizeCallback.getAuthorizedID();
return null;
}Example 82
| Project: openjdk-master File: CDataTransferer.java View source code |
@Override
public Object translateBytes(byte[] bytes, DataFlavor flavor, long format, Transferable transferable) throws IOException {
if (format == CF_URL && URL.class.equals(flavor.getRepresentationClass())) {
String charset = Charset.defaultCharset().name();
if (transferable != null && transferable.isDataFlavorSupported(javaTextEncodingFlavor)) {
try {
charset = new String((byte[]) transferable.getTransferData(javaTextEncodingFlavor), StandardCharsets.UTF_8);
} catch (UnsupportedFlavorException cannotHappen) {
}
}
String xml = new String(bytes, charset);
// let's extract it.
return new URL(extractURL(xml));
}
if (isUriListFlavor(flavor) && format == CF_FILE) {
// dragQueryFile works fine with files and url,
// it parses and extracts values from property list.
// maxosx always returns property list for
// CF_URL and CF_FILE
String[] strings = dragQueryFile(bytes);
if (strings == null) {
return null;
}
bytes = String.join(System.getProperty("line.separator"), strings).getBytes();
// now we extracted uri from xml, now we should treat it as
// regular string that allows to translate data to target represantation
// class by base method
format = CF_STRING;
} else if (format == CF_STRING) {
bytes = Normalizer.normalize(new String(bytes, "UTF8"), Form.NFC).getBytes("UTF8");
}
return super.translateBytes(bytes, flavor, format, transferable);
}Example 83
| Project: behave-master File: ScreenShootingMaker.java View source code |
public void afterScenarioFailure(UUIDExceptionWrapper uuidWrappedFailure) throws Exception {
// Não captura tela dos passos pendentes
if (uuidWrappedFailure instanceof PendingStepFound) {
return;
}
// Ignora o erro quando não existe o runner
Runner runner = null;
try {
runner = (Runner) InjectionManager.getInstance().getInstanceDependecy(Runner.class);
} catch (BehaveException e) {
}
if (runner == null) {
return;
}
String scenario = BehaveContext.getInstance().getCurrentScenario();
String screenshotPathWithScenario = "";
// Convert string to path
if (scenario != null) {
String ret = Normalizer.normalize(scenario, Normalizer.Form.NFD).replace(" ", "").replaceAll("\\p{InCombiningDiacriticalMarks}+", "");
scenario = ret.replaceAll("[-]", "").replaceAll("[:]", "").replaceAll("[.]", "").replaceAll("[#]", "");
screenshotPathWithScenario = screenshotPathWithScenario(scenario, uuidWrappedFailure.getUUID());
}
String screenshotPath = screenshotPath(uuidWrappedFailure.getUUID());
String currentUrl = "";
try {
currentUrl = runner.getCurrentUrl();
} catch (Exception e) {
}
try {
runner.saveScreenshotTo(screenshotPath);
if (!screenshotPathWithScenario.equals("")) {
runner.saveScreenshotTo(screenshotPathWithScenario);
}
} catch (Exception ex) {
logger.error(message.getString("exception-screen-save", currentUrl, screenshotPath, ex.getMessage()));
logger.error(ex);
return;
}
logger.info(message.getString("message-screen-save", currentUrl, screenshotPath, new File(screenshotPath).length()));
}Example 84
| Project: Biblivre-3-master File: Z3950Client.java View source code |
public List<Record> doSearch(final Z3950ServerDTO server, final Z3950SearchDTO search) {
List<Record> listRecords = new ArrayList<Record>();
factory.setHost(server.getUrl());
factory.setPort(server.getPort());
factory.setCharsetEncoding("UTF-8");
factory.setApplicationContext(z3950Context);
factory.setDefaultRecordSyntax("usmarc");
factory.setDefaultElementSetName("F");
factory.setDoCharsetNeg(true);
factory.getRecordArchetypes().put("Default", "usmarc::F");
factory.getRecordArchetypes().put("FullDisplay", "usmarc::F");
factory.getRecordArchetypes().put("BriefDisplay", "usmarc::B");
final String qry = QUERY_PREFIX + search.getType() + " \"" + TextUtils.removeDiacriticals(search.getValue()) + "\"";
IRQuery query = new IRQuery();
query.collections = new Vector();
query.collections.add(server.getCollection());
query.query = new org.jzkit.search.util.QueryModel.PrefixString.PrefixString(qry);
try {
Searchable s = factory.newSearchable();
s.setApplicationContext(z3950Context);
IRResultSet result = s.evaluate(query);
// Wait without timeout until result set is complete or failure
result.waitForStatus(IRResultSetStatus.COMPLETE | IRResultSetStatus.FAILURE, 0);
if (result.getStatus() == IRResultSetStatus.FAILURE) {
log.error("IRResultSetStatus == FAILURE");
}
if (result.getFragmentCount() == 0) {
return listRecords;
}
String encoding = server.getCharset();
AnselToUnicode atu = new AnselToUnicode();
Enumeration e = new ReadAheadEnumeration(result, new ArchetypeRecordFormatSpecification("Default"));
int errorRecords = 0;
Record record = null;
for (int i = 0; e.hasMoreElements(); i++) {
iso2709 o = (iso2709) e.nextElement();
try {
String iso = "";
if (encoding.equals("MARC-8")) {
iso = Normalizer.normalize(atu.convert(new String((byte[]) o.getOriginalObject(), "ISO-8859-1")), Normalizer.Form.NFC);
} else {
iso = new String((byte[]) o.getOriginalObject(), encoding);
}
try {
record = MarcUtils.iso2709ToRecordAsIso(iso, false);
} catch (Exception encodeE) {
}
if (record == null) {
try {
record = MarcUtils.iso2709ToRecord(iso, false);
} catch (Exception encodeE) {
}
}
if (record == null) {
try {
record = MarcUtils.iso2709ToRecordAsIso(new String((byte[]) o.getOriginalObject(), "ISO-8859-1"), false);
} catch (Exception encodeE) {
}
}
if (record == null) {
try {
record = MarcUtils.iso2709ToRecord(new String((byte[]) o.getOriginalObject(), "ISO-8859-1"), false);
} catch (Exception encodeE) {
}
}
} catch (Exception ex) {
}
if (record != null) {
listRecords.add(record);
} else {
++errorRecords;
}
}
if (errorRecords > 0) {
log.warn("Total number of records that failed the conversion: " + errorRecords);
}
try {
result.close();
s.close();
} catch (Exception closingException) {
log.error(closingException.getMessage());
}
} catch (Exception e) {
log.error(e.getMessage(), e);
}
log.info("returning results");
return listRecords;
}Example 85
| Project: cloudhopper-commons-master File: MobileTextUtil.java View source code |
/**
* Replace accented characters with their ascii equivalents. For example,
* convert é to e.<br><br>
* NOTE: This method is not very efficient. The String will be copied
* twice during conversion, so you'll likely only want to run this against
* small strings.
*
* @param buffer The buffer containing the characters to analyze and replace
* if necessary.
* @return The number of characters replaced
*/
public static int replaceAccentedChars(StringBuilder buffer) {
// save the size before we strip out the accents
int sizeBefore = buffer.length();
// each accented char will be converted into 2 chars -- the ascii version
// followed by the accent character
String s = Normalizer.normalize(buffer, Normalizer.Form.NFD);
// new size will include accented chars
int sizeAfter = s.length();
// efficiency check #1 - if the length hasn't changed, do nothing
int replaced = sizeAfter - sizeBefore;
if (replaced <= 0) {
return 0;
}
// replace the accents with nothing
s = s.replaceAll("\\p{InCombiningDiacriticalMarks}+", "");
buffer.setLength(0);
buffer.append(s);
return replaced;
}Example 86
| Project: commcare-odk-master File: StringUtils.java View source code |
/**
* @param input A non-null string
* @return a canonical version of the passed in string that is lower cased and has removed diacritical marks
* like accents.
*/
public static synchronized String normalize(String input) {
if (normalizationCache == null) {
normalizationCache = new LruCache<String, String>(cacheSize);
diacritics = Pattern.compile("\\p{InCombiningDiacriticalMarks}+");
}
String normalized = normalizationCache.get(input);
if (normalized != null) {
return normalizationCache.get(input);
}
normalized = diacritics.matcher(Normalizer.normalize(input, Normalizer.Form.NFD)).replaceAll("").toLowerCase();
normalizationCache.put(input, normalized);
return normalized;
}Example 87
| Project: epublib-master File: SearchIndex.java View source code |
/**
* Turns html encoded text into plain text.
*
* Replaces ö type of expressions into ¨<br/>
* Removes accents<br/>
* Replaces multiple whitespaces with a single space.<br/>
*
* @param text
* @return html encoded text turned into plain text.
*/
public static String cleanText(String text) {
text = unicodeTrim(text);
// replace all multiple whitespaces by a single space
Matcher matcher = WHITESPACE_PATTERN.matcher(text);
text = matcher.replaceAll(" ");
// turn accented characters into normalized form. Turns ö into o"
text = Normalizer.normalize(text, Normalizer.Form.NFD);
// removes the marks found in the previous line.
text = REMOVE_ACCENT_PATTERN.matcher(text).replaceAll("");
// lowercase everything
text = text.toLowerCase();
return text;
}Example 88
| Project: FilmTit-master File: SubtitleDownloadServlet.java View source code |
@Override
public void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
// reads the parameters from the http request
String docId = request.getParameter("docId");
String sessionId = request.getParameter("sessionId");
String typeString = request.getParameter("type");
String wayString = request.getParameter("way");
if (docId == null || sessionId == null || typeString == null || wayString == null) {
writeError(response, "no parameter");
return;
}
Long docIdLong;
try {
docIdLong = new Long(docId);
} catch (NumberFormatException e) {
writeError(response, "wrong documentId");
return;
}
if (!backend.canReadDocument(sessionId, docIdLong)) {
writeError(response, "no rights to read document");
return;
}
TimedChunk.FileType type;
String responseType;
if (typeString.equals("srt")) {
type = TimedChunk.FileType.SRT;
responseType = "application/x-subrip";
} else if (typeString.equals("sub")) {
type = TimedChunk.FileType.SUB;
responseType = "text/plain";
} else if (typeString.equals("txt")) {
type = TimedChunk.FileType.TXT;
responseType = "text/plain";
} else {
writeError(response, "wrong format " + typeString);
return;
}
ChunkStringGenerator.ResultToChunkConverter way;
if (wayString.equals("source")) {
way = ChunkStringGenerator.SOURCE_SIDE;
} else if (wayString.equals("target")) {
way = ChunkStringGenerator.TARGET_SIDE;
} else if (wayString.equals("targetthrowback")) {
way = ChunkStringGenerator.TARGET_SIDE_WITH_THROWBACK;
} else {
writeError(response, "no such way as " + wayString);
return;
}
try {
// generate thi file name
USDocument document = backend.getActiveDocument(sessionId, docIdLong);
// split chars and accents
String fileName = Normalizer.normalize(document.getTitle(), Normalizer.Form.NFD);
// removes accents
fileName = fileName.replaceAll("[\\p{InCombiningDiacriticalMarks}\\p{IsLm}\\p{IsSk}]+", "");
// removes non ASCII characters
fileName = fileName.replaceAll("[^\\x00-\\x7f]", "");
// removes not allowed characters
fileName = fileName.replaceAll("[|\\?\\*\\\\<>+/\\[\\]]+", "");
// replace spaces by underscores
fileName = fileName.replaceAll(" ", "_");
// solve the language code
String languageToFileName = null;
String language1 = ConfigurationSingleton.getConf().l1().getCode();
String language2 = ConfigurationSingleton.getConf().l2().getCode();
if (way == ChunkStringGenerator.SOURCE_SIDE) {
languageToFileName = document.getLanguage().getCode();
} else {
if (document.getLanguage().getCode().equals(language1)) {
languageToFileName = language2;
} else {
languageToFileName = language1;
}
}
// adds the ending
fileName += "." + languageToFileName + "." + typeString;
// generate the actual content of the file
String fileContent = backend.getSourceSubtitles(sessionId, docIdLong, 25L, type, way);
response.setContentType(responseType);
response.setHeader("Content-Disposition", "attachment; filename=" + fileName + "; charset=UTF-8");
response.setCharacterEncoding("UTF-8");
ServletOutputStream out = response.getOutputStream();
out.write(fileContent.getBytes("UTF-8"));
} catch (InvalidSessionIdException e) {
writeError(response, "Invalid session id exception");
return;
} catch (InvalidDocumentIdException e) {
writeError(response, "Invalid document ID exception");
return;
} catch (IOException e) {
writeError(response, "IOexception");
return;
}
}Example 89
| Project: gdl-tools-master File: NodeConversor.java View source code |
public static void filterByText(SelectableNode<?> rootNode, String filtro) {
boolean visible = false;
if (rootNode.getName() != null) {
if (filtro.trim().length() > 0) {
String desc1 = Normalizer.normalize(rootNode.getName(), Normalizer.Form.NFD);
desc1 = FormatConverter.textWithoutPunctuation(desc1);
String desc2 = Normalizer.normalize(filtro, Normalizer.Form.NFD);
desc2 = FormatConverter.textWithoutPunctuation(desc2);
visible = desc1.contains(desc2);
} else {
visible = true;
}
}
if (rootNode.isSingleSelectionMode()) {
rootNode.setSelected(false);
}
rootNode.setVisible(visible);
if (visible) {
filtro = new String();
}
if (!rootNode.isLeaf()) {
Enumeration<?> e = rootNode.getAllchildren();
while (e.hasMoreElements()) {
NodeConversor.filterByText((SelectableNode<?>) e.nextElement(), filtro);
}
if (rootNode.getChildCount() != 0) {
rootNode.setVisible(true);
}
}
}Example 90
| Project: jeboorker-master File: SearchIndex.java View source code |
/**
* Turns html encoded text into plain text.
*
* Replaces ö type of expressions into ¨<br/>
* Removes accents<br/>
* Replaces multiple whitespaces with a single space.<br/>
*
* @param text
* @return
*/
public static String cleanText(String text) {
text = unicodeTrim(text);
// replace all multiple whitespaces by a single space
Matcher matcher = WHITESPACE_PATTERN.matcher(text);
text = matcher.replaceAll(" ");
// turn accented characters into normalized form. Turns ö into o"
text = Normalizer.normalize(text, Normalizer.Form.NFD);
// removes the marks found in the previous line.
text = REMOVE_ACCENT_PATTERN.matcher(text).replaceAll("");
// lowercase everything
text = text.toLowerCase();
return text;
}Example 91
| Project: jw-community-master File: LogUtil.java View source code |
/**
* Convenient method to retrieve all tomcat log file by file name
* @return
*/
public static File getTomcatLogFile(String filename) {
// validate input
String normalizedFileName = Normalizer.normalize(filename, Normalizer.Form.NFKC);
if (normalizedFileName.contains("../") || normalizedFileName.contains("..\\")) {
throw new SecurityException("Invalid filename " + normalizedFileName);
}
String path = System.getProperty("catalina.base");
if (path != null) {
try {
String pureFilename = (new File(URLDecoder.decode(filename, "UTF-8"))).getName();
String logPath = path + File.separator + "logs";
File file = new File(logPath, pureFilename);
if (file.exists() && !file.isDirectory()) {
return file;
}
} catch (Exception e) {
}
}
return null;
}Example 92
| Project: MoCA-master File: TokenInfoDictionaryBuilder.java View source code |
public TokenInfoDictionary buildDictionary(List<File> csvFiles) throws IOException {
TokenInfoDictionary dictionary = new TokenInfoDictionary(10 * 1024 * 1024);
for (File file : csvFiles) {
FileInputStream inputStream = new FileInputStream(file);
InputStreamReader streamReader = new InputStreamReader(inputStream, encoding);
BufferedReader reader = new BufferedReader(streamReader);
String line = null;
while ((line = reader.readLine()) != null) {
String[] entry = CSVUtil.parse(line);
if (entry.length < 13) {
System.out.println("Entry in CSV is not valid: " + line);
continue;
}
int next = dictionary.put(formatEntry(entry));
if (next == offset) {
System.out.println("Failed to process line: " + line);
continue;
}
dictionaryEntries.put(offset, entry[0]);
offset = next;
// NFKC normalize dictionary entry
if (normalizeEntries) {
if (entry[0].equals(Normalizer.normalize(entry[0], Normalizer.Form.NFKC))) {
continue;
}
String[] normalizedEntry = new String[entry.length];
for (int i = 0; i < entry.length; i++) {
normalizedEntry[i] = Normalizer.normalize(entry[i], Normalizer.Form.NFKC);
}
next = dictionary.put(formatEntry(normalizedEntry));
dictionaryEntries.put(offset, normalizedEntry[0]);
offset = next;
}
}
}
return dictionary;
}Example 93
| Project: nuxeo-master File: FileManagerUtils.java View source code |
public static DocumentModel getExistingDocByPropertyName(CoreSession documentManager, String path, String value, String propertyName) {
value = Normalizer.normalize(value, Normalizer.Form.NFC);
DocumentModel existing = null;
String parentId = documentManager.getDocument(new PathRef(path)).getId();
String query = "SELECT * FROM Document WHERE ecm:parentId = '" + parentId + "' AND " + propertyName + " = '" + value.replace("'", "\\\'") + "' AND ecm:currentLifeCycleState != '" + LifeCycleConstants.DELETED_STATE + "'";
DocumentModelList docs = documentManager.query(query, 1);
if (docs.size() > 0) {
existing = docs.get(0);
}
return existing;
}Example 94
| Project: sisob-academic-data-extractor-master File: CandidateTypeURL.java View source code |
/*
* Canonice the string (no accentuation and lower case)
* @param input
* @return
*/
public static String getCanonicalName(String input) {
//Super accent cleaner
// Canonic descomposition
String normalized = Normalizer.normalize(input, Normalizer.Form.NFD);
Pattern pattern = Pattern.compile("\\P{ASCII}");
String output = pattern.matcher(normalized).replaceAll("");
//~Super accent cleaner
output = output.replace(". ", ".");
String symbols1 = "_().,|<>-";
for (char c : symbols1.toCharArray()) output = output.replace(c, ' ');
// String symbols2 = "-";
// for (int i=0; i<symbols2.length(); i++)
// output = output.replace(symbols2.charAt(i), ' ');
output = output.toLowerCase();
/*
* Clean associate_text
*/
output = output.trim().replace("\r\n", " ").replace("\r", " ").replace("\n", " ").replace("\t", " ");
while (output.contains(" ")) output = output.replace(" ", " ");
while (output.contains(" ")) {
output = output.replace(" ", " ");
}
return output.trim();
}Example 95
| Project: trombone-master File: Keywords.java View source code |
public void sort() {
List<String> strings = new ArrayList<String>(keywords);
Collections.sort(strings, new Comparator<String>() {
@Override
public int compare(String s1, String s2) {
return Normalizer.normalize(s1, Normalizer.Form.NFD).compareToIgnoreCase(Normalizer.normalize(s2, Normalizer.Form.NFD));
}
});
keywords.clear();
keywords.addAll(strings);
}Example 96
| Project: wattdepot-master File: UnitsHelper.java View source code |
/**
* @param type
* The type of the unit, Energy, Power, Mass, etc.
* @param unit
* The Unit<?>.
* @return The name.
*/
public static String buildName(String type, Unit<?> unit) {
String s = unit.toString();
String s1 = Normalizer.normalize(s, Normalizer.Form.NFKD);
String regex = Pattern.quote("[\\p{InCombiningDiacriticalMarks}\\p{IsLm}\\p{IsSk}]+");
try {
String s2 = new String(s1.replaceAll(regex, "").getBytes("ascii"), "ascii");
s2 = s2.replace("?", "");
return type + " (" + s2 + ")";
} catch (// NOPMD
UnsupportedEncodingException // NOPMD
e) {
}
return type + " (" + s1 + ")";
}Example 97
| Project: youtestit-master File: AbstractCreateDocument.java View source code |
// =========================================================================
// METHODS
// =========================================================================
/**
* Allow to generate path.
*
* @return path String representation
*/
protected String generatePath(final String title) {
final StrBuilder result = new StrBuilder();
result.append(parentPath);
if (!parentPath.endsWith(PATH_SPLIT)) {
result.append(PATH_SPLIT);
}
final StrBuilder projectPath = new StrBuilder();
projectPath.append(Normalizer.normalize(title, Normalizer.Form.NFD).replaceAll("[^\\p{ASCII}]", ""));
projectPath.replaceAll(" ", "_");
for (String item : TO_REPLACE) {
projectPath.replaceAll(item, "-");
}
result.append(projectPath.toString());
return result.toString().trim();
}Example 98
| Project: archiv-editor-master File: FNStr.java View source code |
/**
* Returns normalized unicode.
* @param ctx query context
* @return string
* @throws QueryException query exception
*/
private Item normuni(final QueryContext ctx) throws QueryException {
final byte[] str = checkEStr(expr[0], ctx);
Form form = Form.NFC;
if (expr.length == 2) {
final byte[] n = uc(trim(checkStr(expr[1], ctx)));
if (n.length == 0)
return Str.get(str);
try {
form = Form.valueOf(string(n));
} catch (final IllegalArgumentException ex) {
NORMUNI.thrw(input, n);
}
}
return ascii(str) ? Str.get(str) : Str.get(Normalizer.normalize(string(str), form));
}Example 99
| Project: latke-master File: URICoder.java View source code |
/**
* Encodes a string containing non ASCII characters using an UTF-8 encoder.
*
* @param s The string the encode (assuming ASCII characters only)
* @param e A character that does not require encoding if found in the string.
*/
private static String encode_UTF8(String s, char e) {
// TODO: Normalizer requires Java 6!
String n = (Normalizer.isNormalized(s, Form.NFKC)) ? s : Normalizer.normalize(s, Form.NFKC);
// convert String to UTF-8
ByteBuffer bb = UTF8.encode(n);
// URI encode
StringBuffer sb = new StringBuffer();
while (bb.hasRemaining()) {
int b = bb.get() & 0xff;
if (isUnreserved(b) || b == e) {
sb.append((char) b);
} else {
appendEscape(sb, (byte) b);
}
}
return sb.toString();
}Example 100
| Project: rascal-master File: RandomValueTypeVisitor.java View source code |
@Override
public IValue visitString(Type type) {
if (stRandom.nextBoolean() || maxDepth <= 0) {
return vf.string("");
}
String result = RandomUtil.string(stRandom, 1 + stRandom.nextInt(maxDepth + 3));
// make sure we are not generating very strange sequences
result = Normalizer.normalize(result, Form.NFC);
return vf.string(result);
}Example 101
| Project: bitcoinj-master File: BIP38PrivateKey.java View source code |
public ECKey decrypt(String passphrase) throws BadPassphraseException {
String normalizedPassphrase = Normalizer.normalize(passphrase, Normalizer.Form.NFC);
ECKey key = ecMultiply ? decryptEC(normalizedPassphrase) : decryptNoEC(normalizedPassphrase);
Sha256Hash hash = Sha256Hash.twiceOf(key.toAddress(params).toString().getBytes(Charsets.US_ASCII));
byte[] actualAddressHash = Arrays.copyOfRange(hash.getBytes(), 0, 4);
if (!Arrays.equals(actualAddressHash, addressHash))
throw new BadPassphraseException();
return key;
}