/** * OrbisGIS is a java GIS application dedicated to research in GIScience. * OrbisGIS is developed by the GIS group of the DECIDE team of the * Lab-STICC CNRS laboratory, see <http://www.lab-sticc.fr/>. * * The GIS group of the DECIDE team is located at : * * Laboratoire Lab-STICC – CNRS UMR 6285 * Equipe DECIDE * UNIVERSITÉ DE BRETAGNE-SUD * Institut Universitaire de Technologie de Vannes * 8, Rue Montaigne - BP 561 56017 Vannes Cedex * * OrbisGIS is distributed under GPL 3 license. * * Copyright (C) 2007-2014 CNRS (IRSTV FR CNRS 2488) * Copyright (C) 2015-2017 CNRS (Lab-STICC UMR CNRS 6285) * * This file is part of OrbisGIS. * * OrbisGIS is free software: you can redistribute it and/or modify it under the * terms of the GNU General Public License as published by the Free Software * Foundation, either version 3 of the License, or (at your option) any later * version. * * OrbisGIS is distributed in the hope that it will be useful, but WITHOUT ANY * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR * A PARTICULAR PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along with * OrbisGIS. If not, see <http://www.gnu.org/licenses/>. * * For more information, please consult: <http://www.orbisgis.org/> * or contact directly: * info_at_ orbisgis.org */ package org.orbisgis.commons.utils; import java.util.regex.Pattern; /** * Utility class that provides useful functions for dealing with text. * * @author Erwan Bocher * @author Antoine Gourlay <antoine@gourlay.fr> */ public final class TextUtils { /** * Returns the platform-specific line separator, or "\n" if it is not defined for some reason. * * @return the platform-specific line separator. */ public static String getEolStr() { return System.getProperty("line.separator", "\n"); } /** * Convert a string to HTML. From S. Bayer. * * @param string * @return */ public static String stringToHTMLString(String string) { StringBuilder sb = new StringBuilder(string.length()); // true if last char was blank boolean lastWasBlankChar = false; int len = string.length(); char c; for (int i = 0; i < len; i++) { c = string.charAt(i); if (c == ' ') { // blank gets extra work, // this solves the problem you get if you replace all // blanks with  , if you do that you loss // word breaking if (lastWasBlankChar) { lastWasBlankChar = false; sb.append(" "); } else { lastWasBlankChar = true; sb.append(' '); } } else { lastWasBlankChar = false; // // HTML Special Chars if (c == '"') { sb.append("""); } else if (c == '&') { sb.append("&"); } else if (c == '<') { sb.append("<"); } else if (c == '>') { sb.append(">"); } else if (c == '\n') { sb.append("<br/>"); } else { int ci = 0xffff & c; if (ci < 160) { // nothing special only 7 Bit sb.append(c); } else { // Not 7 Bit use the unicode system sb.append("&#"); sb.append(Integer.valueOf(ci).toString()); sb.append(';'); } } } } return sb.toString(); } /** * Return the absolute start and end position of the specified positions in a text composed by the specified * lines * * @param bl start position line * @param bc start position column * @param el end position line * @param ec end position column * @param lines lines representing the multi-line text * @return An array of two elements. The first one is the start position (inclusive) and the second is the end * position (exclusive) */ public static int[] getLocation(int bl, int bc, int el, int ec, String[] lines) { int start = getPosition(bl, bc, lines) + bl; int end = getPosition(el, ec, lines) + el; return new int[]{start, end + 1}; } private static int getPosition(int line, int column, String[] lines) { int start = 0; for (int i = 0; i < line; i++) { start += lines[i].length(); } start += column; return start; } /** * Creates a {@link Pattern } instance for a LIKE pattern. * * A LIKE pattern matches always the complete string, and allows for the following special characters: - '%' * matches any sequence of zero or more characters - '_' matches any single character - '\' escapes the meaning * of a special character (any of '\', '%', '_'). * * This implementation is not strict about the escape character: while the only way to match a '%' or a '_' * character is to escape it, a '\' character followed by anything else than '%', '_' or '\' will not report any * error and will just match a regular '\' character. This means that * <code>str\\str</code> and * <code>str\str</code> both only match the String literal * <code>str\str</code>. * * Note: this implementation is not entirely identical to the SQL standard's definition of the LIKE operator. * The non-strict behavior describe in the paragraph above is not in the standard. Most implementation will * reject non-properly-escaped pattern strings, so always escaping the backslash is the preferred option. * * @param pattern a LIKE pattern string * @return a standard pattern */ public static Pattern buildLikePattern(String pattern) { return buildLikePattern(pattern, false); } /** * Creates a {@link Pattern } instance for a LIKE pattern. * * A LIKE pattern matches always the complete string, and allows for the following special characters: - '%' * matches any sequence of zero or more characters - '_' matches any single character - '\' escapes the meaning * of a special character (any of '\', '%', '_'). * * This implementation is not strict about the escape character: while the only way to match a '%' or a '_' * character is to escape it, a '\' character followed by anything else than '%', '_' or '\' will not report any * error and will just match a regular '\' character. This means that * <code>str\\str</code> and * <code>str\str</code> both only match the String literal * <code>str\str</code>. * * Note: this implementation is not entirely identical to the SQL standard's definition of the LIKE operator. * The non-strict behavior describe in the paragraph above is not in the standard. Most implementation will * reject non-properly-escaped pattern strings, so always escaping the backslash is the preferred option. * * @param pattern a LIKE pattern string * @param caseInsensitive true if the match has to be text insensitive * @return a standard pattern */ public static Pattern buildLikePattern(String pattern, boolean caseInsensitive) { String[] s = SPLITLIKE.split(pattern); StringBuilder b = new StringBuilder(); b.append("^"); if (s.length == 0) { for (int i = 0; i < pattern.length(); i++) { switch (pattern.charAt(i)) { case '%': b.append(".*"); break; case '_': b.append("."); break; default: } } } else { int pos = 0; for (int i = 0; i < s.length; i++) { boolean esc = false; if (s[i].endsWith("\\")) { pos++; if (i + 1 < s.length) { s[i + 1] = s[i].substring(0, s[i].length() - 1) + pattern.charAt(pos + s[i].length() - 1) + s[i + 1]; continue; } else { s[i] = s[i].substring(0, s[i].length() - 1); esc = true; } } if (!s[i].isEmpty()) { b.append(Pattern.quote(ESCAPE.matcher(s[i]).replaceAll("\\\\"))); pos += s[i].length(); } if (pos < pattern.length()) { switch (pattern.charAt(pos)) { case '%': if (esc) { b.append("%"); } else { b.append(".*"); } pos++; break; case '_': if (esc) { b.append("_"); } else { b.append("."); } pos++; break; default: } } } while (pos < pattern.length()) { switch (pattern.charAt(pos)) { case '%': b.append(".*"); pos++; break; case '_': b.append("."); pos++; break; default: } } } b.append("$"); if (caseInsensitive) { return Pattern.compile(b.toString(), Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE); } else { return Pattern.compile(b.toString()); } } private static final Pattern SPLITLIKE = Pattern.compile("%|_"); private static final Pattern ESCAPE = Pattern.compile("\\\\\\\\"); private static final Pattern DOT = Pattern.compile("\\."); /** * Creates a {@link Pattern } instance for a SIMILAR TO pattern. * * A SIMILAR TO pattern matches always the complete string, and allows any POSIX special character EXCEPT that: * - '%' matches any sequence of zero or more characters - '_' matches any single character (instead of '.' in * POSIX) - '\' escapes the meaning of a any character (POSIX and above). * * This implementation is not strict about the escape character: while the only way to match a special character * is to escape it, a '\' character followed by a regular character will not report any error and will just * match a regular '\' character. This means that * <code>str\\str</code> and * <code>str\str</code> both only match the String literal * <code>str\str</code>. * * See {@link Pattern } for the list of supported POSIX special characters and features. * * Note: this implementation is not entirely identical to the SQL standard's definition of a regular expression. * It allows some POSIX special character, like '\r' (carriage-return) or '\s' (whitespace character), that are * not allowed in the SQL definition. Furthermore, most implementation will reject non-properly-escaped pattern * strings, so always escaping the backslash is the preferred option. * * @param pattern * @return */ public static Pattern buildSimilarToPattern(String pattern) { String[] s = SPLITLIKE.split(pattern); StringBuilder b = new StringBuilder(); b.append("^"); if (s.length == 0) { for (int i = 0; i < pattern.length(); i++) { switch (pattern.charAt(i)) { case '%': b.append(".*"); break; case '_': b.append("."); break; default: } } } else { int pos = 0; for (int i = 0; i < s.length; i++) { boolean esc = false; if (s[i].endsWith("\\")) { pos++; if (i + 1 < s.length) { s[i + 1] = s[i].substring(0, s[i].length() - 1) + pattern.charAt(pos + s[i].length() - 1) + s[i + 1]; continue; } else { s[i] = s[i].substring(0, s[i].length() - 1); esc = true; } } if (!s[i].isEmpty()) { String t = ESCAPE.matcher(s[i]).replaceAll("\\\\"); b.append(DOT.matcher(t).replaceAll("\\\\.")); pos += s[i].length(); } if (pos < pattern.length()) { switch (pattern.charAt(pos)) { case '%': if (esc) { b.append("%"); } else { b.append(".*"); } pos++; break; case '_': if (esc) { b.append("_"); } else { b.append("."); } pos++; break; default: } } } while (pos < pattern.length()) { switch (pattern.charAt(pos)) { case '%': b.append(".*"); pos++; break; case '_': b.append("."); pos++; break; default: } } } b.append("$"); return Pattern.compile(b.toString()); } private TextUtils() { } }