// License: GPL. For details, see LICENSE file. package org.openstreetmap.josm.plugins.czechaddress; import java.text.Normalizer; import java.util.ArrayList; import java.util.List; import org.openstreetmap.josm.data.coor.LatLon; /** * Collection of utilities for manipulating strings. * * <p>This set of state-less utilities, which can be handy in all parts of * the plugin. Therefore all methods are {@code static} and the class is * {@code abstract}.</p> * * @author Radomír Černoch, radomir.cernoch@gmail.com */ public abstract class StringUtils { /** * Returns a substring equivalent to <tt>^[0-9]*</tt> regexp match. * * @param s the input string * @return <tt>^[0-9]*</tt> substring match */ public static String extractNumber(String s) { String result = ""; for (int i = 0; i < s.length(); i++) { char ch = s.charAt(i); if (ch >= '0' && ch <= '9') result += ch; else break; } return result; } public static String coordinateToString(double coor) { double degrees = Math.floor(coor); double minutes = Math.floor(60*(coor-degrees)); double seconds = 60*60*(coor-degrees-minutes/60); return String.valueOf(Math.round(degrees)) + "°" + String.valueOf(Math.round(minutes)) + "'" + String.valueOf(Math.round(100*seconds)/100.0) + "\""; } public static String latLonToString(LatLon position) { if (position == null) return ""; return "(lat: " + coordinateToString(position.lat()) + " lon: " + coordinateToString(position.lon()) + ")"; } /** * String matcher with abbreviations and regardless of diacritics. * * <p>Returns {@code true} even if s1="Nam. Svobody" and * s2="Náměstí Svobody".</p> */ public static boolean matchAbbrev(String s1, String s2) { s1 = anglicize(s1); s2 = anglicize(s2); List<Integer> beg1 = new ArrayList<>(4); List<Integer> beg2 = new ArrayList<>(4); char lastChar = ' '; for (int i = 0; i < s1.length(); i++) { if (s1.charAt(i) != ' ' && lastChar == ' ') beg1.add(i); lastChar = s1.charAt(i); } lastChar = ' '; for (int i = 0; i < s2.length(); i++) { if (s2.charAt(i) != ' ' && lastChar == ' ') beg2.add(i); lastChar = s2.charAt(i); } if (beg1.size() != beg2.size()) return false; for (int i = 0; i < beg1.size(); i++) { int pos1 = beg1.get(i); int pos2 = beg2.get(i); boolean doContinue = false; while (pos1 < s1.length() && pos2 < s2.length()) { if (s1.charAt(pos1) == '.' || s2.charAt(pos2) == '.') { doContinue = true; break; } if (s1.charAt(pos1) == ' ' && s2.charAt(pos2) == ' ') { doContinue = true; break; } if (Character.toUpperCase(s1.charAt(pos1)) != Character.toUpperCase(s2.charAt(pos2))) return false; pos1++; pos2++; } if (doContinue) continue; if (pos1 >= s1.length() ^ pos2 >= s2.length()) return false; } return true; } public static String tryTrim(String s) { if (s == null) return null; return s.trim(); } /** * Capitalizes the given string (first letter of every word upper-case, * others lower-case). Czech grammar rules are more or less obeyed. * * @param s string to be capitalized * @return capitaized string */ public static String capitalize(String s) { if (s == null) return null; char[] charr = s.toCharArray(); char last = ' '; char ch = last; for (int i = 0; i < charr.length; i++) { ch = charr[i]; if ((last >= 'a' && last <= 'ž') || (last >= 'A' && last <= 'Ž')) ch = Character.toLowerCase(ch); else ch = Character.toTitleCase(ch); last = charr[i] = ch; } String result = String.valueOf(charr); result = result.replaceAll("Nábř. ", "nábřeží "); result = result.replaceAll("Ul. ", "ulice "); result = result.replaceAll("Nám. ", "náměstí "); result = result.replaceAll("Kpt. ", "kapitána "); result = result.replaceAll("Bří. ", "bratří "); String[] noCapitalize = {"Nad", "Pod", "U", "Na", "Z"}; for (String noc : noCapitalize) { result = result.replaceAll(" "+noc+" ", " "+noc.toLowerCase()+" "); } String[] mesice = {"Ledna", "Února", "Března", "Dubna", "Května", "Máje", "Června", "Července", "Srpna", "Září", "Října", "Listopadu", "Prosince"}; for (String mesic : mesice) { result = result.replaceAll("."+mesic, ". " + mesic.toLowerCase()); } String[] noBegCap = {"Třída", "Ulice", "Náměstí", "Nábřeží"}; for (String noc : noBegCap) { result = result.replaceAll(noc, noc.toLowerCase()); } return result.replaceAll(" ", " "); } /** * Remove diacritics from the string. * * <p>This method was posted on the * SUN forum by * <i>Alan Moore</i>.</p> */ public static String anglicize(String str) { String strNFD = Normalizer.normalize(str, Normalizer.Form.NFD); StringBuilder sb = new StringBuilder(str.length()); for (char ch : strNFD.toCharArray()) { if (Character.getType(ch) != Character.NON_SPACING_MARK) { sb.append(ch); } } return sb.toString(); } }