package biz.karms.sinkit.ejb.gsb.util; import org.apache.commons.validator.routines.InetAddressValidator; import java.util.ArrayList; import java.util.List; /** * Created by tom on 12/6/15. * * @author Tomas Kozel */ public class GSBUtils { // max number of parts(subdomains) extracted from host name // that are used for construct of lookup variants // this is according to google spec private static final int MAX_HOST_PARTS = 5; public static List<String> getLookupVariants(final String ipOrFQDN) { final String lookupUrl = ipOrFQDN + '/'; // this is all the canonicalization final List<String> lookupVariants = new ArrayList<>(); // original url is used for lookup every time lookupVariants.add(lookupUrl); // if ipOrFQDN is IPv4 or IPv6 then there is only one variant if (InetAddressValidator.getInstance().isValid(ipOrFQDN)) { return lookupVariants; } String hostParts[] = lookupUrl.split("\\."); int maxHostParts; if (hostParts.length <= 2) { // if hostname consists of 2 parts (i.e. whalebone.io) or less then no other variant // than whole hostname (which was added to list before) is possible return lookupVariants; } else if (hostParts.length > 2 && hostParts.length <= MAX_HOST_PARTS) { // if hostname contains num of parts between 3 and MAX_HOST_PARTS (inclusive) then // only contained parts - 1 is extracted, since the whole hostname is already // added as variant maxHostParts = hostParts.length - 1; } else { // else if hostname contains too much parts than only MAX_HOST_PARTS is extracted maxHostParts = MAX_HOST_PARTS; } // construct lookup variants from the end of the hostname and start at 2nd part so for the // hostname a.b.c.d.e we will have d.e, c.d.e, b.c.d.e (a.b.c.d.e has already been added before) String variant = hostParts[hostParts.length - 1]; for (int i = hostParts.length - 2; i >= hostParts.length - maxHostParts; i--) { variant = hostParts[i] + "." + variant; lookupVariants.add(variant); } return lookupVariants; } /* private static final String HASH_ALGORITHM = "SHA-256"; private static final String ENCODING = "UTF-8"; public static final String PROTO_GROUP = "proto"; public static final String HOST_GROUP = "host"; public static final String PORT_GROUP = "port"; public static final String PATH_GROUP = "path"; public static final String QUERY_GROUP = "query"; public static final String URL_REGEXP = "^((?<" + PROTO_GROUP + ">[a-zA-Z]*):\\/\\/)?" + "(?<" + HOST_GROUP + ">[^/]+(?<!:[0-9]{0,5}))" + "(:(?<" + PORT_GROUP + ">[0-9]+)?)?" + "((?<" + PATH_GROUP + ">\\/[^?]*)" + "(?<" + QUERY_GROUP + ">\\?.*)?)?$"; public static final Map<String, String> DEFUALT_PORTS = new HashMap<>(); static { DEFUALT_PORTS.put("http", "80"); DEFUALT_PORTS.put("https", "443"); DEFUALT_PORTS.put("ftp", "20"); } public static byte[] computeHash(String message) { try { MessageDigest md = MessageDigest.getInstance(HASH_ALGORITHM); md.update(message.getBytes(ENCODING)); return md.digest(); } catch (NoSuchAlgorithmException ex) { throw new IllegalStateException("What!? " + HASH_ALGORITHM + " is unknown algorithm!? Cannot compute hash."); } catch (UnsupportedEncodingException ex) { throw new IllegalStateException("What!? " + ENCODING + " is unsupported encoding!? Cannot compute hash."); } } public static String hashToString(byte[] hash) { return (new HexBinaryAdapter()).marshal(hash).toLowerCase(); } public static String canonicalizeUrl(String url) throws URISyntaxException { String proto; String host; String port; String path; String query; StringBuilder sb = new StringBuilder(); try { String toBeCanonicalized = url.trim() .replaceAll("[\t\r\n]", "") // remove tabulator and new line .replaceFirst("#.*$", ""); // remove segment toBeCanonicalized = unescape(toBeCanonicalized); Pattern urlPattern = Pattern.compile(URL_REGEXP); Matcher matcher = urlPattern.matcher(toBeCanonicalized); if (!matcher.find()) { throw new URISyntaxException(toBeCanonicalized, "Provided URL is not valid URL"); } // proto to lower case if (StringUtils.isNotBlank(matcher.group(PROTO_GROUP))) { proto = matcher.group(PROTO_GROUP).toLowerCase(); } else { proto = "http"; } sb.append(proto).append("://"); // canonicalize host if (StringUtils.isBlank(matcher.group(HOST_GROUP))) { throw new NullPointerException("URL host is not specified"); } else { host = matcher.group(HOST_GROUP) .replaceAll("\\.+", ".") // replace consecutive dots with single one .replaceFirst("^\\.", "") // remove leading dot .replaceFirst("\\.$", "") // remove trailing dot .toLowerCase(); if (host.matches("\\d+")) { BigInteger ip = new BigInteger(host); if (ip.compareTo(BigInteger.valueOf(4294967296l)) < 0 && ip.compareTo(BigInteger.valueOf(255l)) > 0) { host = longToIPv4(ip.longValue()); } } } sb.append(host); // canonicalize port if (StringUtils.isNotBlank(matcher.group(PORT_GROUP))) { port = matcher.group(PORT_GROUP); if (!port.equals(DEFUALT_PORTS.get(proto))) { sb.append(":").append(port); } } //canonicalize path if (StringUtils.isNotBlank(matcher.group(PATH_GROUP))) { path = matcher.group(PATH_GROUP) .replaceAll("[^/]+\\/\\.\\.\\/?", "/") .replaceAll("[^/]+\\.\\/", "/") .replaceAll("\\/{2,}", "/"); } else { path = "/"; } sb.append(path); //canonicalize query if (StringUtils.isNotBlank(matcher.group(QUERY_GROUP))) { query = matcher.group(QUERY_GROUP); } else { query = ""; } sb.append(query); return escape(sb.toString()); } catch (UnsupportedEncodingException e) { throw new IllegalStateException("What!? " + ENCODING + " is unsupported encoding!? Cannot unescape URL."); } } private static String unescape(String toBeUnescaped) throws UnsupportedEncodingException { Pattern escapePattern = Pattern.compile("(%[a-fA-F0-9]{2})"); Matcher matcher; String escapeSequence; String decodedSequence; StringBuilder sb = new StringBuilder(toBeUnescaped); boolean keepWorking; int lastMatchEnd; char[] buff; do { keepWorking = false; matcher = escapePattern.matcher(sb.toString()); buff = sb.toString().toCharArray(); sb = new StringBuilder(); lastMatchEnd = 0; while (matcher.find()) { escapeSequence = matcher.group(0); decodedSequence = URLDecoder.decode(escapeSequence, ENCODING); sb.append(buff, lastMatchEnd, matcher.start() - lastMatchEnd); sb.append(decodedSequence); lastMatchEnd = matcher.end(); keepWorking = true; } if (lastMatchEnd < buff.length) { sb.append(buff, lastMatchEnd, buff.length - lastMatchEnd); } } while (keepWorking); return new String(buff); } private static String escape(String toBeEscaped) throws UnsupportedEncodingException { StringBuilder sb = new StringBuilder(); String escapedSequence; for (int i = 0; i < toBeEscaped.length(); i++) { char c = toBeEscaped.charAt(i); if (c == '%' || c == '#' || (int) c <= 32 || (int) c >= 127) { if (c == 32) { // URLEncoder encodes space as '+' but we do want '%20' escapedSequence = "%20"; } else { escapedSequence = URLEncoder.encode(String.valueOf(c), ENCODING); } sb.append(escapedSequence); } else { sb.append(c); } } return sb.toString(); } public static String longToIPv4(long ip) { return String.format("%d.%d.%d.%d", (ip >> 24 & 0xff), (ip >> 16 & 0xff), (ip >> 8 & 0xff), (ip & 0xff)).replaceAll("^(0\\.)+", ""); }*/ }