/*
* To change this license header, choose License Headers in Project Properties.
* To change this template file, choose Tools | Templates
* and open the template in the editor.
*/
package com.hortonworks.amuise.cdrstorm.storm.utils;
import java.io.IOException;
import java.net.HttpURLConnection;
import java.net.Proxy;
import java.net.URI;
import java.net.URL;
import java.util.ArrayList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
*
* @author adammuise
*/
public class Toolbox {
public static ArrayList<String> extractURLfromString(String input) {
ArrayList<String> urls = new ArrayList<String>();
// Pattern for recognizing a URL, based off RFC 3986
Pattern urlPattern = Pattern.compile(
"(?:^|[\\W])((ht|f)tp(s?):\\/\\/|www\\.)"
+ "(([\\w\\-]+\\.){1,}?([\\w\\-.~]+\\/?)*"
+ "[\\p{Alnum}.,%_=?\\-+()\\[\\]\\*$~@!:/{};']*)",
Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL);
Matcher matcher = urlPattern.matcher(input);
while (matcher.find()) {
int matchStart = matcher.start(1);
int matchEnd = matcher.end();
urls.add(input.substring(matchStart, matchEnd));
}
return urls;
}
public static String stripProtocolPrefix(String input) {
try {
URI uri = new URI(input.toLowerCase().trim());
String domain = uri.getHost();
return domain.startsWith("www.") ? domain.substring(4) : domain;
} catch (Exception e) {
e.printStackTrace();
}
return null;
}
public static String fullURL(String anyURL) throws IOException {
URL url = new URL(anyURL);
HttpURLConnection httpURLConnection = (HttpURLConnection) url.openConnection(Proxy.NO_PROXY);
httpURLConnection.setInstanceFollowRedirects(false);
String finalURL = httpURLConnection.getHeaderField("Location");
httpURLConnection.disconnect();
return finalURL;
}
}