/* ********************************************************************** ** ** Copyright notice ** ** ** ** (c) 2005-2009 RSSOwl Development Team ** ** http://www.rssowl.org/ ** ** ** ** All rights reserved ** ** ** ** This program and the accompanying materials are made available under ** ** the terms of the Eclipse Public License v1.0 which accompanies this ** ** distribution, and is available at: ** ** http://www.rssowl.org/legal/epl-v10.html ** ** ** ** A copy is found in the file epl-v10.html and important notices to the ** ** license from the team is found in the textfile LICENSE.txt distributed ** ** in this package. ** ** ** ** This copyright notice MUST APPEAR in all copies of the file! ** ** ** ** Contributors: ** ** RSSOwl Development Team - initial API and implementation ** ** ** ** ********************************************************************** */ package org.rssowl.core.util; import java.util.ArrayList; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; /** * Utility Class for working with <code>Regular Expressions</code>. * * @author bpasero */ public class RegExUtils { /* The reg. expression for an URL */ private static final String URL_REGEX = "(www([\\wv\\-\\.,@?^=%&:/~\\+#]*[\\w\\-\\@?^=%&/~\\+#])?)|(http|ftp|https|feed):\\/\\/[\\w]+(.[\\w]+)([\\wv\\-\\.,@?^=%&:/~\\+#]*[\\w\\-\\@?^=%&/~\\+#])?"; //$NON-NLS-1$ /* The compiled pattern to match an URL */ private static final Pattern URL_REGEX_PATTERN = Pattern.compile(URL_REGEX); /* The reg. expression for a strict URL (requires protocol) */ private static final String STRICT_URL_REGEX = "(http|ftp|https|feed):\\/\\/[\\w]+(.[\\w]+)([\\wv\\-\\.,@?^=%&:/~\\+#]*[\\w\\-\\@?^=%&/~\\+#])?"; //$NON-NLS-1$ /* The compiled pattern to match a strict URL (requires protocol) */ private static final Pattern STRICT_URL_REGEX_PATTERN = Pattern.compile(STRICT_URL_REGEX); /* This utility class constructor is hidden */ private RegExUtils() { // Protect default constructor } /** * Check if the given URL is valid * * @param url The URL to check * @return boolean TRUE if the link is valid */ public static boolean isValidURL(String url) { return URL_REGEX_PATTERN.matcher(url).matches(); } /** * Extract all links from the given String and returns it. This method will * NOT consider relative links. Only use this method when you are searching * for absolute links in a text (which may also be HTML). * * @param text The String to search for links * @param strict If <code>TRUE</code>, require a protocol for any URL in * the Text * @return A List of Strings matching the criteria for absolute URLs, or an * empty List if none. */ public static List<String> extractLinksFromText(String text, boolean strict) { List<String> urls = new ArrayList<String>(); Matcher match = strict ? STRICT_URL_REGEX_PATTERN.matcher(text) : URL_REGEX_PATTERN.matcher(text); while (match.find()) { String str = match.group(0); if (StringUtils.isSet(str) && !urls.contains(str)) urls.add(str); } return urls; } }