/* * Copyright 2011. Pablo Palazon (pablo.palazon@gmail.com) * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.serinus.parser; import java.util.ArrayList; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; public class SerinusParser { public List<String> parserLinks(String text) { List<String> links = new ArrayList<String>(); Pattern urlPattern = Pattern .compile("((https?|ftp|gopher|telnet|file|notes|ms-help):((//)|(\\\\))+[\\w\\d:#@%/;$\\(\\)~_?\\+-=\\.&]*)"); Matcher matcher = urlPattern.matcher(text); while (matcher.find()) { links.add(matcher.group()); } return links; } public List<String> parserUsers(String text) { List<String> users = new ArrayList<String>(); Pattern userPattern = Pattern.compile("(@\\S*)"); Matcher matcher = userPattern.matcher(text); while (matcher.find()) { users.add(matcher.group().replaceFirst("@", "")); } return users; } public List<String> parserTopics(String text) { List<String> topics = new ArrayList<String>(); Pattern topicPattern = Pattern.compile("(#\\S*)"); Matcher matcher = topicPattern.matcher(text); while (matcher.find()) { topics.add(matcher.group().replaceFirst("#", "")); } return topics; } }