/******************************************************************************* * gMix open source project - https://svs.informatik.uni-hamburg.de/gmix/ * Copyright (C) 2014 SVS * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. *******************************************************************************/ /** * */ package userGeneratedContent.testbedPlugIns.layerPlugIns.layer5application.httpPush_v0_001.helper; import java.net.URI; import java.net.URISyntaxException; import java.util.LinkedList; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; /** * @author bash * * This class contains all neccesary methods to parse a html file or a css file * */ public class HtmlParser { /** * Parses all subressiurces from a html file * @param startRequest * @param message * @return List of sub resources */ public static List<String> getAllRessourcesHtml(String startRequest, String message){ LinkedList<String> returnvalue = new LinkedList<String>(); Pattern pattern = Pattern.compile("<(?:(?:img|script)\\s[^>]*\\bsrc\\s*=\\s*[\"']([^\"']*)[\"']|link\\s[^>]*\\bhref\\s*=\\s*[\"']([^\"']*)[\"'])[^>]*>"); Matcher matcher = pattern.matcher(message); while (matcher.find()){ String tester; tester = matcher.group(1) != null ? matcher.group(1) : matcher.group(2); if(!tester.contains("mailto")){ try { returnvalue.add((new URI(startRequest).resolve(tester).getPath())+"?"+(new URI(startRequest).resolve(tester).getQuery()) ); // returnvalue.add((new URI(startRequest).resolve(tester).getPath()) ); //returnvalue.add((new URI(startRequest).resolve(tester).getPath())//+"?"+(new URI(startRequest).resolve(tester).getQuery()) // ); } catch (URISyntaxException e) { // TODO Auto-generated catch block e.printStackTrace(); } } } return returnvalue; } /** * Parses all sub resources from a css file * @param startRequest * @param message * @return List of sub resources */ public static List<String> getAllRessourcesCss(String message){ LinkedList<String> returnvalue = new LinkedList<String>(); Pattern pattern = Pattern.compile("background: [^;]*?url\\([\"']([^\"']*?)[\"']\\)[^;]*?;"); Matcher matcher = pattern.matcher(message); while (matcher.find()){ returnvalue.add(matcher.group(1)); } return returnvalue; } }