/*
* Copyright 2013, The Sporting Exchange Limited
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.betfair.testing.utils.cougar.helpers;
import org.w3c.dom.DOMException;
import org.w3c.dom.Document;
import org.w3c.dom.NodeList;
import org.w3c.tidy.Tidy;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
// Class for parsing HTML documents to find links and other resources
public class HttpHelpers {
public Document parseInputStream(InputStream is){
Tidy tidy = new Tidy();
tidy.setQuiet(true);
tidy.setShowWarnings(false);
Document d = tidy.parseDOM(is, null);
return d;
}
public List<String> getLinks(Document d, String regex) {
return getULRFromTag("link", "href", regex,d);
}
public List<String> getAnchoredLinks(Document d, String regex) {
return getULRFromTag("a", "href", regex,d);
}
public List<String> getScripts(Document d, String regex) {
return getULRFromTag("script", "src", regex,d);
}
public List<String> getImages(Document d, String regex) {
return getULRFromTag("img", "src", regex,d);
}
private List<String> getULRFromTag(String name, String attribute, String regex,Document d) {
ArrayList<String> links = new ArrayList<String>();
NodeList nodes = d.getElementsByTagName(name);
for (int p = 0; p < nodes.getLength(); p++) {
try{
String url = nodes.item(p).getAttributes().getNamedItem(attribute).getNodeValue();
if (regex==null || url.matches(regex)) {
links.add(url);
}
} catch(DOMException e){
}
}
return links;
}
}