/* * Copyright 2012 the original author or authors. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.gradle.internal.resource.transport.http; import org.cyberneko.html.parsers.SAXParser; import org.gradle.api.resources.ResourceException; import org.gradle.internal.resource.UriTextResource; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.xml.sax.Attributes; import org.xml.sax.InputSource; import org.xml.sax.SAXException; import org.xml.sax.helpers.DefaultHandler; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.Reader; import java.net.MalformedURLException; import java.net.URI; import java.net.URISyntaxException; import java.util.ArrayList; import java.util.List; public class ApacheDirectoryListingParser { private static final Logger LOGGER = LoggerFactory.getLogger(ApacheDirectoryListingParser.class); public List<String> parse(URI baseURI, InputStream content, String contentType) throws Exception { baseURI = addTrailingSlashes(baseURI); if (contentType == null || !contentType.startsWith("text/html")) { throw new ResourceException(baseURI, String.format("Unsupported ContentType %s for directory listing '%s'", contentType, baseURI)); } String contentEncoding = UriTextResource.extractCharacterEncoding(contentType, "utf-8"); final Reader htmlText = new InputStreamReader(content, contentEncoding); final InputSource inputSource = new InputSource(htmlText); final SAXParser htmlParser = new SAXParser(); final AnchorListerHandler anchorListerHandler = new AnchorListerHandler(); htmlParser.setContentHandler(anchorListerHandler); htmlParser.parse(inputSource); List<String> hrefs = anchorListerHandler.getHrefs(); List<URI> uris = resolveURIs(baseURI, hrefs); return filterNonDirectChilds(baseURI, uris); } private URI addTrailingSlashes(URI uri) throws IOException, URISyntaxException { if(uri.getPath() == null){ uri = new URI(uri.getScheme(), uri.getUserInfo(), uri.getHost(), uri.getPort(), "/", uri.getQuery(), uri.getFragment()); }else if (!uri.getPath().endsWith("/") && !uri.getPath().endsWith(".html")) { uri = new URI(uri.getScheme(), uri.getUserInfo(), uri.getHost(), uri.getPort(), uri.getPath() + "/", uri.getQuery(), uri.getFragment()); } return uri; } private List<String> filterNonDirectChilds(URI baseURI, List<URI> inputURIs) throws MalformedURLException { final int baseURIPort = baseURI.getPort(); final String baseURIHost = baseURI.getHost(); final String baseURIScheme = baseURI.getScheme(); List<String> uris = new ArrayList<String>(); final String prefixPath = baseURI.getPath(); for (URI parsedURI : inputURIs) { if (parsedURI.getHost() != null && !parsedURI.getHost().equals(baseURIHost)) { continue; } if (parsedURI.getScheme() != null && !parsedURI.getScheme().equals(baseURIScheme)) { continue; } if (parsedURI.getPort() != baseURIPort) { continue; } if (parsedURI.getPath() != null && !parsedURI.getPath().startsWith(prefixPath)) { continue; } String childPathPart = parsedURI.getPath().substring(prefixPath.length(), parsedURI.getPath().length()); if (childPathPart.startsWith("../")) { continue; } if (childPathPart.equals("") || childPathPart.split("/").length > 1) { continue; } String path = parsedURI.getPath(); int pos = path.lastIndexOf('/'); if (pos < 0) { uris.add(path); } else if (pos == path.length() - 1) { int start = path.lastIndexOf('/', pos - 1); if (start < 0) { uris.add(path.substring(0, pos)); } else { uris.add(path.substring(start + 1, pos)); } } else { uris.add(path.substring(pos + 1)); } } return uris; } private List<URI> resolveURIs(URI baseURI, List<String> hrefs) { List<URI> uris = new ArrayList<URI>(); for (String href : hrefs) { try { uris.add(baseURI.resolve(href)); } catch (IllegalArgumentException ex) { LOGGER.debug("Cannot resolve anchor: {}", href); } } return uris; } private class AnchorListerHandler extends DefaultHandler { List<String> hrefs = new ArrayList<String>(); public List<String> getHrefs() { return hrefs; } public void startElement(String uri, String localName, String qName, Attributes atts) throws SAXException { if (qName.equalsIgnoreCase("A")) { final String href = atts.getValue("href"); if (href != null) { hrefs.add(href); } } } } }