/* * Copyright (c) 2005 Henri Sivonen * Copyright (c) 2007-2017 Mozilla Foundation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. */ package nu.validator.xml; import java.io.IOException; import java.io.InputStream; import java.security.KeyManagementException; import java.security.KeyStoreException; import java.security.NoSuchAlgorithmException; import java.security.cert.CertificateException; import java.security.cert.X509Certificate; import java.util.zip.GZIPInputStream; import javax.net.ssl.HostnameVerifier; import javax.net.ssl.SSLContext; import javax.servlet.http.HttpServletRequest; import org.relaxng.datatype.DatatypeException; import nu.validator.datatype.ContentSecurityPolicy; import nu.validator.datatype.Html5DatatypeException; import nu.validator.io.BoundedInputStream; import nu.validator.io.ObservableInputStream; import nu.validator.io.StreamBoundException; import nu.validator.io.StreamObserver; import nu.validator.io.SystemIdIOException; import org.apache.http.Header; import org.apache.http.HttpEntity; import org.apache.http.HttpResponse; import org.apache.http.client.HttpClient; import org.apache.http.client.config.CookieSpecs; import org.apache.http.client.config.RequestConfig; import org.apache.http.client.methods.HttpGet; import org.apache.http.config.Registry; import org.apache.http.config.RegistryBuilder; import org.apache.http.conn.socket.ConnectionSocketFactory; import org.apache.http.conn.socket.PlainConnectionSocketFactory; import org.apache.http.conn.ssl.SSLConnectionSocketFactory; import org.apache.http.conn.ssl.SSLContextBuilder; import org.apache.http.conn.ssl.TrustStrategy; import org.apache.http.impl.client.HttpClientBuilder; import org.apache.http.impl.client.HttpClients; import org.apache.http.impl.client.LaxRedirectStrategy; import org.apache.http.impl.conn.PoolingHttpClientConnectionManager; import org.apache.log4j.Logger; import org.xml.sax.EntityResolver; import org.xml.sax.ErrorHandler; import org.xml.sax.InputSource; import org.xml.sax.SAXException; import org.xml.sax.SAXParseException; import io.mola.galimatias.URL; import io.mola.galimatias.GalimatiasParseException; /** * @version $Id: PrudentHttpEntityResolver.java,v 1.1 2005/01/08 08:11:26 * hsivonen Exp $ * @author hsivonen */ @SuppressWarnings("deprecation") public class PrudentHttpEntityResolver implements EntityResolver { private static final Logger log4j = Logger.getLogger(PrudentHttpEntityResolver.class); private static HttpClient client; private static int maxRequests; private long sizeLimit; private final ErrorHandler errorHandler; private int requestsLeft; private boolean allowRnc = false; private boolean allowHtml = false; private boolean allowXhtml = false; private boolean acceptAllKnownXmlTypes = false; private boolean allowGenericXml = true; private final ContentTypeParser contentTypeParser; private String userAgent; private HttpServletRequest request; /** * Sets the timeouts of the HTTP client. * * @param connectionTimeout * timeout until connection established in milliseconds. Zero * means no timeout. * @param socketTimeout * timeout for waiting for data in milliseconds. Zero means no * timeout. * @param maxRequests * maximum number of connections to a particular host */ public static void setParams(int connectionTimeout, int socketTimeout, int maxRequests) { PrudentHttpEntityResolver.maxRequests = maxRequests; PoolingHttpClientConnectionManager phcConnMgr; Registry<ConnectionSocketFactory> registry = // RegistryBuilder.<ConnectionSocketFactory> create() // .register("http", PlainConnectionSocketFactory.getSocketFactory()) // .register("https", SSLConnectionSocketFactory.getSocketFactory()) // .build(); HttpClientBuilder builder = HttpClients.custom(); builder.setRedirectStrategy(new LaxRedirectStrategy()); builder.setMaxConnPerRoute(maxRequests); builder.setMaxConnTotal( Integer.parseInt(System.getProperty("nu.validator.servlet.max-total-connections","200"))); if ("true".equals(System.getProperty( "nu.validator.xml.promiscuous-ssl", "true"))) { // try { SSLContext promiscuousSSLContext = new SSLContextBuilder() // .loadTrustMaterial(null, new TrustStrategy() { @Override public boolean isTrusted(X509Certificate[] arg0, String arg1) throws CertificateException { return true; } }).build(); builder.setSslcontext(promiscuousSSLContext); HostnameVerifier verifier = // SSLConnectionSocketFactory.ALLOW_ALL_HOSTNAME_VERIFIER; SSLConnectionSocketFactory promiscuousSSLConnSocketFactory = // new SSLConnectionSocketFactory(promiscuousSSLContext, verifier); registry = RegistryBuilder.<ConnectionSocketFactory> create() // .register("https", promiscuousSSLConnSocketFactory) // .register("http", PlainConnectionSocketFactory.getSocketFactory()) // .build(); } catch (KeyManagementException | KeyStoreException | NoSuchAlgorithmException | NumberFormatException e) { e.printStackTrace(); } } phcConnMgr = new PoolingHttpClientConnectionManager(registry); phcConnMgr.setDefaultMaxPerRoute(maxRequests); phcConnMgr.setMaxTotal(200); builder.setConnectionManager(phcConnMgr); RequestConfig.Builder config = RequestConfig.custom(); config.setCircularRedirectsAllowed(true); config.setMaxRedirects( Integer.parseInt(System.getProperty("nu.validator.servlet.max-redirects","20"))); config.setConnectTimeout(connectionTimeout); config.setCookieSpec(CookieSpecs.BEST_MATCH); config.setSocketTimeout(socketTimeout); config.setCookieSpec(CookieSpecs.IGNORE_COOKIES); client = builder.setDefaultRequestConfig(config.build()).build(); } public void setUserAgent(String ua) { userAgent = ua; } public PrudentHttpEntityResolver(long sizeLimit, boolean laxContentType, ErrorHandler errorHandler, HttpServletRequest request) { this.request = request; this.sizeLimit = sizeLimit; this.requestsLeft = maxRequests; this.errorHandler = errorHandler; this.contentTypeParser = new ContentTypeParser(errorHandler, laxContentType, this.allowRnc, this.allowHtml, this.allowXhtml, this.acceptAllKnownXmlTypes, this.allowGenericXml); } public PrudentHttpEntityResolver(long sizeLimit, boolean laxContentType, ErrorHandler errorHandler) { this(sizeLimit, laxContentType, errorHandler, null); } /** * @see org.xml.sax.EntityResolver#resolveEntity(java.lang.String, * java.lang.String) */ @Override public InputSource resolveEntity(String publicId, String systemId) throws SAXException, IOException { if (requestsLeft > -1) { if (requestsLeft == 0) { throw new IOException( "Number of permitted HTTP requests exceeded."); } else { requestsLeft--; } } HttpGet m = null; try { URL url = null; try { url = URL.parse(systemId); } catch (GalimatiasParseException e) { IOException ioe = (IOException) new IOException(e.getMessage()).initCause(e); SAXParseException spe = new SAXParseException(e.getMessage(), publicId, systemId, -1, -1, ioe); if (errorHandler != null) { errorHandler.fatalError(spe); } throw ioe; } String scheme = url.scheme(); if (!("http".equals(scheme) || "https".equals(scheme))) { String msg = "Unsupported URI scheme: \u201C" + scheme + "\u201D."; SAXParseException spe = new SAXParseException(msg, publicId, systemId, -1, -1, new IOException(msg)); if (errorHandler != null) { errorHandler.fatalError(spe); } throw spe; } systemId = url.toString(); try { m = new HttpGet(systemId); } catch (IllegalArgumentException e) { SAXParseException spe = new SAXParseException( e.getMessage(), publicId, systemId, -1, -1, (IOException) new IOException(e.getMessage()).initCause(e)); if (errorHandler != null) { errorHandler.fatalError(spe); } throw spe; } m.setHeader("User-Agent", userAgent); m.setHeader("Accept", buildAccept()); m.setHeader("Accept-Encoding", "gzip"); log4j.info(systemId); HttpResponse response = client.execute(m); boolean ignoreResponseStatus = false; if (request != null && request.getAttribute( "http://validator.nu/properties/ignore-response-status") != null) { ignoreResponseStatus = (boolean) request.getAttribute( "http://validator.nu/properties/ignore-response-status"); } int statusCode = response.getStatusLine().getStatusCode(); if (statusCode != 200 && !ignoreResponseStatus) { String msg = "HTTP resource not retrievable." + " The HTTP status from the remote server was: " + statusCode + "."; SAXParseException spe = new SAXParseException(msg, publicId, m.getURI().toString(), -1, -1, new SystemIdIOException(m.getURI().toString(), msg)); if (errorHandler != null) { errorHandler.fatalError(spe); } throw new ResourceNotRetrievableException( String.format("%s: %s", m.getURI().toString(), msg)); } HttpEntity entity = response.getEntity(); long len = entity.getContentLength(); if (sizeLimit > -1 && len > sizeLimit) { SAXParseException spe = new SAXParseException( "Resource size exceeds limit.", publicId, m.getURI().toString(), -1, -1, new StreamBoundException("Resource size exceeds limit.")); if (errorHandler != null) { errorHandler.fatalError(spe); } throw spe; } TypedInputSource is; org.apache.http.Header ct = response.getFirstHeader("Content-Type"); String contentType = null; final String baseUri = m.getURI().toString(); if (ct != null) { contentType = ct.getValue(); } is = contentTypeParser.buildTypedInputSource(baseUri, publicId, contentType); Header cl = response.getFirstHeader("Content-Language"); if (cl != null) { is.setLanguage(cl.getValue().trim()); } Header xuac = response.getFirstHeader("X-UA-Compatible"); if (xuac != null) { String val = xuac.getValue().trim(); if (!"ie=edge".equalsIgnoreCase(val)) { SAXParseException spe = new SAXParseException( "X-UA-Compatible HTTP header must have the value \u201CIE=edge\u201D," + " was \u201C" + val + "\u201D.", publicId, systemId, -1, -1); errorHandler.error(spe); } } Header csp = response.getFirstHeader("Content-Security-Policy"); if (csp != null) { try { ContentSecurityPolicy.THE_INSTANCE.checkValid(csp.getValue().trim()); } catch (DatatypeException e) { SAXParseException spe = new SAXParseException( "Content-Security-Policy HTTP header: " + e.getMessage(), publicId, systemId, -1, -1); Html5DatatypeException ex5 = (Html5DatatypeException) e; if (ex5.isWarning()) { errorHandler.warning(spe); } else { errorHandler.error(spe); } } } final HttpGet meth = m; InputStream stream = entity.getContent(); if (sizeLimit > -1) { stream = new BoundedInputStream(stream, sizeLimit, baseUri); } Header ce = response.getFirstHeader("Content-Encoding"); if (ce != null) { String val = ce.getValue().trim(); if ("gzip".equalsIgnoreCase(val) || "x-gzip".equalsIgnoreCase(val)) { stream = new GZIPInputStream(stream); if (sizeLimit > -1) { stream = new BoundedInputStream(stream, sizeLimit, baseUri); } } } is.setByteStream(new ObservableInputStream(stream, new StreamObserver() { private final Logger log4j = Logger.getLogger("nu.validator.xml.PrudentEntityResolver.StreamObserver"); private boolean released = false; @Override public void closeCalled() { log4j.debug("closeCalled"); if (!released) { log4j.debug("closeCalled, not yet released"); released = true; try { meth.releaseConnection(); } catch (Exception e) { log4j.debug( "closeCalled, releaseConnection", e); } } } @Override public void exceptionOccurred(Exception ex) throws IOException { if (!released) { released = true; try { meth.abort(); } catch (Exception e) { log4j.debug("exceptionOccurred, abort", e); } finally { try { meth.releaseConnection(); } catch (Exception e) { log4j.debug( "exceptionOccurred, releaseConnection", e); } } } if (ex instanceof SystemIdIOException) { throw (SystemIdIOException) ex; } else if (ex instanceof IOException) { IOException ioe = (IOException) ex; throw new SystemIdIOException(baseUri, ioe.getMessage(), ioe); } else if (ex instanceof RuntimeException) { throw (RuntimeException) ex; } else { throw new RuntimeException( "API contract violation. Wrong exception type.", ex); } } @Override public void finalizerCalled() { if (!released) { released = true; try { meth.abort(); } catch (Exception e) { log4j.debug("finalizerCalled, abort", e); } finally { try { meth.releaseConnection(); } catch (Exception e) { log4j.debug( "finalizerCalled, releaseConnection", e); } } } } })); return is; } catch (IOException | RuntimeException | SAXException e) { if (m != null) { try { m.abort(); } catch (Exception ex) { log4j.debug("abort", ex); } finally { try { m.releaseConnection(); } catch (Exception ex) { log4j.debug("releaseConnection", ex); } } } throw e; } } /** * @return Returns the allowRnc. */ public boolean isAllowRnc() { return allowRnc; } /** * @param allowRnc * The allowRnc to set. */ public void setAllowRnc(boolean allowRnc) { this.allowRnc = allowRnc; this.contentTypeParser.setAllowRnc(allowRnc); } /** * @param allowHtml */ public void setAllowHtml(boolean allowHtml) { this.allowHtml = allowHtml; this.contentTypeParser.setAllowHtml(allowHtml); } /** * Returns the acceptAllKnownXmlTypes. * * @return the acceptAllKnownXmlTypes */ public boolean isAcceptAllKnownXmlTypes() { return acceptAllKnownXmlTypes; } /** * Sets the acceptAllKnownXmlTypes. * * @param acceptAllKnownXmlTypes * the acceptAllKnownXmlTypes to set */ public void setAcceptAllKnownXmlTypes(boolean acceptAllKnownXmlTypes) { this.acceptAllKnownXmlTypes = acceptAllKnownXmlTypes; this.contentTypeParser.setAcceptAllKnownXmlTypes(acceptAllKnownXmlTypes); } /** * Returns the allowGenericXml. * * @return the allowGenericXml */ public boolean isAllowGenericXml() { return allowGenericXml; } /** * Sets the allowGenericXml. * * @param allowGenericXml * the allowGenericXml to set */ public void setAllowGenericXml(boolean allowGenericXml) { this.allowGenericXml = allowGenericXml; this.contentTypeParser.setAllowGenericXml(allowGenericXml); } /** * Returns the allowXhtml. * * @return the allowXhtml */ public boolean isAllowXhtml() { return allowXhtml; } /** * Sets the allowXhtml. * * @param allowXhtml * the allowXhtml to set */ public void setAllowXhtml(boolean allowXhtml) { this.allowXhtml = allowXhtml; this.contentTypeParser.setAllowXhtml(allowXhtml); } private String buildAccept() { return "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"; } /** * Returns the allowHtml. * * @return the allowHtml */ public boolean isAllowHtml() { return allowHtml; } public boolean isOnlyHtmlAllowed() { return !isAllowGenericXml() && !isAllowRnc() && !isAllowXhtml(); } public class ResourceNotRetrievableException extends SAXException { public ResourceNotRetrievableException(String message) { super(message); } } }