/*************************************************** * * cismet GmbH, Saarbruecken, Germany * * ... and it just works. * ****************************************************/ /* * Copyright (C) 2010 jweintraut * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ package de.cismet.tools.gui.xhtmlrenderer; import org.xhtmlrenderer.resource.XMLResource; import org.xhtmlrenderer.swing.NaiveUserAgent; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.Reader; import java.io.UnsupportedEncodingException; import java.net.MalformedURLException; import java.net.URISyntaxException; import java.net.URL; import java.util.regex.Matcher; import java.util.regex.Pattern; import de.cismet.security.WebAccessManager; import de.cismet.security.exceptions.AccessMethodIsNotSupportedException; import de.cismet.security.exceptions.MissingArgumentException; import de.cismet.security.exceptions.NoHandlerForURLException; import de.cismet.security.exceptions.RequestFailedException; /** * A subclass of Flying Saucer's NaiveUserAgent. It's intended to fetch all requested resources via WebAccessManager. * * @version $Revision$, $Date$ */ public class WebAccessManagerUserAgent extends NaiveUserAgent { //~ Static fields/initializers --------------------------------------------- private static final org.apache.log4j.Logger LOG = org.apache.log4j.Logger.getLogger( WebAccessManagerUserAgent.class); //~ Instance fields -------------------------------------------------------- private Pattern encodingPattern = Pattern.compile("(encoding|charset)=\"?(.*?)[;\" ]"); private Pattern windowsEncodingPattern = Pattern.compile("windows-(\\d{4})"); //~ Methods ---------------------------------------------------------------- @Override protected InputStream resolveAndOpenStream(final String uri) { InputStream result = null; if ((uri != null) && (uri.trim().length() > 0)) { if (uri.startsWith("jar") || uri.startsWith("file")) { try { result = new URL(uri).openStream(); } catch (MalformedURLException ex) { LOG.error("Can't load from URI '" + uri + "' since the resulting URL is malformed.", ex); // NOI18N } catch (IOException ex) { LOG.error("Can't load from URI '" + uri + "'.", ex); // NOI18N } } else { try { result = WebAccessManager.getInstance().doRequest(new URL(uri)); } catch (URISyntaxException ex) { LOG.error("Can't load from URI '" + uri + "' since its syntax is broken.", ex); // NOI18N } catch (MissingArgumentException ex) { LOG.error("Can't load from URI '" + uri + "' since it couldn't be converted to a URL.", ex); // NOI18N } catch (AccessMethodIsNotSupportedException ex) { LOG.error("Can't load from URI '" + uri + "' since the access method isn't supported.", ex); // NOI18N } catch (RequestFailedException ex) { LOG.error("The request to load URI '" + uri + "' failed.", ex); // NOI18N } catch (NoHandlerForURLException ex) { LOG.error("Can't load from URI '" + uri + "' since there is no matching handler.", ex); // NOI18N } catch (Exception ex) { LOG.error("Can't load from URI '" + uri + "' since an unexcpected exception occurred.", ex); // NOI18N } } } return result; } @Override public XMLResource getXMLResource(final String uri) { final Reader reader = resolveAndOpenEncodedStream(uri); final XMLResource xmlResource = XMLResource.load(reader); if (reader != null) { try { reader.close(); } catch (IOException e) { LOG.warn("Couldn't close reader.", e); } } return xmlResource; } /** * This method is intended to avoid broken umlauts when reading from text files. To do this it returns an * InputStream for the given URI which uses the correct encoding. Since this method should be used to open streams * on HTML/XHTML documents the correct encoding is determined by parsing the file referenced by the given URI. So * one should be sure to call this method on text files. * * @param uri The URI referencing the resource which is to be opened * * @return A reader which allows reading from the resource using the correct encoding. */ private Reader resolveAndOpenEncodedStream(final String uri) { Reader result = null; String encoding = null; final BufferedReader reader = new BufferedReader(new InputStreamReader(resolveAndOpenStream(uri))); Matcher matcher = null; String line = null; try { while ((line = reader.readLine()) != null) { matcher = encodingPattern.matcher(line); if (matcher.find()) { encoding = matcher.group(2); break; } } } catch (IOException ex) { LOG.warn("Couldn't determine encoding of resource: '" + uri + "'.", ex); // NOI18N } matcher = windowsEncodingPattern.matcher(encoding); if (matcher.find()) { encoding = "Cp" + matcher.group(1); // NOI18N } if (LOG.isDebugEnabled()) { LOG.debug("Encoding resource '" + uri + "' in '" + encoding + "'."); // NOI18N } try { result = new InputStreamReader(resolveAndOpenStream(uri), encoding); } catch (UnsupportedEncodingException ex) { LOG.error("Error opening a reader on URI '" + uri + "' with unsupported encoding '" + encoding + "'.", ex); } return result; } }