/**
* Copyright (C) 2010 Orbeon, Inc.
*
* This program is free software; you can redistribute it and/or modify it under the terms of the
* GNU Lesser General Public License as published by the Free Software Foundation; either version
* 2.1 of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
* See the GNU Lesser General Public License for more details.
*
* The full text of the license is available at http://www.gnu.org/copyleft/lesser.html
*/
package org.orbeon.oxf.processor.generator;
import org.apache.log4j.Logger;
import org.orbeon.dom.Element;
import org.orbeon.oxf.cache.*;
import org.orbeon.oxf.common.OXFException;
import org.orbeon.oxf.common.OrbeonLocationException;
import org.orbeon.oxf.common.ValidationException;
import org.orbeon.oxf.http.Credentials;
import org.orbeon.oxf.http.HttpStatusCodeException;
import org.orbeon.oxf.json.Converter;
import org.orbeon.oxf.json.Symbols;
import org.orbeon.oxf.pipeline.api.PipelineContext;
import org.orbeon.oxf.processor.*;
import org.orbeon.oxf.resources.ResourceManagerWrapper;
import org.orbeon.oxf.resources.URLFactory;
import org.orbeon.oxf.resources.handler.OXFHandler;
import org.orbeon.oxf.resources.handler.SystemHandler;
import org.orbeon.oxf.util.*;
import org.orbeon.oxf.xml.*;
import org.orbeon.oxf.xml.dom4j.Dom4jUtils;
import org.orbeon.oxf.xml.dom4j.LocationData;
import org.w3c.tidy.Tidy;
import org.xml.sax.*;
import scala.Option;
import scala.Tuple2;
import javax.xml.transform.dom.DOMSource;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.*;
/**
* Generates SAX events from a document fetched from an URL.
*
* NOTE: For XML content-type and encoding related questions, check out the following draft
* document:
*
* http://www.faqs.org/rfcs/rfc3023.html
* http://www.ietf.org/internet-drafts/draft-murata-kohn-lilley-xml-00.txt
*/
public class URLGenerator extends ProcessorImpl {
private static Logger logger = Logger.getLogger(URLGenerator.class);
public static IndentedLogger indentedLogger = new IndentedLogger(logger);
public static final boolean DEFAULT_VALIDATING = false;
public static final boolean DEFAULT_HANDLE_XINCLUDE = false;
public static final boolean DEFAULT_EXTERNAL_ENTITIES = false;
public static final boolean DEFAULT_HANDLE_LEXICAL = true;
private static final boolean DEFAULT_FORCE_CONTENT_TYPE = false;
private static final boolean DEFAULT_FORCE_ENCODING = false;
private static final boolean DEFAULT_IGNORE_CONNECTION_ENCODING = false;
private static final boolean DEFAULT_CACHE_USE_LOCAL_CACHE = true;
private static final boolean DEFAULT_ENABLE_CONDITIONAL_GET = false;
private static final boolean DEFAULT_PREEMPTIVE_AUTHENTICATION = true;
public static final String URL_NAMESPACE_URI = "http://www.orbeon.org/oxf/xml/url";
public static final String VALIDATING_PROPERTY = "validating";
public static final String HANDLE_XINCLUDE_PROPERTY = "handle-xinclude";
public static final String HANDLE_LEXICAL_PROPERTY = "handle-lexical";
private ConfigURIReferences localConfigURIReferences;
public URLGenerator() {
addInputInfo(new ProcessorInputOutputInfo(INPUT_CONFIG, URL_NAMESPACE_URI));
addOutputInfo(new ProcessorInputOutputInfo(OUTPUT_DATA));
}
public URLGenerator(String url) {
init(URLFactory.createURL(url), DEFAULT_HANDLE_XINCLUDE);
}
public URLGenerator(String url, boolean handleXInclude) {
init(URLFactory.createURL(url), handleXInclude);
}
public URLGenerator(URL url) {
init(url, DEFAULT_HANDLE_XINCLUDE);
}
public URLGenerator(URL url, boolean handleXInclude) {
init(url, handleXInclude);
}
private void init(URL url, boolean handleXInclude) {
this.localConfigURIReferences = new ConfigURIReferences(new Config(url, handleXInclude));
addOutputInfo(new ProcessorInputOutputInfo(OUTPUT_DATA));
}
public URLGenerator(URL url, String contentType, boolean forceContentType) {
this.localConfigURIReferences = new ConfigURIReferences(new Config(url, contentType, forceContentType));
addOutputInfo(new ProcessorInputOutputInfo(OUTPUT_DATA));
}
public URLGenerator(URL url,
String contentType,
boolean forceContentType,
String encoding,
boolean forceEncoding,
boolean ignoreConnectionEncoding,
XMLParsing.ParserConfiguration parserConfiguration,
boolean handleLexical,
String mode,
scala.collection.immutable.Map<String, String[]> headerNameValues,
String forwardHeaders,
List<String> readHeaders,
boolean cacheUseLocalCache,
boolean enableConditionalGET
) {
this.localConfigURIReferences =
new ConfigURIReferences(
new Config(
url,
contentType,
forceContentType,
encoding,
forceEncoding,
ignoreConnectionEncoding,
parserConfiguration,
handleLexical,
mode,
headerNameValues,
forwardHeaders,
readHeaders,
cacheUseLocalCache,
enableConditionalGET,
null,
null,
DEFAULT_PREEMPTIVE_AUTHENTICATION,
null,
new TidyConfig(null)
)
);
addOutputInfo(new ProcessorInputOutputInfo(OUTPUT_DATA));
}
private static class Config {
private URL url;
private String contentType = ProcessorUtils.DEFAULT_CONTENT_TYPE;
private boolean forceContentType = DEFAULT_FORCE_CONTENT_TYPE;
private String encoding;
private boolean forceEncoding = DEFAULT_FORCE_ENCODING;
private boolean ignoreConnectionEncoding = DEFAULT_IGNORE_CONNECTION_ENCODING;
private scala.collection.immutable.Map<String, String[]> headerNameValues;
private String forwardHeaders;
private List<String> readHeaders;
private XMLParsing.ParserConfiguration parserConfiguration = null;
private boolean handleLexical = DEFAULT_HANDLE_LEXICAL;
private String mode;
private boolean cacheUseLocalCache = DEFAULT_CACHE_USE_LOCAL_CACHE;
private boolean enableConditionalGET = DEFAULT_ENABLE_CONDITIONAL_GET;
private String username;
private String password;
private String domain;
private boolean preemptiveAuth = DEFAULT_PREEMPTIVE_AUTHENTICATION;
private TidyConfig tidyConfig;
public Config(URL url) {
this.url = url;
this.parserConfiguration = XMLParsing.ParserConfiguration.PLAIN;
this.tidyConfig = new TidyConfig(null);
}
public Config(URL url, boolean handleXInclude) {
this.url = url;
this.parserConfiguration = new XMLParsing.ParserConfiguration(DEFAULT_VALIDATING, handleXInclude, DEFAULT_EXTERNAL_ENTITIES);
this.tidyConfig = new TidyConfig(null);
}
public Config(URL url, String contentType, boolean forceContentType) {
this(url);
this.forceContentType = true;
this.contentType = contentType;
this.forceContentType = forceContentType;
this.tidyConfig = new TidyConfig(null);
}
public Config(URL url,
String contentType,
boolean forceContentType,
String encoding,
boolean forceEncoding,
boolean ignoreConnectionEncoding,
XMLParsing.ParserConfiguration parserConfiguration,
boolean handleLexical,
String mode,
scala.collection.immutable.Map<String, String[]> headerNameValues,
String forwardHeaders,
List<String> readHeaders,
boolean cacheUseLocalCache,
boolean enableConditionalGET,
String username,
String password,
boolean preemptiveAuth,
String domain,
TidyConfig tidyConfig
) {
this.url = url;
this.contentType = contentType;
this.forceContentType = forceContentType;
this.encoding = encoding;
this.forceEncoding = forceEncoding;
this.ignoreConnectionEncoding = ignoreConnectionEncoding;
this.headerNameValues = headerNameValues;
this.forwardHeaders = forwardHeaders;
this.readHeaders = readHeaders;
this.parserConfiguration = parserConfiguration;
this.handleLexical = handleLexical;
this.mode = mode;
// Local cache required for conditional GET
this.cacheUseLocalCache = cacheUseLocalCache || enableConditionalGET;
// NOTE: Hard to handle this if XInclude is enabled as we would need to conditional-GET all dependencies,
// and then cache all individually-included documents. Or, store the non-XInclude-processed document in
// cache. Either way, it's complicated. So we disable conditional GET if XInclude is enabled for now. This
// could be easier if we had a real HTTP client document cache.
this.enableConditionalGET = enableConditionalGET && ! parserConfiguration.handleXInclude;
// Authentication
this.username = username;
this.password = password;
this.domain = domain;
this.preemptiveAuth = preemptiveAuth;
this.tidyConfig = tidyConfig;
}
public URL getURL() {
return url;
}
public String getContentType() {
return contentType;
}
public boolean isForceContentType() {
return forceContentType;
}
public String getEncoding() {
return encoding;
}
public boolean isForceEncoding() {
return forceEncoding;
}
public boolean isIgnoreConnectionEncoding() {
return ignoreConnectionEncoding;
}
public TidyConfig getTidyConfig() {
return tidyConfig;
}
public XMLParsing.ParserConfiguration getParserConfiguration() {
return parserConfiguration;
}
public boolean isHandleLexical() {
return handleLexical;
}
public String getMode() {
return mode;
}
public scala.collection.immutable.Map<String, String[]> getHeaderNameValues() {
return headerNameValues;
}
public String getForwardHeaders() {
return forwardHeaders;
}
public List<String> getReadHeaders() {
return readHeaders;
}
public boolean isCacheUseLocalCache() {
return cacheUseLocalCache;
}
public boolean isEnableConditionalGET() {
return enableConditionalGET;
}
public String getUsername() {
return username;
}
public String getPassword() {
return password;
}
public String getDomain() {
return domain;
}
public boolean isPreemptiveAuth() {
return preemptiveAuth;
}
@Override
public String toString() {
return "[" + getURL().toExternalForm() + "|" + getContentType() + "|" + getEncoding() + "|" + parserConfiguration.getKey() + "|" + isHandleLexical() + "|" + isForceContentType()
+ "|" + isForceEncoding() + "|" + isIgnoreConnectionEncoding() + "|" + getUsername() + "|" + getPassword() + "|" + isPreemptiveAuth() + "|" + getDomain()
+ "|" + tidyConfig + "]";
}
}
private static class ConfigURIReferences {
public ConfigURIReferences(Config config) {
this.config = config;
}
public Config config;
public List<URIProcessorOutputImpl.URIReference> uriReferences;
}
@Override
public ProcessorOutput createOutput(final String name) {
final ProcessorOutput output = new ProcessorOutputImpl(URLGenerator.this, name) {
public void readImpl(PipelineContext pipelineContext, XMLReceiver xmlReceiver) {
makeSureStateIsSet(pipelineContext);
// Read config input into a URL, cache if possible
final ConfigURIReferences configURIReferences = URLGenerator.this.localConfigURIReferences != null ? localConfigURIReferences :
readCacheInputAsObject(pipelineContext, getInputByName(INPUT_CONFIG), new CacheableInputReader<ConfigURIReferences>() {
public ConfigURIReferences read(PipelineContext context, ProcessorInput input) {
final Element configElement = readInputAsOrbeonDom(context, input).getRootElement();
// Processor location data
final LocationData locationData = URLGenerator.this.getLocationData();
// Shortcut if the url is direct child of config
{
final String url = configElement.getTextTrim();
if (url != null && !url.equals("")) {
// Legacy, don't even care about handling relative URLs
return new ConfigURIReferences(new Config(URLFactory.createURL(url)));
}
}
// We have the /config/url syntax
final String url = XPathUtils.selectStringValueNormalize(configElement, "/config/url");
if (url == null) {
throw new ValidationException("URL generator found null URL for config:\n" + Dom4jUtils.domToString(configElement), locationData);
}
// Get content-type
final String contentType = XPathUtils.selectStringValueNormalize(configElement, "/config/content-type");
final boolean forceContentType = ProcessorUtils.selectBooleanValue(configElement, "/config/force-content-type", DEFAULT_FORCE_CONTENT_TYPE);
if (forceContentType && (contentType == null || contentType.equals("")))
throw new ValidationException("The force-content-type element requires a content-type element.", locationData);
// Get encoding
final String encoding = XPathUtils.selectStringValueNormalize(configElement, "/config/encoding");
final boolean forceEncoding = ProcessorUtils.selectBooleanValue(configElement, "/config/force-encoding", DEFAULT_FORCE_ENCODING);
final boolean ignoreConnectionEncoding = ProcessorUtils.selectBooleanValue(configElement, "/config/ignore-connection-encoding", DEFAULT_IGNORE_CONNECTION_ENCODING);
if (forceEncoding && (encoding == null || encoding.equals("")))
throw new ValidationException("The force-encoding element requires an encoding element.", locationData);
// Get headers
final scala.collection.immutable.Map<String, String[]> headerNameValues =
URLGeneratorBase.extractHeaders(configElement);
final String forwardHeaders; {
// Get from configuration first, otherwise use global default
final org.orbeon.dom.Node configForwardHeaders = XPathUtils.selectSingleNode(configElement, "/config/forward-headers");
forwardHeaders = configForwardHeaders != null ? XPathUtils.selectStringValue(configForwardHeaders, ".") : Connection.jHeadersToForward();
}
final List<String> readHeaders = new LinkedList<String>();
for (Object o : configElement.elements("read-header")) {
final Element el = (Element) o;
readHeaders.add(el.getStringValue());
}
// Validation setting: local, then properties, then hard-coded default
final boolean defaultValidating = getPropertySet().getBoolean(VALIDATING_PROPERTY, DEFAULT_VALIDATING);
final boolean validating = ProcessorUtils.selectBooleanValue(configElement, "/config/validating", defaultValidating);
// XInclude handling
final boolean defaultHandleXInclude = getPropertySet().getBoolean(HANDLE_XINCLUDE_PROPERTY, DEFAULT_HANDLE_XINCLUDE);
final boolean handleXInclude = ProcessorUtils.selectBooleanValue(configElement, "/config/handle-xinclude", defaultHandleXInclude);
// External entities
final boolean externalEntities = ProcessorUtils.selectBooleanValue(configElement, "/config/external-entities", DEFAULT_EXTERNAL_ENTITIES);
final boolean defaultHandleLexical = getPropertySet().getBoolean(HANDLE_LEXICAL_PROPERTY, DEFAULT_HANDLE_LEXICAL);
final boolean handleLexical = ProcessorUtils.selectBooleanValue(configElement, "/config/handle-lexical", defaultHandleLexical);
// Output mode
final String mode = XPathUtils.selectStringValueNormalize(configElement, "/config/mode");
// Cache control
final boolean cacheUseLocalCache = ProcessorUtils.selectBooleanValue(configElement, "/config/cache-control/use-local-cache", DEFAULT_CACHE_USE_LOCAL_CACHE);
final boolean enableConditionalGET = ProcessorUtils.selectBooleanValue(configElement, "/config/cache-control/conditional-get", DEFAULT_ENABLE_CONDITIONAL_GET);
// Authentication
final org.orbeon.dom.Node configAuthentication = XPathUtils.selectSingleNode(configElement, "/config/authentication");
final String username = configAuthentication == null ? null : XPathUtils.selectStringValue(configAuthentication, "username");
final String password = configAuthentication == null ? null : XPathUtils.selectStringValue(configAuthentication, "password");
final boolean preemptiveAuth = ProcessorUtils.selectBooleanValue(configElement, "/config/authentication/preemptive", DEFAULT_PREEMPTIVE_AUTHENTICATION);
final String domain = configAuthentication == null ? null : XPathUtils.selectStringValue(configAuthentication, "domain");
// Get Tidy config (will only apply if content-type is text/html)
final TidyConfig tidyConfig = new TidyConfig(XPathUtils.selectSingleNode(configElement, "/config/tidy-options"));
// Create configuration object
// Use location data if present so that relative URLs can be supported
// NOTE: We check whether there is a protocol, because we have
// some Java location data which are NOT to be interpreted as
// base URIs
final URL fullURL = (locationData != null && locationData.file() != null && NetUtils.urlHasProtocol(locationData.file()))
? URLFactory.createURL(locationData.file(), url)
: URLFactory.createURL(url);
// Create configuration
final Config config = new Config(fullURL, contentType, forceContentType, encoding, forceEncoding,
ignoreConnectionEncoding, new XMLParsing.ParserConfiguration(validating, handleXInclude, externalEntities), handleLexical, mode,
headerNameValues, forwardHeaders, readHeaders,
cacheUseLocalCache, enableConditionalGET,
username, password, preemptiveAuth, domain,
tidyConfig);
return new ConfigURIReferences(config);
}
});
try {
// Never accept a null URL
if (configURIReferences.config.getURL() == null)
throw new OXFException("Missing configuration.");
// We use the same validity as for the output
final boolean isUseLocalCache = configURIReferences.config.isCacheUseLocalCache();
final CacheKey localCacheKey;
final Object localCacheValidity;
if (isUseLocalCache) {
localCacheKey = new InternalCacheKey(URLGenerator.this, "urlDocument", configURIReferences.config.toString());
localCacheValidity = getValidityImpl(pipelineContext);
} else {
localCacheKey = null;
localCacheValidity = null;
}
// Decide whether to use read from the special oxf: handler or the generic URL handler
final URLGeneratorState state = (URLGenerator.URLGeneratorState) URLGenerator.this.getState(pipelineContext);
if (state.getDocument() != null) {
// Document was found when retrieving validity in conditional get
// NOTE: This only happens if isCacheUseLocalCache() == true
// NOTE: Document was re-added to cache in getValidityImpl()
state.getDocument().replay(xmlReceiver);
} else {
final Object cachedResource = (localCacheKey == null) ? null : ObjectCache.instance().findValid(localCacheKey, localCacheValidity);
if (cachedResource != null) {
// Just replay the cached resource
((SAXStore) cachedResource).replay(xmlReceiver);
} else {
final ResourceHandler handler = state.ensureMainResourceHandler(pipelineContext, configURIReferences.config);
try {
// We need to read the resource
// Find content-type to use. If the config says to force the
// content-type, we use the content-type provided by the user.
// Otherwise, we give the priority to the content-type provided by
// the connection, then the content-type provided by the user, then
// we use the default content-type (XML). The user will have to
// provide a content-type for example to read HTML documents with
// the file: protocol.
String contentType;
if (configURIReferences.config.isForceContentType()) {
contentType = configURIReferences.config.getContentType();
} else {
contentType = handler.getResourceMediaType();
if (contentType == null)
contentType = configURIReferences.config.getContentType();
if (contentType == null)
contentType = ProcessorUtils.DEFAULT_CONTENT_TYPE;
}
// Get and cache validity as the handler is open, as validity is likely to be used later
// again for caching reasons
final Long validity = (Long) getHandlerValidity(pipelineContext, configURIReferences.config, configURIReferences.config.getURL(), handler);
// Create store for caching if necessary
final XMLReceiver output = isUseLocalCache ? new SAXStore(xmlReceiver) : xmlReceiver;
// Handle mode
String mode = configURIReferences.config.getMode();
if (mode == null) {
// Mode is inferred from content-type
if (ProcessorUtils.HTML_CONTENT_TYPE.equals(contentType))
mode = "html";
else if (ContentTypes.isXMLContentType(contentType))
mode = "xml";
else if (ContentTypes.isJSONContentType(contentType))
mode = "json";
else if (ContentTypes.isTextContentType(contentType))
mode = "text";
else
mode = "binary";
}
// Read resource
if (mode.equals("html")) {
// HTML mode
handler.readHTML(output);
configURIReferences.uriReferences = null;
} else if (mode.equals("xml")) {
// XML mode
final URIProcessorOutputImpl.URIReferences uriReferences = new URIProcessorOutputImpl.URIReferences();
handler.readXML(pipelineContext, output, uriReferences);
configURIReferences.uriReferences = uriReferences.getReferences();
} else if (mode.equals("text")) {
// Text mode
handler.readText(output, contentType, validity);
configURIReferences.uriReferences = null;
} else if (mode.equals("json")) {
handler.readJSON(output, contentType, validity);
configURIReferences.uriReferences = null;
} else {
// Binary mode
handler.readBinary(output, contentType, validity);
configURIReferences.uriReferences = null;
}
// Cache the resource if requested but only if there is not a failure status code. It
// seems reasonable to follow the semantic of the web and to never cache unsuccessful
// responses.
if (isUseLocalCache && ! handler.isFailureStatusCode()) {
// Make sure SAXStore loses its reference on its output so that we don't clutter the cache
((SAXStore) output).setXMLReceiver(null);
// Add to cache
ObjectCache.instance().add(localCacheKey, localCacheValidity, output);
}
} finally {
handler.destroy();
}
}
}
} catch (SAXParseException spe) {
throw new ValidationException(spe.getMessage(), new LocationData(spe));
} catch (ValidationException e) {
final LocationData locationData = e.firstLocationData();
// The system id may not be set
if (locationData == null || locationData.file() == null)
throw OrbeonLocationException.wrapException(e, new LocationData(configURIReferences.config.getURL().toExternalForm(), -1, -1));
else
throw e;
} catch (OXFException e) {
throw e;
} catch (Exception e) {
throw new ValidationException(e, new LocationData(configURIReferences.config.getURL().toExternalForm(), -1, -1));
}
}
@Override
public OutputCacheKey getKeyImpl(PipelineContext pipelineContext) {
makeSureStateIsSet(pipelineContext);
final ConfigURIReferences configURIReferences = getConfigURIReferences(pipelineContext);
if (configURIReferences == null) {
return null;
}
final int keyCount = 1 + ((localConfigURIReferences == null) ? 1 : 0)
+ ((configURIReferences.uriReferences != null) ? configURIReferences.uriReferences.size() : 0);
final CacheKey[] outputKeys = new CacheKey[keyCount];
// Handle config if read as input
int keyIndex = 0;
if (localConfigURIReferences == null) {
KeyValidity configKeyValidity = getInputKeyValidity(pipelineContext, INPUT_CONFIG);
if (configKeyValidity == null) {
return null;
}
outputKeys[keyIndex++] = configKeyValidity.key;
}
// Handle main document and config
outputKeys[keyIndex++] = new SimpleOutputCacheKey(getProcessorClass(), name, configURIReferences.config.toString());
// Handle dependencies if any
if (configURIReferences.uriReferences != null) {
for (URIProcessorOutputImpl.URIReference uriReference : configURIReferences.uriReferences) {
outputKeys[keyIndex++] = new InternalCacheKey(URLGenerator.this, "urlReference", URLFactory.createURL(uriReference.context, uriReference.spec).toExternalForm());
}
}
return new CompoundOutputCacheKey(getProcessorClass(), name, outputKeys);
}
@Override
public Object getValidityImpl(PipelineContext pipelineContext) {
makeSureStateIsSet(pipelineContext);
ConfigURIReferences configURIReferences = getConfigURIReferences(pipelineContext);
if (configURIReferences == null)
return null;
List<Object> validities = new ArrayList<Object>();
// Handle config if read as input
if (localConfigURIReferences == null) {
KeyValidity configKeyValidity = getInputKeyValidity(pipelineContext, INPUT_CONFIG);
if (configKeyValidity == null)
return null;
validities.add(configKeyValidity.validity);
}
// Handle main document and config
final URLGeneratorState state = (URLGenerator.URLGeneratorState) URLGenerator.this.getState(pipelineContext);
final ResourceHandler resourceHandler = state.ensureMainResourceHandler(pipelineContext, configURIReferences.config);
validities.add(getHandlerValidity(pipelineContext, configURIReferences.config, configURIReferences.config.getURL(), resourceHandler));
// Handle dependencies if any
if (configURIReferences.uriReferences != null) {
for (URIProcessorOutputImpl.URIReference uriReference: configURIReferences.uriReferences) {
validities.add(getHandlerValidity(pipelineContext, configURIReferences.config, URLFactory.createURL(uriReference.context, uriReference.spec), null));
}
}
return validities;
}
private Long getHandlerValidity(PipelineContext pipelineContext, Config config, URL url, ResourceHandler handler) {
final URLGeneratorState state = (URLGenerator.URLGeneratorState) URLGenerator.this.getState(pipelineContext);
final String urlString = url.toExternalForm();
if (state.isLastModifiedSet(urlString)) {
// Found value in state cache
return state.getLastModified(urlString);
} else {
// Get value and cache it in state
try {
final Long validity;
if (handler == null) {
// Include dependency
// Create handler right here
handler = OXFHandler.PROTOCOL.equals(url.getProtocol())
? new OXFResourceHandler(new Config(url)) // Should use full config so that headers are forwarded?
: new URLResourceHandler(new Config(url));// Should use full config so that headers are forwarded?
try {
// FIXME: this can potentially be very slow with some URLs like HTTP URLs. We try to
// optimized this by keeping the URLConnection for the main document, but dependencies may
// cause multiple requests to the same URL.
validity = handler.getValidity();
} finally {
// Destroy handler
handler.destroy();
}
} else {
// Main handler
// Try to see what we have in cache
final CacheEntry cacheEntry;
if (config.isEnableConditionalGET()) {
// NOTE: This *could* be transparently handled by HttpClient cache if configured properly:
// http://hc.apache.org/httpcomponents-client-ga/httpclient-cache/apidocs/org/apache/http/impl/client/cache/CachingHttpClient.html
// Although, this would only cache the bytes, and probably not provide us with a
// readily-parsed SAXStore.
final CacheKey localCacheKey = new InternalCacheKey(URLGenerator.this, "urlDocument", config.toString());
cacheEntry = ObjectCache.instance().findAny(localCacheKey);
} else {
cacheEntry = null;
}
if (cacheEntry != null) {
// Found some entry in cache for the key
final long lastModified = findLastModified(cacheEntry.validity);
// This returns the validity and, possibly, stores the document in the state
validity = handler.getConditional(lastModified);
if (handler.getConnectionStatusCode() == 304) {
// The server responded that the resource hasn't changed
// Update the entry in cache
ObjectCache.instance().add(cacheEntry.key, lastModified, cacheEntry.cacheable);
// Remember the document for the rest of this request
state.setDocument((SAXStore) cacheEntry.cacheable);
}
} else {
validity = handler.getValidity();
}
}
state.setLastModified(urlString, validity);
return validity;
} catch (Exception e) {
// If the file no longer exists, for example, we don't want to throw, just to invalidate
// An exception will be thrown if necessary when the document is actually read
return null;
}
}
}
private ConfigURIReferences getConfigURIReferences(PipelineContext context) {
// Check if config is external
if (localConfigURIReferences != null)
return localConfigURIReferences;
// Make sure the config input is cacheable
final KeyValidity keyValidity = getInputKeyValidity(context, INPUT_CONFIG);
if (keyValidity == null) {
return null;
}
// Try to find resource manager key in cache
final ConfigURIReferences config = (ConfigURIReferences) ObjectCache.instance().findValid(keyValidity.key, keyValidity.validity);
if (logger.isDebugEnabled()) {
if (config != null)
logger.debug("Config found: " + config.toString());
else
logger.debug("Config not found");
}
return config;
}
};
addOutput(name, output);
return output;
}
private interface ResourceHandler {
Long getValidity() throws IOException;
Long getConditional(Long lastModified) throws IOException;
String getResourceMediaType() throws IOException;
String getConnectionEncoding() throws IOException;
int getConnectionStatusCode() throws IOException;
boolean isFailureStatusCode() throws IOException;
void destroy() throws IOException;
void readHTML(XMLReceiver xmlReceiver) throws IOException;
void readText(ContentHandler output, String contentType, Long lastModified) throws IOException;
void readXML(PipelineContext pipelineContext, XMLReceiver xmlReceiver, URIProcessorOutputImpl.URIReferences uriReferences) throws IOException;
void readBinary(ContentHandler output, String contentType, Long lastModified) throws IOException;
void readJSON(XMLReceiver output, String contentType, Long lastModified) throws IOException;
}
private static abstract class ResourceHandlerBase implements ResourceHandler {
protected Config config;
public ResourceHandlerBase(Config config) {
this.config = config;
}
protected String getExternalEncoding() throws IOException {
if (config.isForceEncoding())
return config.getEncoding();
final String connectionEncoding = getConnectionEncoding();
if (! config.isIgnoreConnectionEncoding() && connectionEncoding != null)
return connectionEncoding;
final String userEncoding = config.getEncoding();
if (userEncoding != null)
return userEncoding;
return null;
}
public static void readHTML(InputStream is, TidyConfig tidyConfig, String encoding, XMLReceiver output) {
final Tidy tidy = new Tidy();
tidy.setShowWarnings(tidyConfig.isShowWarnings());
tidy.setQuiet(tidyConfig.isQuiet());
tidy.setTrimEmptyElements(false);
tidy.setDropEmptyParas(false);
// Set encoding
// If the encoding is null, we get a default
tidy.setInputEncoding(TidyConfig.getTidyEncoding(encoding));
// Parse and output to SAXResult
TransformerUtils.sourceToSAX(new DOMSource(tidy.parseDOM(is, null)), output);
}
public void readJSON(InputStream is, XMLReceiver output) throws IOException {
final Reader reader = new InputStreamReader(is, "utf-8");
final String jsonString = NetUtils.readStreamAsString(reader);
Converter.jsonStringToXmlStream(jsonString, output, Symbols.JSON());
}
}
private static class OXFResourceHandler extends ResourceHandlerBase {
private String resourceManagerKey;
private InputStream inputStream;
public OXFResourceHandler(Config config) {
super(config);
}
public String getResourceMediaType() throws IOException {
// We generally don't know the "connection" content-type
return null;
}
public String getConnectionEncoding() throws IOException {
// We generally don't know the "connection" encoding
// NOTE: We could know, if the underlying protocol was for example HTTP. But we may
// want to abstract that anyway, so that the behavior is consistent whatever the sandbox
// is.
return null;
}
public int getConnectionStatusCode() throws IOException {
return -1;
}
public boolean isFailureStatusCode() throws IOException {
return false;
}
public Long getValidity() throws IOException {
getKey();
if (logger.isDebugEnabled())
logger.debug("OXF Protocol: Using ResourceManager for key " + getKey());
long result = ResourceManagerWrapper.instance().lastModified(getKey(), false);
// Zero and negative values often have a special meaning, make sure to normalize here
return (result <= 0) ? null : result;
}
public Long getConditional(Long lastModified) throws IOException {
return getValidity();
}
public void destroy() throws IOException {
if (inputStream != null) {
inputStream.close();
}
}
public void readHTML(XMLReceiver xmlReceiver) throws IOException {
inputStream = ResourceManagerWrapper.instance().getContentAsStream(getKey());
ResourceHandlerBase.readHTML(inputStream, config.getTidyConfig(), getExternalEncoding(), xmlReceiver);
}
public void readText(ContentHandler output, String contentType, Long lastModified) throws IOException {
inputStream = ResourceManagerWrapper.instance().getContentAsStream(getKey());
output.setDocumentLocator(new URLLocator(config.getURL().toExternalForm()));
BinaryTextSupport.readText(inputStream, getExternalEncoding(), output, contentType, lastModified, getConnectionStatusCode());
}
public void readXML(PipelineContext pipelineContext, XMLReceiver xmlReceiver, URIProcessorOutputImpl.URIReferences uriReferences) throws IOException {
final XMLParsing.ParserConfiguration parserConfiguration = new XMLParsing.ParserConfiguration(config.getParserConfiguration(), uriReferences);
if (getExternalEncoding() != null) {
// The encoding is set externally, either forced by the user, or set by the connection
inputStream = ResourceManagerWrapper.instance().getContentAsStream(getKey());
XMLParsing.readerToSAX(new InputStreamReader(inputStream, getExternalEncoding()), config.getURL().toExternalForm(),
xmlReceiver, parserConfiguration, config.isHandleLexical());
} else {
// Regular case, the resource manager does the job and autodetects the encoding
ResourceManagerWrapper.instance().getContentAsSAX(getKey(),
xmlReceiver, parserConfiguration, config.isHandleLexical());
}
}
public void readBinary(ContentHandler output, String contentType, Long lastModified) throws IOException {
inputStream = ResourceManagerWrapper.instance().getContentAsStream(getKey());
output.setDocumentLocator(new URLLocator(config.getURL().toExternalForm()));
BinaryTextSupport.readBinary(inputStream, output, contentType, lastModified, getConnectionStatusCode(), null, null);
}
public void readJSON(XMLReceiver output, String contentType, Long lastModified) throws IOException {
inputStream = ResourceManagerWrapper.instance().getContentAsStream(getKey());
readJSON(inputStream, output);
}
private String getKey() {
if (resourceManagerKey == null)
resourceManagerKey = config.getURL().getFile();
return resourceManagerKey;
}
}
private static class URLResourceHandler extends ResourceHandlerBase {
private ConnectionResult connectionResult;
private InputStream inputStream;
private scala.collection.immutable.List<Tuple2<String, String>> headersToPropagate;
public URLResourceHandler(Config config) {
super(config);
}
public String getResourceMediaType() throws IOException {
openConnection();
return connectionResult.mediatypeOrDefault(null);
}
public String getConnectionEncoding() throws IOException {
openConnection();
return connectionResult.charsetJava();
}
public int getConnectionStatusCode() throws IOException {
openConnection();
return connectionResult.statusCode();
}
public Long getValidity() throws IOException {
openConnection();
return isFailureStatusCode() ? null : connectionResult.lastModifiedJava();
}
public Long getConditional(Long lastModified) throws IOException {
openConnection(lastModified);
return getValidity();
}
public void destroy() throws IOException {
// Make sure the connection is closed because when
// getting the last modified date, the stream is
// actually opened. When using the file: protocol, the
// file can be locked on disk.
if (inputStream != null)
inputStream.close();
}
private void openConnection() throws IOException {
openConnection(null);
}
private void openConnection(Long lastModified) throws IOException {
if (connectionResult == null) {
// TODO: pass logging callback
final Map<String, String[]> newHeaders =
URLGeneratorBase.setIfModifiedIfNeeded(config.getHeaderNameValues(), lastModified);
final Credentials credentials = config.getUsername() == null ?
null :
Credentials.apply(
config.getUsername(),
config.getPassword(),
config.isPreemptiveAuth() ? "true" : "false",
config.getDomain()
);
final URI url;
try {
url = config.getURL().toURI();
} catch (URISyntaxException e) {
throw new OXFException(e);
}
final scala.collection.immutable.Map<String, scala.collection.immutable.List<String>> headers =
Connection.jBuildConnectionHeadersCapitalizedIfNeeded(
url.getScheme(),
credentials != null,
newHeaders,
config.getForwardHeaders(),
Connection.getHeaderFromRequest(NetUtils.getExternalContext().getRequest()),
indentedLogger
);
connectionResult =
Connection.jApply("GET", url, credentials, null, headers, true, false, indentedLogger).connect(true);
inputStream = connectionResult.content().inputStream();
headersToPropagate = URLGeneratorBase.collectHeaders(connectionResult, config.getReadHeaders());
URLGeneratorBase.storeHeadersIntoRequest(connectionResult, headersToPropagate);
}
}
public void readHTML(XMLReceiver xmlReceiver) throws IOException {
openConnection();
checkStatusCode();
ResourceHandlerBase.readHTML(inputStream, config.getTidyConfig(), getExternalEncoding(), xmlReceiver);
}
public void readText(ContentHandler output, String contentType, Long lastModified) throws IOException {
openConnection();
output.setDocumentLocator(new URLLocator(config.getURL().toExternalForm()));
BinaryTextSupport.readText(inputStream, getExternalEncoding(), output, contentType, lastModified, getConnectionStatusCode());
}
public void readJSON(XMLReceiver output, String contentType, Long lastModified) throws IOException {
openConnection();
readJSON(inputStream, output);
}
public void readBinary(ContentHandler output, String contentType, Long lastModified) throws IOException {
openConnection();
output.setDocumentLocator(new URLLocator(config.getURL().toExternalForm()));
BinaryTextSupport.readBinary(inputStream, output, contentType, lastModified, getConnectionStatusCode(), null, headersToPropagate);
}
public void readXML(PipelineContext pipelineContext, XMLReceiver xmlReceiver, URIProcessorOutputImpl.URIReferences uriReferences) throws IOException {
openConnection();
checkStatusCode();
final XMLParsing.ParserConfiguration parserConfiguration = new XMLParsing.ParserConfiguration(config.getParserConfiguration(), uriReferences);
try {
final XMLReader reader = XMLParsing.newXMLReader(parserConfiguration);
reader.setContentHandler(xmlReceiver);
reader.setProperty(XMLConstants.SAX_LEXICAL_HANDLER, xmlReceiver);
final InputSource inputSource;
if (getExternalEncoding() != null) {
// The encoding is set externally, either force by the user, or set by the connection
inputSource = new InputSource(new InputStreamReader(inputStream, getExternalEncoding()));
} else {
// This is the regular case where the XML parser autodetects the encoding
inputSource = new InputSource(inputStream);
}
inputSource.setSystemId(config.getURL().toExternalForm());
reader.parse(inputSource);
} catch (SAXException e) {
throw new OXFException(e);
}
}
public boolean isFailureStatusCode() throws IOException {
// NOTE: We accept -1 internally to indicate we don't have an actual status code
final int statusCode = getConnectionStatusCode();
return statusCode > 0 && ! NetUtils.isSuccessCode(statusCode);
}
private void checkStatusCode() throws IOException {
if (isFailureStatusCode())
throw new HttpStatusCodeException(getConnectionStatusCode(), Option.apply(config.getURL().toExternalForm()), Option.<Throwable>apply(null));
}
}
/**
* Read from System.in.
*/
private static class SystemResourceHandler extends ResourceHandlerBase {
public SystemResourceHandler(Config config) {
super(config);
}
public String getResourceMediaType() throws IOException {
// We generally don't know the "connection" content-type
return null;
}
public String getConnectionEncoding() throws IOException {
// We generally don't know the "connection" encoding
// NOTE: We could know, if the underlying protocol was for example HTTP. But we may
// want to abstract that anyway, so that the behavior is consistent whatever the sandbox
// is.
return null;
}
public int getConnectionStatusCode() throws IOException {
return -1;
}
public boolean isFailureStatusCode() throws IOException {
return false;
}
public Long getValidity() throws IOException {
return null;
}
public Long getConditional(Long lastModified) throws IOException {
return getValidity();
}
public void destroy() throws IOException {
}
@Override
protected String getExternalEncoding() throws IOException {
final String encoding = super.getExternalEncoding();
return encoding != null ? encoding : java.nio.charset.Charset.defaultCharset().name();
}
public void readHTML(XMLReceiver xmlReceiver) throws IOException {
ResourceHandlerBase.readHTML(System.in, config.getTidyConfig(), getExternalEncoding(), xmlReceiver);
}
public void readText(ContentHandler output, String contentType, Long lastModified) throws IOException {
BinaryTextSupport.readText(System.in, getExternalEncoding(), output, contentType, lastModified, getConnectionStatusCode());
}
public void readXML(PipelineContext pipelineContext, XMLReceiver xmlReceiver, URIProcessorOutputImpl.URIReferences uriReferences) throws IOException {
final XMLParsing.ParserConfiguration parserConfiguration = new XMLParsing.ParserConfiguration(config.getParserConfiguration(), uriReferences);
if (getExternalEncoding() != null) {
// The encoding is set externally, either forced by the user, or set by the connection
XMLParsing.readerToSAX(new InputStreamReader(System.in, getExternalEncoding()), config.getURL().toExternalForm(),
xmlReceiver, parserConfiguration, config.isHandleLexical());
} else {
// Regular case, the resource manager does the job and autodetects the encoding
ResourceManagerWrapper.instance().getContentAsSAX(getKey(),
xmlReceiver, parserConfiguration, config.isHandleLexical());
}
}
public void readBinary(ContentHandler output, String contentType, Long lastModified) throws IOException {
BinaryTextSupport.readBinary(System.in, output, contentType, lastModified, getConnectionStatusCode(), null, null);
}
public void readJSON(XMLReceiver output, String contentType, Long lastModified) throws IOException {
readJSON(System.in, output);
}
private String getKey() {
return "system:in";
}
}
// The idea of URLGeneratorState is that, during a pipeline execution with a given PipelineContext, there is typically:
//
// - a call to getValidity()
// - followed by a call to read()
//
// In order to avoid dereferencing the URL twice, the handler is stored in the state so it can be accessed by read().
private static class URLGeneratorState {
private ResourceHandler mainResourceHandler;
private Map<String, Object> map;
private SAXStore document;
public void setLastModified(String urlString, Long lastModified) {
if (map == null)
map = new HashMap<String, Object>();
map.put(urlString, lastModified == null ? "" : lastModified);
}
public boolean isLastModifiedSet(String urlString) {
return map != null && map.get(urlString) != null;
}
public Long getLastModified(String urlString) {
final Object result = map.get(urlString);
return (result instanceof String) ? null : (Long) result;
}
public ResourceHandler ensureMainResourceHandler(PipelineContext pipelineContext, Config config) {
if (mainResourceHandler == null) {
// Create and remember handler
mainResourceHandler = OXFHandler.PROTOCOL.equals(config.getURL().getProtocol()) ? new OXFResourceHandler(config)
: SystemHandler.PROTOCOL.equals(config.getURL().getProtocol()) ? new SystemResourceHandler(config)
: new URLResourceHandler(config);
// Make sure it is destroyed when the pipeline ends at the latest
pipelineContext.addContextListener(new PipelineContext.ContextListener() {
public void contextDestroyed(boolean success) {
try {
mainResourceHandler.destroy();
} catch (IOException e) {
logger.error("Exception caught while destroying ResourceHandler", e);
}
}
});
}
return mainResourceHandler;
}
public void setDocument(SAXStore document) {
this.document = document;
}
public SAXStore getDocument() {
return document;
}
}
private void makeSureStateIsSet(PipelineContext pipelineContext) {
if (!hasState(pipelineContext))
setState(pipelineContext, new URLGeneratorState());
}
@Override
public void reset(PipelineContext pipelineContext) {
setState(pipelineContext, new URLGeneratorState());
}
}