/** * Copyright (C) 2010 Orbeon, Inc. * * This program is free software; you can redistribute it and/or modify it under the terms of the * GNU Lesser General Public License as published by the Free Software Foundation; either version * 2.1 of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * See the GNU Lesser General Public License for more details. * * The full text of the license is available at http://www.gnu.org/copyleft/lesser.html */ package org.orbeon.oxf.processor; import org.apache.log4j.Logger; import org.orbeon.dom.QName; import org.orbeon.oxf.cache.*; import org.orbeon.oxf.common.OXFException; import org.orbeon.oxf.common.ValidationException; import org.orbeon.oxf.pipeline.api.PipelineContext; import org.orbeon.oxf.pipeline.api.TransformerXMLReceiver; import org.orbeon.oxf.xml.XMLParsing; import org.orbeon.oxf.xml.XMLReceiver; import org.orbeon.oxf.processor.impl.DelegatingProcessorInput; import org.orbeon.oxf.processor.impl.ProcessorInputImpl; import org.orbeon.oxf.processor.validation.MSVValidationProcessor; import org.orbeon.oxf.properties.Properties; import org.orbeon.oxf.properties.PropertySet; import org.orbeon.oxf.util.LoggerFactory; import org.orbeon.oxf.util.PipelineUtils; import org.orbeon.oxf.xml.SchemaRepository; import org.orbeon.oxf.xml.TransformerUtils; import org.orbeon.oxf.xml.dom4j.LocationData; import org.orbeon.oxf.xml.dom4j.LocationSAXContentHandler; import org.orbeon.saxon.Configuration; import org.orbeon.saxon.om.DocumentInfo; import org.orbeon.saxon.tinytree.TinyBuilder; import org.w3c.dom.Document; import javax.xml.transform.dom.DOMResult; import java.lang.ref.WeakReference; import java.util.*; /** * Helper class that implements default method of the Processor interface. */ public abstract class ProcessorImpl implements Processor { public static Logger logger = LoggerFactory.createLogger(ProcessorImpl.class); public static final String INPUT_DATA = "data"; public static final String INPUT_CONFIG = "config"; public static final String OUTPUT_DATA = "data"; public static final String PROCESSOR_VALIDATION_FLAG = "oxf.validation.processor"; public static final String USER_VALIDATION_FLAG = "oxf.validation.user"; public static final String SAX_INSPECTION_FLAG = "oxf.sax.inspection"; private static final List<ProcessorInput> EMPTY_INPUT_LIST = Collections.emptyList(); public static int PROCESSOR_SEQUENCE_NUMBER = 0; private final int sequenceNumber; private String id; private QName name; private final Map<String, List<ProcessorInput>> inputMap = new LinkedHashMap<String, List<ProcessorInput>>(); private final Map<String, ProcessorOutput> outputMap = new LinkedHashMap<String, ProcessorOutput>(); private int outputCount = 0; private final List<ProcessorInputOutputInfo> inputsInfo = new ArrayList<ProcessorInputOutputInfo>(0); private final List<ProcessorInputOutputInfo> outputsInfo = new ArrayList<ProcessorInputOutputInfo>(0); private LocationData locationData; public static final String PROCESSOR_INPUT_SCHEME = "input:"; public static final String PROCESSOR_OUTPUT_SCHEME = "output:"; /** * This is for internal pipeline engine use. */ protected static final String PARENT_PROCESSORS = "parent-processors"; protected ProcessorImpl() { sequenceNumber = PROCESSOR_SEQUENCE_NUMBER++; } public int getSequenceNumber() { return sequenceNumber; } /** * Return a property set for this processor. */ protected PropertySet getPropertySet() { return Properties.instance().getPropertySet(getName()); } public LocationData getLocationData() { return locationData; } public void setLocationData( final LocationData loc ) { locationData = loc; } public void setId(String id) { this.id = id; } public String getId() { return id; } public QName getName() { return name; } public void setName(QName name) { this.name = name; } public ProcessorInput getInputByName(String name) { final List<ProcessorInput> inputs = inputMap.get(name); if (inputs == null) throw new ValidationException("Cannot find input \"" + name + "\"", getLocationData()); if (inputs.size() != 1) throw new ValidationException("Found more than one input \"" + name + "\"", getLocationData()); return inputs.get(0); } public List<ProcessorInput> getInputsByName(String name) { final List<ProcessorInput> result = inputMap.get(name); return result == null ? EMPTY_INPUT_LIST : result; } public ProcessorInput createInput(final String name) { final ProcessorInputOutputInfo inputInfo = getInputInfo(name); // The PropertySet can be null during properties initialization. This should be one of the // rare places where this should be tested on. By default, enable validation so the // properties can be validated! final PropertySet propertySet = Properties.instance().getPropertySet(); final boolean inputValidationEnabled = (propertySet == null) ? true : propertySet.getBoolean(PROCESSOR_VALIDATION_FLAG, true); final ProcessorInput input; if (inputValidationEnabled && inputInfo != null && inputInfo.getSchemaURI() != null) { if (logger.isDebugEnabled()) logger.debug("Creating validator for input name '" + name + "' and schema-uri '" + inputInfo.getSchemaURI() + "'"); // Create and hook-up input validation processor if needed final Processor validatorProcessor = new MSVValidationProcessor(inputInfo.getSchemaURI()); // Connect schema to validator final Processor schemaURLGenerator = PipelineUtils.createURLGenerator(SchemaRepository.instance().getSchemaLocation(inputInfo.getSchemaURI())); PipelineUtils.connect(schemaURLGenerator, OUTPUT_DATA, validatorProcessor, MSVValidationProcessor.INPUT_SCHEMA); PipelineUtils.connect(MSVValidationProcessor.NO_DECORATION_CONFIG, OUTPUT_DATA, validatorProcessor, INPUT_CONFIG); // Create data input and output final ProcessorInput inputValData = validatorProcessor.createInput(INPUT_DATA); final ProcessorOutput outputValData = validatorProcessor.createOutput(OUTPUT_DATA); input = new DelegatingProcessorInput(this, name, inputValData, outputValData); } else { input = new ProcessorInputImpl(this, name); } addInput(name, input); return input; } public void addInput(String inputName, ProcessorInput input) { List<ProcessorInput> inputs = inputMap.get(inputName); if (inputs == null) { inputs = new ArrayList<ProcessorInput>(); inputMap.put(inputName, inputs); } // if (inputs.size() > 0) // logger.info("Processor " + getClass().getName() + " has more than 1 input called " + name); inputs.add(input); } public void deleteInput(ProcessorInput input) { deleteFromListMap(inputMap, input); } public ProcessorOutput getOutputByName(String outputName) { ProcessorOutput ret = outputMap.get(outputName); if (ret == null ) throw new ValidationException("Exactly one output " + outputName + " is required", getLocationData()); return ret; } public ProcessorOutput createOutput(String outputName) { throw new ValidationException("Outputs are not supported", getLocationData()); } public ProcessorOutput addOutput(String name, ProcessorOutput output) { // NOTE: One exception to the rule that we only have one output with a given name is the TeeProcessor, which // adds multiple outputs called "data". outputMap.put(name, output); outputCount++; return output; } public int getOutputCount() { return outputCount; } // NOTE: As of 2009-06-26, this is never called. public void deleteOutput(ProcessorOutput output) { final Collection outputs = outputMap.values(); outputs.remove(output); // NOTE: This won't be correct with the TeeProcessor. outputCount--; } protected void addInputInfo(ProcessorInputOutputInfo inputInfo) { inputsInfo.add(inputInfo); } protected void addOutputInfo(ProcessorInputOutputInfo outputInfo) { outputsInfo.add(outputInfo); } public Set<String> getInputNames() { return inputMap.keySet(); } public List<ProcessorInputOutputInfo> getInputsInfo() { return inputsInfo; } public Map<String, List<ProcessorInput>> getConnectedInputs() { return Collections.unmodifiableMap(inputMap); } public ProcessorInputOutputInfo getInputInfo(String name) { for (Iterator i = inputsInfo.iterator(); i.hasNext();) { ProcessorInputOutputInfo inputInfo = (ProcessorInputOutputInfo) i.next(); if (inputInfo.getName().equals(name)) return inputInfo; } return null; } public List<ProcessorInputOutputInfo> getOutputsInfo() { return outputsInfo; } public Map<String, ProcessorOutput> getConnectedOutputs() { return Collections.unmodifiableMap(outputMap); } /** * The fundamental read method based on SAX. */ public static void readInputAsSAX(PipelineContext context, final ProcessorInput input, XMLReceiver xmlReceiver) { input.getOutput().read(context, xmlReceiver); } public void readInputAsSAX(PipelineContext context, String inputName, XMLReceiver xmlReceiver) { readInputAsSAX(context, getInputByName(inputName), xmlReceiver); } public Document readInputAsDOM(PipelineContext context, ProcessorInput input) { final TransformerXMLReceiver identity = TransformerUtils.getIdentityTransformerHandler(); final DOMResult domResult = new DOMResult(XMLParsing.createDocument()); identity.setResult(domResult); readInputAsSAX(context, input, identity); return (Document) domResult.getNode(); } public org.orbeon.dom.Document readInputAsOrbeonDom(PipelineContext context, ProcessorInput input) { LocationSAXContentHandler ch = new LocationSAXContentHandler(); readInputAsSAX(context, input, ch); return ch.getDocument(); } // TODO: https://github.com/orbeon/orbeon-forms/issues/3088 public org.orbeon.dom.Document readInputAsDOM4J(PipelineContext context, ProcessorInput input) { return readInputAsOrbeonDom(context, input); } public DocumentInfo readInputAsTinyTree(PipelineContext context, ProcessorInput input, Configuration configuration) { final TinyBuilder treeBuilder = new TinyBuilder(); final TransformerXMLReceiver identity = TransformerUtils.getIdentityTransformerHandler(configuration); identity.setResult(treeBuilder); readInputAsSAX(context, input, identity); return (DocumentInfo) treeBuilder.getCurrentRoot(); } public org.orbeon.dom.Document readInputAsOrbeonDom(PipelineContext context, String inputName) { return readInputAsOrbeonDom(context, getInputByName(inputName)); } public Document readCacheInputAsDOM(PipelineContext context, String inputName) { return readCacheInputAsObject(context, getInputByName(inputName), new CacheableInputReader<Document>() { public Document read(PipelineContext context, ProcessorInput input) { return readInputAsDOM(context, input); } }); } public org.orbeon.dom.Document readCacheInputAsDOM4J(PipelineContext context, String inputName) { return readCacheInputAsObject(context, getInputByName(inputName), new CacheableInputReader<org.orbeon.dom.Document>() { public org.orbeon.dom.Document read(PipelineContext context, ProcessorInput input) { return readInputAsOrbeonDom(context, input); } }); } public DocumentInfo readCacheInputAsTinyTree(PipelineContext pipelineContext, final Configuration configuration, String inputName) { return readCacheInputAsObject(pipelineContext, getInputByName(inputName), new CacheableInputReader<DocumentInfo>() { public DocumentInfo read(PipelineContext context, ProcessorInput input) { return readInputAsTinyTree(context, input, configuration); } }); } /** * To be used in the readImpl implementation of a processor when an object * is created based on an input (an the object only depends on the input). * * @param input The input the object depends on * @param reader The code constructing the object based on the input * @return The object returned by the reader (either directly returned, * or from the cache) */ public <T> T readCacheInputAsObject(PipelineContext pipelineContext, ProcessorInput input, CacheableInputReader<T> reader) { // Get associated output final ProcessorOutput output = input.getOutput(); final String debugInfo = logger.isDebugEnabled() ? "[" + output.getName() + ", " + output.getProcessorClass() + ", " + input.getName() + ", " + input.getProcessorClass() + "]" : null; // Get cache instance final Cache cache = ObjectCache.instance(); // Check in cache first KeyValidity keyValidity = getInputKeyValidity(pipelineContext, input); if (keyValidity != null && keyValidity.key != null && keyValidity.validity != null) { // We got a key and a validity final Object inputObject = cache.findValid(keyValidity.key, keyValidity.validity); if (inputObject != null) { // Return cached object if (logger.isDebugEnabled()) logger.debug("Cache " + debugInfo + ": source cacheable and found for key '" + keyValidity.key + "'. FOUND object: " + inputObject); reader.foundInCache(); return (T) inputObject; } } if (logger.isDebugEnabled()) logger.debug("Cache " + debugInfo + ": READING."); final T result = reader.read(pipelineContext, input); if (reader.allowCaching()) { // Cache new result if possible, asking again for KeyValidity if needed if (keyValidity == null || keyValidity.key == null || keyValidity.validity == null) keyValidity = getInputKeyValidity(pipelineContext, input); if (keyValidity != null && keyValidity.key != null && keyValidity.validity != null) { if (logger.isDebugEnabled()) logger.debug("Cache " + debugInfo + ": source cacheable for key '" + keyValidity.key + "'. STORING object:" + result); cache.add(keyValidity.key, keyValidity.validity, result); reader.storedInCache(); } } return result; } /** * This method is used to retrieve the state information set with setState(). * * This method may be called from start() and ProcessorOutput.readImpl(). * * @param pipelineContext current context * @return state object set by the caller of setState() */ public Object getState(PipelineContext pipelineContext) { final Object state = pipelineContext.getAttribute(getProcessorKey(pipelineContext)); if (state == null) { throw new OXFException("No state in context"); } return state; } /** * This method is used by processor implementations to store state information tied to the * current execution of the current processor, across processor initialization as well as reads * of all the processor's outputs. * * This method should be called from the reset() method. * * @param context current PipelineContext object * @param state user-defined object containing state information */ public void setState(PipelineContext context, Object state) { context.setAttribute(getProcessorKey(context), state); } public boolean hasState(PipelineContext context) { return context.getAttribute(getProcessorKey(context)) != null; } /** * Returns a key that should be used to store the state of the processor in the context. * * This method must be called in ProcessorOutput.readImpl() or start() of the processors before read/start is * called on other processors. (The key returned by getProcessorKey can be used after read/start is called.) */ public ProcessorKey getProcessorKey(PipelineContext context) { final Stack<ProcessorImpl> parents = (Stack<ProcessorImpl>) context.getAttribute(PARENT_PROCESSORS); return new ProcessorKey(parents, this); } public void start(PipelineContext pipelineContext) { throw new ValidationException("Start not supported; processor implemented by '" + getClass().getName() + "'", locationData); } public void reset(PipelineContext pipelineContext) { // nop } /** * Utility methods to remove an item from a map of lists. */ private void deleteFromListMap(Map map, Object toRemove) { for (Iterator i = map.keySet().iterator(); i.hasNext();) { List list = (List) map.get(i.next()); for (Iterator j = list.iterator(); j.hasNext();) { Object current = j.next(); if (current == toRemove) { j.remove(); } } if (list.size() == 0) { i.remove(); } } } /** * Check if the given URI is referring to a processor input. */ public static boolean isProcessorInputScheme(String uri) { return uri.startsWith(PROCESSOR_INPUT_SCHEME) && !uri.startsWith(PROCESSOR_INPUT_SCHEME + "/"); } /** * Check if the given URI is referring to a processor output. */ public static boolean isProcessorOutputScheme(String uri) { return uri.startsWith(PROCESSOR_OUTPUT_SCHEME) && !uri.startsWith(PROCESSOR_OUTPUT_SCHEME + "/"); } /** * Return the input name if the URI is referring to a processor input, null otherwise. */ public static String getProcessorInputSchemeInputName(String uri) { return isProcessorInputScheme(uri) ? uri.substring(PROCESSOR_INPUT_SCHEME.length()) : null; } /** * Return the output name if the URI is referring to a processor output, null otherwise. */ public static String getProcessorOutputSchemeInputName(String uri) { return isProcessorOutputScheme(uri) ? uri.substring(PROCESSOR_OUTPUT_SCHEME.length()) : null; } public static OutputCacheKey getInputKey(PipelineContext context, ProcessorInput input) { return input.getOutput().getKey(context); } public static Object getInputValidity(PipelineContext context, ProcessorInput input) { return input.getOutput().getValidity(context); } /** * Subclasses can use this utility method when implementing the getKey * and getValidity methods to make sure that they don't read the whole * config (if we don't already have it) just to return a key/validity. */ public boolean isInputInCache(PipelineContext context, ProcessorInput input) { final KeyValidity keyValidity = getInputKeyValidity(context, input); return keyValidity != null && ObjectCache.instance().findValid(keyValidity.key, keyValidity.validity) != null; } public boolean isInputInCache(PipelineContext context, String inputName) { return isInputInCache(context, getInputByName(inputName)); } public boolean isInputInCache(PipelineContext context, KeyValidity keyValidity) { return ObjectCache.instance().findValid(keyValidity.key, keyValidity.validity) != null; } /** * Subclasses can use this utility method to obtain the key and validity associated with an * input when implementing the getKey and getValidity methods. * * @return a KeyValidity object containing non-null key and validity, or null */ public KeyValidity getInputKeyValidity(PipelineContext context, ProcessorInput input) { final OutputCacheKey outputCacheKey = getInputKey(context, input); if (outputCacheKey == null) return null; final InputCacheKey inputCacheKey = new InputCacheKey(input, outputCacheKey); final Object inputValidity = getInputValidity(context, input); if (inputValidity == null) return null; return new KeyValidity(inputCacheKey, inputValidity); } public KeyValidity getInputKeyValidity(PipelineContext context, String inputName) { return getInputKeyValidity(context, getInputByName(inputName)); } /** * Find the last modified timestamp of a particular input. * * @param pipelineContext pipeline context * @param input input to check * @param inputMustBeInCache if true, also return 0 if the input is not currently in cache * @return timestamp, <= 0 if unknown */ public long findInputLastModified(PipelineContext pipelineContext, ProcessorInput input, boolean inputMustBeInCache) { final long lastModified; { final KeyValidity keyValidity = getInputKeyValidity(pipelineContext, input); if (keyValidity == null || inputMustBeInCache && !isInputInCache(pipelineContext, keyValidity)) { lastModified = 0; } else { lastModified = (keyValidity.validity != null) ? findLastModified(keyValidity.validity) : 0; } } if (logger.isDebugEnabled()) logger.debug("Last modified: " + lastModified); return lastModified; } /** * Recursively find the last modified timestamp of a validity object. Supported types are Long and List<Long>. The * latest timestamp is returned. * * @param validity validity object * @return timestamp, <= 0 if unknown */ public static long findLastModified(Object validity) { if (validity instanceof Long) { return ((Long) validity).longValue(); } else if (validity instanceof List) { final List list = (List) validity; long latest = 0; for (Iterator i = list.iterator(); i.hasNext();) { final Object o = i.next(); latest = Math.max(latest, findLastModified(o)); } return latest; } else { return 0; } } public static class ProcessorKey { private int hash = 0; private List<WeakReference<ProcessorImpl>> processors; public ProcessorKey(Stack<ProcessorImpl> parents, ProcessorImpl child) { processors = new ArrayList<WeakReference<ProcessorImpl>>(); if (parents != null) for (ProcessorImpl parent: parents) processors.add(new WeakReference(parent)); processors.add(new WeakReference(child)); // NOTE: Use get() which appears to be faster (profiling) than using an iterator in such a bottleneck for (int i = 0; i < processors.size(); i++) { WeakReference<ProcessorImpl> processor = processors.get(i); hash += processor.get().hashCode() * 31; } } public int hashCode() { return hash; } public boolean equals(Object other) { if (!(other instanceof ProcessorKey)) return false; final List<WeakReference<ProcessorImpl>> otherProcessors = ((ProcessorKey) other).processors; int processorsSize = processors.size(); if (processorsSize != otherProcessors.size()) return false; // NOTE: Use get() which appears to be faster (profiling) than using an iterator in such a bottleneck for (int i = 0; i < processorsSize; i++) { ProcessorImpl p1 = processors.get(i).get(); ProcessorImpl p2 = otherProcessors.get(i).get(); if (p1 == null || p2 == null || p1 != p2) return false; } return true; } public String toString() { StringBuilder result = null; for (WeakReference<ProcessorImpl> processor: processors) { if (result == null) { result = new StringBuilder(hash + ": ["); } else { result.append(", "); } if (processor == null) { result.append("Garbage collected processor"); } else { result.append(Integer.toString(processor.get().hashCode())); result.append(": "); result.append(processor.get().getClass().getName()); } } result.append("]"); return result.toString(); } } public static class KeyValidity { public KeyValidity(CacheKey key, Object validity) { this.key = key; this.validity = validity; } public CacheKey key; public Object validity; } // For backward compatibility public abstract class ProcessorOutputImpl extends org.orbeon.oxf.processor.impl.ProcessorOutputImpl { public ProcessorOutputImpl(Class processorClass, String name) { super(processorClass, name); } public ProcessorOutputImpl(Processor processor, String name) { super(processor, name); } } }