/* $HeadURL:: $ * $Id$ * * Copyright (c) 2006-2010 by Public Library of Science * http://plos.org * http://ambraproject.org * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.ambraproject.xml.transform; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.xml.sax.EntityResolver; import org.xml.sax.InputSource; import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.StringReader; import java.util.Collections; import java.util.HashSet; import java.util.Set; /** * Cache entities for an {@link org.xml.sax.XMLReader}. * * For example: (also see {@link org.ambraproject.xml.transform.cache.CachedSource}) * <pre> * Transformer transformer = ... * URLRetriever retriever = ... * InputSource myInputSource = new InputSource(new FileReader(inFileName)); * EntityResolver resolver = new CustomEntityResolver(retriever); * EntityResolvingSource source = new EntityResolvingSource(myInputSource, resolver); * transformer.transform(source, new StreamResult(outFileName)); * </pre> * * @author Eric Brown and Ronald Tschalär * @version $Id$ */ public class CustomEntityResolver implements EntityResolver { private static final Logger log = LoggerFactory.getLogger(CustomEntityResolver.class); private URLRetriever retriever; private Set<String> entityUrlsToIgnore; /** * Create a new EntityResolver that will use the supplied <code>URLRetriever</code> to * retrieve entities. * * @param retriever the object to retrieve entities. */ public CustomEntityResolver(URLRetriever retriever) { this.retriever = retriever; entityUrlsToIgnore = Collections.EMPTY_SET; } /** * Sets the URLs of entities that should never attempted to be loaded. * If this method is called more than once, only the last call will take effect. * * @param toIgnore array of entity URLs that should not be loaded */ public void setEntityUrlsToIgnore(String... toIgnore) { Set<String> set = new HashSet<String>(); for (String s : toIgnore) { set.add(s); } entityUrlsToIgnore = Collections.unmodifiableSet(set); } /** * Resolve the specified entity using the configured <code>URLRetriever</code> (and any * of its delegates). * * @param publicId The public identifier of the external entity being referenced, or null if * none was supplied. * @param systemId The system identifier of the external entity being referenced. * @return An InputSource object describing the new input source, or null to request that the * parser open a regular URI connection to the system identifier. * @throws IOException Indicates a problem retrieving the entity. */ public InputSource resolveEntity(String publicId, String systemId) throws IOException { if (log.isDebugEnabled()) log.debug("Resolving entity '" + systemId + "'"); if (entityUrlsToIgnore.contains(systemId)) { log.debug("Intentionally ignoring " + systemId); // Returning null will cause the parent EntityLoader to issue an HTTP request, so we // just return the empty string. return new InputSource(new StringReader("")); } else { byte[] res = retriever.retrieve(systemId, publicId); if (log.isDebugEnabled()) log.debug("Entity '" + systemId + "' " + (res != null ? "found" : "not found")); if (res == null) return null; InputSource is = new InputSource(new ByteArrayInputStream(res)); is.setPublicId(publicId); is.setSystemId(systemId); return is; } } }