/*
* $HeadURL$
* $Id$
* Copyright (c) 2006-2013 by Public Library of Science http://plos.org http://ambraproject.org
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.ambraproject.solr;
import net.sf.saxon.TransformerFactoryImpl;
import net.sf.saxon.lib.SaxonOutputKeys;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Document;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Templates;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import javax.xml.transform.stream.StreamSource;
import java.io.ByteArrayOutputStream;
import java.io.InputStream;
import java.util.HashMap;
import java.util.Map;
import java.util.Properties;
/**
* Class that transforms an NLM DTD journal article into the document that we send to
* solr for indexing.
* <p/>
* This code is currently not used anywhere in the ambra project proper, but is used
* by the (old) plos queue and the (new) indexer minion.
*/
public class XmlTransformer {
private static final Logger log = LoggerFactory.getLogger(XmlTransformer.class);
private Properties defaultProperties;
private Map<String, Transformer> transformerMap;
public void init() {
defaultProperties = new Properties();
defaultProperties.put(OutputKeys.ENCODING, "UTF-8");
defaultProperties.put(OutputKeys.OMIT_XML_DECLARATION, "yes");
defaultProperties.put(OutputKeys.INDENT, "yes");
defaultProperties.put(OutputKeys.METHOD, "xml");
defaultProperties.put(OutputKeys.MEDIA_TYPE, "text/xml");
}
/**
* Transform the document to a format solr can understand.
*
* This supports multiple versions of the DTD. This method detects the DTD
* and uses the appropriate transformer.
*/
public String transform(Document article)
throws TransformerException, ParserConfigurationException {
StreamResult dst = null;
ByteArrayOutputStream streamOut = new ByteArrayOutputStream();
try {
DOMSource src = new DOMSource(article);
dst = new StreamResult(streamOut);
Transformer transformer = getTranslet(article);
transformer.transform(src, dst);
} catch (TransformerException e ) {
log.error("Transformation error: " + e.getMessage());
e.printStackTrace();
throw e;
}
return streamOut.toString();
}
/**
* Get the XSL specific to the NLM DTD Version
*
* @param doc article XML
*
* @return an XSL transformer that can be used on doc
* @throws TransformerException
*/
private Transformer getTranslet(Document doc) throws TransformerException {
String key = doc.getDocumentElement().getAttribute("dtd-version");
log.debug("Got a dtd-version of: {}", key);
if (key == null || key.length() == 0) {
throw new IllegalArgumentException("Cannot identify DTD version of the article XML");
} else {
key = key.trim();
if (transformerMap.containsKey(key)) {
return transformerMap.get(key);
} else {
throw new IllegalArgumentException("DTD version " + key + " is not supported");
}
}
}
/**
* Sets the .xsl files used by this class to transform NLM article documents.
*
* @param xslTemplateMap a map from a string indicating a DTD version (e.g. "3.0")
* to a string that is a (classpath-relative) filename of the .xsl file for
* that version
* @throws TransformerConfigurationException
*/
public void setXslTemplateMap(Map<String, String> xslTemplateMap)
throws TransformerConfigurationException {
if (xslTemplateMap == null) {
throw new IllegalArgumentException("transformerMap property not initialized");
}
transformerMap = new HashMap<String, Transformer>(xslTemplateMap.size());
for (String key : xslTemplateMap.keySet()) {
String filename = xslTemplateMap.get(key);
// set the Templates
final TransformerFactory tFactory = TransformerFactoryImpl.newInstance();
InputStream is = getClass().getClassLoader().getResourceAsStream(filename);
if (is == null) {
throw new IllegalArgumentException("Could not locate style sheet: " + filename);
}
Templates translet = tFactory.newTemplates(new StreamSource(is));
Transformer transformer = translet.newTransformer();
transformer.setOutputProperties(defaultProperties);
transformerMap.put(key, transformer);
}
}
}