/*
* Copyright (C) 2012 Jan Pokorsky
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package cz.cas.lib.proarc.common.xml;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.UnsupportedEncodingException;
import java.net.URL;
import java.util.Collections;
import java.util.EnumMap;
import java.util.HashMap;
import java.util.Map;
import java.util.logging.Level;
import java.util.logging.Logger;
import javax.xml.transform.Result;
import javax.xml.transform.Source;
import javax.xml.transform.Templates;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.URIResolver;
import javax.xml.transform.stream.StreamResult;
import javax.xml.transform.stream.StreamSource;
/**
* Transforms MARCXML and MODS documents to various {@link Format formats}.
*
* @author Jan Pokorsky
* @see <a href='http://www.loc.gov/standards/marcxml/'>MARC 21 XML Schema</a>
* @see <a href='http://www.loc.gov/standards/mods/mods-conversions.html'>MODS Conversions</a>
*/
public final class Transformers {
private static final Map<Format, String> FORMAT2XSL;
private static final Map<Format, Templates> FORMAT2TEMPLATES;
private static final Logger LOG = Logger.getLogger(Transformers.class.getName());
// private static final String DC_RDF_XSL_PATH = "http://www.loc.gov/standards/marcxml/xslt/MARC21slim2RDFDC.xsl";
private static final String MODS_3_XSL_PATH = "http://www.loc.gov/standards/mods/v3/MARC21slim2MODS3-4.xsl";
private static final String OAIMARC2MARC21slim_XSL_PATH = "http://www.loc.gov/standards/marcxml/xslt/OAIMARC2MARC21slim.xsl";
private static final String MARC21slim2HTML_XSL_PATH = "http://www.loc.gov/standards/marcxml/xslt/MARC21slim2HTML.xsl";
private static final String MODS2HTML_XSL_PATH = "http://www.loc.gov/standards/mods/mods.xsl";
private static final String MODS2TITLE_XSL_PATH = "/xml/mods2Title.xsl";
private static final String MODS2FEDORA_LABEL_XSL_PATH = "/xml/mods2FedoraLabel.xsl";
private static final String ALEPHXSERVERFIX_XSL_PATH = "/xml/alephOaiMarcFix.xsl";
static {
FORMAT2TEMPLATES = new EnumMap<Format, Templates>(Format.class);
FORMAT2XSL = new EnumMap<Format, String>(Format.class);
// FORMAT2XSL.put(Format.MarcxmlAsDcRdf, DC_RDF_XSL_PATH);
FORMAT2XSL.put(Format.MarcxmlAsMods34, MODS_3_XSL_PATH);
FORMAT2XSL.put(Format.MarcxmlAsMods3, MODS_3_XSL_PATH);
FORMAT2XSL.put(Format.OaimarcAsMarc21slim, OAIMARC2MARC21slim_XSL_PATH);
FORMAT2XSL.put(Format.MarcxmlAsHtml, MARC21slim2HTML_XSL_PATH);
FORMAT2XSL.put(Format.ModsAsHtml, MODS2HTML_XSL_PATH);
FORMAT2XSL.put(Format.ModsAsTitle, MODS2TITLE_XSL_PATH);
FORMAT2XSL.put(Format.ModsAsFedoraLabel, MODS2FEDORA_LABEL_XSL_PATH);
FORMAT2XSL.put(Format.AlephOaiMarcFix, ALEPHXSERVERFIX_XSL_PATH);
initTemplates();
}
public Source transform(Source input, Format format) throws TransformerException {
return transform(input, format, Collections.<String, Object>emptyMap());
}
public Source transform(Source input, Format format, Map<String, Object> params) throws TransformerException {
return toSource(transformAsBytes(input, format, params));
}
public byte[] transformAsBytes(Source input, Format format) throws TransformerException {
return transformAsBytes(input, format, Collections.<String, Object>emptyMap());
}
public byte[] transformAsBytes(Source input, Format format, Map<String, Object> params) throws TransformerException {
ByteArrayOutputStream buffer = new ByteArrayOutputStream();
Result output = new StreamResult(buffer);
Transformer t = createTransformer(format);
for (Map.Entry<String, Object> param : params.entrySet()) {
t.setParameter(param.getKey(), param.getValue());
}
t.transform(input, output);
return buffer.toByteArray();
}
public Source toSource(byte[] buffer) {
return new StreamSource(new ByteArrayInputStream(buffer));
}
public Source dump(Source source, StringBuilder dump) {
try {
TransformerFactory factory = TransformerFactory.newInstance();
Transformer t = factory.newTransformer();
ByteArrayOutputStream buffer = new ByteArrayOutputStream();
t.transform(source, new StreamResult(buffer));
dump.append(buffer.toString("UTF-8"));
return new StreamSource(new ByteArrayInputStream(buffer.toByteArray()));
} catch (TransformerException ex) {
throw new IllegalStateException(ex);
} catch (UnsupportedEncodingException ex) {
throw new IllegalStateException(ex);
}
}
public Source dump2Temp(Source source, String filename) {
try {
TransformerFactory factory = TransformerFactory.newInstance();
Transformer t = factory.newTransformer();
ByteArrayOutputStream buffer = new ByteArrayOutputStream();
t.transform(source, new StreamResult(buffer));
// t.transform(new StreamSource(new ByteArrayInputStream(buffer.toByteArray())),
// new StreamResult(new File("/tmp/aleph/" + filename)));
return new StreamSource(new ByteArrayInputStream(buffer.toByteArray()));
} catch (TransformerException ex) {
throw new IllegalStateException(ex);
}
}
static Source getXsl(Format format, URIResolver resolver) throws TransformerException {
String path = FORMAT2XSL.get(format);
return resolver.resolve(path, path);
}
private static Templates createTemplates(Format recordFormat) throws TransformerException {
TransformerFactory factory = TransformerFactory.newInstance();
// factory.setAttribute("debug", true);
SimpleResolver resolver = new SimpleResolver();
factory.setURIResolver(resolver);
Templates templates = factory.newTemplates(getXsl(recordFormat, resolver));
return templates;
}
private static Transformer createTransformer(Format recordFormat) throws TransformerConfigurationException {
Templates templates = FORMAT2TEMPLATES.get(recordFormat);
if (templates == null) {
throw new TransformerConfigurationException("Cannot transform " + recordFormat);
}
return templates.newTransformer();
}
private static void initTemplates() {
for (Map.Entry<Format, String> entry : FORMAT2XSL.entrySet()) {
try {
Templates templates = createTemplates(entry.getKey());
FORMAT2TEMPLATES.put(entry.getKey(), templates);
} catch (TransformerException ex) {
LOG.log(Level.SEVERE, entry.getValue(), ex);
}
}
}
/**
* This allows to run Transformers offline.
*/
static final class SimpleResolver implements URIResolver {
/** mapping to offline resources */
private static final Map<String, String> CATALOG = new HashMap<String, String>();
static {
CATALOG.put(MODS_3_XSL_PATH, "/xml/MARC21slim2MODS3.xsl");
// CATALOG.put(DC_RDF_XSL_PATH, "/xslts/MARC21slim2RDFDC.xsl");
CATALOG.put(OAIMARC2MARC21slim_XSL_PATH, "/xml/OAIMARC2MARC21slim.xsl");
CATALOG.put(MARC21slim2HTML_XSL_PATH, "/xml/MARC21slim2HTML.xsl");
CATALOG.put(MODS2HTML_XSL_PATH, "/xml/mods2html.xsl");
CATALOG.put("http://www.loc.gov/standards/mods/modsDictionary.xml", "/xml/modsDictionary.xml");
CATALOG.put("http://www.loc.gov/standards/marcxml/xslt/MARC21slimUtils.xsl", "/xml/MARC21slimUtils.xsl");
CATALOG.put(MODS2TITLE_XSL_PATH, MODS2TITLE_XSL_PATH);
CATALOG.put(MODS2FEDORA_LABEL_XSL_PATH, MODS2FEDORA_LABEL_XSL_PATH);
CATALOG.put(ALEPHXSERVERFIX_XSL_PATH, ALEPHXSERVERFIX_XSL_PATH);
}
@Override
public Source resolve(String href, String base) throws TransformerException {
String path = CATALOG.get(href);
if (path == null) {
path = "/xml/" + href;
}
URL resource = SimpleResolver.class.getResource(path);
if (resource != null) {
// Transformers.LOG.info(String.format("\nhref: %s, \nbase: %s, \npath: %s, \nres: %s", href, base, path, resource));
return new StreamSource(resource.toExternalForm());
}
// delegates to system resolver
return null;
}
}
public enum Format {
/**
* Use {@link #MarcxmlAsMods3} instead.
* @deprecated
*/
@Deprecated
MarcxmlAsMods34,
/** The latest MODS 3 version. */
MarcxmlAsMods3,
/*MarcxmlAsDcRdf,*/
OaimarcAsMarc21slim,
MarcxmlAsHtml,
ModsAsHtml,
ModsAsTitle,
ModsAsFedoraLabel,
AlephOaiMarcFix;
}
}