/**
* Copyright 2008 - 2009 Pro-Netics S.P.A.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package it.pronetics.madstore.crawler.transformer.impl;
import it.pronetics.madstore.crawler.model.Page;
import it.pronetics.madstore.crawler.transformer.Transformer;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import javax.xml.transform.Source;
import javax.xml.transform.Templates;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.URIResolver;
import javax.xml.transform.stream.StreamResult;
import javax.xml.transform.stream.StreamSource;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* {@link it.pronetics.madstore.crawler.transformer.Transformer} implementation using XSLT transformations for hAtom microformats
* to Atom document conversion.
*
* @author Salvatore Incandela
* @author Sergio Bossa
*/
public class HAtomToAtomTransformer implements Transformer {
private static final transient Logger LOG = LoggerFactory.getLogger(HAtomToAtomTransformer.class);
private static final String XSL_LOCATION = "xsl/";
private static final String XSL_FILE = "hAtom2Atom.xsl";
private Templates templates;
/**
* Create the transformer.
*/
public HAtomToAtomTransformer() {
TransformerFactory transformerFactory = TransformerFactory.newInstance();
transformerFactory.setURIResolver(new CustomUriResolver());
Source xsltSource = new StreamSource(Thread.currentThread().getContextClassLoader().getResourceAsStream(XSL_LOCATION + XSL_FILE));
try {
templates = transformerFactory.newTemplates(xsltSource);
} catch (TransformerConfigurationException e) {
LOG.error(e.getMessage(), e);
}
}
public byte[] transform(Page page) {
ByteArrayInputStream dataInputStream = null;
ByteArrayOutputStream resultInputStream = null;
try {
LOG.info("Transforming: {}", page.getLink());
dataInputStream = new ByteArrayInputStream(page.getData().getBytes("UTF-8"));
resultInputStream = new ByteArrayOutputStream();
if (dataInputStream.available() <= 0) {
dataInputStream.close();
return new byte[0];
} else {
Source htmlSource = new StreamSource(dataInputStream);
javax.xml.transform.Transformer transformer = templates.newTransformer();
transformer.setParameter("source-uri", page.getLink().getLink());
transformer.transform(htmlSource, new StreamResult(resultInputStream));
return resultInputStream.toByteArray();
}
} catch (Exception ex) {
LOG.info("Transformation abnormally terminated: {}", page.getLink());
LOG.warn(ex.getMessage());
LOG.debug(ex.getMessage(), ex);
return new byte[0];
} finally {
try {
if (dataInputStream != null) {
dataInputStream.close();
}
if (resultInputStream != null) {
resultInputStream.close();
}
} catch (IOException ex) {
throw new RuntimeException(ex.getMessage(), ex);
}
}
}
private static class CustomUriResolver implements URIResolver {
public Source resolve(String href, String base) throws TransformerException {
return new StreamSource(Thread.currentThread().getContextClassLoader().getResourceAsStream(XSL_LOCATION + href));
}
}
}