/* * Copyright 2010 * Ubiquitous Knowledge Processing (UKP) Lab * Technische Universität Darmstadt * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package de.tudarmstadt.ukp.dkpro.core.io.xml; import static org.apache.commons.io.IOUtils.closeQuietly; import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.OutputStream; import java.net.URL; import javax.xml.transform.Transformer; import javax.xml.transform.TransformerException; import javax.xml.transform.TransformerFactory; import javax.xml.transform.stream.StreamResult; import javax.xml.transform.stream.StreamSource; import org.apache.uima.UimaContext; import org.apache.uima.analysis_engine.AnalysisEngineProcessException; import org.apache.uima.cas.CASException; import org.apache.uima.fit.descriptor.ConfigurationParameter; import org.apache.uima.fit.descriptor.MimeTypeCapability; import org.apache.uima.fit.descriptor.TypeCapability; import org.apache.uima.jcas.JCas; import org.apache.uima.resource.ResourceInitializationException; import org.apache.uima.util.CasToInlineXml; import de.tudarmstadt.ukp.dkpro.core.api.io.JCasFileWriter_ImplBase; import de.tudarmstadt.ukp.dkpro.core.api.parameter.MimeTypes; import de.tudarmstadt.ukp.dkpro.core.api.resources.ResourceUtils; /** * Writes an approximation of the content of a textual CAS as an inline XML file. Optionally applies * an XSLT stylesheet. * <p> * Note this component inherits the restrictions from {@link CasToInlineXml}: * * <ul> * <li>Features whose values are FeatureStructures are not represented.</li> * <li>Feature values which are strings longer than 64 characters are truncated.</li> * <li>Feature values which are arrays of primitives are represented by strings that look like [ * xxx, xxx ]</li> * <li>The Subject of analysis is presumed to be a text string.</li> * <li>Some characters in the document's Subject-of-analysis are replaced by blanks, because the * characters aren't valid in xml documents.</li> * <li>It doesn't work for annotations which are overlapping, because these cannot be properly * represented as properly - nested XML.</li> * </ul> * * @since 1.1.0 */ @MimeTypeCapability({MimeTypes.APPLICATION_XML, MimeTypes.TEXT_XML}) @TypeCapability( inputs={ "de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData"}) public class InlineXmlWriter extends JCasFileWriter_ImplBase { /** * XSLT stylesheet to apply. */ public static final String PARAM_XSLT = "Xslt"; @ConfigurationParameter(name=PARAM_XSLT, mandatory=false) private String xslt; private CasToInlineXml cas2xml; private Transformer transformer; @Override public void initialize(UimaContext aContext) throws ResourceInitializationException { super.initialize(aContext); if (xslt != null) { TransformerFactory tf = TransformerFactory.newInstance(); try { URL url = ResourceUtils.resolveLocation(xslt, this, getContext()); transformer = tf.newTransformer(new StreamSource(url.openStream())); } catch (Exception e) { throw new ResourceInitializationException(e); } } cas2xml = new CasToInlineXml(); } @Override public void process(final JCas aJCas) throws AnalysisEngineProcessException { OutputStream docOS = null; try { docOS = getOutputStream(aJCas, ".xml"); final String xmlAnnotations = cas2xml.generateXML(aJCas.getCas()); if (transformer != null) { transformer.transform( new StreamSource(new ByteArrayInputStream(xmlAnnotations.getBytes("UTF-8"))), new StreamResult(docOS)); } else { docOS.write(xmlAnnotations.getBytes("UTF-8")); } } catch (final CASException e) { throw new AnalysisEngineProcessException(e); } catch (final IOException e) { throw new AnalysisEngineProcessException(e); } catch (TransformerException e) { throw new AnalysisEngineProcessException(e); } finally { closeQuietly(docOS); } } }