/** * Copyright (C) 2012 cogroo <cogroo@cogroo.org> * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package cogroo.uima; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; import org.apache.uima.cas.CAS; import org.apache.uima.cas.CASException; import org.apache.uima.cas.FSIterator; import org.apache.uima.cas.impl.XmiCasSerializer; import org.apache.uima.collection.CasConsumer_ImplBase; import org.apache.uima.examples.SourceDocumentInformation; import org.apache.uima.jcas.JCas; import org.apache.uima.resource.ResourceInitializationException; import org.apache.uima.resource.ResourceProcessException; import org.apache.uima.util.XMLSerializer; import org.xml.sax.SAXException; /** * A simple CAS consumer that writes the CAS to XMI format. * <p> * This CAS Consumer takes one parameter: * <ul> * <li><code>OutputDirectory</code> - path to directory into which output files * will be written</li> * </ul> */ public class XmiWriterCasConsumer extends CasConsumer_ImplBase { /** * Name of configuration parameter that must be set to the path of a directory * into which the output files will be written. */ public static final String PARAM_OUTPUTDIR = "OutputDirectory"; private File mOutputDir; private int mDocNum; public void initialize() throws ResourceInitializationException { mDocNum = 0; mOutputDir = new File((String) getConfigParameterValue(PARAM_OUTPUTDIR)); if (!mOutputDir.exists()) { mOutputDir.mkdirs(); } } /** * Processes the CAS which was populated by the TextAnalysisEngines. <br> * In this case, the CAS is converted to XMI and written into the output file * . * * @param aCAS * a CAS which has been populated by the TAEs * * @throws ResourceProcessException * if there is an error in processing the Resource * * @see org.apache.uima.collection.base_cpm.CasObjectProcessor#processCas(org.apache.uima.cas.CAS) */ public void processCas(CAS aCAS) throws ResourceProcessException { String modelFileName = null; JCas jcas; try { jcas = aCAS.getJCas(); } catch (CASException e) { throw new ResourceProcessException(e); } // retreive the filename of the input file from the CAS FSIterator it = jcas.getAnnotationIndex(SourceDocumentInformation.type) .iterator(); File outFile = null; if (it.hasNext()) { SourceDocumentInformation fileLoc = (SourceDocumentInformation) it.next(); File inFile; try { inFile = new File(new URL(fileLoc.getUri()).getPath()); String outFileName = inFile.getName(); if (fileLoc.getOffsetInSource() > 0) { outFileName += ("_" + fileLoc.getOffsetInSource()); } outFileName += ".xmi"; outFile = new File(mOutputDir, outFileName); modelFileName = mOutputDir.getAbsolutePath() + "/" + inFile.getName() + ".ecore"; } catch (MalformedURLException e1) { // invalid URL, use default processing below } } if (outFile == null) { FSIterator s = jcas.getAnnotationIndex(GoldenSentence.type).iterator(); if (s.hasNext()) { GoldenSentence st = (GoldenSentence) s.next(); String id = st.getId(); id = id.substring(0, id.indexOf("-")); if (id != null && id.length() > 0) { outFile = new File(mOutputDir, id + ".xmi"); } } } if (outFile == null) { outFile = new File(mOutputDir, "doc" + mDocNum++ + ".xmi"); } // serialize XCAS and write to output file try { writeXmi(jcas.getCas(), outFile, modelFileName); } catch (IOException e) { throw new ResourceProcessException(e); } catch (SAXException e) { throw new ResourceProcessException(e); } } /** * Serialize a CAS to a file in XMI format * * @param aCas * CAS to serialize * @param name * output file * @throws SAXException * @throws Exception * * @throws ResourceProcessException */ private void writeXmi(CAS aCas, File name, String modelFileName) throws IOException, SAXException { FileOutputStream out = null; try { // write XMI out = new FileOutputStream(name); XmiCasSerializer ser = new XmiCasSerializer(aCas.getTypeSystem()); XMLSerializer xmlSer = new XMLSerializer(out, false); ser.serialize(aCas, xmlSer.getContentHandler()); } finally { if (out != null) { out.close(); } } } }