Java Examples for org.apache.uima.cas.impl.XmiCasSerializer
The following java examples will help you to understand the usage of org.apache.uima.cas.impl.XmiCasSerializer. These source code samples are taken from different open source projects.
Example 1
Project: OMTwitter-master File: XmiWriteConsumer.java View source code |
public void processCas(CAS aCAS) throws ResourceProcessException { File outFile = new File(outputDir, String.format("%010d.xmi", ++entityCnt)); try { OutputStreamWriter osw = new OutputStreamWriter(new FileOutputStream(outFile), "UTF-8"); XmiCasSerializer serializer = new XmiCasSerializer(aCAS.getTypeSystem()); XMLSerializer xmlSerializer = new XMLSerializer(osw, true); serializer.serialize(aCAS, xmlSerializer.getContentHandler()); osw.close(); } catch (IOException e) { logger.log(Level.WARNING, e.getMessage()); throw new ResourceProcessException(e); } catch (SAXException e) { logger.log(Level.WARNING, e.getMessage()); throw new ResourceProcessException(e); } }
Example 2
Project: sad-analyzer-master File: Ecore2UimaTypeSystem.java View source code |
private static String namespaceUri2UimaNamespace(String nsUri) throws URISyntaxException {
// Check for the special "no namespace URI", which maps to the null UIMA namespace
if (XmiCasSerializer.DEFAULT_NAMESPACE_URI.equals(nsUri)) {
return null;
}
// Our convention is that the UIMA namespace is the URI path, with leading slashes
// removed, trailing ".ecore" removed, and internal slashes converted to dots
java.net.URI uri = new java.net.URI(nsUri);
String uimaNs = uri.getPath();
if (uimaNs == null) {
// The URI is a URN
uimaNs = uri.getSchemeSpecificPart();
uimaNs = uimaNs.replace(':', '.');
} else {
// The URI is a URL
while (uimaNs.startsWith("/")) {
uimaNs = uimaNs.substring(1);
}
if (uimaNs.endsWith(".ecore")) {
uimaNs = uimaNs.substring(0, uimaNs.length() - 6);
}
uimaNs = uimaNs.replace('/', '.');
}
uimaNs = uimaNs.replace('-', '_');
return uimaNs;
}
Example 3
Project: dkpro-csniper-master File: CasHolder.java View source code |
@Override
public void writeExternal(ObjectOutput aOut) throws IOException {
if (cas == null) {
aOut.writeInt(0);
} else {
ByteArrayOutputStream casOS = new ByteArrayOutputStream();
ByteArrayOutputStream tsdOS = new ByteArrayOutputStream();
try {
XmiCasSerializer.serialize(cas, casOS);
TypeSystemUtil.typeSystem2TypeSystemDescription(cas.getTypeSystem()).toXML(tsdOS);
} catch (SAXException e) {
throw new IOException(e);
}
// Write TSD data
byte[] tsdData = tsdOS.toByteArray();
aOut.writeInt(tsdData.length);
aOut.write(tsdData);
// Write CAS data
byte[] casData = casOS.toByteArray();
aOut.writeInt(casData.length);
aOut.write(casData);
}
}
Example 4
Project: SparkStreamingCTK-master File: CtakesFunction.java View source code |
@Override public String call(String paragraph) throws Exception { this.jcas.setDocumentText(paragraph); // final AnalysisEngineDescription aed = getFastPipeline(); // Outputs // from default and fast pipelines are identical ByteArrayOutputStream baos = new ByteArrayOutputStream(); SimplePipeline.runPipeline(this.jcas, this.aed); XmiCasSerializer xmiSerializer = new XmiCasSerializer(jcas.getTypeSystem()); XMLSerializer xmlSerializer = new XMLSerializer(baos, true); xmiSerializer.serialize(jcas.getCas(), xmlSerializer.getContentHandler()); this.jcas.reset(); return baos.toString("utf-8"); }
Example 5
Project: dkpro-core-master File: XmiWriter.java View source code |
@Override
public void process(JCas aJCas) throws AnalysisEngineProcessException {
try (OutputStream docOS = getOutputStream(aJCas, ".xmi")) {
XmiCasSerializer.serialize(aJCas.getCas(), null, docOS, prettyPrint, null);
if (!typeSystemWritten) {
writeTypeSystem(aJCas);
typeSystemWritten = true;
}
} catch (Exception e) {
throw new AnalysisEngineProcessException(e);
}
}
Example 6
Project: TemporalSearch-master File: XMIResultFormatter.java View source code |
@Override public String format(JCas jcas) throws Exception { ByteArrayOutputStream outStream = null; try { // Write XMI outStream = new ByteArrayOutputStream(); XmiCasSerializer ser = new XmiCasSerializer(jcas.getTypeSystem()); XMLSerializer xmlSer = new XMLSerializer(outStream, false); ser.serialize(jcas.getCas(), xmlSer.getContentHandler()); // Convert output stream to string // String newOut = outStream.toString("UTF-8"); String newOut = outStream.toString(); // System.err.println("NEWOUT:"+newOut); return newOut; } finally { if (outStream != null) { outStream.close(); } } }
Example 7
Project: uima_prolog-master File: Ecore2UimaTypeSystem.java View source code |
private static String namespaceUri2UimaNamespace(String nsUri) throws URISyntaxException {
// Check for the special "no namespace URI", which maps to the null UIMA namespace
if (XmiCasSerializer.DEFAULT_NAMESPACE_URI.equals(nsUri)) {
return null;
}
// Our convention is that the UIMA namespace is the URI path, with leading slashes
// removed, trailing ".ecore" removed, and internal slashes converted to dots
java.net.URI uri = new java.net.URI(nsUri);
String uimaNs = uri.getPath();
if (uimaNs == null) {
// The URI is a URN
uimaNs = uri.getSchemeSpecificPart();
uimaNs = uimaNs.replace(':', '.');
} else {
// The URI is a URL
while (uimaNs.startsWith("/")) {
uimaNs = uimaNs.substring(1);
}
if (uimaNs.endsWith(".ecore")) {
uimaNs = uimaNs.substring(0, uimaNs.length() - 6);
}
uimaNs = uimaNs.replace('/', '.');
}
uimaNs = uimaNs.replace('-', '_');
return uimaNs;
}
Example 8
Project: termsuite-core-master File: TermSuitePreprocessor.java View source code |
private void exportCas(Document document, JCas cas) {
String toFilePath;
try {
toFilePath = FileUtils.replaceRootDir(document.getUrl(), new File(inputDirectory).getCanonicalPath(), outputDirectory.get());
toFilePath = FileUtils.replaceExtensionWith(toFilePath, this.outputFormat.toString().toLowerCase());
new File(toFilePath).getParentFile().mkdirs();
try (Writer writer = new FileWriter(toFilePath)) {
if (outputFormat == OutputFormat.JSON)
TermSuiteJsonCasSerializer.serialize(writer, cas);
if (outputFormat == OutputFormat.XMI)
XmiCasSerializer.serialize(cas.getCas(), cas.getTypeSystem(), new FileOutputStream(toFilePath));
} catch (Exception e) {
throw new TermSuiteException("Could not export cas to " + toFilePath + " for cas " + document.getUrl(), e);
}
} catch (IOException e1) {
throw new TermSuiteException("Could not export cas " + document.getUrl(), e1);
}
}
Example 9
Project: dkpro-bigdata-master File: XmiSequenceFileWriter.java View source code |
@Override
public void process(JCas aJCas) throws AnalysisEngineProcessException {
final DocumentMetaData meta = DocumentMetaData.get(aJCas);
final String baseUri = meta.getDocumentBaseUri();
final String docUri = meta.getDocumentUri();
String relativeDocumentPath = "doc_" + this.i++;
if (baseUri != null) {
if ((docUri == null) || !docUri.startsWith(baseUri)) {
throw new IllegalStateException("Base URI [" + baseUri + "] is not a prefix of document URI [" + docUri + "]");
}
relativeDocumentPath = docUri.substring(baseUri.length());
} else {
if (meta.getDocumentId() == null) {
// throw new
// IllegalStateException("Neither base URI/document URI nor document ID set");
// TODO: Bad Hack!
relativeDocumentPath = meta.getDocumentTitle();
} else {
relativeDocumentPath = meta.getDocumentId();
}
}
final OutputStream docOS = null;
final OutputStream typeOS = null;
try {
final ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
XmiCasSerializer.serialize(aJCas.getCas(), byteArrayOutputStream);
this.writer.append(new Text(relativeDocumentPath), new Text(byteArrayOutputStream.toString("UTF-8")));
// TypeSystemUtil.typeSystem2TypeSystemDescription(aJCas.getTypeSystem()).toXML(typeOS);
} catch (final Exception e) {
throw new AnalysisEngineProcessException(e);
} finally {
IOUtils.closeQuietly(docOS);
IOUtils.closeQuietly(typeOS);
}
}
Example 10
Project: tika-master File: CTAKESUtils.java View source code |
/**
* Serializes a CAS in the given format.
*
* @param jcas
* CAS (Common Analysis System) to be serialized.
* @param type
* type of cTAKES (UIMA) serializer used to write CAS.
* @param prettyPrint
* {@code true} to do pretty printing of output.
* @param stream
* {@see OutputStream} object used to print out information
* extracted by using cTAKES.
* @throws SAXException
* if there was a SAX exception.
* @throws IOException
* if any I/O error occurs.
*/
public static void serialize(JCas jcas, CTAKESSerializer type, boolean prettyPrint, OutputStream stream) throws SAXException, IOException {
if (type == CTAKESSerializer.XCAS) {
XCASSerializer.serialize(jcas.getCas(), stream, prettyPrint);
} else if (type == CTAKESSerializer.XMI) {
XmiCasSerializer.serialize(jcas.getCas(), jcas.getTypeSystem(), stream, prettyPrint, new XmiSerializationSharedData());
} else {
XmlCasSerializer.serialize(jcas.getCas(), jcas.getTypeSystem(), stream);
}
}
Example 11
Project: uima-collection-tools-master File: DBXMICASConsumer.java View source code |
/** * Processes the CAS which was populated by the TextAnalysisEngines. <br> * In this case, the CAS is converted to XMI and written into the output file . * * @param aCAS * a CAS which has been populated by the TAEs * * @throws ResourceProcessException * if there is an error in processing the Resource * * @see org.apache.uima.collection.base_cpm.CasObjectProcessor#processCas(org.apache.uima.cas.CAS) */ public void processCas(CAS cas) throws ResourceProcessException { try { ByteArrayOutputStream xmi_baos = new ByteArrayOutputStream(); XmiCasSerializer ser = new XmiCasSerializer(cas.getTypeSystem()); XMLSerializer xmlSer = new XMLSerializer(xmi_baos, false); ser.serialize(cas, xmlSer.getContentHandler()); // Retrieve XMI id String xmi_id = new String(); JCas jcas = cas.getJCas(); FSIterator it = jcas.getAnnotationIndex(SourceDocumentInformation.type).iterator(); if (it.hasNext()) { SourceDocumentInformation sdi = (SourceDocumentInformation) it.next(); if (this.fullURI) { xmi_id = sdi.getUri().toString(); } else { xmi_id = new File(new URL(sdi.getUri()).getPath()).getName(); } if (sdi.getOffsetInSource() > 0 || !sdi.getLastSegment()) { xmi_id += ("_" + sdi.getOffsetInSource() + "_" + sdi.getDocumentSize()); } } if (xmi_id.length() == 0) { xmi_id = "doc" + this.mDocNum++; } /////////////////////////////////////////////////////////////////////////////////// XMIDTO xmidto = null; //XMI compression if (this.do_compression) { ByteArrayOutputStream compressed_xmi_baos = new ByteArrayOutputStream(); Deflater deflater = new Deflater(); DeflaterOutputStream deflaterOutputStream = new DeflaterOutputStream(compressed_xmi_baos, deflater); deflaterOutputStream.write(xmi_baos.toByteArray()); deflaterOutputStream.close(); xmidto = new XMIDTO(xmi_id, compressed_xmi_baos.toByteArray()); } else { xmidto = new XMIDTO(xmi_id, xmi_baos.toByteArray()); } this.xmiDAO.insert(xmidto); } catch (Exception e) { logger.log(Level.SEVERE, e.getMessage()); throw new ResourceProcessException(e); } }
Example 12
Project: ChemSpot-master File: App.java View source code |
private static List<Mention> runChemSpot(ChemSpot chemspot, JCas jcas, String outputPath, boolean evaluate) { boolean hasOtherEntities = false; for (NamedEntity ne : JCasUtil.iterate(jcas, NamedEntity.class)) { if (!Constants.GOLDSTANDARD.equals(ne.getSource())) { hasOtherEntities = true; break; } } if (hasOtherEntities) { System.out.println("Pre-existing entities found in document. Evaluating and removing them."); otherEvaluator.evaluate(jcas); removeOtherEntities(jcas); } if (!JCasUtil.iterator(jcas, PubmedDocument.class).hasNext()) { PubmedDocument pd = new PubmedDocument(jcas); pd.setBegin(0); pd.setEnd(jcas.getDocumentText().length()); pd.setPmid(""); pd.addToIndexes(jcas); } List<Mention> mentions = chemspot.tag(jcas); if (evaluate) { chemspot.getEvaluator().evaluate(jcas); } if (pathToOutputFile != null && outputPath != null) { String output = convertToIOB ? ChemSpot.convertToIOB(jcas) : ChemSpot.serializeAnnotations(jcas); try { FileWriter outputFile = outputPath != null ? new FileWriter(new File(outputPath)) : null; if (outputFile != null) { outputFile.write(output); System.out.println("Output written to: " + outputPath); outputFile.close(); } } catch (IOException e) { System.err.println("Error while writing ChemSpot output"); e.printStackTrace(); } } if (pathToXMIOutput != null && outputPath != null) { try { pathToXMIOutput += !pathToXMIOutput.endsWith("/") && !pathToXMIOutput.endsWith("\\") ? "/" : ""; File xmiOutputFile = new File(pathToXMIOutput + outputPath.replaceFirst(".*/", "").replaceFirst("\\.[^\\.]+$", "") + ".xmi"); xmiOutputFile.getParentFile().mkdirs(); OutputStream out = new FileOutputStream(xmiOutputFile); XmiCasSerializer serializer = new XmiCasSerializer(jcas.getTypeSystem()); XMLSerializer xmlSerializer = new XMLSerializer(out, false); serializer.serialize(jcas.getCas(), xmlSerializer.getContentHandler()); out.close(); System.out.println("XMI file written to: " + xmiOutputFile.getCanonicalPath()); } catch (SAXException e) { e.printStackTrace(); } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } return mentions; }
Example 13
Project: heideltime-master File: XMIResultFormatter.java View source code |
@Override public String format(JCas jcas) throws Exception { ByteArrayOutputStream outStream = null; try { // Write XMI outStream = new ByteArrayOutputStream(); XmiCasSerializer ser = new XmiCasSerializer(jcas.getTypeSystem()); XMLSerializer xmlSer = new XMLSerializer(outStream, false); ser.serialize(jcas.getCas(), xmlSer.getContentHandler()); // Convert output stream to string // String newOut = outStream.toString("UTF-8"); String newOut = outStream.toString(); // System.err.println("NEWOUT:"+newOut); return newOut; } finally { if (outStream != null) { outStream.close(); } } }
Example 14
Project: webanno-master File: XmiWriter.java View source code |
@Override
public void process(JCas aJCas) throws AnalysisEngineProcessException {
try (OutputStream docOS = getOutputStream(aJCas, ".xmi")) {
XmiCasSerializer.serialize(aJCas.getCas(), null, docOS, prettyPrint, null);
if (!typeSystemWritten || typeSystemFile == null) {
writeTypeSystem(aJCas);
typeSystemWritten = true;
}
} catch (Exception e) {
throw new AnalysisEngineProcessException(e);
}
}
Example 15
Project: cogroo4-master File: XmiWriterCasConsumer.java View source code |
/** * Serialize a CAS to a file in XMI format * * @param aCas * CAS to serialize * @param name * output file * @throws SAXException * @throws Exception * * @throws ResourceProcessException */ private void writeXmi(CAS aCas, File name, String modelFileName) throws IOException, SAXException { FileOutputStream out = null; try { // write XMI out = new FileOutputStream(name); XmiCasSerializer ser = new XmiCasSerializer(aCas.getTypeSystem()); XMLSerializer xmlSer = new XMLSerializer(out, false); ser.serialize(aCas, xmlSer.getContentHandler()); } finally { if (out != null) { out.close(); } } }
Example 16
Project: dkpro-lab-master File: XmiAdapter.java View source code |
@Override public void write(OutputStream aStream) throws Exception { XmiCasSerializer ser = new XmiCasSerializer(cas.getTypeSystem()); XMLSerializer xmlSer = new XMLSerializer(aStream, false); ser.serialize(cas, xmlSer.getContentHandler()); }