package jav.correctionBackend; import java.io.*; import java.util.Iterator; import java.util.logging.Level; import java.util.logging.Logger; /** *Copyright (c) 2012, IMPACT working group at the Centrum für Informations- und Sprachverarbeitung, University of Munich. *All rights reserved. *Redistribution and use in source and binary forms, with or without *modification, are permitted provided that the following conditions are met: *Redistributions of source code must retain the above copyright *notice, this list of conditions and the following disclaimer. *Redistributions in binary form must reproduce the above copyright *notice, this list of conditions and the following disclaimer in the *documentation and/or other materials provided with the distribution. *THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS *IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED *TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A *PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT *HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, *SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT *LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, *DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY *THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT *(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE *OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * This file is part of the ocr-postcorrection tool developed * by the IMPACT working group at the Centrum für Informations- und Sprachverarbeitung, University of Munich. * For further information and contacts visit http://ocr.cis.uni-muenchen.de/ * * @author anna */ public class SimpleXmlExporter { public SimpleXmlExporter() { } public void export(Document doc, String outfile, boolean exportCandidates) { try { Writer w = new OutputStreamWriter(new FileOutputStream(outfile), "UTF8"); BufferedWriter out = new BufferedWriter(w); export(doc, out, exportCandidates); } catch (IOException ex) { Logger.getLogger(OCRXMLExporter.class.getName()).log(Level.SEVERE, null, ex); } } public void export(Document doc, BufferedWriter out, boolean exportCandidates) throws IOException { out.write("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"); out.write("<document>\n"); Iterator<Page> pageIt = doc.pageIterator(); while (pageIt.hasNext()) { Page page = pageIt.next(); Iterator<Token> tokenIt = doc.tokenIterator(page); while (tokenIt.hasNext()) { Token token = tokenIt.next(); SpecialSequenceType sst = token.getSpecialSeq(); if (!sst.equals(SpecialSequenceType.SPACE) && !sst.equals(SpecialSequenceType.NEWLINE)) { out.write(" <token id=\"" + token.getID() + "\" wOCR=\"" + token.getWCOR() + "\"/>\n"); } } } out.write("</document>\n"); } }