/* * CATMA Computer Aided Text Markup and Analysis * * Copyright (C) 2008-2010 University Of Hamburg * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ package de.catma.document.source.contenthandler; import java.util.Scanner; import nu.xom.Element; /** * A content handler HTML based {@link de.catma.document.source.SourceDocument}s. * * @author marco.petris@web.de * */ public class TEIContentHandler extends XMLContentHandler { public TEIContentHandler() { inlineElements.add("abbr"); inlineElements.add("add"); inlineElements.add("expan"); inlineElements.add("corr"); inlineElements.add("date"); inlineElements.add("del"); inlineElements.add("distinct"); inlineElements.add("emph"); inlineElements.add("foreign"); inlineElements.add("gap"); inlineElements.add("gloss"); inlineElements.add("hi"); inlineElements.add("index"); inlineElements.add("measure"); inlineElements.add("mentioned"); inlineElements.add("milestone"); inlineElements.add("name"); inlineElements.add("num"); inlineElements.add("orig"); inlineElements.add("q"); inlineElements.add("quote"); inlineElements.add("ref"); inlineElements.add("reg"); inlineElements.add("rs"); inlineElements.add("said"); inlineElements.add("sic"); inlineElements.add("soCalled"); inlineElements.add("sp"); inlineElements.add("street"); inlineElements.add("term"); inlineElements.add("time"); inlineElements.add("unclear"); } @Override public void addTextContent(StringBuilder contentBuilder, Element element, String content) { boolean inline = inlineElements.contains(element.getLocalName()); // make things look good... if (!content.trim().isEmpty()) { if (inline) { contentBuilder.append(" "); } try (Scanner lineScanner = new Scanner(content.trim())) { String conc = ""; while (lineScanner.hasNextLine()) { contentBuilder.append(conc); contentBuilder.append(lineScanner.nextLine().trim()); conc = " "; } } if (inline) { contentBuilder.append(" "); } } } @Override public void addEmptyElement(StringBuilder contentBuilder, Element element) { // show linebreaks as actual linebreaks if (element.getLocalName().equals("lb")) { contentBuilder.append("\n"); } else { super.addEmptyElement(contentBuilder, element); } } }