/* * SmartDoc : Ultimate document format based on XML * Copyright (C) 1998-2004 ASAMI, Tomoharu (asami@XMLSmartDoc.org) * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ package org.xmlsmartdoc.SmartDoc.xhtml; import java.io.IOException; import java.net.URL; import org.xmlsmartdoc.SmartDoc.Anchor; import org.xmlsmartdoc.SmartDoc.Blockquote; import org.xmlsmartdoc.SmartDoc.Body; import org.xmlsmartdoc.SmartDoc.Bold; import org.xmlsmartdoc.SmartDoc.Chapter; import org.xmlsmartdoc.SmartDoc.CharBlock; import org.xmlsmartdoc.SmartDoc.Cite; import org.xmlsmartdoc.SmartDoc.Code; import org.xmlsmartdoc.SmartDoc.Col; import org.xmlsmartdoc.SmartDoc.Colgroup; import org.xmlsmartdoc.SmartDoc.Container; import org.xmlsmartdoc.SmartDoc.Dd; import org.xmlsmartdoc.SmartDoc.Dfn; import org.xmlsmartdoc.SmartDoc.Div; import org.xmlsmartdoc.SmartDoc.Dl; import org.xmlsmartdoc.SmartDoc.Doc; import org.xmlsmartdoc.SmartDoc.DocContext; import org.xmlsmartdoc.SmartDoc.Dt; import org.xmlsmartdoc.SmartDoc.Em; import org.xmlsmartdoc.SmartDoc.Head; import org.xmlsmartdoc.SmartDoc.Italic; import org.xmlsmartdoc.SmartDoc.Li; import org.xmlsmartdoc.SmartDoc.Ol; import org.xmlsmartdoc.SmartDoc.Paragraph; import org.xmlsmartdoc.SmartDoc.Pre; import org.xmlsmartdoc.SmartDoc.Quote; import org.xmlsmartdoc.SmartDoc.Section; import org.xmlsmartdoc.SmartDoc.SmartDocModel; import org.xmlsmartdoc.SmartDoc.Span; import org.xmlsmartdoc.SmartDoc.SubSection; import org.xmlsmartdoc.SmartDoc.SubSubSection; import org.xmlsmartdoc.SmartDoc.Symbol; import org.xmlsmartdoc.SmartDoc.TBody; import org.xmlsmartdoc.SmartDoc.TFoot; import org.xmlsmartdoc.SmartDoc.THead; import org.xmlsmartdoc.SmartDoc.Table; import org.xmlsmartdoc.SmartDoc.Td; import org.xmlsmartdoc.SmartDoc.Th; import org.xmlsmartdoc.SmartDoc.Tr; import org.xmlsmartdoc.SmartDoc.Tt; import org.xmlsmartdoc.SmartDoc.Ul; import com.AsamiOffice.text.UString; import com.AsamiOffice.jaba2.xml.IProcessor; import com.AsamiOffice.jaba2.xml.ProcessorFactory; import org.w3c.dom.CDATASection; import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.Entity; import org.w3c.dom.Node; import org.w3c.dom.NodeList; import org.w3c.dom.Text; import com.AsamiOffice.xml.UDOM; /** * XHTMLImporter * * @since Dec. 27, 1999 * @version Jan. 25, 2004 * @author ASAMI, Tomoharu (asami@XMLSmartDoc.org) */ public class XHTMLImporter { protected SmartDocModel sdocModel_; // XXX protected CharacterEntityMap cemap_ = new CharacterEntityMap(); public XHTMLImporter(SmartDocModel model) { sdocModel_ = model; } public String getID() { return ("xhtml"); } public String getName() { return ("XHTML Importer"); } public boolean canImport(URL url) { String suffix = UString.getSuffix(url.toExternalForm()); return ("xhtml".equals(suffix)); } public void importSource( URL url, DocContext context ) throws IOException { SmartDocModel model = sdocModel_; IProcessor processor = ProcessorFactory.getProcessor(); // XXX Document xml = processor.parseDocument(url); Doc doc = _buildDoc(xml, context); model.setDoc(doc); } protected Doc _buildDoc(Document xml, DocContext context) { Element html = xml.getDocumentElement(); return (_buildHTML(html)); } protected Doc _buildHTML(Element html) { Doc doc = new Doc(); _buildContents(html, doc); return (doc); } protected void _buildHead(Element element, Container target) { Head head = new Head(); target.addContent(head); _buildContents(element, head); } protected void _buildBody(Element element, Container target) { Body body = new Body(); target.addContent(body); _buildContents(element, body); } protected void _buildTitle(Element element, Container target) { // XXX } protected void _buildBase(Element element, Container target) { // do nothing } protected void _buildIsindex(Element element, Container target) { // do nothing } protected void _buildLink(Element element, Container target) { // XXX } protected void _buildMeta(Element element, Container target) { // XXX } protected void _buildStyle(Element element, Container target) { // XXX } protected void _buildAbbr(Element element, Container target) { Span span = new Span(); target.addContent(span); _buildContents(element, span); } protected void _buildAcronym(Element element, Container target) { Span span = new Span(); target.addContent(span); _buildContents(element, span); } protected void _buildCode(Element element, Container target) { Code code = new Code(); target.addContent(code); _buildContents(element, code); } protected void _buildCite(Element element, Container target) { Cite cite = new Cite(); // cite.setName(UString.checkNull(element.getAttribute("name"))); target.addContent(cite); _buildContents(element, cite); } protected void _buildDfn(Element element, Container target) { Dfn dfn = new Dfn(); target.addContent(dfn); _buildContents(element, dfn); } protected void _buildEm(Element element, Container target) { Em em = new Em(); target.addContent(em); _buildContents(element, em); } protected void _buildStrong(Element element, Container target) { Em em = new Em(); target.addContent(em); _buildContents(element, em); } protected void _buildKbd(Element element, Container target) { Span span = new Span(); target.addContent(span); _buildContents(element, span); } protected void _buildSamp(Element element, Container target) { Span span = new Span(); target.addContent(span); _buildContents(element, span); } protected void _buildVar(Element element, Container target) { Span span = new Span(); target.addContent(span); _buildContents(element, span); } protected void _buildSub(Element element, Container target) { Span span = new Span(); target.addContent(span); _buildContents(element, span); } protected void _buildSup(Element element, Container target) { Span span = new Span(); target.addContent(span); _buildContents(element, span); } protected void _buildBold(Element element, Container target) { Bold bold = new Bold(); target.addContent(bold); _buildContents(element, bold); } protected void _buildItalic(Element element, Container target) { Italic italic = new Italic(); target.addContent(italic); _buildContents(element, italic); } protected void _buildBig(Element element, Container target) { Span span = new Span(); target.addContent(span); _buildContents(element, span); } protected void _buildSmall(Element element, Container target) { Span span = new Span(); target.addContent(span); _buildContents(element, span); } protected void _buildStrike(Element element, Container target) { Span span = new Span(); target.addContent(span); _buildContents(element, span); } protected void _buildUnderline(Element element, Container target) { Span span = new Span(); target.addContent(span); _buildContents(element, span); } protected void _buildTt(Element element, Container target) { Tt tt = new Tt(); target.addContent(tt); _buildContents(element, tt); } protected void _buildFont(Element element, Container target) { Span span = new Span(); target.addContent(span); _buildContents(element, span); } protected void _buildBasefont(Element element, Container target) { // XXX } protected Container _buildH1(Element element, Container target) { Chapter chapter = new Chapter(); chapter.setTitle(UDOM.getDataValue(element)); return (chapter); } protected Container _buildH2(Element element, Container target) { Section section = new Section(); section.setTitle(UDOM.getDataValue(element)); return (section); } protected Container _buildH3(Element element, Container target) { SubSection subsection = new SubSection(); subsection.setTitle(UDOM.getDataValue(element)); return (subsection); } protected Container _buildH4(Element element, Container target) { SubSubSection subsubsection = new SubSubSection(); subsubsection.setTitle(UDOM.getDataValue(element)); return (subsubsection); } protected Container _buildH5(Element element, Container target) { throw (new UnsupportedOperationException()); } protected Container _buildH6(Element element, Container target) { throw (new UnsupportedOperationException()); } protected void _buildAddress(Element element, Container target) { Span span = new Span(); target.addContent(span); _buildContents(element, span); } protected void _buildDel(Element element, Container target) { Span span = new Span(); target.addContent(span); _buildContents(element, span); } protected void _buildIns(Element element, Container target) { Span span = new Span(); target.addContent(span); _buildContents(element, span); } protected void _buildParagraph(Element element, Container target) { Paragraph paragraph = new Paragraph(); target.addContent(paragraph); _buildContents(element, paragraph); } protected void _buildBlockquote(Element element, Container target) { Blockquote blockquote = new Blockquote(); target.addContent(blockquote); _buildContents(element, blockquote); } protected void _buildQuote(Element element, Container target) { Quote quote = new Quote(); target.addContent(quote); _buildContents(element, quote); } protected void _buildBr(Element element, Container target) { // XXX } protected void _buildPre(Element element, Container target) { Pre pre = new Pre(); target.addContent(pre); _buildContents(element, pre); } protected void _buildBdo(Element element, Container target) { Span span = new Span(); target.addContent(span); _buildContents(element, span); } protected void _buildCenter(Element element, Container target) { Div div = new Div(); div.setStyle("text-align: center;"); target.addContent(div); _buildContents(element, div); } protected void _buildDiv(Element element, Container target) { Div div = new Div(); target.addContent(div); _buildContents(element, div); } protected void _buildSpan(Element element, Container target) { Span span = new Span(); target.addContent(span); _buildContents(element, span); } protected void _buildAnchor(Element element, Container target) { Anchor anchor = new Anchor(); target.addContent(anchor); _buildContents(element, anchor); } protected void _buildImg(Element element, Container target) { // XXX } protected void _buildMap(Element element, Container target) { // do nothing } protected void _buildArea(Element element, Container target) { // do nothing } protected void _buildHr(Element element, Container target) { // XXX } protected void _buildDir(Element element, Container target) { // XXX } protected void _buildMenu(Element element, Container target) { // XXX } protected void _buildOl(Element element, Container target) { Ol ol = new Ol(); target.addContent(ol); _buildContents(element, ol); } protected void _buildUl(Element element, Container target) { Ul ul = new Ul(); target.addContent(ul); _buildContents(element, ul); } protected void _buildLi(Element element, Container target) { Li li = new Li(); target.addContent(li); _buildContents(element, li); } protected void _buildDl(Element element, Container target) { Dl dl = new Dl(); target.addContent(dl); _buildContents(element, dl); } protected void _buildDt(Element element, Container target) { Dt dt = new Dt(); target.addContent(dt); _buildContents(element, dt); } protected void _buildDd(Element element, Container target) { Dd dd = new Dd(); target.addContent(dd); _buildContents(element, dd); } protected void _buildTable(Element element, Container target) { Table table = new Table(); target.addContent(table); _buildContents(element, table); } protected void _buildTr(Element element, Container target) { Tr tr = new Tr(); target.addContent(tr); _buildContents(element, tr); } protected void _buildTh(Element element, Container target) { Th th = new Th(); target.addContent(th); _buildContents(element, th); } protected void _buildTd(Element element, Container target) { Td td = new Td(); target.addContent(td); _buildContents(element, td); } protected void _buildThead(Element element, Container target) { THead thead = new THead(); target.addContent(thead); _buildContents(element, thead); } protected void _buildTbody(Element element, Container target) { TBody tbody = new TBody(); target.addContent(tbody); _buildContents(element, tbody); } protected void _buildTfoot(Element element, Container target) { TFoot tfoot = new TFoot(); target.addContent(tfoot); _buildContents(element, tfoot); } protected void _buildColgroup(Element element, Container target) { Colgroup colgroup = new Colgroup(); target.addContent(colgroup); _buildContents(element, colgroup); } protected void _buildCol(Element element, Container target) { Col col = new Col(); target.addContent(col); } protected void _buildCaption(Element element, Container target) { // XXX } protected void _buildFrameset(Element element, Container target) { // XXX } protected void _buildFrame(Element element, Container target) { // XXX } protected void _buildNoframes(Element element, Container target) { // XXX } protected void _buildIframe(Element element, Container target) { // XXX } protected void _buildForm(Element element, Container target) { // XXX } protected void _buildFieldset(Element element, Container target) { // XXX } protected void _buildLegend(Element element, Container target) { // XXX } protected void _buildLabel(Element element, Container target) { // XXX } protected void _buildInput(Element element, Container target) { // XXX } protected void _buildButton(Element element, Container target) { // XXX } protected void _buildTextarea(Element element, Container target) { // XXX } protected void _buildSelect(Element element, Container target) { // XXX } protected void _buildOptgroup(Element element, Container target) { // XXX } protected void _buildOption(Element element, Container target) { // XXX } protected void _buildScript(Element element, Container target) { // XXX } protected void _buildNoscript(Element element, Container target) { // XXX } protected void _buildObject(Element element, Container target) { // XXX } protected void _buildParam(Element element, Container target) { // XXX } protected void _buildApplet(Element element, Container target) { // XXX } protected void _buildSymbol(Element element, Container target) { Symbol symbol = new Symbol(); target.addContent(symbol); _buildContents(element, symbol); } protected void _buildContents(Node parent, Container target) { Container[] headerContexts = new Container[6]; Container newTarget; NodeList nodes = parent.getChildNodes(); int size = nodes.getLength(); for (int i = 0;i < size;i++) { Node node = nodes.item(i); switch (node.getNodeType()) { case Node.ELEMENT_NODE: Element child = (Element)node; String tagName = child.getTagName(); if (_isTag("head", tagName)) { _buildHead(child, target); } else if (_isTag("body", tagName)) { _buildBody(child, target); } else if (_isTag("title", tagName)) { _buildTitle(child, target); } else if (_isTag("base", tagName)) { _buildBase(child, target); } else if (_isTag("isindex", tagName)) { _buildIsindex(child, target); } else if (_isTag("link", tagName)) { _buildLink(child, target); } else if (_isTag("meta", tagName)) { _buildMeta(child, target); } else if (_isTag("style", tagName)) { _buildStyle(child, target); } else if (_isTag("abbr", tagName)) { _buildAbbr(child, target); } else if (_isTag("acronym", tagName)) { _buildAcronym(child, target); } else if (_isTag("code", tagName)) { _buildCode(child, target); } else if (_isTag("cite", tagName)) { _buildCite(child, target); } else if (_isTag("dfn", tagName)) { _buildDfn(child, target); } else if (_isTag("em", tagName)) { _buildEm(child, target); } else if (_isTag("strong", tagName)) { _buildStrong(child, target); } else if (_isTag("kbd", tagName)) { _buildKbd(child, target); } else if (_isTag("samp", tagName)) { _buildSamp(child, target); } else if (_isTag("var", tagName)) { _buildVar(child, target); } else if (_isTag("sub", tagName)) { _buildSub(child, target); } else if (_isTag("sup", tagName)) { _buildSup(child, target); } else if (_isTag("b", tagName)) { _buildBold(child, target); } else if (_isTag("i", tagName)) { _buildItalic(child, target); } else if (_isTag("big", tagName)) { _buildBig(child, target); } else if (_isTag("small", tagName)) { _buildSmall(child, target); } else if (_isTag("s", tagName)) { _buildStrike(child, target); } else if (_isTag("strike", tagName)) { _buildStrike(child, target); } else if (_isTag("u", tagName)) { _buildUnderline(child, target); } else if (_isTag("tt", tagName)) { _buildTt(child, target); } else if (_isTag("font", tagName)) { _buildFont(child, target); } else if (_isTag("basefont", tagName)) { _buildBasefont(child, target); } else if (_isTag("h1", tagName)) { if (headerContexts[0] != null) { target = headerContexts[0]; } else { headerContexts[0] = target; } headerContexts[1] = null; headerContexts[2] = null; headerContexts[3] = null; headerContexts[4] = null; headerContexts[5] = null; newTarget = _buildH1(child, target); target.addContent(newTarget); target = newTarget; } else if (_isTag("h2", tagName)) { if (headerContexts[1] != null) { target = headerContexts[1]; } else { headerContexts[1] = target; } headerContexts[2] = null; headerContexts[3] = null; headerContexts[4] = null; headerContexts[5] = null; newTarget = _buildH2(child, target); target.addContent(newTarget); target = newTarget; } else if (_isTag("h3", tagName)) { if (headerContexts[2] != null) { target = headerContexts[1]; } else { headerContexts[2] = target; } headerContexts[3] = null; headerContexts[4] = null; headerContexts[5] = null; newTarget = _buildH3(child, target); target.addContent(newTarget); target = newTarget; } else if (_isTag("h4", tagName)) { if (headerContexts[3] != null) { target = headerContexts[1]; } else { headerContexts[3] = target; } headerContexts[4] = null; headerContexts[5] = null; newTarget = _buildH4(child, target); target.addContent(newTarget); target = newTarget; } else if (_isTag("h5", tagName)) { if (headerContexts[4] != null) { target = headerContexts[1]; } else { headerContexts[4] = target; } headerContexts[5] = null; newTarget = _buildH5(child, target); target.addContent(newTarget); target = newTarget; } else if (_isTag("h6", tagName)) { if (headerContexts[5] != null) { target = headerContexts[1]; } else { headerContexts[5] = target; } newTarget = _buildH6(child, target); target.addContent(newTarget); target = newTarget; } else if (_isTag("address", tagName)) { _buildAddress(child, target); } else if (_isTag("del", tagName)) { _buildDel(child, target); } else if (_isTag("ins", tagName)) { _buildIns(child, target); } else if (_isTag("p", tagName)) { _buildParagraph(child, target); } else if (_isTag("blockquote", tagName)) { _buildBlockquote(child, target); } else if (_isTag("q", tagName)) { _buildQuote(child, target); } else if (_isTag("br", tagName)) { _buildBr(child, target); } else if (_isTag("pre", tagName)) { _buildPre(child, target); } else if (_isTag("bdo", tagName)) { _buildBdo(child, target); } else if (_isTag("center", tagName)) { _buildCenter(child, target); } else if (_isTag("div", tagName)) { _buildDiv(child, target); } else if (_isTag("span", tagName)) { _buildSpan(child, target); } else if (_isTag("a", tagName)) { _buildAnchor(child, target); } else if (_isTag("img", tagName)) { _buildImg(child, target); } else if (_isTag("map", tagName)) { _buildMap(child, target); } else if (_isTag("area", tagName)) { _buildArea(child, target); } else if (_isTag("hr", tagName)) { _buildHr(child, target); } else if (_isTag("dir", tagName)) { _buildDir(child, target); } else if (_isTag("menu", tagName)) { _buildMenu(child, target); } else if (_isTag("ol", tagName)) { _buildOl(child, target); } else if (_isTag("ul", tagName)) { _buildUl(child, target); } else if (_isTag("li", tagName)) { _buildLi(child, target); } else if (_isTag("dl", tagName)) { _buildDl(child, target); } else if (_isTag("dt", tagName)) { _buildDt(child, target); } else if (_isTag("dd", tagName)) { _buildDd(child, target); } else if (_isTag("table", tagName)) { _buildTable(child, target); } else if (_isTag("tr", tagName)) { _buildTr(child, target); } else if (_isTag("th", tagName)) { _buildTh(child, target); } else if (_isTag("td", tagName)) { _buildTd(child, target); } else if (_isTag("thead", tagName)) { _buildThead(child, target); } else if (_isTag("tbody", tagName)) { _buildTbody(child, target); } else if (_isTag("tfoot", tagName)) { _buildTfoot(child, target); } else if (_isTag("colgroup", tagName)) { _buildColgroup(child, target); } else if (_isTag("col", tagName)) { _buildCol(child, target); } else if (_isTag("caption", tagName)) { _buildCaption(child, target); } else if (_isTag("framset", tagName)) { _buildFrameset(child, target); } else if (_isTag("frame", tagName)) { _buildFrame(child, target); } else if (_isTag("noframes", tagName)) { _buildNoframes(child, target); } else if (_isTag("iframe", tagName)) { _buildIframe(child, target); } else if (_isTag("form", tagName)) { _buildForm(child, target); } else if (_isTag("fieldset", tagName)) { _buildFieldset(child, target); } else if (_isTag("legend", tagName)) { _buildLegend(child, target); } else if (_isTag("label", tagName)) { _buildLabel(child, target); } else if (_isTag("input", tagName)) { _buildInput(child, target); } else if (_isTag("button", tagName)) { _buildButton(child, target); } else if (_isTag("textarea", tagName)) { _buildTextarea(child, target); } else if (_isTag("select", tagName)) { _buildSelect(child, target); } else if (_isTag("optgroup", tagName)) { _buildOptgroup(child, target); } else if (_isTag("option", tagName)) { _buildOption(child, target); } else if (_isTag("script", tagName)) { _buildScript(child, target); } else if (_isTag("noscript", tagName)) { _buildNoscript(child, target); } else if (_isTag("object", tagName)) { _buildObject(child, target); } else if (_isTag("param", tagName)) { _buildParam(child, target); } else if (_isTag("applet", tagName)) { _buildApplet(child, target); } else { _buildSymbol(child, target); } break; case Node.TEXT_NODE: String text = ((Text)node).getData(); target.addContent(new CharBlock(text)); break; case Node.ENTITY_NODE: String name = ((Entity)node).getNotationName(); if (name != null) { char c = cemap_.getCharacter(name); if (c != 0) { target.addContent(new CharBlock(c)); } else { throw (new InternalError("bad entity")); } } break; case Node.ENTITY_REFERENCE_NODE: // UArray.addAll(list, _buildContents(node)); // break; throw (new UnsupportedOperationException()); case Node.COMMENT_NODE: // do nothing break; case Node.CDATA_SECTION_NODE: String cdata = ((CDATASection)node).getData(); target.addContent(new CharBlock(cdata)); break; default: throw (new InternalError("bad node type = " + node.getNodeType())); // XXX : debug } } } protected final boolean _isTag(String name, String tagName) { return (name.equals(tagName.toLowerCase())); } }