/** * <a href="http://www.openolat.org"> * OpenOLAT - Online Learning and Training</a><br> * <p> * Licensed under the Apache License, Version 2.0 (the "License"); <br> * you may not use this file except in compliance with the License.<br> * You may obtain a copy of the License at the * <a href="http://www.apache.org/licenses/LICENSE-2.0">Apache homepage</a> * <p> * Unless required by applicable law or agreed to in writing,<br> * software distributed under the License is distributed on an "AS IS" BASIS, <br> * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. <br> * See the License for the specific language governing permissions and <br> * limitations under the License. * <p> * Initial code contributed and copyrighted by<br> * frentix GmbH, http://www.frentix.com * <p> */ package org.olat.core.util.openxml; import java.io.File; import java.util.ArrayDeque; import java.util.ArrayList; import java.util.Collections; import java.util.Deque; import java.util.List; import org.olat.core.util.StringHelper; import org.olat.core.util.openxml.OpenXMLDocument.Border; import org.olat.core.util.openxml.OpenXMLDocument.Indent; import org.olat.core.util.openxml.OpenXMLDocument.ListParagraph; import org.olat.core.util.openxml.OpenXMLDocument.PredefinedStyle; import org.olat.core.util.openxml.OpenXMLDocument.Spacing; import org.olat.core.util.openxml.OpenXMLDocument.Style; import org.w3c.dom.Element; import org.w3c.dom.Node; import org.xml.sax.Attributes; import org.xml.sax.SAXException; import org.xml.sax.helpers.DefaultHandler; /** * Convert HTML code to OpenXML * * * Initial date: 05.09.2013<br> * @author srosse, stephane.rosse@frentix.com, http://www.frentix.com * */ public class HTMLToOpenXMLHandler extends DefaultHandler { private static final Border QUOTE_BORDER = new Border(400, 24, "EEEEEE"); private boolean latex = false; private StringBuilder textBuffer; private Spacing startSpacing; private boolean appendToCursor = true; protected final OpenXMLDocument factory; protected List<Node> content = new ArrayList<>(); protected Deque<StyleStatus> styleStack = new ArrayDeque<>(); protected Table currentTable; protected Element currentParagraph; protected ListParagraph currentListParagraph; protected boolean pNeedNewParagraph = true; public HTMLToOpenXMLHandler(OpenXMLDocument document) { this.factory = document; } /** * @param document The OpenXML document * @param paragraph The current paragraph * @param appendToCursor If true, append automatically to the document */ public HTMLToOpenXMLHandler(OpenXMLDocument document, Element paragraph, boolean appendToCursor) { this(document); this.currentParagraph = paragraph; this.appendToCursor = appendToCursor; } public HTMLToOpenXMLHandler(OpenXMLDocument document, Spacing spacing) { this(document); this.startSpacing = spacing; } public void setInitialParagraph(Element paragraph) { this.currentParagraph = paragraph; } public List<Node> getContent() { return content; } /** * Flush the text if a new paragraph is created. Trailing text is flushed * in the previous paragraph. * @param create * @return */ protected Element getCurrentParagraph(boolean create) { if(create || currentParagraph == null) { //flush the text if(textBuffer != null) { flushText(); addContent(currentParagraph); } Indent indent = getCurrentIndent(); Border leftBorder = getCurrentLeftBorder(); PredefinedStyle predefinedStyle = getCurrentPredefinedStyle(); currentParagraph = factory.createParagraphEl(indent, leftBorder, startSpacing, predefinedStyle); startSpacing = null;//consumed } return currentParagraph; } protected Element appendParagraph(Spacing spacing) { //flush the text if(textBuffer != null) { flushText(); addContent(currentParagraph); } Indent indent = getCurrentIndent(); Border leftBorder = getCurrentLeftBorder(); PredefinedStyle predefinedStyle = getCurrentPredefinedStyle(); currentParagraph = factory.createParagraphEl(indent, leftBorder, spacing, predefinedStyle); return currentParagraph; } protected Element getCurrentListParagraph(boolean create) { if(create || currentParagraph == null) { //flush the text if(textBuffer != null) { flushText(); addContent(currentParagraph); } currentParagraph = factory.createListParagraph(currentListParagraph); } return currentParagraph; } protected void closeParagraph() { flushText(); currentParagraph = addContent(currentParagraph); textBuffer = null; latex = false; } protected Element addContent(Node element) { if(element == null) return null; if(currentTable != null) { currentTable.getCurrentCell().appendChild(element); } else { content.add(element); } return null; } protected void flushText() { if(textBuffer == null) return; if(latex) { //begin a new paragraph if(currentParagraph != null) { currentParagraph = addContent(currentParagraph); } List<Node> nodes = factory.convertLaTeX(textBuffer.toString()); for(Node node:nodes) { addContent(node); } } else { Element currentRun = getCurrentRun(); String text = textBuffer.toString().replace("\n", "").replace("\r", ""); if(text.length() > 0 && Character.isSpaceChar(text.charAt(0))) { currentRun.appendChild(factory.createPreserveSpaceEl()); } currentRun.appendChild(factory.createTextEl(text)); if(text.length() > 1 && Character.isSpaceChar(text.charAt(text.length() - 1))) { currentRun.appendChild(factory.createPreserveSpaceEl()); } } latex = false; textBuffer = null; } /** * Get or create a run on the current paragraph * @return */ protected Element getCurrentRun() { Element paragraphEl; if(currentParagraph == null) { Indent indent = getCurrentIndent(); Border leftBorder = getCurrentLeftBorder(); PredefinedStyle predefinedStyle = getCurrentPredefinedStyle(); paragraphEl = currentParagraph = factory.createParagraphEl(indent, leftBorder, startSpacing, predefinedStyle); startSpacing = null; } else { paragraphEl = currentParagraph; } Node lastChild = paragraphEl.getLastChild(); if(lastChild != null && "w:r".equals(lastChild.getNodeName())) { return (Element)lastChild; } PredefinedStyle runStyle = getCurrentPredefinedStyle(); return (Element)paragraphEl.appendChild(factory.createRunEl(null, runStyle)); } protected Style[] setTextPreferences(String cssStyles) { if(cssStyles == null) { return setTextPreferences(); } else { List<Style> styles = new ArrayList<Style>(4); if(cssStyles.contains("bold")) styles.add(Style.bold); if(cssStyles.contains("italic")) styles.add(Style.italic); if(cssStyles.contains("underline")) styles.add(Style.underline); if(cssStyles.contains("line-through")) styles.add(Style.strike); return setTextPreferences(styles.toArray(new Style[styles.size()])); } } /** * Create a new run with preferences */ protected Style[] setTextPreferences(Style... styles) { Node runPrefs = getRunForTextPreferences(); factory.createRunPrefsEl(runPrefs, styles); return styles; } protected Style[] unsetTextPreferences(Style... styles) { Node runPrefs = getRunForTextPreferences(); factory.createRunReversePrefsEl(runPrefs, styles); return styles; } protected Node getRunForTextPreferences() { Element paragraphEl = getCurrentParagraph(false); Node runPrefs = null; Node run = paragraphEl.getLastChild(); if(run != null && "w:r".equals(run.getNodeName())) { Node prefs = run.getLastChild(); if("w:rPr".equals(prefs.getNodeName())){ runPrefs = prefs; } } if(runPrefs == null) { PredefinedStyle style = getCurrentPredefinedStyle(); run = paragraphEl.appendChild(factory.createRunEl(null, style)); runPrefs = run.appendChild(factory.createRunPrefsEl()); } if(!"w:rPr".equals(runPrefs.getNodeName())){ runPrefs = run.appendChild(factory.createRunPrefsEl()); } return runPrefs; } public Style[] getCurrentStyle() { if(styleStack.isEmpty()) return null; return styleStack.getLast().getStyles(); } public Indent getCurrentIndent() { if(styleStack.isEmpty()) return null; int indent = 0; for(StyleStatus style:styleStack) { if(style.isQuote()) { indent++; } } int emuIndent = 0; if(indent > 0) { emuIndent = 700; } if(indent > 1) { emuIndent += (indent - 1) * 100; } return emuIndent == 0 ? null : new Indent(emuIndent); } public Border getCurrentLeftBorder() { if(styleStack.isEmpty()) return null; int indent = 0; for(StyleStatus style:styleStack) { if(style.isQuote()) { indent++; } } String val; switch(indent) { case 1: val = "single"; break; case 2: val = "double"; break; default: val = "triple"; } return indent == 0 ? null : new Border(QUOTE_BORDER, val); } public PredefinedStyle getCurrentPredefinedStyle() { if(styleStack.isEmpty()) return null; boolean quote = false; for(StyleStatus style:styleStack) { quote |= style.isQuote(); } return quote ? PredefinedStyle.quote : null; } public Style[] popStyle(String tag) { StyleStatus status = styleStack.pollLast(); if(status != null && status.getTag().equals(tag)) { return status.getStyles(); } return null; } protected void setImage(String path) { Element imgEl = factory.createImageEl(path); if(imgEl != null) { PredefinedStyle style = getCurrentPredefinedStyle(); Element runEl = factory.createRunEl(Collections.singletonList(imgEl), style); Element paragrapheEl = getCurrentParagraph(false); paragrapheEl.appendChild(runEl); } } protected void setImage(File file) { Element imgEl = factory.createImageEl(file); if(imgEl != null) { PredefinedStyle style = getCurrentPredefinedStyle(); Element runEl = factory.createRunEl(Collections.singletonList(imgEl), style); Element paragrapheEl = getCurrentParagraph(false); paragrapheEl.appendChild(runEl); } } protected void startGraphic(File backgroundImage, List<OpenXMLGraphic> elements) { Element paragrapheEl = getCurrentParagraph(true); Element graphicEl = factory.createGraphicEl(backgroundImage, elements); Element runEl = factory.createRunEl(); runEl.appendChild(graphicEl); paragrapheEl.appendChild(runEl); closeParagraph(); } protected void startTable() { closeParagraph(); currentTable = new Table(); } protected void startTable(Integer... width) { closeParagraph(); currentTable = new Table(width); } protected void startCurrentTableRow() { currentTable.addRowEl(); } protected void closeCurrentTableRow() { if(currentTable != null) { currentTable.closeRow(); } textBuffer = null; latex = false; currentParagraph = null; } protected void endTable() { if(currentTable != null) { content.add(currentTable.getTableEl()); } currentTable = null; currentParagraph = null; } @Override public void startElement(String uri, String localName, String qName, Attributes attributes) { String tag = localName.toLowerCase(); if("p".equalsIgnoreCase(tag)) { getCurrentParagraph(pNeedNewParagraph); } else if("span".equalsIgnoreCase(tag)) { flushText(); Style[] styles = null; String cl = attributes.getValue("class"); if("math".equals(cl)) { latex = true; } else { String cssStyles = attributes.getValue("style"); styles = setTextPreferences(cssStyles); } styleStack.add(new StyleStatus(tag, styles)); } else if("br".equals(tag)) { closeParagraph(); } else if("em".equalsIgnoreCase(tag)) { flushText(); Style[] styles = setTextPreferences(Style.italic); styleStack.add(new StyleStatus(tag, styles)); } else if("strong".equalsIgnoreCase(tag)) { flushText(); Style[] styles = setTextPreferences(Style.bold); styleStack.add(new StyleStatus(tag, styles)); } else if("img".equals(tag)) { String path = attributes.getValue("src"); setImage(path); } else if("table".equalsIgnoreCase(tag)) { startTable(); } else if("tr".equals(tag)) { startCurrentTableRow(); } else if("td".equals(tag) || "th".equals(tag)) { int colspan = OpenXMLUtils.getSpanAttribute("colspan", attributes); int rowspan = OpenXMLUtils.getSpanAttribute("rowspan", attributes); currentTable.addCellEl(colspan, rowspan); } else if("ul".equals(tag) || "ol".equals(tag)) { currentListParagraph = factory.createListParagraph(); } else if("li".equals(tag)) { getCurrentListParagraph(true); } else if("blockquote".equals(tag)) { Style[] styles = setTextPreferences(Style.italic); styleStack.add(new StyleStatus(tag, true, styles)); appendParagraph(new Spacing(90, 0)); pNeedNewParagraph = false; } else if("div".equals(tag)) { String cl = attributes.getValue("class"); if(StringHelper.containsNonWhitespace(cl)) { if(cl.contains("o_quote_author")) { Style[] styles = setTextPreferences(Style.italic); styleStack.add(new StyleStatus(tag, true, styles)); appendParagraph(new Spacing(120, 0)); pNeedNewParagraph = false; } else { styleStack.add(new StyleStatus(tag, new Style[0])); } } else { styleStack.add(new StyleStatus(tag, new Style[0])); } } } @Override public void characters(char[] ch, int start, int length) { if(textBuffer == null) { textBuffer = new StringBuilder(); } textBuffer.append(ch, start, length); } @Override public void endElement(String uri, String localName, String qName) { String tag = localName.toLowerCase(); if("p".equals(tag)) { closeParagraph(); //flush text nodes to current paragraph } else if("span".equals(tag) ) { flushText(); Style[] currentStyles = popStyle(tag); unsetTextPreferences(currentStyles); } else if("em".equalsIgnoreCase(tag)) { flushText(); unsetTextPreferences(Style.italic); popStyle(tag); } else if("strong".equalsIgnoreCase(tag)) { flushText(); unsetTextPreferences(Style.bold); popStyle(tag); } else if("table".equals(tag)) { endTable(); } else if("td".equals(tag) || "th".equals(tag)) { flushText(); currentParagraph = addContent(currentParagraph); } else if("tr".equals(tag)) { closeCurrentTableRow(); } else if("ul".equals(tag) || "ol".equals(tag)) { closeParagraph(); currentListParagraph = null; } else if("li".equals(tag)) { //do nothing } else if("blockquote".equals(tag)) { popStyle(tag); } else if("div".equals(tag)) { popStyle(tag); } } @Override public void endDocument() throws SAXException { //clean up trailing text and pack it in a last paragraph closeParagraph(); if(appendToCursor) { for(Node node:content) { factory.getCursor().appendChild(node); } } } public static class StyleStatus { private final String tag; private final Style[] styles; private final boolean quote; public StyleStatus(String tag, Style[] styles) { this(tag, false, styles); } public StyleStatus(String tag, boolean quote, Style[] styles) { this.tag = tag; this.quote = quote; this.styles = styles; } public String getTag() { return tag; } public boolean isQuote() { return quote; } public Style[] getStyles() { return styles; } } public class Table { private final Element tableEl; private int nextCol; private Node currentRowEl; private Element currentCellEl; private Span[] rowSpans = new Span[128]; public Table() { tableEl = factory.createTable(); } public Table(Integer... width) { tableEl = factory.createTable(width); } public Element getTableEl() { return tableEl; } public Node addRowEl() { for(int i=rowSpans.length; i-->0; ) { if(rowSpans[i] != null) { rowSpans[i].unDone(); } } nextCol = 0; currentRowEl = tableEl.getOwnerDocument().createElement("w:tr"); return tableEl.appendChild(currentRowEl); } public void closeRow() { closeCell(rowSpans.length-1); } /* <w:tc> <w:tcPr> <w:gridSpan w:val="2" /> <w:vMerge w:val="restart" /> */ public Node addCellEl(int colSpan, int rowSpan) { nextCol += closeCell(nextCol); currentCellEl = currentRowEl.getOwnerDocument().createElement("w:tc"); Node prefs = null; if(colSpan > 1) { prefs = currentCellEl.appendChild(currentCellEl.getOwnerDocument().createElement("w:tcPr")); Element gridSpan = (Element)prefs.appendChild(prefs.getOwnerDocument().createElement("w:gridSpan")); gridSpan.setAttribute("w:val", Integer.toString(colSpan)); } if(rowSpan > 1) { prefs = prefs != null ? prefs : currentCellEl.appendChild(currentCellEl.getOwnerDocument().createElement("w:tcPr")); Element vMerge = (Element)prefs.appendChild(prefs.getOwnerDocument().createElement("w:vMerge")); vMerge.setAttribute("w:val", "restart"); } if(colSpan == 1 && rowSpan == 1) { rowSpans[nextCol] = Span.OneOnOne; } else { rowSpans[nextCol] = new Span(colSpan, rowSpan); } nextCol += (colSpan <= 1 ? 1 : colSpan); return currentRowEl.appendChild(currentCellEl); } public Element addCellEl(Element cellEl, int colSpan) { nextCol += closeCell(nextCol); currentCellEl = cellEl; nextCol += (colSpan <= 1 ? 1 : colSpan); return (Element)currentRowEl.appendChild(currentCellEl); } public int closeCell(int lastIndex) { for(int i=lastIndex+1; i-->0; ) { Span span = rowSpans[i]; if(span != null) { if(span.getRowSpan() > 1 && !span.isDone()) { currentCellEl = (Element)currentRowEl.appendChild(currentRowEl.getOwnerDocument().createElement("w:tc")); Node prefs = currentCellEl.appendChild(currentCellEl.getOwnerDocument().createElement("w:tcPr")); if(span.getColSpan() > 1) { Element gridSpan = (Element)prefs.appendChild(prefs.getOwnerDocument().createElement("w:gridSpan")); gridSpan.setAttribute("w:val", Integer.toString(span.getColSpan())); } prefs.appendChild(prefs.getOwnerDocument().createElement("w:vMerge")); currentCellEl.appendChild(currentCellEl.getOwnerDocument().createElement("w:p")); span.decrementRowSpan(); return span.getColSpan(); } else { break; } } } return 0; } public Element getCurrentCell() { return currentCellEl; } } private static class Span { public static final Span OneOnOne = new Span(1,1); private int colspan; private int rowspan; private boolean done = true; private Span(int colspan, int rowspan) { this.colspan = colspan; this.rowspan = rowspan; } public int getColSpan() { return colspan; } public int getRowSpan() { return rowspan; } public void decrementRowSpan() { rowspan--; } public boolean isDone() { return done; } public void unDone() { done = false; } } }