/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.jooby.internal.raml; import java.util.List; import java.util.stream.Collectors; import org.jooby.raml.Raml; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.nodes.Node; import org.jsoup.nodes.TextNode; import org.jsoup.select.NodeTraversor; import org.jsoup.select.NodeVisitor; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.google.common.base.Splitter; public class Doc { /** The logging system. */ private static final Logger log = LoggerFactory.getLogger(Raml.class); public static String toMarkDown(final String html) { Document doc = Jsoup.parseBodyFragment(html.replace("\n", "<br>")); StringBuilder buff = new StringBuilder(); recurseElement(doc.body(), buff); return buff.toString(); } public static String toYaml(final String text, final int level) { List<String> lines = Splitter.on("\n").splitToList(text); long count = lines.stream() .filter(l -> l.trim().length() > 0) .count(); if (count == 1) { return "'" + text.trim().replace("'", "''") + "'"; } StringBuilder indent = new StringBuilder(); for (int i = 0; i < level + 2; i++) { indent.append(" "); } return "|-\n" + lines.stream() .map(line -> { if (line.trim().length() > 0) { return indent + line; } return ""; }) .collect(Collectors.joining("\n")) .replaceAll("^[\\n]+", ""); } // Source: https://github.com/foursquare/sites-to-markdown/blob/master/src/jon/Convert.java private static void recurseElement(final Element element, final StringBuilder builder) { new NodeTraversor(new NodeVisitor() { boolean isInToc = false; int listDepth = 0; @Override public void head(final Node node, final int depth) { if (!isInToc) { if (node instanceof TextNode) { TextNode textNode = (TextNode) node; String txt = textNode.text().replaceAll("\u00a0", " "); // non-break spaces builder.append(txt); } else if (node instanceof Element) { Element element = (Element) node; switch (element.tagName()) { case "span": case "blockquote": // ignored break; case "ol": case "ul": listDepth += 1; case "br": case "p": builder.append("\n"); break; case "div": builder.append("\n"); break; case "h1": builder.append("\n# "); break; case "h2": builder.append("\n## "); break; case "h3": builder.append("\n### "); break; case "h4": builder.append("\n#### "); case "b": case "strong": builder.append("**"); break; case "cite": case "i": case "u": builder.append("*"); break; case "a": builder.append('['); break; case "li": for (int i = 0; i < listDepth - 1; i++) { builder.append(" "); } builder.append(element.parent().tagName().equals("ol") ? "1. " : "* "); break; case "code": builder.append("`"); break; case "strike": builder.append("<").append(element.tagName()).append(">"); break; case "img": String src = element.attr("src"); String alt = element.attr("alt"); alt = alt == null ? "" : alt; if (src != null) { builder.append("![").append(alt).append("](").append(src).append(")\n"); } break; case "pre": builder.append("```\n"); break; case "hr": builder.append("\n***\n"); break; case "font": String face = element.attr("face"); if (face != null && face.contains("monospace")) { builder.append("`"); } break; default: log.debug("Unhandled element {}", element.tagName()); } } } } @Override public void tail(final Node node, final int depth) { if (node instanceof Element) { Element element = (Element) node; switch (element.tagName()) { case "b": case "strong": builder.append("**"); break; case "ol": case "ul": listDepth -= 1; break; case "cite": case "i": case "u": builder.append("*"); break; case "strike": builder.append("</").append(element.tagName()).append(">"); break; case "a": String href = element.attr("href"); if (href != null) { if (href.startsWith("http")) { builder.append(']').append('(').append(href).append(')'); } else { builder.append(']').append('(').append(href).append(')'); } } break; case "pre": builder.append("\n```\n"); break; case "code": builder.append("`"); break; case "font": String face = element.attr("face"); if (face != null && face.contains("monospace")) { builder.append("`"); } break; case "h1": case "h2": case "h3": case "h4": case "li": builder.append("\n"); default: break; } } } }).traverse(element); } public static String parse(final String doc, final int level) { return toYaml(toMarkDown(doc), level).trim(); } }