/* * Copyright 2013 gitblit.com. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.gitblit.wicket; import static org.pegdown.FastEncoder.encode; import java.io.Serializable; import java.io.StringWriter; import java.io.UnsupportedEncodingException; import java.net.URLEncoder; import java.text.MessageFormat; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; import org.apache.wicket.Page; import org.apache.wicket.RequestCycle; import org.eclipse.jgit.lib.Repository; import org.eclipse.jgit.revwalk.RevCommit; import org.eclipse.mylyn.wikitext.confluence.core.ConfluenceLanguage; import org.eclipse.mylyn.wikitext.core.parser.Attributes; import org.eclipse.mylyn.wikitext.core.parser.MarkupParser; import org.eclipse.mylyn.wikitext.core.parser.builder.HtmlDocumentBuilder; import org.eclipse.mylyn.wikitext.core.parser.markup.MarkupLanguage; import org.eclipse.mylyn.wikitext.mediawiki.core.MediaWikiLanguage; import org.eclipse.mylyn.wikitext.textile.core.TextileLanguage; import org.eclipse.mylyn.wikitext.tracwiki.core.TracWikiLanguage; import org.eclipse.mylyn.wikitext.twiki.core.TWikiLanguage; import org.pegdown.DefaultVerbatimSerializer; import org.pegdown.LinkRenderer; import org.pegdown.ToHtmlSerializer; import org.pegdown.VerbatimSerializer; import org.pegdown.ast.ExpImageNode; import org.pegdown.ast.RefImageNode; import org.pegdown.ast.WikiLinkNode; import org.pegdown.plugins.ToHtmlSerializerPlugin; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.gitblit.IStoredSettings; import com.gitblit.Keys; import com.gitblit.models.PathModel; import com.gitblit.servlet.RawServlet; import com.gitblit.utils.JGitUtils; import com.gitblit.utils.MarkdownUtils; import com.gitblit.utils.StringUtils; import com.gitblit.utils.XssFilter; import com.gitblit.wicket.pages.DocPage; import com.google.common.base.Joiner; /** * Processes markup content and generates html with repository-relative page and * image linking. * * @author James Moger * */ public class MarkupProcessor { public enum MarkupSyntax { PLAIN, MARKDOWN, TWIKI, TRACWIKI, TEXTILE, MEDIAWIKI, CONFLUENCE } private Logger logger = LoggerFactory.getLogger(getClass()); private final IStoredSettings settings; private final XssFilter xssFilter; public static List<String> getMarkupExtensions(IStoredSettings settings) { List<String> list = new ArrayList<String>(); list.addAll(settings.getStrings(Keys.web.confluenceExtensions)); list.addAll(settings.getStrings(Keys.web.markdownExtensions)); list.addAll(settings.getStrings(Keys.web.mediawikiExtensions)); list.addAll(settings.getStrings(Keys.web.textileExtensions)); list.addAll(settings.getStrings(Keys.web.tracwikiExtensions)); list.addAll(settings.getStrings(Keys.web.twikiExtensions)); return list; } public MarkupProcessor(IStoredSettings settings, XssFilter xssFilter) { this.settings = settings; this.xssFilter = xssFilter; } public List<String> getMarkupExtensions() { return getMarkupExtensions(settings); } public List<String> getAllExtensions() { List<String> list = getMarkupExtensions(settings); list.add("txt"); list.add("TXT"); return list; } private List<String> getRoots() { return settings.getStrings(Keys.web.documents); } private String [] getEncodings() { return settings.getStrings(Keys.web.blobEncodings).toArray(new String[0]); } private MarkupSyntax determineSyntax(String documentPath) { String ext = StringUtils.getFileExtension(documentPath).toLowerCase(); if (StringUtils.isEmpty(ext)) { return MarkupSyntax.PLAIN; } if (settings.getStrings(Keys.web.confluenceExtensions).contains(ext)) { return MarkupSyntax.CONFLUENCE; } else if (settings.getStrings(Keys.web.markdownExtensions).contains(ext)) { return MarkupSyntax.MARKDOWN; } else if (settings.getStrings(Keys.web.mediawikiExtensions).contains(ext)) { return MarkupSyntax.MEDIAWIKI; } else if (settings.getStrings(Keys.web.textileExtensions).contains(ext)) { return MarkupSyntax.TEXTILE; } else if (settings.getStrings(Keys.web.tracwikiExtensions).contains(ext)) { return MarkupSyntax.TRACWIKI; } else if (settings.getStrings(Keys.web.twikiExtensions).contains(ext)) { return MarkupSyntax.TWIKI; } return MarkupSyntax.PLAIN; } public boolean hasRootDocs(Repository r) { List<String> roots = getRoots(); List<String> extensions = getAllExtensions(); List<PathModel> paths = JGitUtils.getFilesInPath(r, null, null); for (PathModel path : paths) { if (!path.isTree()) { String ext = StringUtils.getFileExtension(path.name).toLowerCase(); String name = StringUtils.stripFileExtension(path.name).toLowerCase(); if (roots.contains(name)) { if (StringUtils.isEmpty(ext) || extensions.contains(ext)) { return true; } } } } return false; } public List<MarkupDocument> getRootDocs(Repository r, String repositoryName, String commitId) { List<String> roots = getRoots(); List<MarkupDocument> list = getDocs(r, repositoryName, commitId, roots); return list; } public MarkupDocument getReadme(Repository r, String repositoryName, String commitId) { List<MarkupDocument> list = getDocs(r, repositoryName, commitId, Arrays.asList("readme")); if (list.isEmpty()) { return null; } return list.get(0); } private List<MarkupDocument> getDocs(Repository r, String repositoryName, String commitId, List<String> names) { List<String> extensions = getAllExtensions(); String [] encodings = getEncodings(); Map<String, MarkupDocument> map = new HashMap<String, MarkupDocument>(); RevCommit commit = JGitUtils.getCommit(r, commitId); List<PathModel> paths = JGitUtils.getFilesInPath(r, null, commit); for (PathModel path : paths) { if (!path.isTree()) { String ext = StringUtils.getFileExtension(path.name).toLowerCase(); String name = StringUtils.stripFileExtension(path.name).toLowerCase(); if (names.contains(name)) { if (StringUtils.isEmpty(ext) || extensions.contains(ext)) { String markup = JGitUtils.getStringContent(r, commit.getTree(), path.name, encodings); MarkupDocument doc = parse(repositoryName, commitId, path.name, markup); map.put(name, doc); } } } } // return document list in requested order List<MarkupDocument> list = new ArrayList<MarkupDocument>(); for (String name : names) { if (map.containsKey(name)) { list.add(map.get(name)); } } return list; } public MarkupDocument parse(String repositoryName, String commitId, String documentPath, String markupText) { final MarkupSyntax syntax = determineSyntax(documentPath); final MarkupDocument doc = new MarkupDocument(documentPath, markupText, syntax); if (markupText != null) { try { switch (syntax){ case CONFLUENCE: parse(doc, repositoryName, commitId, new ConfluenceLanguage()); break; case MARKDOWN: parse(doc, repositoryName, commitId); break; case MEDIAWIKI: parse(doc, repositoryName, commitId, new MediaWikiLanguage()); break; case TEXTILE: parse(doc, repositoryName, commitId, new TextileLanguage()); break; case TRACWIKI: parse(doc, repositoryName, commitId, new TracWikiLanguage()); break; case TWIKI: parse(doc, repositoryName, commitId, new TWikiLanguage()); break; default: doc.html = MarkdownUtils.transformPlainText(markupText); break; } } catch (Exception e) { logger.error("failed to transform " + syntax, e); } } if (doc.html == null) { // failed to transform markup if (markupText == null) { markupText = String.format("Document <b>%1$s</b> not found in <em>%2$s</em>", documentPath, repositoryName); } markupText = MessageFormat.format("<div class=\"alert alert-error\"><strong>{0}:</strong> {1}</div>{2}", "Error", "failed to parse markup", markupText); doc.html = StringUtils.breakLinesForHtml(markupText); } return doc; } /** * Parses the markup using the specified markup language * * @param doc * @param repositoryName * @param commitId * @param lang */ private void parse(final MarkupDocument doc, final String repositoryName, final String commitId, MarkupLanguage lang) { StringWriter writer = new StringWriter(); HtmlDocumentBuilder builder = new HtmlDocumentBuilder(writer) { @Override public void image(Attributes attributes, String imagePath) { String url; if (imagePath.indexOf("://") == -1) { // relative image String path = doc.getRelativePath(imagePath); String contextUrl = RequestCycle.get().getRequest().getRelativePathPrefixToContextRoot(); url = RawServlet.asLink(contextUrl, repositoryName, commitId, path); } else { // absolute image url = imagePath; } super.image(attributes, url); } @Override public void link(Attributes attributes, String hrefOrHashName, String text) { String url; if (hrefOrHashName.charAt(0) != '#') { if (hrefOrHashName.indexOf("://") == -1) { // relative link String path = doc.getRelativePath(hrefOrHashName); url = getWicketUrl(DocPage.class, repositoryName, commitId, path); } else { // absolute link url = hrefOrHashName; } } else { // page-relative hash link url = hrefOrHashName; } super.link(attributes, url, text); } }; // avoid the <html> and <body> tags builder.setEmitAsDocument(false); MarkupParser parser = new MarkupParser(lang); parser.setBuilder(builder); parser.parse(doc.markup); final String content = writer.toString(); final String safeContent = xssFilter.relaxed(content); doc.html = safeContent; } /** * Parses the document as Markdown using Pegdown. * * @param doc * @param repositoryName * @param commitId */ private void parse(final MarkupDocument doc, final String repositoryName, final String commitId) { LinkRenderer renderer = new LinkRenderer() { @Override public Rendering render(ExpImageNode node, String text) { if (node.url.indexOf("://") == -1) { // repository-relative image link String path = doc.getRelativePath(node.url); String contextUrl = RequestCycle.get().getRequest().getRelativePathPrefixToContextRoot(); String url = RawServlet.asLink(contextUrl, repositoryName, commitId, path); return new Rendering(url, text); } // absolute image link return new Rendering(node.url, text); } @Override public Rendering render(RefImageNode node, String url, String title, String alt) { Rendering rendering; if (url.indexOf("://") == -1) { // repository-relative image link String path = doc.getRelativePath(url); String contextUrl = RequestCycle.get().getRequest().getRelativePathPrefixToContextRoot(); String wurl = RawServlet.asLink(contextUrl, repositoryName, commitId, path); rendering = new Rendering(wurl, alt); } else { // absolute image link rendering = new Rendering(url, alt); } return StringUtils.isEmpty(title) ? rendering : rendering.withAttribute("title", encode(title)); } @Override public Rendering render(WikiLinkNode node) { String path = doc.getRelativePath(node.getText()); String name = getDocumentName(path); String url = getWicketUrl(DocPage.class, repositoryName, commitId, path); return new Rendering(url, name); } }; final String content = MarkdownUtils.transformMarkdown(doc.markup, renderer); final String safeContent = xssFilter.relaxed(content); doc.html = safeContent; } private String getWicketUrl(Class<? extends Page> pageClass, final String repositoryName, final String commitId, final String document) { String fsc = settings.getString(Keys.web.forwardSlashCharacter, "/"); String encodedPath = document.replace(' ', '-'); try { encodedPath = URLEncoder.encode(encodedPath, "UTF-8"); } catch (UnsupportedEncodingException e) { logger.error(null, e); } encodedPath = encodedPath.replace("/", fsc).replace("%2F", fsc); String url = RequestCycle.get().urlFor(pageClass, WicketUtils.newPathParameter(repositoryName, commitId, encodedPath)).toString(); return url; } private String getDocumentName(final String document) { // extract document name String name = StringUtils.stripFileExtension(document); name = name.replace('_', ' '); if (name.indexOf('/') > -1) { name = name.substring(name.lastIndexOf('/') + 1); } return name; } public static class MarkupDocument implements Serializable { private static final long serialVersionUID = 1L; public final String documentPath; public final String markup; public final MarkupSyntax syntax; public String html; MarkupDocument(String documentPath, String markup, MarkupSyntax syntax) { this.documentPath = documentPath; this.markup = markup; this.syntax = syntax; } String getCurrentPath() { String basePath = ""; if (documentPath.indexOf('/') > -1) { basePath = documentPath.substring(0, documentPath.lastIndexOf('/') + 1); if (basePath.charAt(0) == '/') { return basePath.substring(1); } } return basePath; } String getRelativePath(String ref) { if (ref.charAt(0) == '/') { // absolute path in repository return ref.substring(1); } else { // resolve relative repository path String cp = getCurrentPath(); if (StringUtils.isEmpty(cp)) { return ref; } // this is a simple relative path resolver List<String> currPathStrings = new ArrayList<String>(Arrays.asList(cp.split("/"))); String file = ref; while (file.startsWith("../")) { // strip ../ from the file reference // drop the last path element file = file.substring(3); currPathStrings.remove(currPathStrings.size() - 1); } currPathStrings.add(file); String path = Joiner.on("/").join(currPathStrings); return path; } } } /** * This class implements a workaround for a bug reported in issue-379. * The bug was introduced by my own pegdown pull request #115. * * @author James Moger * */ public static class WorkaroundHtmlSerializer extends ToHtmlSerializer { public WorkaroundHtmlSerializer(final LinkRenderer linkRenderer) { super(linkRenderer, Collections.<String, VerbatimSerializer>singletonMap(VerbatimSerializer.DEFAULT, DefaultVerbatimSerializer.INSTANCE), Collections.<ToHtmlSerializerPlugin>emptyList()); } private void printAttribute(String name, String value) { printer.print(' ').print(name).print('=').print('"').print(value).print('"'); } /* Reimplement print image tag to eliminate a trailing double-quote */ @Override protected void printImageTag(LinkRenderer.Rendering rendering) { printer.print("<img"); printAttribute("src", rendering.href); printAttribute("alt", rendering.text); for (LinkRenderer.Attribute attr : rendering.attributes) { printAttribute(attr.name, attr.value); } printer.print("/>"); } } }