/**
* Copyright (c) 2000-present Liferay, Inc. All rights reserved.
*
* This library is free software; you can redistribute it and/or modify it under
* the terms of the GNU Lesser General Public License as published by the Free
* Software Foundation; either version 2.1 of the License, or (at your option)
* any later version.
*
* This library is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
* details.
*/
package com.liferay.knowledge.base.internal.importer.util;
import com.liferay.document.library.kernel.util.DLUtil;
import com.liferay.knowledge.base.exception.KBArticleImportException;
import com.liferay.knowledge.base.markdown.converter.MarkdownConverter;
import com.liferay.knowledge.base.markdown.converter.factory.MarkdownConverterFactoryUtil;
import com.liferay.knowledge.base.model.KBArticle;
import com.liferay.portal.kernel.exception.PortalException;
import com.liferay.portal.kernel.log.Log;
import com.liferay.portal.kernel.log.LogFactoryUtil;
import com.liferay.portal.kernel.model.ModelHintsUtil;
import com.liferay.portal.kernel.repository.model.FileEntry;
import com.liferay.portal.kernel.util.CharPool;
import com.liferay.portal.kernel.util.FileUtil;
import com.liferay.portal.kernel.util.HtmlUtil;
import com.liferay.portal.kernel.util.StringBundler;
import com.liferay.portal.kernel.util.StringPool;
import com.liferay.portal.kernel.util.StringUtil;
import com.liferay.portal.kernel.util.Validator;
import com.liferay.portal.kernel.zip.ZipReader;
import java.io.IOException;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
/**
* @author Sergio González
*/
public class KBArticleMarkdownConverter {
public KBArticleMarkdownConverter(
String markdown, String fileEntryName, Map<String, String> metadata)
throws KBArticleImportException {
MarkdownConverter markdownConverter =
MarkdownConverterFactoryUtil.create();
String html = null;
try {
html = markdownConverter.convert(markdown);
}
catch (IOException ioe) {
throw new KBArticleImportException(
"Unable to convert Markdown to HTML: " +
ioe.getLocalizedMessage(),
ioe);
}
String heading = getHeading(html);
if (Validator.isNull(heading)) {
throw new KBArticleImportException(
"Unable to extract title heading from file: " + fileEntryName);
}
_urlTitle = getUrlTitle(heading);
if (Validator.isNull(_urlTitle)) {
throw new KBArticleImportException(
"Missing title heading ID in file: " + fileEntryName);
}
String title = HtmlUtil.unescape(heading);
int x = title.indexOf("[](id=");
if (x != -1) {
title = title.substring(0, x);
}
_title = title;
html = stripIds(html);
_html = stripHeading(html);
String baseSourceURL = metadata.get(_METADATA_BASE_SOURCE_URL);
_sourceURL = buildSourceURL(baseSourceURL, fileEntryName);
}
public String getSourceURL() {
return _sourceURL;
}
public String getTitle() {
return _title;
}
public String getUrlTitle() {
return _urlTitle;
}
public String processAttachmentsReferences(
long userId, KBArticle kbArticle, ZipReader zipReader,
Map<String, FileEntry> fileEntriesMap)
throws PortalException {
Set<Integer> indexes = new TreeSet<>();
int index = 0;
while ((index = _html.indexOf("<img", index)) > -1) {
indexes.add(index);
index += 4;
}
if (indexes.isEmpty()) {
return _html;
}
StringBundler sb = new StringBundler();
int previousIndex = 0;
for (int curIndex : indexes) {
if (curIndex < 0) {
break;
}
if (curIndex > previousIndex) {
// Append text from previous position up to image tag
String text = _html.substring(previousIndex, curIndex);
sb.append(text);
}
int pos = _html.indexOf("/>", curIndex);
if (pos < 0) {
if (_log.isDebugEnabled()) {
_log.debug(
"Expected close tag for image " +
_html.substring(curIndex));
}
sb.append(_html.substring(curIndex));
previousIndex = curIndex;
break;
}
String text = _html.substring(curIndex, pos);
String imageFileName = KBArticleImporterUtil.extractImageFileName(
text);
FileEntry imageFileEntry = KBArticleImporterUtil.addImageFileEntry(
imageFileName, userId, kbArticle, zipReader, fileEntriesMap);
if (imageFileEntry == null) {
if (_log.isWarnEnabled()) {
_log.warn("Unable to find image source " + text);
}
sb.append("<img alt=\"missing image\" src=\"\" ");
}
else {
String imageSrc = StringPool.BLANK;
try {
imageSrc = DLUtil.getPreviewURL(
imageFileEntry, imageFileEntry.getFileVersion(), null,
StringPool.BLANK);
}
catch (PortalException pe) {
if (_log.isWarnEnabled()) {
_log.warn(
"Unable to obtain image URL from file entry " +
imageFileEntry.getFileEntryId(),
pe);
}
}
sb.append("<img alt=\"");
sb.append(HtmlUtil.escapeAttribute(imageFileEntry.getTitle()));
sb.append("\" src=\"");
sb.append(imageSrc);
sb.append("\" ");
}
previousIndex = pos;
}
if (previousIndex < _html.length()) {
sb.append(_html.substring(previousIndex));
}
return sb.toString();
}
protected String buildSourceURL(
String baseSourceURL, String fileEntryName) {
if (!Validator.isUrl(baseSourceURL)) {
return null;
}
int pos = baseSourceURL.length() - 1;
while (pos >= 0) {
char c = baseSourceURL.charAt(pos);
if (c != CharPool.SLASH) {
break;
}
pos--;
}
StringBundler sb = new StringBundler(3);
sb.append(baseSourceURL.substring(0, pos + 1));
if (!fileEntryName.startsWith(StringPool.SLASH)) {
sb.append(StringPool.SLASH);
}
sb.append(FileUtil.replaceSeparator(fileEntryName));
return sb.toString();
}
protected String getHeading(String html) {
int x = html.indexOf("<h1>");
int y = html.indexOf("</h1>");
if ((x == -1) || (y == -1) || (x > y)) {
return null;
}
return html.substring(x + 4, y);
}
protected String getUrlTitle(String heading) {
String urlTitle = null;
int x = heading.indexOf("[](id=");
if (x == -1) {
return null;
}
int y = heading.indexOf(StringPool.CLOSE_PARENTHESIS, x);
if (y > (x + 1)) {
int equalsSign = heading.indexOf(StringPool.EQUAL, x);
urlTitle = heading.substring(equalsSign + 1, y);
urlTitle = StringUtil.replace(
urlTitle, CharPool.SPACE, CharPool.DASH);
urlTitle = StringUtil.toLowerCase(urlTitle);
}
if (urlTitle == null) {
return null;
}
if (!urlTitle.startsWith(StringPool.SLASH)) {
urlTitle = StringPool.SLASH + urlTitle;
}
int urlTitleMaxLength = ModelHintsUtil.getMaxLength(
KBArticle.class.getName(), "urlTitle");
while (urlTitle.length() > urlTitleMaxLength) {
int pos = urlTitle.lastIndexOf(StringPool.DASH);
if (pos == -1) {
urlTitle = urlTitle.substring(0, urlTitleMaxLength);
}
else {
urlTitle = urlTitle.substring(0, pos);
}
}
return urlTitle;
}
protected String stripHeading(String html) {
int index = html.indexOf("</h1>");
if (index == -1) {
return html;
}
return html.substring(index + 5);
}
protected String stripIds(String content) {
int index = content.indexOf("[](id=");
if (index == -1) {
return content;
}
StringBundler sb = new StringBundler();
do {
int x = content.indexOf(StringPool.EQUAL, index);
int y = content.indexOf(StringPool.CLOSE_PARENTHESIS, x);
if (y != -1) {
int z = content.indexOf("</h", y);
if (z != (y + 1)) {
sb.append(content.substring(0, y + 1));
}
else {
sb.append(
StringUtil.trimTrailing(content.substring(0, index)));
}
content = content.substring(y + 1);
}
else {
if (_log.isWarnEnabled()) {
String msg = content.substring(index);
// Get the invalid id text from the content
int spaceIndex = content.indexOf(StringPool.SPACE);
if (spaceIndex != -1) {
msg = content.substring(index, spaceIndex);
}
_log.warn(
"Missing ')' for web content containing header id " +
msg);
}
// Since no close parenthesis remains in the content, stop
// stripping out IDs and simply include all of the remaining
// content
break;
}
}
while ((index = content.indexOf("[](id=")) != -1);
sb.append(content);
return sb.toString();
}
private static final String _METADATA_BASE_SOURCE_URL = "base.source.url";
private static final Log _log = LogFactoryUtil.getLog(
KBArticleMarkdownConverter.class);
private final String _html;
private final String _sourceURL;
private final String _title;
private final String _urlTitle;
}