/**
* Copyright (c) 2000-2013 Liferay, Inc. All rights reserved.
*
* This library is free software; you can redistribute it and/or modify it under
* the terms of the GNU Lesser General Public License as published by the Free
* Software Foundation; either version 2.1 of the License, or (at your option)
* any later version.
*
* This library is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
* details.
*/
package com.liferay.portlet.wiki.importers.mediawiki;
import com.liferay.portal.kernel.exception.PortalException;
import com.liferay.portal.kernel.exception.SystemException;
import com.liferay.portal.kernel.io.unsync.UnsyncBufferedReader;
import com.liferay.portal.kernel.log.Log;
import com.liferay.portal.kernel.log.LogFactoryUtil;
import com.liferay.portal.kernel.util.CharPool;
import com.liferay.portal.kernel.util.MapUtil;
import com.liferay.portal.kernel.util.ObjectValuePair;
import com.liferay.portal.kernel.util.ProgressTracker;
import com.liferay.portal.kernel.util.ProgressTrackerThreadLocal;
import com.liferay.portal.kernel.util.SetUtil;
import com.liferay.portal.kernel.util.StreamUtil;
import com.liferay.portal.kernel.util.StringBundler;
import com.liferay.portal.kernel.util.StringPool;
import com.liferay.portal.kernel.util.StringUtil;
import com.liferay.portal.kernel.util.Validator;
import com.liferay.portal.kernel.xml.Attribute;
import com.liferay.portal.kernel.xml.Document;
import com.liferay.portal.kernel.xml.DocumentException;
import com.liferay.portal.kernel.xml.Element;
import com.liferay.portal.kernel.xml.SAXReaderUtil;
import com.liferay.portal.kernel.zip.ZipReader;
import com.liferay.portal.kernel.zip.ZipReaderFactoryUtil;
import com.liferay.portal.model.User;
import com.liferay.portal.service.ServiceContext;
import com.liferay.portal.service.UserLocalServiceUtil;
import com.liferay.portal.util.PropsValues;
import com.liferay.portlet.asset.NoSuchTagException;
import com.liferay.portlet.asset.model.AssetTag;
import com.liferay.portlet.asset.service.AssetTagLocalServiceUtil;
import com.liferay.portlet.asset.service.AssetTagPropertyLocalServiceUtil;
import com.liferay.portlet.asset.util.AssetUtil;
import com.liferay.portlet.documentlibrary.store.DLStoreUtil;
import com.liferay.portlet.wiki.ImportFilesException;
import com.liferay.portlet.wiki.NoSuchPageException;
import com.liferay.portlet.wiki.importers.WikiImporter;
import com.liferay.portlet.wiki.importers.WikiImporterKeys;
import com.liferay.portlet.wiki.model.WikiNode;
import com.liferay.portlet.wiki.model.WikiPage;
import com.liferay.portlet.wiki.model.WikiPageConstants;
import com.liferay.portlet.wiki.service.WikiPageLocalServiceUtil;
import com.liferay.portlet.wiki.translators.MediaWikiToCreoleTranslator;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* @author Alvaro del Castillo
* @author Jorge Ferrer
*/
public class MediaWikiImporter implements WikiImporter {
public static final String SHARED_IMAGES_CONTENT = "See attachments";
public static final String SHARED_IMAGES_TITLE = "SharedImages";
@Override
public void importPages(
long userId, WikiNode node, InputStream[] inputStreams,
Map<String, String[]> options)
throws PortalException {
if ((inputStreams.length < 1) || (inputStreams[0] == null)) {
throw new PortalException("The pages file is mandatory");
}
InputStream pagesInputStream = inputStreams[0];
InputStream usersInputStream = inputStreams[1];
InputStream imagesInputStream = inputStreams[2];
try {
Document document = SAXReaderUtil.read(pagesInputStream);
Map<String, String> usersMap = readUsersFile(usersInputStream);
Element rootElement = document.getRootElement();
List<String> specialNamespaces = readSpecialNamespaces(rootElement);
processSpecialPages(userId, node, rootElement, specialNamespaces);
processRegularPages(
userId, node, rootElement, specialNamespaces, usersMap,
imagesInputStream, options);
processImages(userId, node, imagesInputStream);
moveFrontPage(userId, node, options);
}
catch (DocumentException de) {
throw new ImportFilesException("Invalid XML file provided");
}
catch (IOException ioe) {
throw new ImportFilesException("Error reading the files provided");
}
catch (PortalException pe) {
throw pe;
}
catch (Exception e) {
throw new PortalException(e);
}
}
protected long getUserId(
long userId, WikiNode node, String author,
Map<String, String> usersMap)
throws SystemException {
User user = null;
String emailAddress = usersMap.get(author);
if (Validator.isNotNull(emailAddress)) {
user = UserLocalServiceUtil.fetchUserByEmailAddress(
node.getCompanyId(), emailAddress);
}
else {
user = UserLocalServiceUtil.fetchUserByScreenName(
node.getCompanyId(), StringUtil.toLowerCase(author));
}
if (user != null) {
return user.getUserId();
}
return userId;
}
protected void importPage(
long userId, String author, WikiNode node, String title,
String content, String summary, Map<String, String> usersMap,
boolean strictImportMode)
throws PortalException {
try {
long authorUserId = getUserId(userId, node, author, usersMap);
String parentTitle = readParentTitle(content);
String redirectTitle = readRedirectTitle(content);
ServiceContext serviceContext = new ServiceContext();
serviceContext.setAddGroupPermissions(true);
serviceContext.setAddGuestPermissions(true);
serviceContext.setAssetTagNames(
readAssetTagNames(userId, node, content));
if (Validator.isNull(redirectTitle)) {
_translator.setStrictImportMode(strictImportMode);
content = _translator.translate(content);
}
else {
content =
StringPool.DOUBLE_OPEN_BRACKET + redirectTitle +
StringPool.DOUBLE_CLOSE_BRACKET;
}
WikiPage page = null;
try {
page = WikiPageLocalServiceUtil.getPage(
node.getNodeId(), title);
}
catch (NoSuchPageException nspe) {
page = WikiPageLocalServiceUtil.addPage(
authorUserId, node.getNodeId(), title,
WikiPageConstants.NEW, null, true, serviceContext);
}
WikiPageLocalServiceUtil.updatePage(
authorUserId, node.getNodeId(), title, page.getVersion(),
content, summary, true, "creole", parentTitle, redirectTitle,
serviceContext);
}
catch (Exception e) {
throw new PortalException("Error importing page " + title, e);
}
}
protected boolean isSpecialMediaWikiPage(
String title, List<String> specialNamespaces) {
for (String namespace : specialNamespaces) {
if (title.startsWith(namespace + StringPool.COLON)) {
return true;
}
}
return false;
}
protected boolean isValidImage(String[] paths, InputStream inputStream) {
if (_specialMediaWikiDirs.contains(paths[0])) {
return false;
}
if ((paths.length > 1) && _specialMediaWikiDirs.contains(paths[1])) {
return false;
}
String fileName = paths[paths.length - 1];
try {
DLStoreUtil.validate(fileName, true, inputStream);
}
catch (PortalException pe) {
return false;
}
catch (SystemException se) {
return false;
}
return true;
}
protected void moveFrontPage(
long userId, WikiNode node, Map<String, String[]> options) {
String frontPageTitle = MapUtil.getString(
options, WikiImporterKeys.OPTIONS_FRONT_PAGE);
if (Validator.isNotNull(frontPageTitle)) {
frontPageTitle = normalizeTitle(frontPageTitle);
try {
if (WikiPageLocalServiceUtil.getPagesCount(
node.getNodeId(), frontPageTitle, true) > 0) {
ServiceContext serviceContext = new ServiceContext();
serviceContext.setAddGroupPermissions(true);
serviceContext.setAddGuestPermissions(true);
WikiPageLocalServiceUtil.movePage(
userId, node.getNodeId(), frontPageTitle,
WikiPageConstants.FRONT_PAGE, false, serviceContext);
}
}
catch (Exception e) {
if (_log.isWarnEnabled()) {
StringBundler sb = new StringBundler(4);
sb.append("Could not move ");
sb.append(WikiPageConstants.FRONT_PAGE);
sb.append(" to the title provided: ");
sb.append(frontPageTitle);
_log.warn(sb.toString(), e);
}
}
}
}
protected String normalize(String categoryName, int length) {
categoryName = AssetUtil.toWord(categoryName.trim());
return StringUtil.shorten(categoryName, length);
}
protected String normalizeDescription(String description) {
description = description.replaceAll(
_categoriesPattern.pattern(), StringPool.BLANK);
return normalize(description, 255);
}
protected String normalizeTitle(String title) {
title = title.replaceAll(
PropsValues.WIKI_PAGE_TITLES_REMOVE_REGEXP, StringPool.BLANK);
return StringUtil.shorten(title, 75);
}
protected void processImages(
long userId, WikiNode node, InputStream imagesInputStream)
throws Exception {
if (imagesInputStream == null) {
return;
}
ProgressTracker progressTracker =
ProgressTrackerThreadLocal.getProgressTracker();
int count = 0;
ZipReader zipReader = ZipReaderFactoryUtil.getZipReader(
imagesInputStream);
List<String> entries = zipReader.getEntries();
int total = entries.size();
if (total > 0) {
try {
WikiPageLocalServiceUtil.getPage(
node.getNodeId(), SHARED_IMAGES_TITLE);
}
catch (NoSuchPageException nspe) {
ServiceContext serviceContext = new ServiceContext();
serviceContext.setAddGroupPermissions(true);
serviceContext.setAddGuestPermissions(true);
WikiPageLocalServiceUtil.addPage(
userId, node.getNodeId(), SHARED_IMAGES_TITLE,
SHARED_IMAGES_CONTENT, null, true, serviceContext);
}
}
List<ObjectValuePair<String, InputStream>> inputStreamOVPs =
new ArrayList<ObjectValuePair<String, InputStream>>();
try {
int percentage = 50;
for (int i = 0; i < entries.size(); i++) {
String entry = entries.get(i);
String key = entry;
InputStream inputStream = zipReader.getEntryAsInputStream(
entry);
String[] paths = StringUtil.split(key, CharPool.SLASH);
if (!isValidImage(paths, inputStream)) {
if (_log.isInfoEnabled()) {
_log.info("Ignoring " + key);
}
continue;
}
String fileName = StringUtil.toLowerCase(
paths[paths.length - 1]);
ObjectValuePair<String, InputStream> inputStreamOVP =
new ObjectValuePair<String, InputStream>(
fileName, inputStream);
inputStreamOVPs.add(inputStreamOVP);
count++;
if ((i % 5) == 0) {
WikiPageLocalServiceUtil.addPageAttachments(
userId, node.getNodeId(), SHARED_IMAGES_TITLE,
inputStreamOVPs);
inputStreamOVPs.clear();
percentage = Math.min(50 + (i * 50) / total, 99);
progressTracker.setPercent(percentage);
}
}
if (!inputStreamOVPs.isEmpty()) {
WikiPageLocalServiceUtil.addPageAttachments(
userId, node.getNodeId(), SHARED_IMAGES_TITLE,
inputStreamOVPs);
}
}
finally {
for (ObjectValuePair<String, InputStream> inputStreamOVP :
inputStreamOVPs) {
InputStream inputStream = inputStreamOVP.getValue();
StreamUtil.cleanUp(inputStream);
}
}
zipReader.close();
if (_log.isInfoEnabled()) {
_log.info("Imported " + count + " images into " + node.getName());
}
}
protected void processRegularPages(
long userId, WikiNode node, Element rootElement,
List<String> specialNamespaces, Map<String, String> usersMap,
InputStream imagesInputStream, Map<String, String[]> options) {
boolean importLatestVersion = MapUtil.getBoolean(
options, WikiImporterKeys.OPTIONS_IMPORT_LATEST_VERSION);
boolean strictImportMode = MapUtil.getBoolean(
options, WikiImporterKeys.OPTIONS_STRICT_IMPORT_MODE);
ProgressTracker progressTracker =
ProgressTrackerThreadLocal.getProgressTracker();
int count = 0;
int percentage = 10;
int maxPercentage = 50;
if (imagesInputStream == null) {
maxPercentage = 99;
}
List<Element> pageElements = rootElement.elements("page");
for (int i = 0; i < pageElements.size(); i++) {
Element pageElement = pageElements.get(i);
String title = pageElement.elementText("title");
if (isSpecialMediaWikiPage(title, specialNamespaces)) {
continue;
}
title = normalizeTitle(title);
percentage = Math.min(
10 + (i * (maxPercentage - percentage)) / pageElements.size(),
maxPercentage);
progressTracker.setPercent(percentage);
List<Element> revisionElements = pageElement.elements("revision");
if (importLatestVersion) {
Element lastRevisionElement = revisionElements.get(
revisionElements.size() - 1);
revisionElements = new ArrayList<Element>();
revisionElements.add(lastRevisionElement);
}
for (Element revisionElement : revisionElements) {
Element contributorElement = revisionElement.element(
"contributor");
String author = contributorElement.elementText("username");
String content = revisionElement.elementText("text");
String summary = revisionElement.elementText("comment");
try {
importPage(
userId, author, node, title, content, summary, usersMap,
strictImportMode);
}
catch (Exception e) {
if (_log.isWarnEnabled()) {
_log.warn(
"Page with title " + title +
" could not be imported",
e);
}
}
}
count++;
}
if (_log.isInfoEnabled()) {
_log.info("Imported " + count + " pages into " + node.getName());
}
}
protected void processSpecialPages(
long userId, WikiNode node, Element rootElement,
List<String> specialNamespaces)
throws PortalException {
ProgressTracker progressTracker =
ProgressTrackerThreadLocal.getProgressTracker();
List<Element> pageElements = rootElement.elements("page");
for (int i = 0; i < pageElements.size(); i++) {
Element pageElement = pageElements.get(i);
String title = pageElement.elementText("title");
if (!title.startsWith("Category:")) {
if (isSpecialMediaWikiPage(title, specialNamespaces)) {
rootElement.remove(pageElement);
}
continue;
}
String categoryName = title.substring("Category:".length());
categoryName = normalize(categoryName, 75);
Element revisionElement = pageElement.element("revision");
String description = revisionElement.elementText("text");
description = normalizeDescription(description);
try {
AssetTag assetTag = null;
try {
assetTag = AssetTagLocalServiceUtil.getTag(
node.getGroupId(), categoryName);
}
catch (NoSuchTagException nste) {
ServiceContext serviceContext = new ServiceContext();
serviceContext.setAddGroupPermissions(true);
serviceContext.setAddGuestPermissions(true);
serviceContext.setScopeGroupId(node.getGroupId());
assetTag = AssetTagLocalServiceUtil.addTag(
userId, categoryName, null, serviceContext);
if (PropsValues.ASSET_TAG_PROPERTIES_ENABLED &&
Validator.isNotNull(description)) {
AssetTagPropertyLocalServiceUtil.addTagProperty(
userId, assetTag.getTagId(), "description",
description);
}
}
}
catch (SystemException se) {
_log.error(se, se);
}
if ((i % 5) == 0) {
progressTracker.setPercent((i * 10) / pageElements.size());
}
}
}
protected String[] readAssetTagNames(
long userId, WikiNode node, String content)
throws PortalException, SystemException {
Matcher matcher = _categoriesPattern.matcher(content);
List<String> assetTagNames = new ArrayList<String>();
while (matcher.find()) {
String categoryName = matcher.group(1);
categoryName = normalize(categoryName, 75);
AssetTag assetTag = null;
try {
assetTag = AssetTagLocalServiceUtil.getTag(
node.getGroupId(), categoryName);
}
catch (NoSuchTagException nste) {
ServiceContext serviceContext = new ServiceContext();
serviceContext.setAddGroupPermissions(true);
serviceContext.setAddGuestPermissions(true);
serviceContext.setScopeGroupId(node.getGroupId());
assetTag = AssetTagLocalServiceUtil.addTag(
userId, categoryName, null, serviceContext);
}
assetTagNames.add(assetTag.getName());
}
if (content.contains(_WORK_IN_PROGRESS)) {
assetTagNames.add(_WORK_IN_PROGRESS_TAG);
}
return assetTagNames.toArray(new String[assetTagNames.size()]);
}
protected String readParentTitle(String content) {
Matcher matcher = _parentPattern.matcher(content);
String redirectTitle = StringPool.BLANK;
if (matcher.find()) {
redirectTitle = matcher.group(1);
redirectTitle = normalizeTitle(redirectTitle);
redirectTitle += " (disambiguation)";
}
return redirectTitle;
}
protected String readRedirectTitle(String content) {
Matcher matcher = _redirectPattern.matcher(content);
String redirectTitle = StringPool.BLANK;
if (matcher.find()) {
redirectTitle = matcher.group(1);
redirectTitle = normalizeTitle(redirectTitle);
}
return redirectTitle;
}
protected List<String> readSpecialNamespaces(Element root)
throws ImportFilesException {
List<String> namespaces = new ArrayList<String>();
Element siteinfoElement = root.element("siteinfo");
if (siteinfoElement == null) {
throw new ImportFilesException("Invalid pages XML file");
}
Element namespacesElement = siteinfoElement.element("namespaces");
List<Element> namespaceElements = namespacesElement.elements(
"namespace");
for (Element namespaceElement : namespaceElements) {
Attribute attribute = namespaceElement.attribute("key");
String value = attribute.getValue();
if (!value.equals("0")) {
namespaces.add(namespaceElement.getText());
}
}
return namespaces;
}
protected Map<String, String> readUsersFile(InputStream usersInputStream)
throws IOException {
if (usersInputStream == null) {
return Collections.emptyMap();
}
Map<String, String> usersMap = new HashMap<String, String>();
UnsyncBufferedReader unsyncBufferedReader = new UnsyncBufferedReader(
new InputStreamReader(usersInputStream));
String line = unsyncBufferedReader.readLine();
while (line != null) {
String[] array = StringUtil.split(line);
if ((array.length == 2) && Validator.isNotNull(array[0]) &&
Validator.isNotNull(array[1])) {
usersMap.put(array[0], array[1]);
}
else {
if (_log.isInfoEnabled()) {
_log.info(
"Ignoring line " + line +
" because it does not contain exactly 2 columns");
}
}
line = unsyncBufferedReader.readLine();
}
return usersMap;
}
private static final String _WORK_IN_PROGRESS = "{{Work in progress}}";
private static final String _WORK_IN_PROGRESS_TAG = "work in progress";
private static Log _log = LogFactoryUtil.getLog(MediaWikiImporter.class);
private static Pattern _categoriesPattern = Pattern.compile(
"\\[\\[[Cc]ategory:([^\\]]*)\\]\\][\\n]*");
private static Pattern _parentPattern = Pattern.compile(
"\\{{2}OtherTopics\\|([^\\}]*)\\}{2}");
private static Pattern _redirectPattern = Pattern.compile(
"#REDIRECT \\[\\[([^\\]]*)\\]\\]");
private static Set<String> _specialMediaWikiDirs = SetUtil.fromArray(
new String[] {"archive", "temp", "thumb"});
private MediaWikiToCreoleTranslator _translator =
new MediaWikiToCreoleTranslator();
}