/**
* This file Copyright (c) 2003-2012 Magnolia International
* Ltd. (http://www.magnolia-cms.com). All rights reserved.
*
*
* This file is dual-licensed under both the Magnolia
* Network Agreement and the GNU General Public License.
* You may elect to use one or the other of these licenses.
*
* This file is distributed in the hope that it will be
* useful, but AS-IS and WITHOUT ANY WARRANTY; without even the
* implied warranty of MERCHANTABILITY or FITNESS FOR A
* PARTICULAR PURPOSE, TITLE, or NONINFRINGEMENT.
* Redistribution, except as permitted by whichever of the GPL
* or MNA you select, is prohibited.
*
* 1. For the GPL license (GPL), you can redistribute and/or
* modify this file under the terms of the GNU General
* Public License, Version 3, as published by the Free Software
* Foundation. You should have received a copy of the GNU
* General Public License, Version 3 along with this program;
* if not, write to the Free Software Foundation, Inc., 51
* Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* 2. For the Magnolia Network Agreement (MNA), this file
* and the accompanying materials are made available under the
* terms of the MNA which accompanies this distribution, and
* is available at http://www.magnolia-cms.com/mna.html
*
* Any modifications to this file must keep this entire header
* intact.
*
*/
package info.magnolia.importexport;
import info.magnolia.cms.beans.runtime.Document;
import info.magnolia.cms.core.Content;
import info.magnolia.cms.core.HierarchyManager;
import info.magnolia.cms.core.ItemType;
import info.magnolia.cms.core.SystemProperty;
import info.magnolia.cms.util.ContentUtil;
import info.magnolia.cms.util.NodeDataUtil;
import info.magnolia.context.MgnlContext;
import info.magnolia.importexport.filters.ImportXmlRootFilter;
import info.magnolia.importexport.filters.MagnoliaV2Filter;
import info.magnolia.importexport.filters.MetadataUuidFilter;
import info.magnolia.importexport.filters.RemoveMixversionableFilter;
import info.magnolia.importexport.filters.VersionFilter;
import info.magnolia.importexport.postprocessors.MetaDataImportPostProcessor;
import info.magnolia.jcr.util.NodeUtil;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.UnsupportedEncodingException;
import java.net.URLDecoder;
import java.net.URLEncoder;
import java.text.MessageFormat;
import java.util.Iterator;
import java.util.List;
import java.util.Properties;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.zip.DeflaterOutputStream;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;
import java.util.zip.ZipInputStream;
import java.util.zip.ZipOutputStream;
import javax.jcr.ImportUUIDBehavior;
import javax.jcr.Node;
import javax.jcr.NodeIterator;
import javax.jcr.PathNotFoundException;
import javax.jcr.RepositoryException;
import javax.jcr.Session;
import javax.jcr.Workspace;
import javax.xml.transform.Source;
import javax.xml.transform.sax.SAXTransformerFactory;
import javax.xml.transform.stream.StreamSource;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.xml.serialize.OutputFormat;
import org.apache.xml.serialize.XMLSerializer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLFilter;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.XMLReaderFactory;
/**
* Utility class for manipulation of XML files (mainly JCR XML).
* @author <a href="mailto:niko@macnica.com">Nicolas Modrzyk</a>
* @author Oliver Lietz
*
* @version $Id$
*/
public class DataTransporter {
private static final Pattern DOT_NAME_PATTERN = Pattern.compile("[\\w\\-]*\\.*[\\w\\-]*");
private static final int INDENT_VALUE = 2;
private static Logger log = LoggerFactory.getLogger(DataTransporter.class.getName());
final static int BOOTSTRAP_IMPORT_MODE = ImportUUIDBehavior.IMPORT_UUID_COLLISION_REPLACE_EXISTING;
public static final String ZIP = ".zip";
public static final String GZ = ".gz";
public static final String XML = ".xml";
public static final String PROPERTIES = ".properties";
public static final String DOT = ".";
public static final String SLASH = "/";
public static final String UTF8 = "UTF-8";
public static final String JCR_ROOT = "jcr:root";
/**
* Converts a xml document into a file.
* @param xmlDocument uploaded file
* @param repositoryName selected repository
* @param basepath base path in repository
* @param keepVersionHistory if <code>false</code> version info will be stripped before importing the document
* @param importMode a valid value for ImportUUIDBehavior
* @param saveAfterImport
* @param createBasepathIfNotExist
* @throws IOException
* @see ImportUUIDBehavior
*/
public static synchronized void importDocument(Document xmlDocument, String repositoryName, String basepath,
boolean keepVersionHistory, int importMode, boolean saveAfterImport,
boolean createBasepathIfNotExist)
throws IOException {
File xmlFile = xmlDocument.getFile();
importFile(xmlFile, repositoryName, basepath, keepVersionHistory, importMode, saveAfterImport,
createBasepathIfNotExist);
}
/**
* Creates an <code>InputStream</code> backed by the specified xml file.
* @param xmlFile (zipped/gzipped) XML file to import
* @param repositoryName selected repository
* @param basepath base path in repository
* @param keepVersionHistory if <code>false</code> version info will be stripped before importing the document
* @param importMode a valid value for ImportUUIDBehavior
* @param saveAfterImport
* @param createBasepathIfNotExist
* @throws IOException
* @see ImportUUIDBehavior
*/
public static synchronized void importFile(File xmlFile, String repositoryName, String basepath,
boolean keepVersionHistory, int importMode, boolean saveAfterImport,
boolean createBasepathIfNotExist)
throws IOException {
String name = xmlFile.getAbsolutePath();
InputStream xmlStream = getInputStreamForFile(xmlFile);
importXmlStream(xmlStream, repositoryName, basepath, name, keepVersionHistory, importMode, saveAfterImport,
createBasepathIfNotExist);
}
/**
* @param xmlFile
* @param repositoryName
* @throws IOException
*/
public static void executeBootstrapImport(File xmlFile, String repositoryName) throws IOException {
String filenameWithoutExt = StringUtils.substringBeforeLast(xmlFile.getName(), DOT);
if (filenameWithoutExt.endsWith(XML)) {
// if file ends in .xml.gz or .xml.zip
// need to keep the .xml to be able to view it after decompression
filenameWithoutExt = StringUtils.substringBeforeLast(xmlFile.getName(), DOT);
}
String pathName = StringUtils.substringAfter(StringUtils.substringBeforeLast(filenameWithoutExt, DOT), DOT);
pathName = decodePath(pathName, UTF8);
String basepath = SLASH + StringUtils.replace(pathName, DOT, SLASH);
if (xmlFile.getName().endsWith(PROPERTIES)) {
Properties properties = new Properties();
FileInputStream stream = new FileInputStream(xmlFile);
properties.load(stream);
stream.close();
importProperties(properties, repositoryName);
} else {
DataTransporter.importFile(xmlFile, repositoryName, basepath, false, BOOTSTRAP_IMPORT_MODE, true, true);
}
}
/**
* Deprecated.
* @param properties
* @param repositoryName
* @deprecated since 4.0 - use the PropertiesImportExport class instead.
*/
public static void importProperties(Properties properties, String repositoryName) {
for (Iterator iter = properties.keySet().iterator(); iter.hasNext();) {
String key = (String) iter.next();
String value = (String) properties.get(key);
String name = StringUtils.substringAfterLast(key, ".");
String path = StringUtils.substringBeforeLast(key, ".").replace('.', '/');
Content node = ContentUtil.getContent(repositoryName, path);
if (node != null) {
try {
NodeDataUtil.getOrCreate(node, name).setValue(value);
node.save();
}
catch (RepositoryException e) {
log.error("can't set property " + key, e);
}
}
}
}
/**
* Imports XML stream into repository.
* XML is filtered by <code>MagnoliaV2Filter</code>, <code>VersionFilter</code> and <code>ImportXmlRootFilter</code>
* if <code>keepVersionHistory</code> is set to <code>false</code>
* @param xmlStream XML stream to import
* @param repositoryName selected repository
* @param basepath base path in repository
* @param name (absolute path of <code>File</code>)
* @param keepVersionHistory if <code>false</code> version info will be stripped before importing the document
* @param importMode a valid value for ImportUUIDBehavior
* @param saveAfterImport
* @param createBasepathIfNotExist
* @throws IOException
* @see ImportUUIDBehavior
* @see ImportXmlRootFilter
* @see VersionFilter
* @see MagnoliaV2Filter
*/
public static synchronized void importXmlStream(InputStream xmlStream, String repositoryName, String basepath,
String name, boolean keepVersionHistory, int importMode,
boolean saveAfterImport, boolean createBasepathIfNotExist)
throws IOException {
// TODO hopefully this will be fixed with a more useful message with the Bootstrapper refactoring
if (xmlStream == null) {
throw new IOException("Can't import a null stream into repository: " + repositoryName + ", basepath: " + basepath + ", name: " + name);
}
HierarchyManager hm = MgnlContext.getHierarchyManager(repositoryName);
if (hm == null) {
throw new IllegalStateException("Can't import " + name + " since repository " + repositoryName + " does not exist.");
}
Workspace ws = hm.getWorkspace();
if (log.isDebugEnabled()) {
log.debug("Importing content into repository: [{}] from: [{}] into path: [{}]",
new Object[]{repositoryName, name, basepath});
}
if (!hm.isExist(basepath) && createBasepathIfNotExist) {
try {
ContentUtil.createPath(hm, basepath, ItemType.CONTENT);
}
catch (RepositoryException e) {
log.error("can't create path [{}]", basepath);
}
}
Session session = ws.getSession();
try {
// Collects a list with all nodes at the basepath before import so we can see exactly which nodes were imported afterwards
List<Node> nodesBeforeImport = NodeUtil.asList(NodeUtil.asIterable(session.getNode(basepath).getNodes()));
if (keepVersionHistory) {
// do not manipulate
session.importXML(basepath, xmlStream, importMode);
} else {
// create readers/filters and chain
XMLReader initialReader = XMLReaderFactory.createXMLReader(org.apache.xerces.parsers.SAXParser.class.getName());
try{
initialReader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
}catch (SAXException e) {
log.error("could not set parser feature");
}
XMLFilter magnoliaV2Filter = null;
// if stream is from regular file, test for belonging XSL file to apply XSL transformation to XML
if (new File(name).isFile()) {
InputStream xslStream = getXslStreamForXmlFile(new File(name));
if (xslStream != null) {
Source xslSource = new StreamSource(xslStream);
SAXTransformerFactory saxTransformerFactory = (SAXTransformerFactory) SAXTransformerFactory.newInstance();
XMLFilter xslFilter = saxTransformerFactory.newXMLFilter(xslSource);
magnoliaV2Filter = new MagnoliaV2Filter(xslFilter);
}
}
if (magnoliaV2Filter == null) {
magnoliaV2Filter = new MagnoliaV2Filter(initialReader);
}
XMLFilter versionFilter = new VersionFilter(magnoliaV2Filter);
// enable this to strip useless "name" properties from dialogs
// versionFilter = new UselessNameFilter(versionFilter);
// enable this to strip mix:versionable from pre 3.6 xml files
versionFilter = new RemoveMixversionableFilter(versionFilter);
XMLReader finalReader = new ImportXmlRootFilter(versionFilter);
ContentHandler handler = session.getImportContentHandler(basepath, importMode);
finalReader.setContentHandler(handler);
// parse XML, import is done by handler from session
try {
finalReader.parse(new InputSource(xmlStream));
}
finally {
IOUtils.closeQuietly(xmlStream);
}
if (((ImportXmlRootFilter) finalReader).rootNodeFound) {
String path = basepath;
if (!path.endsWith(SLASH)) {
path += SLASH;
}
Node dummyRoot = (Node) session.getItem(path + JCR_ROOT);
for (Iterator iter = dummyRoot.getNodes(); iter.hasNext();) {
Node child = (Node) iter.next();
// move childs to real root
if (session.itemExists(path + child.getName())) {
session.getItem(path + child.getName()).remove();
}
session.move(child.getPath(), path + child.getName());
}
// delete the dummy node
dummyRoot.remove();
}
// Post process all nodes that were imported
NodeIterator nodesAfterImport = session.getNode(basepath).getNodes();
while (nodesAfterImport.hasNext()) {
Node nodeAfterImport = nodesAfterImport.nextNode();
boolean existedBeforeImport = false;
for (Node nodeBeforeImport : nodesBeforeImport) {
if (NodeUtil.isSame(nodeAfterImport, nodeBeforeImport)) {
existedBeforeImport = true;
break;
}
}
if (!existedBeforeImport) {
postProcessAfterImport(nodeAfterImport);
}
}
}
}
catch (Exception e) {
throw new RuntimeException("Error importing " + name + ": " + e.getMessage(), e);
}
finally {
IOUtils.closeQuietly(xmlStream);
}
try {
if (saveAfterImport) {
session.save();
}
}
catch (RepositoryException e) {
log.error(MessageFormat.format(
"Unable to save changes to the [{0}] repository due to a {1} Exception: {2}.",
new Object[]{repositoryName, e.getClass().getName(), e.getMessage()}), e);
throw new IOException(e.getMessage());
}
}
private static void postProcessAfterImport(Node node) throws RepositoryException {
try {
new MetaDataImportPostProcessor().postProcessNode(node);
} catch (RepositoryException e) {
throw new RepositoryException("Failed to post process imported nodes at path " + NodeUtil.getNodePathIfPossible(node) + ": " + e.getMessage(), e);
}
}
/**
* @param file
* @return XSL stream for Xml file or <code>null</code>
*/
protected static InputStream getXslStreamForXmlFile(File file) {
InputStream xslStream = null;
String xlsFilename = StringUtils.substringBeforeLast(file.getAbsolutePath(), ".") + ".xsl";
File xslFile = new File(xlsFilename);
if (xslFile.exists()) {
try {
xslStream = new FileInputStream(xslFile);
log.info("XSL file for [" + file.getName() + "] found (" + xslFile.getName() + ")");
} catch (FileNotFoundException e) { // should never happen (xslFile.exists())
e.printStackTrace();
}
}
return xslStream;
}
/**
* Creates a stream from the (zipped/gzipped) XML file.
* @param xmlFile
* @return stream of the file
* @throws IOException
*/
private static InputStream getInputStreamForFile(File xmlFile) throws IOException {
InputStream xmlStream;
// looks like the zip one is buggy. It throws exception when trying to use it
if (xmlFile.getName().endsWith(ZIP)) {
xmlStream = new ZipInputStream((new FileInputStream(xmlFile)));
} else if (xmlFile.getName().endsWith(GZ)) {
xmlStream = new GZIPInputStream((new FileInputStream(xmlFile)));
} else { // if(fileName.endsWith(XML))
xmlStream = new FileInputStream(xmlFile);
}
return xmlStream;
}
public static void executeExport(OutputStream baseOutputStream, boolean keepVersionHistory, boolean format,
Session session, String basepath, String repository, String ext) throws IOException {
OutputStream outputStream = baseOutputStream;
if (ext.endsWith(ZIP)) {
outputStream = new ZipOutputStream(baseOutputStream);
} else if (ext.endsWith(GZ)) {
outputStream = new GZIPOutputStream(baseOutputStream);
}
try {
if (keepVersionHistory) {
// use exportSystemView in order to preserve property types
// http://issues.apache.org/jira/browse/JCR-115
if (!format) {
session.exportSystemView(basepath, outputStream, false, false);
} else {
parseAndFormat(outputStream, null, repository, basepath, session, false);
}
} else {
// use XMLSerializer and a SAXFilter in order to rewrite the
// file
XMLReader reader = new VersionFilter(XMLReaderFactory
.createXMLReader(org.apache.xerces.parsers.SAXParser.class.getName()));
parseAndFormat(outputStream, reader, repository, basepath, session, false);
}
}
catch (IOException e) {
throw new RuntimeException(e);
}
catch (SAXException e) {
throw new RuntimeException(e);
}
catch (RepositoryException e) {
throw new RuntimeException(e);
}
// finish the stream properly if zip stream
// this is not done by the IOUtils
if (outputStream instanceof DeflaterOutputStream) {
((DeflaterOutputStream) outputStream).finish();
}
baseOutputStream.flush();
IOUtils.closeQuietly(baseOutputStream);
}
/**
* Exports the content of the repository, and format it if necessary.
* @param stream the stream to write the content to
* @param reader the reader to use to parse the xml content (so that we can perform filtering), if null instanciate
* a default one
* @param repository the repository to export
* @param basepath the basepath in the repository
* @param session the session to use to export the data from the repository
* @param noRecurse
* @throws IOException
* @throws SAXException
* @throws RepositoryException
* @throws PathNotFoundException
*/
public static void parseAndFormat(OutputStream stream, XMLReader reader, String repository, String basepath,
Session session, boolean noRecurse)
throws IOException, SAXException, PathNotFoundException, RepositoryException {
if (reader == null) {
reader = XMLReaderFactory.createXMLReader(org.apache.xerces.parsers.SAXParser.class.getName());
}
// write to a temp file and then re-read it to remove version history
File tempFile = File.createTempFile("export-" + repository + session.getUserID(), ".xml");
OutputStream fileStream = new FileOutputStream(tempFile);
try {
session.exportSystemView(basepath, fileStream, false, noRecurse);
}
finally {
IOUtils.closeQuietly(fileStream);
}
readFormatted(reader, tempFile, stream);
if (!tempFile.delete()) {
log.warn("Could not delete temporary export file {}", tempFile.getAbsolutePath());
}
}
/**
* @param reader
* @param inputFile
* @param outputStream
* @throws FileNotFoundException
* @throws IOException
* @throws SAXException
*/
protected static void readFormatted(XMLReader reader, File inputFile, OutputStream outputStream)
throws FileNotFoundException, IOException, SAXException {
InputStream fileInputStream = new FileInputStream(inputFile);
readFormatted(reader, fileInputStream, outputStream);
IOUtils.closeQuietly(fileInputStream);
}
/**
* @param reader
* @param inputStream
* @param outputStream
* @throws FileNotFoundException
* @throws IOException
* @throws SAXException
*/
protected static void readFormatted(XMLReader reader, InputStream inputStream, OutputStream outputStream)
throws FileNotFoundException, IOException, SAXException {
OutputFormat outputFormat = new OutputFormat();
outputFormat.setPreserveSpace(false); // this is ok, doesn't affect text nodes??
outputFormat.setIndenting(true);
outputFormat.setIndent(INDENT_VALUE);
outputFormat.setLineWidth(120); // need to be set after setIndenting()!
final boolean removeUnwantedNamespaces = !SystemProperty.getBooleanProperty("magnolia.export.keep_extra_namespaces"); // MAGNOLIA-2960
MetadataUuidFilter metadataUuidFilter = new MetadataUuidFilter(reader, removeUnwantedNamespaces); // MAGNOLIA-1650
metadataUuidFilter.setContentHandler(new XMLSerializer(outputStream, outputFormat));
metadataUuidFilter.parse(new InputSource(inputStream));
IOUtils.closeQuietly(inputStream);
}
/**
*
* @param path path to encode
* @param separator "." (dot) or "/", it will be not encoded if found
* @param enc charset
* @return the path encoded
*/
public static String encodePath(String path, String separator, String enc)
{
StringBuilder pathEncoded = new StringBuilder();
try
{
if (!StringUtils.contains(path, separator))
{
return URLEncoder.encode(path, enc);
}
for(int i=0; i < path.length(); i++) {
String ch = String.valueOf(path.charAt(i));
if(separator.equals(ch)) {
pathEncoded.append(ch);
} else {
pathEncoded.append(URLEncoder.encode(ch, enc));
}
}
}
catch (UnsupportedEncodingException e)
{
return path;
}
return pathEncoded.toString();
}
/**
* decode a path (ex. %D0%9D%D0%B0.%B2%D0%BE%D0%BB%D0%BD)
* @param path path to decode
* @param enc charset
* @return the path decoded
*/
public static String decodePath(String path, String enc)
{
String pathEncoded = StringUtils.EMPTY;
try
{
pathEncoded = URLDecoder.decode(path, enc);
}
catch (UnsupportedEncodingException e)
{
return path;
}
return pathEncoded;
}
/**
* Prior to 4.5 Magnolia used to produce export xml filenames where the / (slash) separating sub nodes was replaced by a dot.
* Since 4.5, Magnolia enables dots in path names, therefore dots which are part of the node name have to be escaped by doubling them.
* I.e. given a path like this <code>/foo/bar.baz/test../dir/baz..bar</code>, this method will produce
* <code>.foo.bar..baz.test.....dir.baz....bar</code>.
*/
public static String createExportPath(String path) {
//TODO if someone is smarter than me (not an impossible thing) and can do this with one single elegant regex, please do it.
String newPath = path.replace(".", "..");
newPath = newPath.replace("/", ".");
return newPath;
}
/**
* The opposite of {@link #createExportPath(String)}.
* I.e. given a path like this <code>.foo.bar..baz.test.....dir.baz....bar</code>, this method will produce <code>/foo/bar.baz/test../dir/baz..bar</code>.
*/
public static String revertExportPath(String exportPath) {
if(".".equals(exportPath)) {
return "/";
}
//TODO I have a feeling there's a simpler way to achieve our goal.
Matcher matcher = DOT_NAME_PATTERN.matcher(exportPath);
StringBuilder reversed = new StringBuilder(exportPath.length());
while(matcher.find()){
String group = matcher.group();
int dotsNumber = StringUtils.countMatches(group, ".");
if(dotsNumber == 1) {
reversed.append(group.replaceFirst("\\.", "/"));
} else {
String dots = StringUtils.substringBeforeLast(group, ".").replace("..", ".");
String name = StringUtils.substringAfterLast(group, ".");
reversed.append(dots);
//if number is odd, the last dot has to be replaced with a slash
if(dotsNumber % 2 != 0) {
reversed.append("/");
}
reversed.append(name);
}
}
return reversed.toString();
}
}