/*
* eXist Open Source Native XML Database
* Copyright (C) 2004-2009 The eXist Project
* http://exist-db.org
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write to the Free Software Foundation
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
* $Id$
*/
package org.exist.xquery.functions.xmldb;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import java.io.IOException;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.List;
import org.exist.dom.QName;
import org.exist.util.DirectoryScanner;
import org.exist.util.FileUtils;
import org.exist.util.MimeTable;
import org.exist.util.MimeType;
import org.exist.xmldb.EXistResource;
import org.exist.xquery.Cardinality;
import org.exist.xquery.FunctionSignature;
import org.exist.xquery.XPathException;
import org.exist.xquery.XQueryContext;
import org.exist.xquery.value.FunctionReturnSequenceType;
import org.exist.xquery.value.FunctionParameterSequenceType;
import org.exist.xquery.value.Sequence;
import org.exist.xquery.value.SequenceIterator;
import org.exist.xquery.value.SequenceType;
import org.exist.xquery.value.StringValue;
import org.exist.xquery.value.Type;
import org.exist.xquery.value.ValueSequence;
import org.xmldb.api.base.Collection;
import org.xmldb.api.base.Resource;
import org.xmldb.api.base.XMLDBException;
/**
* @author wolf
*/
public class XMLDBLoadFromPattern extends XMLDBAbstractCollectionManipulator {
protected static final Logger logger = LogManager.getLogger(XMLDBLoadFromPattern.class);
protected final static QName FUNCTION_NAME = new QName("store-files-from-pattern", XMLDBModule.NAMESPACE_URI, XMLDBModule.PREFIX);
protected final static String FUNCTION_DESCRIPTION = "Stores new resources into the database. Resources are read from the server's " +
"file system, using file patterns. " +
"The function returns a sequence of all document paths added " +
"to the db. These can be directly passed to fn:doc() to retrieve the document(s).";
protected final static SequenceType PARAM_COLLECTION = new FunctionParameterSequenceType("collection-uri", Type.STRING, Cardinality.EXACTLY_ONE, "The collection-uri where resources should be stored. " + XMLDBModule.COLLECTION_URI);
protected final static SequenceType PARAM_FS_DIRECTORY = new FunctionParameterSequenceType("directory", Type.STRING, Cardinality.EXACTLY_ONE, "The directory in the file system from where the files are read.");
// fixit! - security - we should say some words about sanity
// DBA role should be required for anything short of chroot/jail
// easily setup per installation/execution host for each function. /ljo
protected final static SequenceType PARAM_FS_PATTERN = new FunctionParameterSequenceType("pattern", Type.STRING, Cardinality.ONE_OR_MORE, "The file matching pattern. Based on code from Apache's Ant, thus following the same conventions. For example: *.xml matches any file ending with .xml in the current directory, **/*.xml matches files in any directory below the current one");
protected final static SequenceType PARAM_MIME_TYPE = new FunctionParameterSequenceType("mime-type", Type.STRING, Cardinality.ZERO_OR_ONE, "If the mime-type is something other than 'text/xml' or 'application/xml', the resource will be stored as a binary resource.");
protected static final SequenceType PARAM_PRESERVE_STRUCTURE = new FunctionParameterSequenceType("preserve-structure", Type.BOOLEAN, Cardinality.EXACTLY_ONE, "If preserve-structure is true(), the filesystem directory structure will be mirrored in the collection. Otherwise all the matching resources, including the ones in sub-directories, will be stored in the collection given in the first argument flatly.");
protected final static SequenceType PARAM_EXCLUDES = new FunctionParameterSequenceType("exclude", Type.STRING, Cardinality.ZERO_OR_MORE, "A sequence of file patterns to exclude");
protected static final FunctionReturnSequenceType RETURN_TYPE = new FunctionReturnSequenceType(Type.STRING, Cardinality.ZERO_OR_MORE, "the sequence of document paths");
public final static FunctionSignature signatures[] = {
new FunctionSignature(
FUNCTION_NAME,
FUNCTION_DESCRIPTION,
new SequenceType[]{PARAM_COLLECTION, PARAM_FS_DIRECTORY, PARAM_FS_PATTERN},
RETURN_TYPE
),
new FunctionSignature(
FUNCTION_NAME,
FUNCTION_DESCRIPTION,
new SequenceType[]{PARAM_COLLECTION, PARAM_FS_DIRECTORY, PARAM_FS_PATTERN, PARAM_MIME_TYPE},
RETURN_TYPE
),
new FunctionSignature(
FUNCTION_NAME,
FUNCTION_DESCRIPTION,
new SequenceType[]{PARAM_COLLECTION, PARAM_FS_DIRECTORY, PARAM_FS_PATTERN, PARAM_MIME_TYPE, PARAM_PRESERVE_STRUCTURE},
RETURN_TYPE
),
new FunctionSignature(
FUNCTION_NAME,
FUNCTION_DESCRIPTION,
new SequenceType[]{PARAM_COLLECTION, PARAM_FS_DIRECTORY, PARAM_FS_PATTERN, PARAM_MIME_TYPE, PARAM_PRESERVE_STRUCTURE, PARAM_EXCLUDES},
RETURN_TYPE
)
};
public XMLDBLoadFromPattern(XQueryContext context, FunctionSignature signature) {
super(context, signature);
}
@Override
protected Sequence evalWithCollection(Collection collection, Sequence[] args, Sequence contextSequence)
throws XPathException {
final Path baseDir = Paths.get(args[1].getStringValue()).normalize();
logger.debug("Loading files from directory: " + baseDir.toAbsolutePath().toString());
//determine resource type - xml or binary?
MimeType mimeTypeFromArgs = null;
if (getSignature().getArgumentCount() > 3 && args[3].hasOne()) {
final String mimeTypeParam = args[3].getStringValue();
mimeTypeFromArgs = MimeTable.getInstance().getContentType(mimeTypeParam);
if (mimeTypeFromArgs == null) {
throw new XPathException(this, "Unknown mime type specified: " + mimeTypeParam);
}
}
//keep the directory structure?
boolean keepDirStructure = false;
if (getSignature().getArgumentCount() >= 5) {
keepDirStructure = args[4].effectiveBooleanValue();
}
final List<String> excludes = new ArrayList<>();
if (getSignature().getArgumentCount() == 6) {
for (final SequenceIterator i = args[5].iterate(); i.hasNext(); ) {
excludes.add(i.nextItem().getStringValue());
}
}
final ValueSequence stored = new ValueSequence();
//store according to each pattern
try {
final Sequence patterns = args[2];
for (final SequenceIterator i = patterns.iterate(); i.hasNext(); ) {
//get the files to store
final String pattern = i.nextItem().getStringValue();
final List<Path> files = DirectoryScanner.scanDir(baseDir, pattern);
logger.debug("Found: " + files.size());
Collection col = collection;
String relDir, prevDir = null;
for (final Path file : files) {
try {
logger.debug(file.toAbsolutePath().toString());
String relPath = file.toString().substring(baseDir.toString().length());
final int p = relPath.lastIndexOf(java.io.File.separatorChar);
if (checkExcludes(excludes, relPath)) {
continue;
}
if (p >= 0) {
relDir = relPath.substring(0, p);
relDir = relDir.replace(java.io.File.separatorChar, '/');
} else {
relDir = relPath;
}
if (keepDirStructure && (prevDir == null || (!relDir.equals(prevDir)))) {
col = createCollectionPath(collection, relDir);
prevDir = relDir;
}
MimeType mimeType = mimeTypeFromArgs;
if (mimeType == null) {
mimeType = MimeTable.getInstance().getContentTypeFor(FileUtils.fileName(file));
if (mimeType == null) {
mimeType = MimeType.BINARY_TYPE;
}
}
//TODO : these probably need to be encoded and checked for right mime type
final Resource resource = col.createResource(FileUtils.fileName(file), mimeType.getXMLDBType());
resource.setContent(file.toFile());
((EXistResource) resource).setMimeType(mimeType.getName());
col.storeResource(resource);
//TODO : use dedicated function in XmldbURI
stored.add(new StringValue(col.getName() + "/" + resource.getId()));
} catch (final XMLDBException e) {
logger.error("Could not store file " + file.toAbsolutePath() + ": " + e.getMessage());
}
}
}
} catch (final IOException e) {
logger.error(e);
}
return stored;
}
/**
* Check if path matches any of the exclude patterns.
*/
private static boolean checkExcludes(final List<String> excludes, String path) {
if (excludes == null || excludes.isEmpty()) {
return false;
}
if (path.charAt(0) == java.io.File.separatorChar) {
path = path.substring(1);
}
boolean skip = false;
for (final String exclude : excludes) {
if (DirectoryScanner.match(exclude, path)) {
skip = true;
break;
}
}
return skip;
}
}