/* * (C) Copyright 2006-2016 Nuxeo SA (http://nuxeo.com/) and others. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * Contributors: * Benjamin Jalon * Florent Guillaume */ package org.nuxeo.ecm.core.utils; import java.util.ArrayList; import java.util.Arrays; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; import org.nuxeo.ecm.core.api.Blob; import org.nuxeo.ecm.core.api.DocumentModel; import org.nuxeo.ecm.core.api.model.Property; import org.nuxeo.ecm.core.schema.DocumentType; import org.nuxeo.ecm.core.schema.SchemaManager; import org.nuxeo.ecm.core.schema.TypeConstants; import org.nuxeo.ecm.core.schema.types.ComplexType; import org.nuxeo.ecm.core.schema.types.Field; import org.nuxeo.ecm.core.schema.types.ListType; import org.nuxeo.ecm.core.schema.types.Schema; import org.nuxeo.ecm.core.schema.types.Type; import org.nuxeo.runtime.api.Framework; /** * Extractor for all the blobs of a document. */ public class BlobsExtractor { protected final Map<String, List<String>> docBlobPaths = new ConcurrentHashMap<>(); private Set<String> includedPaths; private Set<String> excludedPaths; private boolean allBlobs; private boolean isDefaultConfiguration = true; /** * Sets extractor properties, controlling what properties or values are returned by {@link #getBlobsProperties} or * {@link #getBlobs}. * <p> * The properties have to be defined without prefix if there is no prefix in the schema definition. For blob * properties, the path must include the {@code /data} part. */ public void setExtractorProperties(Set<String> includedPaths, Set<String> excludedPaths, boolean allBlobs) { this.includedPaths = normalizePaths(includedPaths); this.excludedPaths = normalizePaths(excludedPaths); this.allBlobs = allBlobs; isDefaultConfiguration = includedPaths == null && excludedPaths == null && allBlobs; } protected boolean isInterestingPath(String path) { if (isDefaultConfiguration) { return true; } else if (excludedPaths != null && excludedPaths.contains(path)) { return false; } else if (includedPaths != null && includedPaths.contains(path)) { return true; } else if (allBlobs) { return true; } return false; } /** * Removes the "/data" suffix used by FulltextConfiguration. * <p> * Adds missing schema name as prefix if no prefix ("content" -> "file:content"). */ protected Set<String> normalizePaths(Set<String> paths) { if (paths == null) { return null; } SchemaManager schemaManager = Framework.getService(SchemaManager.class); Set<String> normPaths = new HashSet<>(); for (String path : paths) { // remove "/data" suffix if (path.endsWith("/data")) { path = path.substring(0, path.length() - "/data".length()); } // add schema if no schema prefix if (schemaManager.getField(path) == null && !path.contains(":")) { // check without prefix // TODO precompute this in SchemaManagerImpl int slash = path.indexOf('/'); String first = slash == -1 ? path : path.substring(0, slash); for (Schema schema : schemaManager.getSchemas()) { if (!schema.getNamespace().hasPrefix()) { // schema without prefix, try it if (schema.getField(first) != null) { path = schema.getName() + ":" + path; break; } } } } normPaths.add(path); } return normPaths; } /** * Gets the blobs of the document. * * @param doc the document * @return the list of blobs */ public List<Blob> getBlobs(DocumentModel doc) { List<Blob> blobs = new ArrayList<>(); for (Property property : getBlobsProperties(doc)) { blobs.add((Blob) property.getValue()); } return blobs; } /** * Gets the blob properties of the document. * * @param doc the document * @return the list of blob properties */ public List<Property> getBlobsProperties(DocumentModel doc) { List<Property> properties = new ArrayList<>(); for (String path : getBlobPaths(doc.getDocumentType())) { if (!isInterestingPath(path)) { continue; } List<String> split = Arrays.asList(path.split("/[*]/")); if (split.isEmpty()) { throw new IllegalStateException("Path detected not well-formed: " + path); } Property property = doc.getProperty(split.get(0)); List<String> subPath = split.subList(1, split.size()); findBlobsProperties(property, subPath, properties); } return properties; } /** * Gets the blob paths of the document type. Extractor properties are ignored. * * @param documentType the document type * @return the list of blob paths * * @since 8.3 */ public List<String> getBlobPaths(DocumentType documentType) { String docType = documentType.getName(); List<String> paths = docBlobPaths.get(docType); if (paths == null) { paths = new ArrayList<>(); for (Schema schema : documentType.getSchemas()) { findBlobPaths(schema, null, schema, paths); } docBlobPaths.put(docType, paths); } return paths; } protected void findBlobsProperties(Property property, List<String> split, List<Property> properties) { if (split.isEmpty()) { if (property.getValue() != null) { properties.add(property); } } else { for (Property childProperty : property.getChildren()) { Property childSubProp = childProperty.get(split.get(0)); List<String> subPath = split.subList(1, split.size()); findBlobsProperties(childSubProp, subPath, properties); } } } protected void findBlobPaths(ComplexType complexType, String path, Schema schema, List<String> paths) { for (Field field : complexType.getFields()) { String fieldPath = field.getName().getPrefixedName(); if (path == null) { // add schema name as prefix if the schema doesn't have a prefix if (!schema.getNamespace().hasPrefix()) { fieldPath = schema.getName() + ":" + fieldPath; } } else { fieldPath = path + "/" + fieldPath; } Type type = field.getType(); if (type.isSimpleType()) { continue; // not binary text } else if (type.isListType()) { Type fieldType = ((ListType) type).getFieldType(); if (fieldType.isComplexType()) { findBlobPaths((ComplexType) fieldType, fieldPath + "/*", schema, paths); } else { continue; // not binary text } } else { // complex type ComplexType ctype = (ComplexType) type; if (TypeConstants.isContentType(type)) { // note this path paths.add(fieldPath); } else { findBlobPaths(ctype, fieldPath, schema, paths); } } } } }