/* * Copyright (c) 2006-2011 Nuxeo SA (http://nuxeo.com/) and others. * * All rights reserved. This program and the accompanying materials * are made available under the terms of the Eclipse Public License v1.0 * which accompanies this distribution, and is available at * http://www.eclipse.org/legal/epl-v10.html * * Contributors: * Florent Guillaume */ package org.eclipse.ecr.core.utils; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Set; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.eclipse.ecr.core.api.Blob; import org.eclipse.ecr.core.api.ClientException; import org.eclipse.ecr.core.api.DocumentModel; import org.eclipse.ecr.core.api.model.Property; import org.eclipse.ecr.core.schema.DocumentType; import org.eclipse.ecr.core.schema.SchemaManager; import org.eclipse.ecr.core.schema.TypeConstants; import org.eclipse.ecr.core.schema.types.ComplexType; import org.eclipse.ecr.core.schema.types.Field; import org.eclipse.ecr.core.schema.types.ListType; import org.eclipse.ecr.core.schema.types.Schema; import org.eclipse.ecr.core.schema.types.Type; import org.eclipse.ecr.runtime.api.Framework; /** * Extractor for all the blobs of a document. * * @author Florent Guillaume * @author Benjamin Jalon */ public class BlobsExtractor { protected static final Log log = LogFactory.getLog(BlobsExtractor.class); protected final Map<String, Map<String, List<String>>> blobFieldPaths = new HashMap<String, Map<String, List<String>>>(); protected List<String> docTypeCached = new ArrayList<String>(); protected SchemaManager schemaManager; private Set<String> pathProperties; private Set<String> excludedPathProperties; private boolean indexAllBinary = false; private boolean isDefaultConfiguration = true; protected SchemaManager getSchemaManager() throws Exception { if (schemaManager == null) { schemaManager = Framework.getService(SchemaManager.class); } return schemaManager; } /** * Get properties of the given document that contain a blob value. This * method uses the cache engine to find these properties. */ public List<Property> getBlobsProperties(DocumentModel doc) throws Exception { List<Property> result = new ArrayList<Property>(); for (String schema : getBlobFieldPathForDocumentType(doc.getType()).keySet()) { List<String> pathsList = getBlobFieldPathForDocumentType( doc.getType()).get(schema); for (String path : pathsList) { if (!isInterestingBlobProperty(path, schemaManager.getSchema(schema).getNamespace().prefix)) { continue; } List<String> pathSplitted = Arrays.asList(path.split("/[*]/")); if (pathSplitted.size() == 0) { throw new IllegalStateException("Path detected not wellformed: " + pathsList); } Property prop = doc.getProperty(schema + ":" + pathSplitted.get(0)); if (pathSplitted.size() >= 1) { List<String> subPath = pathSplitted.subList(1, pathSplitted.size()); getBlobValue(prop, subPath, path, result); } } } return result; } /** * Get path list of properties that may contain a blob for the given * document type. * * @param documentType document type name * @return return the property names that contain blob * @throws Exception */ public Map<String, List<String>> getBlobFieldPathForDocumentType( String documentType) throws Exception { DocumentType docType = getSchemaManager().getDocumentType(documentType); if (!docTypeCached.contains(documentType)) { Map<String, List<String>> paths = new HashMap<String, List<String>>(); blobFieldPaths.put(docType.getName(), paths); createCacheForDocumentType(docType); } return blobFieldPaths.get(documentType); } public void invalidateDocumentTypeCache(String docType) { if (docTypeCached.contains(docType)) { docTypeCached.remove(docType); } } public void invalidateCache() { docTypeCached = new ArrayList<String>(); } protected void createCacheForDocumentType(DocumentType docType) throws Exception { for (Schema schema : docType.getSchemas()) { findInteresting(docType, schema, "", schema); } if (!docTypeCached.contains(docType.getName())) { docTypeCached.add(docType.getName()); } } /** * Analyzes the document's schemas to find which fields and complex types * contain blobs. For each blob fields type found, * {@link BlobsExtractor#blobMatched(DocumentType, Schema, String, Field)} is * called and for each property that contains a subProperty containing a * Blob, * {@link BlobsExtractor#containsBlob(DocumentType, Schema, String, Field)} * is called * * @param schema The parent schema that contains the field * @param ct Current type parsed * @return {@code true} if the passed complex type contains at least one * blob field * @throws Exception thrown if a field is named '*' (name forbidden) */ protected boolean findInteresting(DocumentType docType, Schema schema, String path, ComplexType ct) throws Exception { boolean interesting = false; for (Field field : ct.getFields()) { Type type = field.getType(); if (type.isSimpleType()) { continue; // not binary text } else if (type.isListType()) { Type ftype = ((ListType) type).getField().getType(); if (ftype.isComplexType()) { String blobMatchedPath = path + String.format("/%s/*", field.getName().getLocalName()); if ("*".equals(field.getName())) { throw new Exception( "A field can't be named '*' please check this field: " + path); } if (findInteresting(docType, schema, blobMatchedPath, (ComplexType) ftype)) { containsBlob(docType, schema, blobMatchedPath, field); interesting |= true; } } else { continue; // not binary text } } else { // complex type ComplexType ctype = (ComplexType) type; if (type.getName().equals(TypeConstants.CONTENT)) { // CB: Fix for NXP-3847 - do not accumulate field name in // the path String blobMatchedPath = path + String.format("/%s", field.getName().getLocalName()); blobMatched(docType, schema, blobMatchedPath, field); interesting = true; } else { String blobMatchedPath = path + String.format("/%s", field.getName().getLocalName()); interesting |= findInteresting(docType, schema, blobMatchedPath, ctype); } } } if (interesting) { containsBlob(docType, schema, path, null); } return interesting; } /** * Call during the parsing of the schema structure in * {@link BlobsExtractor#findInteresting} if field is a * Blob Type. This method stores the path to that Field. * * @param schema The parent schema that contains the field * @param field Field that is a BlobType */ protected void blobMatched(DocumentType docType, Schema schema, String path, Field field) { Map<String, List<String>> blobPathsForDocType = blobFieldPaths.get(docType.getName()); List<String> pathsList = blobPathsForDocType.get(schema.getName()); if (pathsList == null) { pathsList = new ArrayList<String>(); blobPathsForDocType.put(schema.getName(), pathsList); blobFieldPaths.put(docType.getName(), blobPathsForDocType); } pathsList.add(path); } /** * Called during the parsing of the schema structure in * {@link BlobsExtractor#findInteresting} if field * contains a subfield of type Blob. This method does nothing. * * @param schema The parent schema that contains the field * @param field Field that contains a subField of type BlobType */ protected void containsBlob(DocumentType docType, Schema schema, String path, Field field) { } protected void getBlobValue(Property prop, List<String> subPath, String completePath, List<Property> result) throws Exception { if (subPath.size() == 0) { if (!(prop.getValue() instanceof Blob)) { log.debug("Path Field not contains a blob value: " + completePath); return; } result.add(prop); return; } for (Property childProp : prop.getChildren()) { if ("/*".equals(subPath.get(0))) { log.debug("TODO : BLOB IN A LIST NOT IMPLEMENTED for this path " + completePath); } Property childSubProp = childProp.get(subPath.get(0)); getBlobValue(childSubProp, subPath.subList(1, subPath.size()), completePath, result); } } /** * Finds all the blobs of the document. * <p> * This method is not thread-safe. * * @param doc the document * @return the list of blobs in the document */ public List<Blob> getBlobs(DocumentModel doc) throws ClientException { List<Blob> result = new ArrayList<Blob>(); try { for (Property blobField : getBlobsProperties(doc)) { Blob blob = (Blob) blobField.getValue(); result.add(blob); } } catch (Exception e) { throw new ClientException(e); } return result; } public void setExtractorProperties(Set<String> pathProps, Set<String> excludedPathProps, boolean indexBlobs) { pathProperties = pathProps; excludedPathProperties = excludedPathProps; indexAllBinary = indexBlobs; isDefaultConfiguration = (pathProps == null && excludedPathProps == null && Boolean.TRUE.equals(indexBlobs)); } private boolean isInterestingBlobProperty(String path, String prefix) { if (isDefaultConfiguration) { return true; } else if (pathProperties != null && matchProperty(prefix, path, pathProperties)) { return true; } else if (excludedPathProperties != null && matchProperty(prefix, path, excludedPathProperties)) { return false; } else if (Boolean.TRUE.equals(indexAllBinary)) { return true; } return false; } private boolean matchProperty(String prefix, String fieldPath, Set<String> propPaths) { String pathToMatch = (prefix == "" ? "" : prefix + ":") + fieldPath.substring(1); for (String propPath : propPaths) { if (propPath.startsWith(pathToMatch)) { return true; } } return false; } }