/* * (C) Copyright 2006-2014 Nuxeo SA (http://nuxeo.com/) and others. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * Contributors: * Florent Guillaume */ package org.nuxeo.ecm.core.storage; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.HashSet; import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.Set; import org.apache.commons.lang.StringUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.nuxeo.ecm.core.schema.DocumentType; import org.nuxeo.ecm.core.schema.FacetNames; import org.nuxeo.ecm.core.schema.Namespace; import org.nuxeo.ecm.core.schema.SchemaManager; import org.nuxeo.ecm.core.schema.TypeConstants; import org.nuxeo.ecm.core.schema.types.ComplexType; import org.nuxeo.ecm.core.schema.types.Field; import org.nuxeo.ecm.core.schema.types.ListType; import org.nuxeo.ecm.core.schema.types.Schema; import org.nuxeo.ecm.core.schema.types.SimpleTypeImpl; import org.nuxeo.ecm.core.schema.types.Type; import org.nuxeo.ecm.core.schema.types.primitives.BinaryType; import org.nuxeo.ecm.core.schema.types.primitives.StringType; import org.nuxeo.ecm.core.storage.FulltextDescriptor.FulltextIndexDescriptor; import org.nuxeo.runtime.api.Framework; /** * Info about the fulltext configuration. */ public class FulltextConfiguration { private static final Log log = LogFactory.getLog(FulltextConfiguration.class); public static final String ROOT_TYPE = "Root"; public static final String PROP_TYPE_STRING = "string"; public static final String PROP_TYPE_BLOB = "blob"; public static final String FULLTEXT_DEFAULT_INDEX = "default"; /** All index names. */ public final Set<String> indexNames = new LinkedHashSet<String>(); /** Indexes holding exactly one field. */ public final Map<String, String> fieldToIndexName = new HashMap<String, String>(); /** Indexes containing all simple properties. */ public final Set<String> indexesAllSimple = new HashSet<String>(); /** Indexes containing all binaries properties. */ public final Set<String> indexesAllBinary = new HashSet<String>(); /** Indexes for each specific simple property path. */ public final Map<String, Set<String>> indexesByPropPathSimple = new HashMap<String, Set<String>>(); /** Indexes for each specific binary property path. */ // DBSTransactionState.findDirtyDocuments expects this to contain unprefixed versions for schemas // without prefix, like "content/data". public final Map<String, Set<String>> indexesByPropPathBinary = new HashMap<String, Set<String>>(); /** Indexes for each specific simple property path excluded. */ public final Map<String, Set<String>> indexesByPropPathExcludedSimple = new HashMap<String, Set<String>>(); /** Indexes for each specific binary property path excluded. */ public final Map<String, Set<String>> indexesByPropPathExcludedBinary = new HashMap<String, Set<String>>(); // inverse of above maps public final Map<String, Set<String>> propPathsByIndexSimple = new HashMap<String, Set<String>>(); public final Map<String, Set<String>> propPathsByIndexBinary = new HashMap<String, Set<String>>(); public final Map<String, Set<String>> propPathsExcludedByIndexSimple = new HashMap<String, Set<String>>(); public final Map<String, Set<String>> propPathsExcludedByIndexBinary = new HashMap<String, Set<String>>(); public final Set<String> excludedTypes = new HashSet<String>(); public final Set<String> includedTypes = new HashSet<String>(); public final boolean fulltextSearchDisabled; public final int fulltextFieldSizeLimit; public FulltextConfiguration(FulltextDescriptor fulltextDescriptor) { SchemaManager schemaManager = Framework.getService(SchemaManager.class); fulltextFieldSizeLimit = fulltextDescriptor.getFulltextFieldSizeLimit(); fulltextSearchDisabled = fulltextDescriptor.getFulltextSearchDisabled(); // find what paths we mean by "all" // for schemas without prefix, we add both the unprefixed and the prefixed version Set<String> allSimplePaths = new HashSet<>(); Set<String> allBinaryPaths = new HashSet<>(); PathsFinder pathsFinder = new PathsFinder(allSimplePaths, allBinaryPaths); for (Schema schema : schemaManager.getSchemas()) { pathsFinder.walkSchema(schema); } List<FulltextIndexDescriptor> descs = fulltextDescriptor.getFulltextIndexes(); if (descs == null) { descs = new ArrayList<FulltextIndexDescriptor>(1); } if (descs.isEmpty()) { descs.add(new FulltextIndexDescriptor()); } for (FulltextIndexDescriptor desc : descs) { String name = desc.name == null ? FULLTEXT_DEFAULT_INDEX : desc.name; indexNames.add(name); if (desc.fields == null) { desc.fields = new HashSet<String>(); } if (desc.excludeFields == null) { desc.excludeFields = new HashSet<String>(); } if (desc.fields.size() == 1 && desc.excludeFields.isEmpty()) { fieldToIndexName.put(desc.fields.iterator().next(), name); } if (desc.fieldType != null) { if (desc.fieldType.equals(FulltextConfiguration.PROP_TYPE_STRING)) { indexesAllSimple.add(name); } else if (desc.fieldType.equals(FulltextConfiguration.PROP_TYPE_BLOB)) { indexesAllBinary.add(name); } else { log.error("Ignoring unknow repository fulltext configuration fieldType: " + desc.fieldType); } } if (desc.fields.isEmpty() && desc.fieldType == null) { // no fields specified and no field type -> all of them indexesAllSimple.add(name); indexesAllBinary.add(name); } if (indexesAllSimple.contains(name)) { propPathsByIndexSimple.put(name, new HashSet<>(allSimplePaths)); for (String path : allSimplePaths) { indexesByPropPathSimple.computeIfAbsent(path, p -> new HashSet<>()).add(name); } } if (indexesAllBinary.contains(name)) { propPathsByIndexBinary.put(name, new HashSet<>(allBinaryPaths)); for (String path : allBinaryPaths) { indexesByPropPathBinary.computeIfAbsent(path, p -> new HashSet<>()).add(name); } } if (fulltextDescriptor.getFulltextExcludedTypes() != null) { excludedTypes.addAll(fulltextDescriptor.getFulltextExcludedTypes()); } if (fulltextDescriptor.getFulltextIncludedTypes() != null) { includedTypes.addAll(fulltextDescriptor.getFulltextIncludedTypes()); } for (Set<String> fields : Arrays.asList(desc.fields, desc.excludeFields)) { boolean include = fields == desc.fields; for (String path : fields) { Field field = schemaManager.getField(path); if (field == null && !path.contains(":")) { // check without prefix // TODO precompute this in SchemaManagerImpl int slash = path.indexOf('/'); String first = slash == -1 ? path : path.substring(0, slash); for (Schema schema : schemaManager.getSchemas()) { if (!schema.getNamespace().hasPrefix()) { // schema without prefix, try it if (schema.getField(first) != null) { path = schema.getName() + ":" + path; field = schemaManager.getField(path); break; } } } } if (field == null) { log.error(String.format("Ignoring unknown property '%s' in fulltext configuration: %s", path, name)); continue; } Type baseType = getBaseType(field.getType()); Map<String, Set<String>> indexesByPropPath; Map<String, Set<String>> propPathsByIndex; if (baseType instanceof ComplexType && TypeConstants.isContentType(baseType)) { baseType = ((ComplexType) baseType).getField(BaseDocument.BLOB_DATA).getType(); // BinaryType } if (baseType instanceof StringType) { indexesByPropPath = include ? indexesByPropPathSimple : indexesByPropPathExcludedSimple; propPathsByIndex = include ? propPathsByIndexSimple : propPathsExcludedByIndexSimple; } else if (baseType instanceof BinaryType) { indexesByPropPath = include ? indexesByPropPathBinary : indexesByPropPathExcludedBinary; propPathsByIndex = include ? propPathsByIndexBinary : propPathsExcludedByIndexBinary; if (!path.endsWith("/" + BaseDocument.BLOB_DATA)) { path += "/" + BaseDocument.BLOB_DATA; // needed for indexesByPropPathBinary as DBSTransactionState.findDirtyDocuments expects this // to be in the same format as what DirtyPathsFinder expects, like "content/data". } } else { log.error(String.format("Ignoring property '%s' with bad type %s in fulltext configuration: %s", path, field.getType(), name)); continue; } indexesByPropPath.computeIfAbsent(path, p -> new HashSet<>()).add(name); propPathsByIndex.computeIfAbsent(name, n -> new HashSet<>()).add(path); } } } // Add document types with the NotFulltextIndexable facet for (DocumentType documentType : schemaManager.getDocumentTypes()) { if (documentType.hasFacet(FacetNames.NOT_FULLTEXT_INDEXABLE)) { excludedTypes.add(documentType.getName()); } } } protected Type getBaseType(Type type) { if (type instanceof SimpleTypeImpl) { return getBaseType(type.getSuperType()); } if (type instanceof ListType) { return getBaseType(((ListType) type).getFieldType()); } return type; } /** * Accumulates paths for string and binary properties in schemas passed to {@link #walkSchema}. * <p> * For schemas without prefix the path is accumulated both with and without prefix. * <p> * For binaries the path includes the final "/data" part. */ // TODO precompute this in SchemaManagerImpl public static class PathsFinder { protected final Set<String> simplePaths; protected final Set<String> binaryPaths; public PathsFinder(Set<String> simplePaths, Set<String> binaryPaths) { this.simplePaths = simplePaths; this.binaryPaths = binaryPaths; } public void walkSchema(Schema schema) { String addPrefix = schema.getNamespace().hasPrefix() ? null : schema.getName(); walkComplexType(schema, null, addPrefix); } protected void walkComplexType(ComplexType complexType, String path, String addPrefix) { for (Field field : complexType.getFields()) { String name = field.getName().getPrefixedName(); String fieldPath = path == null ? name : path + '/' + name; walkType(field.getType(), fieldPath, addPrefix); } } protected void walkType(Type type, String path, String addPrefix) { if (type.isSimpleType()) { walkSimpleType(type, path, addPrefix); } else if (type.isListType()) { String listPath = path + "/*"; Type ftype = ((ListType) type).getField().getType(); if (ftype.isComplexType()) { // complex list walkComplexType((ComplexType) ftype, listPath, addPrefix); } else { // array walkSimpleType(ftype, listPath, addPrefix); } } else { // complex type ComplexType ctype = (ComplexType) type; walkComplexType(ctype, path, addPrefix); } } protected void walkSimpleType(Type type, String path, String addPrefix) { while (type instanceof SimpleTypeImpl) { // type with constraint type = type.getSuperType(); } if (type instanceof StringType) { simplePaths.add(path); if (addPrefix != null) { simplePaths.add(addPrefix + ":" + path); } } else if (type instanceof BinaryType) { binaryPaths.add(path); if (addPrefix != null) { binaryPaths.add(addPrefix + ":" + path); } } } } public boolean isFulltextIndexable(String typeName) { if (ROOT_TYPE.equals(typeName)) { return false; } if (includedTypes.contains(typeName) || (includedTypes.isEmpty() && !excludedTypes.contains(typeName))) { return true; } return false; } }