/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.solr.update.processor; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.Set; import java.util.regex.Pattern; import java.util.regex.PatternSyntaxException; import org.apache.solr.common.SolrException; import org.apache.solr.common.util.NamedList; import org.apache.solr.core.SolrCore; import org.apache.solr.update.processor.FieldMutatingUpdateProcessor.FieldNameSelector; import org.apache.solr.util.plugin.SolrCoreAware; import static org.apache.solr.update.processor.FieldMutatingUpdateProcessor.SELECT_ALL_FIELDS; /** * Base class for implementing Factories for FieldMutatingUpdateProcessors and * FieldValueMutatingUpdateProcessors. * * <p> * This class provides all of the plumbing for configuring the * FieldNameSelector using the following init params to specify selection * criteria... * </p> * <ul> * <li><code>fieldName</code> - selecting specific fields by field name lookup</li> * <li><code>fieldRegex</code> - selecting specific fields by field name regex match (regexes are checked in the order specified)</li> * <li><code>typeName</code> - selecting specific fields by fieldType name lookup</li> * <li><code>typeClass</code> - selecting specific fields by fieldType class lookup, including inheritence and interfaces</li> * </ul> * * <p> * Each criteria can specified as either an <arr> of <str>, or * multiple <str> with the same name. When multiple criteria of a * single type exist, fields must match <b>at least one</b> to be selected. * If more then one type of criteria exist, fields must match * <b>at least one of each</b> to be selected. * </p> * <p> * The following additional selector may be specified as a <bool> - when specified * as false, only fields that <b>do not</b> match a schema field/dynamic field are selected; * when specified as true, only fields that <b>do</b> match a schema field/dynamic field are * selected: * </p> * <ul> * <li><code>fieldNameMatchesSchemaField</code> - selecting specific fields based on whether or not they match a schema field</li> * </ul> * <p> * One or more <code>excludes</code> <lst> params may also be specified, * containing any of the above criteria, identifying fields to be excluded * from seelction even if they match the selection criteria. As with the main * selection critiera a field must match all of criteria in a single exclusion * in order to be excluded, but multiple exclusions may be specified to get an * <code>OR</code> behavior * </p> * * <p> * In the ExampleFieldMutatingUpdateProcessorFactory configured below, * fields will be mutated if the name starts with "foo" <i>or</i> "bar"; * <b>unless</b> the field name contains the substring "SKIP" <i>or</i> * the fieldType is (or subclasses) TrieDateField. Meaning a field named * "foo_SKIP" is guaranteed not to be selected, but a field named "bar_smith" * that uses StrField will be selected. * </p> * <pre class="prettyprint"> * <processor class="solr.ExampleFieldMutatingUpdateProcessorFactory"> * <str name="fieldRegex">foo.*</str> * <str name="fieldRegex">bar.*</str> * <!-- each set of exclusions is checked independently --> * <lst name="exclude"> * <str name="fieldRegex">.*SKIP.*</str> * </lst> * <lst name="exclude"> * <str name="typeClass">solr.TrieDateField</str> * </lst> * </processor></pre> * * <p> * Subclasses define the default selection behavior to be applied if no * criteria is configured by the user. User configured "exclude" criteria * will be applied to the subclass defined default selector. * </p> * * @see FieldMutatingUpdateProcessor * @see FieldValueMutatingUpdateProcessor * @see FieldNameSelector */ public abstract class FieldMutatingUpdateProcessorFactory extends UpdateRequestProcessorFactory implements SolrCoreAware { public static final class SelectorParams { public Set<String> fieldName = Collections.emptySet(); public Set<String> typeName = Collections.emptySet(); public Collection<String> typeClass = Collections.emptyList(); public Collection<Pattern> fieldRegex = Collections.emptyList(); public Boolean fieldNameMatchesSchemaField = null; // null => not specified public boolean noSelectorsSpecified() { return typeClass.isEmpty() && typeName.isEmpty() && fieldRegex.isEmpty() && fieldName.isEmpty() && null == fieldNameMatchesSchemaField; } } private SelectorParams inclusions = new SelectorParams(); private Collection<SelectorParams> exclusions = new ArrayList<>(); private FieldNameSelector selector = null; protected final FieldNameSelector getSelector() { if (null != selector) return selector; throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "selector was never initialized, inform(SolrCore) never called???"); } public static SelectorParams parseSelectorParams(NamedList args) { SelectorParams params = new SelectorParams(); params.fieldName = new HashSet<>(args.removeConfigArgs("fieldName")); params.typeName = new HashSet<>(args.removeConfigArgs("typeName")); // we can compile the patterns now Collection<String> patterns = args.removeConfigArgs("fieldRegex"); if (! patterns.isEmpty()) { params.fieldRegex = new ArrayList<>(patterns.size()); for (String s : patterns) { try { params.fieldRegex.add(Pattern.compile(s)); } catch (PatternSyntaxException e) { throw new SolrException (SolrException.ErrorCode.SERVER_ERROR, "Invalid 'fieldRegex' pattern: " + s, e); } } } // resolve this into actual Class objects later params.typeClass = args.removeConfigArgs("typeClass"); // Returns null if the arg is not specified params.fieldNameMatchesSchemaField = args.removeBooleanArg("fieldNameMatchesSchemaField"); return params; } public static Collection<SelectorParams> parseSelectorExclusionParams(NamedList args) { Collection<SelectorParams> exclusions = new ArrayList<>(); List<Object> excList = args.getAll("exclude"); for (Object excObj : excList) { if (null == excObj) { throw new SolrException (SolrException.ErrorCode.SERVER_ERROR, "'exclude' init param can not be null"); } if (! (excObj instanceof NamedList) ) { throw new SolrException (SolrException.ErrorCode.SERVER_ERROR, "'exclude' init param must be <lst/>"); } NamedList exc = (NamedList) excObj; exclusions.add(parseSelectorParams(exc)); if (0 < exc.size()) { throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unexpected 'exclude' init sub-param(s): '" + args.getName(0) + "'"); } // call once per instance args.remove("exclude"); } return exclusions; } /** * Handles common initialization related to source fields for * constructing the FieldNameSelector to be used. * * Will error if any unexpected init args are found, so subclasses should * remove any subclass-specific init args before calling this method. */ @SuppressWarnings("unchecked") @Override public void init(NamedList args) { inclusions = parseSelectorParams(args); exclusions = parseSelectorExclusionParams(args); if (0 < args.size()) { throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unexpected init param(s): '" + args.getName(0) + "'"); } } @Override public void inform(final SolrCore core) { selector = FieldMutatingUpdateProcessor.createFieldNameSelector (core.getResourceLoader(), core, inclusions, getDefaultSelector(core)); for (SelectorParams exc : exclusions) { selector = FieldMutatingUpdateProcessor.wrap (selector, FieldMutatingUpdateProcessor.createFieldNameSelector (core.getResourceLoader(), core, exc, FieldMutatingUpdateProcessor.SELECT_NO_FIELDS)); } } /** * Defines the default selection behavior when the user has not * configured any specific criteria for selecting fields. The Default * implementation matches all fields, and should be overridden by subclasses * as needed. * * @see FieldMutatingUpdateProcessor#SELECT_ALL_FIELDS */ protected FieldNameSelector getDefaultSelector(SolrCore core) { return SELECT_ALL_FIELDS; } }