/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.solr.update.processor; import java.io.IOException; import java.lang.invoke.MethodHandles; import java.util.Collection; import java.util.Collections; import java.util.HashSet; import java.util.LinkedHashSet; import java.util.Map; import java.util.Map.Entry; import java.util.Set; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefBuilder; import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException.ErrorCode; import org.apache.solr.common.SolrInputDocument; import org.apache.solr.common.SolrInputField; import org.apache.solr.common.params.CommonParams; import org.apache.solr.core.SolrCore; import org.apache.solr.handler.component.RealTimeGetComponent; import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.schema.CopyField; import org.apache.solr.schema.IndexSchema; import org.apache.solr.schema.NumericValueFieldType; import org.apache.solr.schema.SchemaField; import org.apache.solr.update.AddUpdateCommand; import org.apache.solr.util.RefCounted; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import static org.apache.solr.common.params.CommonParams.ID; /** * @lucene.experimental */ public class AtomicUpdateDocumentMerger { private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); protected final IndexSchema schema; protected final SchemaField idField; public AtomicUpdateDocumentMerger(SolrQueryRequest queryReq) { schema = queryReq.getSchema(); idField = schema.getUniqueKeyField(); } /** * Utility method that examines the SolrInputDocument in an AddUpdateCommand * and returns true if the documents contains atomic update instructions. */ public static boolean isAtomicUpdate(final AddUpdateCommand cmd) { SolrInputDocument sdoc = cmd.getSolrInputDocument(); for (SolrInputField sif : sdoc.values()) { if (sif.getValue() instanceof Map) { return true; } } return false; } /** * Merges the fromDoc into the toDoc using the atomic update syntax. * * @param fromDoc SolrInputDocument which will merged into the toDoc * @param toDoc the final SolrInputDocument that will be mutated with the values from the fromDoc atomic commands * @return toDoc with mutated values */ public SolrInputDocument merge(final SolrInputDocument fromDoc, SolrInputDocument toDoc) { for (SolrInputField sif : fromDoc.values()) { Object val = sif.getValue(); if (val instanceof Map) { for (Entry<String,Object> entry : ((Map<String,Object>) val).entrySet()) { String key = entry.getKey(); Object fieldVal = entry.getValue(); boolean updateField = false; switch (key) { case "add": updateField = true; doAdd(toDoc, sif, fieldVal); break; case "set": updateField = true; doSet(toDoc, sif, fieldVal); break; case "remove": updateField = true; doRemove(toDoc, sif, fieldVal); break; case "removeregex": updateField = true; doRemoveRegex(toDoc, sif, fieldVal); break; case "inc": updateField = true; doInc(toDoc, sif, fieldVal); break; default: //Perhaps throw an error here instead? log.warn("Unknown operation for the an atomic update, operation ignored: " + key); break; } // validate that the field being modified is not the id field. if (updateField && idField.getName().equals(sif.getName())) { throw new SolrException(ErrorCode.BAD_REQUEST, "Invalid update of id field: " + sif); } } } else { // normal fields are treated as a "set" toDoc.put(sif.getName(), sif); } } return toDoc; } /** * Given a schema field, return whether or not such a field is supported for an in-place update. * Note: If an update command has updates to only supported fields (and _version_ is also supported), * only then is such an update command executed as an in-place update. */ private static boolean isSupportedFieldForInPlaceUpdate(SchemaField schemaField) { return !(schemaField.indexed() || schemaField.stored() || !schemaField.hasDocValues() || schemaField.multiValued() || !(schemaField.getType() instanceof NumericValueFieldType)); } /** * Given an add update command, compute a list of fields that can be updated in-place. If there is even a single * field in the update that cannot be updated in-place, the entire update cannot be executed in-place (and empty set * will be returned in that case). * * @return Return a set of fields that can be in-place updated. */ public static Set<String> computeInPlaceUpdatableFields(AddUpdateCommand cmd) throws IOException { SolrInputDocument sdoc = cmd.getSolrInputDocument(); IndexSchema schema = cmd.getReq().getSchema(); final SchemaField uniqueKeyField = schema.getUniqueKeyField(); final String uniqueKeyFieldName = null == uniqueKeyField ? null : uniqueKeyField.getName(); final Set<String> candidateFields = new HashSet<>(); // if _version_ field is not supported for in-place update, bail out early SchemaField versionField = schema.getFieldOrNull(CommonParams.VERSION_FIELD); if (versionField == null || !isSupportedFieldForInPlaceUpdate(versionField)) { return Collections.emptySet(); } // first pass, check the things that are virtually free, // and bail out early if anything is obviously not a valid in-place update for (String fieldName : sdoc.getFieldNames()) { if (fieldName.equals(uniqueKeyFieldName) || fieldName.equals(CommonParams.VERSION_FIELD)) { continue; } Object fieldValue = sdoc.getField(fieldName).getValue(); if (! (fieldValue instanceof Map) ) { // not an in-place update if there are fields that are not maps return Collections.emptySet(); } // else it's a atomic update map... for (String op : ((Map<String, Object>)fieldValue).keySet()) { if (!op.equals("set") && !op.equals("inc")) { // not a supported in-place update op return Collections.emptySet(); } } candidateFields.add(fieldName); } if (candidateFields.isEmpty()) { return Collections.emptySet(); } // second pass over the candidates for in-place updates // this time more expensive checks involving schema/config settings for (String fieldName: candidateFields) { SchemaField schemaField = schema.getField(fieldName); if (!isSupportedFieldForInPlaceUpdate(schemaField)) { return Collections.emptySet(); } // if this field has copy target which is not supported for in place, then empty for (CopyField copyField: schema.getCopyFieldsList(fieldName)) { if (!isSupportedFieldForInPlaceUpdate(copyField.getDestination())) return Collections.emptySet(); } } // third pass: requiring checks against the actual IndexWriter due to internal DV update limitations SolrCore core = cmd.getReq().getCore(); RefCounted<IndexWriter> holder = core.getSolrCoreState().getIndexWriter(core); Set<String> fieldNamesFromIndexWriter = null; Set<String> segmentSortingFields = null; try { IndexWriter iw = holder.get(); fieldNamesFromIndexWriter = iw.getFieldNames(); // This shouldn't be needed once LUCENE-7659 is resolved segmentSortingFields = iw.getConfig().getIndexSortFields(); } finally { holder.decref(); } for (String fieldName: candidateFields) { if (! fieldNamesFromIndexWriter.contains(fieldName) ) { return Collections.emptySet(); // if this field doesn't exist, DV update can't work } if (segmentSortingFields.contains(fieldName) ) { return Collections.emptySet(); // if this is used for segment sorting, DV updates can't work } } return candidateFields; } /** * Given an AddUpdateCommand containing update operations (e.g. set, inc), merge and resolve the operations into * a partial document that can be used for indexing the in-place updates. The AddUpdateCommand is modified to contain * the partial document (instead of the original document which contained the update operations) and also * the prevVersion that this in-place update depends on. * Note: updatedFields passed into the method can be changed, i.e. the version field can be added to the set. * @return If in-place update cannot succeed, e.g. if the old document is deleted recently, then false is returned. A false * return indicates that this update can be re-tried as a full atomic update. Returns true if the in-place update * succeeds. */ public boolean doInPlaceUpdateMerge(AddUpdateCommand cmd, Set<String> updatedFields) throws IOException { SolrInputDocument inputDoc = cmd.getSolrInputDocument(); BytesRef idBytes = cmd.getIndexedId(); updatedFields.add(CommonParams.VERSION_FIELD); // add the version field so that it is fetched too SolrInputDocument oldDocument = RealTimeGetComponent.getInputDocument (cmd.getReq().getCore(), idBytes, null, // don't want the version to be returned true, // avoid stored fields from index updatedFields, true); // resolve the full document if (oldDocument == RealTimeGetComponent.DELETED || oldDocument == null) { // This doc was deleted recently. In-place update cannot work, hence a full atomic update should be tried. return false; } if (oldDocument.containsKey(CommonParams.VERSION_FIELD) == false) { throw new SolrException (ErrorCode.INVALID_STATE, "There is no _version_ in previous document. id=" + cmd.getPrintableId()); } Long oldVersion = (Long) oldDocument.remove(CommonParams.VERSION_FIELD).getValue(); // If the oldDocument contains any other field apart from updatedFields (or id/version field), then remove them. // This can happen, despite requesting for these fields in the call to RTGC.getInputDocument, if the document was // fetched from the tlog and had all these fields (possibly because it was a full document ADD operation). if (updatedFields != null) { Collection<String> names = new HashSet<String>(oldDocument.getFieldNames()); for (String fieldName: names) { if (fieldName.equals(CommonParams.VERSION_FIELD)==false && fieldName.equals(ID)==false && updatedFields.contains(fieldName)==false) { oldDocument.remove(fieldName); } } } // Copy over all supported DVs from oldDocument to partialDoc // // Assuming multiple updates to the same doc: field 'dv1' in one update, then field 'dv2' in a second // update, and then again 'dv1' in a third update (without commits in between), the last update would // fetch from the tlog the partial doc for the 2nd (dv2) update. If that doc doesn't copy over the // previous updates to dv1 as well, then a full resolution (by following previous pointers) would // need to be done to calculate the dv1 value -- so instead copy all the potentially affected DV fields. SolrInputDocument partialDoc = new SolrInputDocument(); String uniqueKeyField = schema.getUniqueKeyField().getName(); for (String fieldName : oldDocument.getFieldNames()) { SchemaField schemaField = schema.getField(fieldName); if (fieldName.equals(uniqueKeyField) || isSupportedFieldForInPlaceUpdate(schemaField)) { partialDoc.addField(fieldName, oldDocument.getFieldValue(fieldName)); } } merge(inputDoc, partialDoc); // Populate the id field if not already populated (this can happen since stored fields were avoided during fetch from RTGC) if (!partialDoc.containsKey(schema.getUniqueKeyField().getName())) { partialDoc.addField(idField.getName(), inputDoc.getField(schema.getUniqueKeyField().getName()).getFirstValue()); } cmd.prevVersion = oldVersion; cmd.solrDoc = partialDoc; return true; } protected void doSet(SolrInputDocument toDoc, SolrInputField sif, Object fieldVal) { SchemaField sf = schema.getField(sif.getName()); toDoc.setField(sif.getName(), sf.getType().toNativeType(fieldVal)); } protected void doAdd(SolrInputDocument toDoc, SolrInputField sif, Object fieldVal) { SchemaField sf = schema.getField(sif.getName()); toDoc.addField(sif.getName(), sf.getType().toNativeType(fieldVal)); } protected void doInc(SolrInputDocument toDoc, SolrInputField sif, Object fieldVal) { SolrInputField numericField = toDoc.get(sif.getName()); SchemaField sf = schema.getField(sif.getName()); if (numericField != null || sf.getDefaultValue() != null) { // TODO: fieldtype needs externalToObject? String oldValS = (numericField != null) ? numericField.getFirstValue().toString(): sf.getDefaultValue().toString(); BytesRefBuilder term = new BytesRefBuilder(); sf.getType().readableToIndexed(oldValS, term); Object oldVal = sf.getType().toObject(sf, term.get()); String fieldValS = fieldVal.toString(); Number result; if (oldVal instanceof Long) { result = ((Long) oldVal).longValue() + Long.parseLong(fieldValS); } else if (oldVal instanceof Float) { result = ((Float) oldVal).floatValue() + Float.parseFloat(fieldValS); } else if (oldVal instanceof Double) { result = ((Double) oldVal).doubleValue() + Double.parseDouble(fieldValS); } else { // int, short, byte result = ((Integer) oldVal).intValue() + Integer.parseInt(fieldValS); } toDoc.setField(sif.getName(), result); } else { toDoc.setField(sif.getName(), fieldVal); } } protected void doRemove(SolrInputDocument toDoc, SolrInputField sif, Object fieldVal) { final String name = sif.getName(); SolrInputField existingField = toDoc.get(name); if (existingField == null) return; SchemaField sf = schema.getField(name); if (sf != null) { final Collection<Object> original = existingField.getValues(); if (fieldVal instanceof Collection) { for (Object object : (Collection) fieldVal) { Object o = sf.getType().toNativeType(object); original.remove(o); } } else { original.remove(sf.getType().toNativeType(fieldVal)); } toDoc.setField(name, original); } } protected void doRemoveRegex(SolrInputDocument toDoc, SolrInputField sif, Object valuePatterns) { final String name = sif.getName(); final SolrInputField existingField = toDoc.get(name); if (existingField != null) { final Collection<Object> valueToRemove = new HashSet<>(); final Collection<Object> original = existingField.getValues(); final Collection<Pattern> patterns = preparePatterns(valuePatterns); for (Object value : original) { for(Pattern pattern : patterns) { final Matcher m = pattern.matcher(value.toString()); if (m.matches()) { valueToRemove.add(value); } } } original.removeAll(valueToRemove); toDoc.setField(name, original); } } private Collection<Pattern> preparePatterns(Object fieldVal) { final Collection<Pattern> patterns = new LinkedHashSet<>(1); if (fieldVal instanceof Collection) { Collection<String> patternVals = (Collection<String>) fieldVal; for (String patternVal : patternVals) { patterns.add(Pattern.compile(patternVal)); } } else { patterns.add(Pattern.compile(fieldVal.toString())); } return patterns; } }