/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.solr.update; import java.util.Iterator; import java.util.LinkedList; import java.util.List; import java.util.Set; import org.apache.lucene.document.Document; import org.apache.lucene.document.NumericDocValuesField; import org.apache.lucene.index.IndexableField; import org.apache.lucene.util.BytesRef; import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrInputDocument; import org.apache.solr.common.SolrInputField; import org.apache.solr.schema.CopyField; import org.apache.solr.schema.IndexSchema; import org.apache.solr.schema.SchemaField; import com.google.common.collect.Sets; /** * Builds a Lucene {@link Document} from a {@link SolrInputDocument}. */ public class DocumentBuilder { // accessible only for tests static int MIN_LENGTH_TO_MOVE_LAST = Integer.getInteger("solr.docBuilder.minLengthToMoveLast", 4*1024); // internal setting /** * Add a field value to a given document. * @param doc Document that the field needs to be added to * @param field The schema field object for the field * @param val The value for the field to be added * @param forInPlaceUpdate Whether the field is to be added for in-place update. If true, * only numeric docValues based fields are added to the document. This can be true * when constructing a Lucene document for writing an in-place update, and we don't need * presence of non-updatable fields (non NDV) in such a document. */ private static void addField(Document doc, SchemaField field, Object val, boolean forInPlaceUpdate) { if (val instanceof IndexableField) { if (forInPlaceUpdate) { assert val instanceof NumericDocValuesField: "Expected in-place update to be done on" + " NDV fields only."; } doc.add((IndexableField)val); return; } for (IndexableField f : field.getType().createFields(field, val)) { if (f != null) { // null fields are not added // HACK: workaround for SOLR-9809 // even though at this point in the code we know the field is single valued and DV only // TrieField.createFields() may still return (usless) IndexableField instances that are not // NumericDocValuesField instances. // // once SOLR-9809 is resolved, we should be able to replace this conditional with... // assert f instanceof NumericDocValuesField if (forInPlaceUpdate) { if (f instanceof NumericDocValuesField) { doc.add(f); } } else { doc.add(f); } } } } private static String getID( SolrInputDocument doc, IndexSchema schema ) { String id = ""; SchemaField sf = schema.getUniqueKeyField(); if( sf != null ) { id = "[doc="+doc.getFieldValue( sf.getName() )+"] "; } return id; } /** * @see DocumentBuilder#toDocument(SolrInputDocument, IndexSchema, boolean) */ public static Document toDocument( SolrInputDocument doc, IndexSchema schema ) { return toDocument(doc, schema, false); } /** * Convert a SolrInputDocument to a lucene Document. * * This function should go elsewhere. This builds the Document without an * extra Map<> checking for multiple values. For more discussion, see: * http://www.nabble.com/Re%3A-svn-commit%3A-r547493---in--lucene-solr-trunk%3A-.--src-java-org-apache-solr-common--src-java-org-apache-solr-schema--src-java-org-apache-solr-update--src-test-org-apache-solr-common--tf3931539.html * * TODO: /!\ NOTE /!\ This semantics of this function are still in flux. * Something somewhere needs to be able to fill up a SolrDocument from * a lucene document - this is one place that may happen. It may also be * moved to an independent function * * @since solr 1.3 * * @param doc SolrInputDocument from which the document has to be built * @param schema Schema instance * @param forInPlaceUpdate Whether the output document would be used for an in-place update or not. When this is true, * default fields values and copy fields targets are not populated. * @return Built Lucene document */ public static Document toDocument( SolrInputDocument doc, IndexSchema schema, boolean forInPlaceUpdate ) { final SchemaField uniqueKeyField = schema.getUniqueKeyField(); final String uniqueKeyFieldName = null == uniqueKeyField ? null : uniqueKeyField.getName(); Document out = new Document(); Set<String> usedFields = Sets.newHashSet(); // Load fields from SolrDocument to Document for( SolrInputField field : doc ) { String name = field.getName(); SchemaField sfield = schema.getFieldOrNull(name); boolean used = false; // Make sure it has the correct number if( sfield!=null && !sfield.multiValued() && field.getValueCount() > 1 ) { throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "ERROR: "+getID(doc, schema)+"multiple values encountered for non multiValued field " + sfield.getName() + ": " +field.getValue() ); } List<CopyField> copyFields = schema.getCopyFieldsList(name); if( copyFields.size() == 0 ) copyFields = null; // load each field value boolean hasField = false; try { for( Object v : field ) { if( v == null ) { continue; } hasField = true; if (sfield != null) { used = true; addField(out, sfield, v, name.equals(uniqueKeyFieldName) ? false : forInPlaceUpdate); // record the field as having a value usedFields.add(sfield.getName()); } // Check if we should copy this field value to any other fields. // This could happen whether it is explicit or not. if (copyFields != null) { // Do not copy this field if this document is to be used for an in-place update, // and this is the uniqueKey field (because the uniqueKey can't change so no need to "update" the copyField). if ( ! (forInPlaceUpdate && name.equals(uniqueKeyFieldName)) ) { for (CopyField cf : copyFields) { SchemaField destinationField = cf.getDestination(); final boolean destHasValues = usedFields.contains(destinationField.getName()); // check if the copy field is a multivalued or not if (!destinationField.multiValued() && destHasValues) { throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "ERROR: "+getID(doc, schema)+"multiple values encountered for non multiValued copy field " + destinationField.getName() + ": " + v); } used = true; // Perhaps trim the length of a copy field Object val = v; if( val instanceof String && cf.getMaxChars() > 0 ) { val = cf.getLimitedValue((String)val); } addField(out, destinationField, val, destinationField.getName().equals(uniqueKeyFieldName) ? false : forInPlaceUpdate); // record the field as having a value usedFields.add(destinationField.getName()); } } } } } catch( SolrException ex ) { throw ex; } catch( Exception ex ) { throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "ERROR: "+getID(doc, schema)+"Error adding field '" + field.getName() + "'='" +field.getValue()+"' msg=" + ex.getMessage(), ex ); } // make sure the field was used somehow... if( !used && hasField ) { throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "ERROR: "+getID(doc, schema)+"unknown field '" +name + "'"); } } // Now validate required fields or add default values // fields with default values are defacto 'required' // Note: We don't need to add default fields if this document is to be used for // in-place updates, since this validation and population of default fields would've happened // during the full indexing initially. if (!forInPlaceUpdate) { for (SchemaField field : schema.getRequiredFields()) { if (out.getField(field.getName() ) == null) { if (field.getDefaultValue() != null) { addField(out, field, field.getDefaultValue(), false); } else { String msg = getID(doc, schema) + "missing required field: " + field.getName(); throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, msg ); } } } } if (!forInPlaceUpdate) { moveLargestFieldLast(out); } return out; } /** Move the largest stored field last, because Lucene can avoid loading that one if it's not needed. */ private static void moveLargestFieldLast(Document doc) { String largestField = null; int largestFieldLen = -1; boolean largestIsLast = true; for (IndexableField field : doc) { if (!field.fieldType().stored()) { continue; } if (largestIsLast && !field.name().equals(largestField)) { largestIsLast = false; } if (field.numericValue() != null) { // just ignore these as non-competitive (avoid toString'ing their number) continue; } String strVal = field.stringValue(); if (strVal != null) { if (strVal.length() > largestFieldLen) { largestField = field.name(); largestFieldLen = strVal.length(); largestIsLast = true; } } else { BytesRef bytesRef = field.binaryValue(); if (bytesRef != null && bytesRef.length > largestFieldLen) { largestField = field.name(); largestFieldLen = bytesRef.length; largestIsLast = true; } } } if (!largestIsLast && largestField != null && largestFieldLen > MIN_LENGTH_TO_MOVE_LAST) { // only bother if the value isn't tiny LinkedList<IndexableField> addToEnd = new LinkedList<>(); Iterator<IndexableField> iterator = doc.iterator(); while (iterator.hasNext()) { IndexableField field = iterator.next(); if (field.name().equals(largestField)) { addToEnd.add(field); iterator.remove(); // Document may not have "remove" but it's iterator allows mutation } } for (IndexableField field : addToEnd) { doc.add(field); } } } }