/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.solr.update; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import org.apache.lucene.document.Document; import org.apache.lucene.index.IndexableField; import org.apache.solr.common.SolrDocument; import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrInputDocument; import org.apache.solr.common.SolrInputField; import org.apache.solr.schema.*; /** * */ // Not thread safe - by design. Create a new builder for each thread. public class DocumentBuilder { private final IndexSchema schema; private Document doc; private HashMap<String,String> map; public DocumentBuilder(IndexSchema schema) { this.schema = schema; } public void startDoc() { doc = new Document(); map = new HashMap<String,String>(); } protected void addSingleField(SchemaField sfield, String val, float boost) { //System.out.println("###################ADDING FIELD "+sfield+"="+val); // we don't check for a null val ourselves because a solr.FieldType // might actually want to map it to something. If createField() // returns null, then we don't store the field. if (sfield.isPolyField()) { IndexableField[] fields = sfield.createFields(val, boost); if (fields.length > 0) { if (!sfield.multiValued()) { String oldValue = map.put(sfield.getName(), val); if (oldValue != null) { throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "ERROR: multiple values encountered for non multiValued field " + sfield.getName() + ": first='" + oldValue + "' second='" + val + "'"); } } // Add each field for (IndexableField field : fields) { doc.add(field); } } } else { IndexableField field = sfield.createField(val, boost); if (field != null) { if (!sfield.multiValued()) { String oldValue = map.put(sfield.getName(), val); if (oldValue != null) { throw new SolrException( SolrException.ErrorCode.BAD_REQUEST,"ERROR: multiple values encountered for non multiValued field " + sfield.getName() + ": first='" + oldValue + "' second='" + val + "'"); } } } doc.add(field); } } /** * Add the specified {@link org.apache.solr.schema.SchemaField} to the document. Does not invoke the copyField mechanism. * @param sfield The {@link org.apache.solr.schema.SchemaField} to add * @param val The value to add * @param boost The boost factor * * @see #addField(String, String) * @see #addField(String, String, float) * @see #addSingleField(org.apache.solr.schema.SchemaField, String, float) */ public void addField(SchemaField sfield, String val, float boost) { addSingleField(sfield,val,boost); } /** * Add the Field and value to the document, invoking the copyField mechanism * @param name The name of the field * @param val The value to add * * @see #addField(String, String, float) * @see #addField(org.apache.solr.schema.SchemaField, String, float) * @see #addSingleField(org.apache.solr.schema.SchemaField, String, float) */ public void addField(String name, String val) { addField(name, val, 1.0f); } /** * Add the Field and value to the document with the specified boost, invoking the copyField mechanism * @param name The name of the field. * @param val The value to add * @param boost The boost * * @see #addField(String, String) * @see #addField(org.apache.solr.schema.SchemaField, String, float) * @see #addSingleField(org.apache.solr.schema.SchemaField, String, float) * */ public void addField(String name, String val, float boost) { SchemaField sfield = schema.getFieldOrNull(name); if (sfield != null) { addField(sfield,val,boost); } // Check if we should copy this field to any other fields. // This could happen whether it is explicit or not. final List<CopyField> copyFields = schema.getCopyFieldsList(name); if (copyFields != null) { for(CopyField cf : copyFields) { addSingleField(cf.getDestination(), cf.getLimitedValue( val ), boost); } } // error if this field name doesn't match anything if (sfield==null && (copyFields==null || copyFields.size()==0)) { throw new SolrException( SolrException.ErrorCode.BAD_REQUEST,"ERROR:unknown field '" + name + "'"); } } public void endDoc() { } // specific to this type of document builder public Document getDoc() throws IllegalArgumentException { // Check for all required fields -- Note, all fields with a // default value are defacto 'required' fields. List<String> missingFields = null; for (SchemaField field : schema.getRequiredFields()) { if (doc.getField(field.getName() ) == null) { if (field.getDefaultValue() != null) { addField(doc, field, field.getDefaultValue(), 1.0f); } else { if (missingFields==null) { missingFields = new ArrayList<String>(1); } missingFields.add(field.getName()); } } } if (missingFields != null) { StringBuilder builder = new StringBuilder(); // add the uniqueKey if possible if( schema.getUniqueKeyField() != null ) { String n = schema.getUniqueKeyField().getName(); String v = doc.getField( n ).stringValue(); builder.append( "Document ["+n+"="+v+"] " ); } builder.append("missing required fields: " ); for (String field : missingFields) { builder.append(field); builder.append(" "); } throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, builder.toString()); } Document ret = doc; doc=null; return ret; } private static void addField(Document doc, SchemaField field, Object val, float boost) { if (field.isPolyField()) { IndexableField[] farr = field.getType().createFields(field, val, boost); for (IndexableField f : farr) { if (f != null) doc.add(f); // null fields are not added } } else { IndexableField f = field.createField(val, boost); if (f != null) doc.add(f); // null fields are not added } } private static String getID( SolrInputDocument doc, IndexSchema schema ) { String id = ""; SchemaField sf = schema.getUniqueKeyField(); if( sf != null ) { id = "[doc="+doc.getFieldValue( sf.getName() )+"] "; } return id; } /** * Convert a SolrInputDocument to a lucene Document. * * This function should go elsewhere. This builds the Document without an * extra Map<> checking for multiple values. For more discussion, see: * http://www.nabble.com/Re%3A-svn-commit%3A-r547493---in--lucene-solr-trunk%3A-.--src-java-org-apache-solr-common--src-java-org-apache-solr-schema--src-java-org-apache-solr-update--src-test-org-apache-solr-common--tf3931539.html * * TODO: /!\ NOTE /!\ This semantics of this function are still in flux. * Something somewhere needs to be able to fill up a SolrDocument from * a lucene document - this is one place that may happen. It may also be * moved to an independent function * * @since solr 1.3 */ public static Document toDocument( SolrInputDocument doc, IndexSchema schema ) { Document out = new Document(); final float docBoost = doc.getDocumentBoost(); // Load fields from SolrDocument to Document for( SolrInputField field : doc ) { String name = field.getName(); SchemaField sfield = schema.getFieldOrNull(name); boolean used = false; float boost = field.getBoost(); boolean applyBoost = sfield != null && sfield.indexed() && !sfield.omitNorms(); // Make sure it has the correct number if( sfield!=null && !sfield.multiValued() && field.getValueCount() > 1 ) { throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "ERROR: "+getID(doc, schema)+"multiple values encountered for non multiValued field " + sfield.getName() + ": " +field.getValue() ); } if (applyBoost == false && boost != 1.0F) { throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "ERROR: "+getID(doc, schema)+"cannot set an index-time boost, unindexed or norms are omitted for field " + sfield.getName() + ": " +field.getValue() ); } // Lucene no longer has a native docBoost, so we have to multiply // it ourselves (do this after the applyBoost error check so we don't // give an error on fields that don't support boost just because of a // docBoost) boost *= docBoost; // load each field value boolean hasField = false; try { for( Object v : field ) { if( v == null ) { continue; } hasField = true; if (sfield != null) { used = true; addField(out, sfield, v, applyBoost ? boost : 1f); } // Check if we should copy this field to any other fields. // This could happen whether it is explicit or not. List<CopyField> copyFields = schema.getCopyFieldsList(name); for (CopyField cf : copyFields) { SchemaField destinationField = cf.getDestination(); // check if the copy field is a multivalued or not if (!destinationField.multiValued() && out.getField(destinationField.getName()) != null) { throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "ERROR: "+getID(doc, schema)+"multiple values encountered for non multiValued copy field " + destinationField.getName() + ": " + v); } used = true; // Perhaps trim the length of a copy field Object val = v; if( val instanceof String && cf.getMaxChars() > 0 ) { val = cf.getLimitedValue((String)val); } addField(out, destinationField, val, destinationField.indexed() && !destinationField.omitNorms() ? boost : 1F); } // The boost for a given field is the product of the // *all* boosts on values of that field. // For multi-valued fields, we only want to set the boost on the // first field. boost = 1.0f; } } catch( SolrException ex ) { throw ex; } catch( Exception ex ) { throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "ERROR: "+getID(doc, schema)+"Error adding field '" + field.getName() + "'='" +field.getValue()+"' msg=" + ex.getMessage(), ex ); } // make sure the field was used somehow... if( !used && hasField ) { throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "ERROR: "+getID(doc, schema)+"unknown field '" +name + "'"); } } // Now validate required fields or add default values // fields with default values are defacto 'required' for (SchemaField field : schema.getRequiredFields()) { if (out.getField(field.getName() ) == null) { if (field.getDefaultValue() != null) { addField(out, field, field.getDefaultValue(), 1.0f); } else { String msg = getID(doc, schema) + "missing required field: " + field.getName(); throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, msg ); } } } return out; } }