/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.solr.update; import java.util.ArrayList; import java.util.Date; import java.util.HashMap; import java.util.List; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.Fieldable; import org.apache.solr.common.SolrDocument; import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrInputDocument; import org.apache.solr.common.SolrInputField; import org.apache.solr.schema.*; /** * @version $Id: DocumentBuilder.java 911595 2010-02-18 21:29:08Z hossman $ */ // Not thread safe - by design. Create a new builder for each thread. public class DocumentBuilder { private final IndexSchema schema; private Document doc; private HashMap<String,String> map; public DocumentBuilder(IndexSchema schema) { this.schema = schema; } public void startDoc() { doc = new Document(); map = new HashMap<String,String>(); } protected void addSingleField(SchemaField sfield, String val, float boost) { //System.out.println("###################ADDING FIELD "+sfield+"="+val); // we don't check for a null val ourselves because a solr.FieldType // might actually want to map it to something. If createField() // returns null, then we don't store the field. if (sfield.isPolyField()) { Fieldable[] fields = sfield.createFields(val, boost); if (fields.length > 0) { if (!sfield.multiValued()) { String oldValue = map.put(sfield.getName(), val); if (oldValue != null) { throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "ERROR: multiple values encountered for non multiValued field " + sfield.getName() + ": first='" + oldValue + "' second='" + val + "'"); } } // Add each field for (Fieldable field : fields) { doc.add(field); } } } else { Field field = sfield.createField(val, boost); if (field != null) { if (!sfield.multiValued()) { String oldValue = map.put(sfield.getName(), val); if (oldValue != null) { throw new SolrException( SolrException.ErrorCode.BAD_REQUEST,"ERROR: multiple values encountered for non multiValued field " + sfield.getName() + ": first='" + oldValue + "' second='" + val + "'"); } } } doc.add(field); } } /** * Add the specified {@link org.apache.solr.schema.SchemaField} to the document. Does not invoke the copyField mechanism. * @param sfield The {@link org.apache.solr.schema.SchemaField} to add * @param val The value to add * @param boost The boost factor * * @see #addField(String, String) * @see #addField(String, String, float) * @see #addSingleField(org.apache.solr.schema.SchemaField, String, float) */ public void addField(SchemaField sfield, String val, float boost) { addSingleField(sfield,val,boost); } /** * Add the Field and value to the document, invoking the copyField mechanism * @param name The name of the field * @param val The value to add * * @see #addField(String, String, float) * @see #addField(org.apache.solr.schema.SchemaField, String, float) * @see #addSingleField(org.apache.solr.schema.SchemaField, String, float) */ public void addField(String name, String val) { addField(name, val, 1.0f); } /** * Add the Field and value to the document with the specified boost, invoking the copyField mechanism * @param name The name of the field. * @param val The value to add * @param boost The boost * * @see #addField(String, String) * @see #addField(org.apache.solr.schema.SchemaField, String, float) * @see #addSingleField(org.apache.solr.schema.SchemaField, String, float) * */ public void addField(String name, String val, float boost) { SchemaField sfield = schema.getFieldOrNull(name); if (sfield != null) { addField(sfield,val,boost); } // Check if we should copy this field to any other fields. // This could happen whether it is explicit or not. final List<CopyField> copyFields = schema.getCopyFieldsList(name); if (copyFields != null) { for(CopyField cf : copyFields) { addSingleField(cf.getDestination(), cf.getLimitedValue( val ), boost); } } // error if this field name doesn't match anything if (sfield==null && (copyFields==null || copyFields.size()==0)) { throw new SolrException( SolrException.ErrorCode.BAD_REQUEST,"ERROR:unknown field '" + name + "'"); } } public void setBoost(float boost) { doc.setBoost(boost); } public void endDoc() { } // specific to this type of document builder public Document getDoc() throws IllegalArgumentException { // Check for all required fields -- Note, all fields with a // default value are defacto 'required' fields. List<String> missingFields = null; for (SchemaField field : schema.getRequiredFields()) { if (doc.getField(field.getName() ) == null) { if (field.getDefaultValue() != null) { addField(doc, field, field.getDefaultValue(), 1.0f); } else { if (missingFields==null) { missingFields = new ArrayList<String>(1); } missingFields.add(field.getName()); } } } if (missingFields != null) { StringBuilder builder = new StringBuilder(); // add the uniqueKey if possible if( schema.getUniqueKeyField() != null ) { String n = schema.getUniqueKeyField().getName(); String v = doc.get( n ); builder.append( "Document ["+n+"="+v+"] " ); } builder.append("missing required fields: " ); for (String field : missingFields) { builder.append(field); builder.append(" "); } throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, builder.toString()); } Document ret = doc; doc=null; return ret; } private static void addField(Document doc, SchemaField field, String val, float boost) { if (field.isPolyField()) { Fieldable[] farr = field.getType().createFields(field, val, boost); for (Fieldable f : farr) { if (f != null) doc.add(f); // null fields are not added } } else { Field f = field.createField(val, boost); if (f != null) doc.add(f); // null fields are not added } } /** * Convert a SolrInputDocument to a lucene Document. * * This function should go elsewhere. This builds the Document without an * extra Map<> checking for multiple values. For more discussion, see: * http://www.nabble.com/Re%3A-svn-commit%3A-r547493---in--lucene-solr-trunk%3A-.--src-java-org-apache-solr-common--src-java-org-apache-solr-schema--src-java-org-apache-solr-update--src-test-org-apache-solr-common--tf3931539.html * * TODO: /!\ NOTE /!\ This semantics of this function are still in flux. * Something somewhere needs to be able to fill up a SolrDocument from * a lucene document - this is one place that may happen. It may also be * moved to an independent function * * @since solr 1.3 */ public static Document toDocument( SolrInputDocument doc, IndexSchema schema ) { Document out = new Document(); out.setBoost( doc.getDocumentBoost() ); // Load fields from SolrDocument to Document for( SolrInputField field : doc ) { String name = field.getName(); SchemaField sfield = schema.getFieldOrNull(name); boolean used = false; float boost = field.getBoost(); // Make sure it has the correct number if( sfield!=null && !sfield.multiValued() && field.getValueCount() > 1 ) { String id = ""; SchemaField sf = schema.getUniqueKeyField(); if( sf != null ) { id = "["+doc.getFieldValue( sf.getName() )+"] "; } throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "ERROR: "+id+"multiple values encountered for non multiValued field " + sfield.getName() + ": " +field.getValue() ); } // load each field value boolean hasField = false; for( Object v : field ) { if( v == null ) { continue; } String val = null; hasField = true; boolean isBinaryField = false; if (sfield != null && sfield.getType() instanceof BinaryField) { isBinaryField = true; BinaryField binaryField = (BinaryField) sfield.getType(); Field f = binaryField.createField(sfield,v,boost); if(f != null){ out.add(f); } used = true; } else { // TODO!!! HACK -- date conversion if (sfield != null && v instanceof Date && sfield.getType() instanceof DateField) { DateField df = (DateField) sfield.getType(); val = df.toInternal((Date) v) + 'Z'; } else if (v != null) { val = v.toString(); } if (sfield != null) { used = true; addField(out, sfield, val, boost); } } // Check if we should copy this field to any other fields. // This could happen whether it is explicit or not. List<CopyField> copyFields = schema.getCopyFieldsList(name); for (CopyField cf : copyFields) { SchemaField destinationField = cf.getDestination(); // check if the copy field is a multivalued or not if (!destinationField.multiValued() && out.get(destinationField.getName()) != null) { throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "ERROR: multiple values encountered for non multiValued copy field " + destinationField.getName() + ": " + val); } used = true; //Don't worry about poly fields here Fieldable [] fields = null; if (isBinaryField) { if (destinationField.getType() instanceof BinaryField) { BinaryField binaryField = (BinaryField) destinationField.getType(); //TODO: safe to assume that binary fields only create one? fields = new Field[]{binaryField.createField(destinationField, v, boost)}; } } else { fields = destinationField.createFields(cf.getLimitedValue(val), boost); } if (fields != null) { // null fields are not added for (Fieldable f : fields) { out.add(f); } } } // In lucene, the boost for a given field is the product of the // document boost and *all* boosts on values of that field. // For multi-valued fields, we only want to set the boost on the // first field. boost = 1.0f; } // make sure the field was used somehow... if( !used && hasField ) { throw new SolrException( SolrException.ErrorCode.BAD_REQUEST,"ERROR:unknown field '" + name + "'"); } } // Now validate required fields or add default values // fields with default values are defacto 'required' for (SchemaField field : schema.getRequiredFields()) { if (out.getField(field.getName() ) == null) { if (field.getDefaultValue() != null) { addField(out, field, field.getDefaultValue(), 1.0f); } else { String id = schema.printableUniqueKey( out ); String msg = "Document ["+id+"] missing required field: " + field.getName(); throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, msg ); } } } return out; } /** * Add fields from the solr document * * TODO: /!\ NOTE /!\ This semantics of this function are still in flux. * Something somewhere needs to be able to fill up a SolrDocument from * a lucene document - this is one place that may happen. It may also be * moved to an independent function * * @since solr 1.3 */ public SolrDocument loadStoredFields( SolrDocument doc, Document luceneDoc ) { for( Object f : luceneDoc.getFields() ) { Fieldable field = (Fieldable)f; if( field.isStored() ) { SchemaField sf = schema.getField( field.name() ); if( !schema.isCopyFieldTarget( sf ) ) { doc.addField( field.name(), sf.getType().toObject( field ) ); } } } return doc; } }