/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.solr.schema; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.document.Fieldable; import org.apache.lucene.search.Similarity; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.util.Version; import org.apache.solr.common.ResourceLoader; import org.apache.solr.common.SolrException; import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.util.DOMUtil; import org.apache.solr.core.SolrConfig; import org.apache.solr.core.Config; import org.apache.solr.core.SolrResourceLoader; import org.apache.solr.analysis.CharFilterFactory; import org.apache.solr.analysis.TokenFilterFactory; import org.apache.solr.analysis.TokenizerChain; import org.apache.solr.analysis.TokenizerFactory; import org.apache.solr.search.SolrQueryParser; import org.apache.solr.util.plugin.AbstractPluginLoader; import org.apache.solr.util.plugin.SolrCoreAware; import org.w3c.dom.*; import javax.xml.xpath.XPath; import javax.xml.xpath.XPathConstants; import javax.xml.xpath.XPathExpressionException; import javax.xml.xpath.XPathFactory; import java.io.InputStream; import java.io.Reader; import java.io.IOException; import java.util.*; import java.lang.reflect.Constructor; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * <code>IndexSchema</code> contains information about the valid fields in an index * and the types of those fields. * * @version $Id: IndexSchema.java 945886 2010-05-18 21:07:28Z hossman $ */ public final class IndexSchema { public static final String DEFAULT_SCHEMA_FILE = "schema.xml"; public static final String LUCENE_MATCH_VERSION_PARAM = "luceneMatchVersion"; final static Logger log = LoggerFactory.getLogger(IndexSchema.class); private final SolrConfig solrConfig; private final String resourceName; private String name; private float version; private final SolrResourceLoader loader; private final HashMap<String, SchemaField> fields = new HashMap<String,SchemaField>(); private final HashMap<String, FieldType> fieldTypes = new HashMap<String,FieldType>(); private final List<SchemaField> fieldsWithDefaultValue = new ArrayList<SchemaField>(); private final Collection<SchemaField> requiredFields = new HashSet<SchemaField>(); private DynamicField[] dynamicFields; private Analyzer analyzer; private Analyzer queryAnalyzer; private String defaultSearchFieldName=null; private String queryParserDefaultOperator = "OR"; private final Map<String, List<CopyField>> copyFieldsMap = new HashMap<String, List<CopyField>>(); private DynamicCopy[] dynamicCopyFields; /** * keys are all fields copied to, count is num of copyField * directives that target them. */ private Map<SchemaField, Integer> copyFieldTargetCounts = new HashMap<SchemaField, Integer>(); /** * Constructs a schema using the specified file name using the normal * Config path directory searching rules. * * @see Config#openResource * @deprecated Use {@link #IndexSchema(SolrConfig, String, InputStream)} instead. */ @Deprecated public IndexSchema(SolrConfig solrConfig, String name) { this(solrConfig, name, null); } /** * Constructs a schema using the specified resource name and stream. * If the is stream is null, the resource loader will load the schema resource by name. * @see SolrResourceLoader#openSchema * By default, this follows the normal config path directory searching rules. * @see Config#openResource */ public IndexSchema(SolrConfig solrConfig, String name, InputStream is) { this.solrConfig = solrConfig; if (name == null) name = DEFAULT_SCHEMA_FILE; this.resourceName = name; loader = solrConfig.getResourceLoader(); InputStream lis = is; if (lis == null) lis = loader.openSchema(name); readSchema(lis); if (lis != is) { try { lis.close(); } catch(IOException xio) {} // ignore } loader.inform( loader ); } /** * @deprecated -- get access to SolrConfig some other way... */ public SolrConfig getSolrConfig() { return solrConfig; } /** * @since solr 1.4 */ public SolrResourceLoader getResourceLoader() { return loader; } /** Gets the name of the resource used to instantiate this schema. */ public String getResourceName() { return resourceName; } /** Gets the name of the schema as specified in the schema resource. */ public String getSchemaName() { return name; } float getVersion() { return version; } /** * Direct access to the InputStream for the schemaFile used by this instance. * @see Config#openResource * @deprecated Use {@link #getSolrConfig()} and open a resource input stream * for {@link #getResourceName()} instead. */ @Deprecated public InputStream getInputStream() { return loader.openResource(resourceName); } /** Gets the name of the schema file. * @deprecated Use {@link #getResourceName()} instead. */ @Deprecated public String getSchemaFile() { return resourceName; } /** The Name of this schema (as specified in the schema file) * @deprecated Use {@link #getSchemaName()} instead. */ @Deprecated public String getName() { return name; } /** * Provides direct access to the Map containing all explicit * (ie: non-dynamic) fields in the index, keyed on field name. * * <p> * Modifying this Map (or any item in it) will affect the real schema * </p> * * <p> * NOTE: this function is not thread safe. However, it is safe to use within the standard * <code>inform( SolrCore core )</code> function for <code>SolrCoreAware</code> classes. * Outside <code>inform</code>, this could potentially throw a ConcurrentModificationException * </p> */ public Map<String,SchemaField> getFields() { return fields; } /** * Provides direct access to the Map containing all Field Types * in the index, keyed on field type name. * * <p> * Modifying this Map (or any item in it) will affect the real schema. However if you * make any modifications, be sure to call {@link IndexSchema#refreshAnalyzers()} to * update the Analyzers for the registered fields. * </p> * * <p> * NOTE: this function is not thread safe. However, it is safe to use within the standard * <code>inform( SolrCore core )</code> function for <code>SolrCoreAware</code> classes. * Outside <code>inform</code>, this could potentially throw a ConcurrentModificationException * </p> */ public Map<String,FieldType> getFieldTypes() { return fieldTypes; } /** * Provides direct access to the List containing all fields with a default value */ public List<SchemaField> getFieldsWithDefaultValue() { return fieldsWithDefaultValue; } /** * Provides direct access to the List containing all required fields. This * list contains all fields with default values. */ public Collection<SchemaField> getRequiredFields() { return requiredFields; } private SimilarityFactory similarityFactory; /** * Returns the Similarity used for this index */ public Similarity getSimilarity() { return similarityFactory.getSimilarity(); } /** * Returns the SimilarityFactory used for this index */ public SimilarityFactory getSimilarityFactory() { return similarityFactory; } /** * Returns the Analyzer used when indexing documents for this index * * <p> * This Analyzer is field (and dynamic field) name aware, and delegates to * a field specific Analyzer based on the field type. * </p> */ public Analyzer getAnalyzer() { return analyzer; } /** * Returns the Analyzer used when searching this index * * <p> * This Analyzer is field (and dynamic field) name aware, and delegates to * a field specific Analyzer based on the field type. * </p> */ public Analyzer getQueryAnalyzer() { return queryAnalyzer; } /** * A SolrQueryParser linked to this IndexSchema for field datatype * information, and populated with default options from the * <solrQueryParser> configuration for this IndexSchema. * * @param defaultField if non-null overrides the schema default * @deprecated */ public SolrQueryParser getSolrQueryParser(String defaultField) { SolrQueryParser qp = new SolrQueryParser(this,defaultField); String operator = getQueryParserDefaultOperator(); qp.setDefaultOperator("AND".equals(operator) ? QueryParser.Operator.AND : QueryParser.Operator.OR); return qp; } /** * Name of the default search field specified in the schema file */ public String getDefaultSearchFieldName() { return defaultSearchFieldName; } /** * default operator ("AND" or "OR") for QueryParser * @deprecated use getSolrQueryParser().getDefaultOperator() */ @Deprecated public String getQueryParserDefaultOperator() { return queryParserDefaultOperator; } private SchemaField uniqueKeyField; /** * Unique Key field specified in the schema file * @return null if this schema has no unique key field */ public SchemaField getUniqueKeyField() { return uniqueKeyField; } private String uniqueKeyFieldName; private FieldType uniqueKeyFieldType; /** * The raw (field type encoded) value of the Unique Key field for * the specified Document * @return null if this schema has no unique key field * @see #printableUniqueKey */ public Fieldable getUniqueKeyField(org.apache.lucene.document.Document doc) { return doc.getFieldable(uniqueKeyFieldName); // this should return null if name is null } /** * The printable value of the Unique Key field for * the specified Document * @return null if this schema has no unique key field */ public String printableUniqueKey(org.apache.lucene.document.Document doc) { Fieldable f = doc.getFieldable(uniqueKeyFieldName); return f==null ? null : uniqueKeyFieldType.toExternal(f); } private SchemaField getIndexedField(String fname) { SchemaField f = getFields().get(fname); if (f==null) { throw new RuntimeException("unknown field '" + fname + "'"); } if (!f.indexed()) { throw new RuntimeException("'"+fname+"' is not an indexed field:" + f); } return f; } /** * This will re-create the Analyzers. If you make any modifications to * the Field map ({@link IndexSchema#getFields()}, this function is required * to synch the internally cached field analyzers. * * @since solr 1.3 */ public void refreshAnalyzers() { analyzer = new SolrIndexAnalyzer(); queryAnalyzer = new SolrQueryAnalyzer(); } private class SolrIndexAnalyzer extends Analyzer { protected final HashMap<String,Analyzer> analyzers; SolrIndexAnalyzer() { analyzers = analyzerCache(); } protected HashMap<String,Analyzer> analyzerCache() { HashMap<String,Analyzer> cache = new HashMap<String,Analyzer>(); for (SchemaField f : getFields().values()) { Analyzer analyzer = f.getType().getAnalyzer(); cache.put(f.getName(), analyzer); } return cache; } protected Analyzer getAnalyzer(String fieldName) { Analyzer analyzer = analyzers.get(fieldName); return analyzer!=null ? analyzer : getDynamicFieldType(fieldName).getAnalyzer(); } public TokenStream tokenStream(String fieldName, Reader reader) { return getAnalyzer(fieldName).tokenStream(fieldName,reader); } @Override public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException { return getAnalyzer(fieldName).reusableTokenStream(fieldName,reader); } @Override public int getPositionIncrementGap(String fieldName) { return getAnalyzer(fieldName).getPositionIncrementGap(fieldName); } } private class SolrQueryAnalyzer extends SolrIndexAnalyzer { @Override protected HashMap<String,Analyzer> analyzerCache() { HashMap<String,Analyzer> cache = new HashMap<String,Analyzer>(); for (SchemaField f : getFields().values()) { Analyzer analyzer = f.getType().getQueryAnalyzer(); cache.put(f.getName(), analyzer); } return cache; } @Override protected Analyzer getAnalyzer(String fieldName) { Analyzer analyzer = analyzers.get(fieldName); return analyzer!=null ? analyzer : getDynamicFieldType(fieldName).getQueryAnalyzer(); } } private void readSchema(InputStream is) { log.info("Reading Solr Schema"); try { // pass the config resource loader to avoid building an empty one for no reason: // in the current case though, the stream is valid so we wont load the resource by name Config schemaConf = new Config(loader, "schema", is, "/schema/"); Document document = schemaConf.getDocument(); final XPath xpath = schemaConf.getXPath(); final List<SchemaAware> schemaAware = new ArrayList<SchemaAware>(); Node nd = (Node) xpath.evaluate("/schema/@name", document, XPathConstants.NODE); if (nd==null) { log.warn("schema has no name!"); } else { name = nd.getNodeValue(); log.info("Schema name=" + name); } version = schemaConf.getFloat("/schema/@version", 1.0f); final IndexSchema schema = this; AbstractPluginLoader<FieldType> fieldLoader = new AbstractPluginLoader<FieldType>( "[schema.xml] fieldType", true, true) { @Override protected FieldType create( ResourceLoader loader, String name, String className, Node node ) throws Exception { FieldType ft = (FieldType)loader.newInstance(className); ft.setTypeName(name); String expression = "./analyzer[@type='query']"; Node anode = (Node)xpath.evaluate(expression, node, XPathConstants.NODE); Analyzer queryAnalyzer = readAnalyzer(anode); // An analyzer without a type specified, or with type="index" expression = "./analyzer[not(@type)] | ./analyzer[@type='index']"; anode = (Node)xpath.evaluate(expression, node, XPathConstants.NODE); Analyzer analyzer = readAnalyzer(anode); if (queryAnalyzer==null) queryAnalyzer=analyzer; if (analyzer==null) analyzer=queryAnalyzer; if (analyzer!=null) { ft.setAnalyzer(analyzer); ft.setQueryAnalyzer(queryAnalyzer); } if (ft instanceof SchemaAware){ schemaAware.add((SchemaAware) ft); } return ft; } @Override protected void init(FieldType plugin, Node node) throws Exception { Map<String,String> params = DOMUtil.toMapExcept( node.getAttributes(), "name","class" ); plugin.setArgs(schema, params ); } @Override protected FieldType register(String name, FieldType plugin) throws Exception { log.trace("fieldtype defined: " + plugin ); return fieldTypes.put( name, plugin ); } }; String expression = "/schema/types/fieldtype | /schema/types/fieldType"; NodeList nodes = (NodeList) xpath.evaluate(expression, document, XPathConstants.NODESET); fieldLoader.load( loader, nodes ); // Hang on to the fields that say if they are required -- this lets us set a reasonable default for the unique key Map<String,Boolean> explicitRequiredProp = new HashMap<String, Boolean>(); ArrayList<DynamicField> dFields = new ArrayList<DynamicField>(); expression = "/schema/fields/field | /schema/fields/dynamicField"; nodes = (NodeList) xpath.evaluate(expression, document, XPathConstants.NODESET); for (int i=0; i<nodes.getLength(); i++) { Node node = nodes.item(i); NamedNodeMap attrs = node.getAttributes(); String name = DOMUtil.getAttr(attrs,"name","field definition"); log.trace("reading field def "+name); String type = DOMUtil.getAttr(attrs,"type","field " + name); FieldType ft = fieldTypes.get(type); if (ft==null) { throw new SolrException( SolrException.ErrorCode.BAD_REQUEST,"Unknown fieldtype '" + type + "' specified on field " + name,false); } Map<String,String> args = DOMUtil.toMapExcept(attrs, "name", "type"); if( args.get( "required" ) != null ) { explicitRequiredProp.put( name, Boolean.valueOf( args.get( "required" ) ) ); } SchemaField f = SchemaField.create(name,ft,args); if (node.getNodeName().equals("field")) { SchemaField old = fields.put(f.getName(),f); if( old != null ) { String msg = "[schema.xml] Duplicate field definition for '" + f.getName() + "' [[["+old.toString()+"]]] and [[["+f.toString()+"]]]"; SolrException t = new SolrException(SolrException.ErrorCode.SERVER_ERROR, msg ); SolrException.logOnce(log,null,t); SolrConfig.severeErrors.add( t ); throw t; } log.debug("field defined: " + f); if( f.getDefaultValue() != null ) { log.debug(name+" contains default value: " + f.getDefaultValue()); fieldsWithDefaultValue.add( f ); } if (f.isRequired()) { log.debug(name+" is required in this schema"); requiredFields.add(f); } } else if (node.getNodeName().equals("dynamicField")) { // make sure nothing else has the same path addDynamicField(dFields, f); } else { // we should never get here throw new RuntimeException("Unknown field type"); } } //fields with default values are by definition required //add them to required fields, and we only have to loop once // in DocumentBuilder.getDoc() requiredFields.addAll(getFieldsWithDefaultValue()); // OK, now sort the dynamic fields largest to smallest size so we don't get // any false matches. We want to act like a compiler tool and try and match // the largest string possible. Collections.sort(dFields); log.trace("Dynamic Field Ordering:" + dFields); // stuff it in a normal array for faster access dynamicFields = (DynamicField[])dFields.toArray(new DynamicField[dFields.size()]); Node node = (Node) xpath.evaluate("/schema/similarity", document, XPathConstants.NODE); if (node==null) { similarityFactory = new SimilarityFactory() { public Similarity getSimilarity() { return Similarity.getDefault(); } }; log.debug("using default similarity"); } else { final Object obj = loader.newInstance(((Element) node).getAttribute("class")); if (obj instanceof SimilarityFactory) { // configure a factory, get a similarity back SolrParams params = SolrParams.toSolrParams(DOMUtil.childNodesToNamedList(node)); similarityFactory = (SimilarityFactory)obj; similarityFactory.init(params); } else { // just like always, assume it's a Similarlity and get a ClassCastException - reasonable error handling similarityFactory = new SimilarityFactory() { public Similarity getSimilarity() { return (Similarity) obj; } }; } if (similarityFactory instanceof SchemaAware){ schemaAware.add((SchemaAware) similarityFactory); } log.debug("using similarity factory" + similarityFactory.getClass().getName()); } node = (Node) xpath.evaluate("/schema/defaultSearchField/text()", document, XPathConstants.NODE); if (node==null) { log.warn("no default search field specified in schema."); } else { defaultSearchFieldName=node.getNodeValue().trim(); // throw exception if specified, but not found or not indexed if (defaultSearchFieldName!=null) { SchemaField defaultSearchField = getFields().get(defaultSearchFieldName); if ((defaultSearchField == null) || !defaultSearchField.indexed()) { String msg = "default search field '" + defaultSearchFieldName + "' not defined or not indexed" ; throw new SolrException( SolrException.ErrorCode.SERVER_ERROR, msg ); } } log.info("default search field is "+defaultSearchFieldName); } node = (Node) xpath.evaluate("/schema/solrQueryParser/@defaultOperator", document, XPathConstants.NODE); if (node==null) { log.debug("using default query parser operator (OR)"); } else { queryParserDefaultOperator=node.getNodeValue().trim(); log.info("query parser default operator is "+queryParserDefaultOperator); } node = (Node) xpath.evaluate("/schema/uniqueKey/text()", document, XPathConstants.NODE); if (node==null) { log.warn("no uniqueKey specified in schema."); } else { uniqueKeyField=getIndexedField(node.getNodeValue().trim()); if (!uniqueKeyField.stored()) { log.error("uniqueKey is not stored - distributed search will not work"); } if (uniqueKeyField.multiValued()) { log.error("uniqueKey should not be multivalued"); } uniqueKeyFieldName=uniqueKeyField.getName(); uniqueKeyFieldType=uniqueKeyField.getType(); log.info("unique key field: "+uniqueKeyFieldName); // Unless the uniqueKeyField is marked 'required=false' then make sure it exists if( Boolean.FALSE != explicitRequiredProp.get( uniqueKeyFieldName ) ) { uniqueKeyField.required = true; requiredFields.add(uniqueKeyField); } } /////////////// parse out copyField commands /////////////// // Map<String,ArrayList<SchemaField>> cfields = new HashMap<String,ArrayList<SchemaField>>(); // expression = "/schema/copyField"; dynamicCopyFields = new DynamicCopy[] {}; expression = "//copyField"; nodes = (NodeList) xpath.evaluate(expression, document, XPathConstants.NODESET); for (int i=0; i<nodes.getLength(); i++) { node = nodes.item(i); NamedNodeMap attrs = node.getAttributes(); String source = DOMUtil.getAttr(attrs,"source","copyField definition"); String dest = DOMUtil.getAttr(attrs,"dest", "copyField definition"); String maxChars = DOMUtil.getAttr(attrs, "maxChars"); int maxCharsInt = CopyField.UNLIMITED; if (maxChars != null) { try { maxCharsInt = Integer.parseInt(maxChars); } catch (NumberFormatException e) { log.warn("Couldn't parse maxChars attribute for copyField from " + source + " to " + dest + " as integer. The whole field will be copied."); } } registerCopyField(source, dest, maxCharsInt); } for (Map.Entry<SchemaField, Integer> entry : copyFieldTargetCounts.entrySet()) { if (entry.getValue() > 1 && !entry.getKey().multiValued()) { log.warn("Field " + entry.getKey().name + " is not multivalued "+ "and destination for multiple copyFields ("+ entry.getValue()+")"); } } //Run the callbacks on SchemaAware now that everything else is done for (SchemaAware aware : schemaAware) { aware.inform(this); } } catch (SolrException e) { SolrConfig.severeErrors.add( e ); throw e; } catch(Exception e) { // unexpected exception... SolrConfig.severeErrors.add( e ); throw new SolrException( SolrException.ErrorCode.SERVER_ERROR,"Schema Parsing Failed",e,false); } // create the field analyzers refreshAnalyzers(); } private void addDynamicField(List<DynamicField> dFields, SchemaField f) { boolean dup = isDuplicateDynField(dFields, f); if( !dup ) { addDynamicFieldNoDupCheck(dFields, f); } else { String msg = "[schema.xml] Duplicate DynamicField definition for '" + f.getName() + "'"; SolrException t = new SolrException(SolrException.ErrorCode.SERVER_ERROR, msg); SolrException.logOnce(log, null, t); SolrConfig.severeErrors.add(t); throw t; } } /** * Register one or more new Dynamic Field with the Schema. * @param f The {@link org.apache.solr.schema.SchemaField} */ public void registerDynamicField(SchemaField ... f) { List<DynamicField> dynFields = new ArrayList<DynamicField>(Arrays.asList(dynamicFields)); for (SchemaField field : f) { if (isDuplicateDynField(dynFields, field) == false) { log.debug("dynamic field creation for schema field: " + field.getName()); addDynamicFieldNoDupCheck(dynFields, field); } else { log.debug("dynamic field already exists: dynamic field: [" + field.getName() + "]"); } } Collections.sort(dynFields); dynamicFields = dynFields.toArray(new DynamicField[dynFields.size()]); } private void addDynamicFieldNoDupCheck(List<DynamicField> dFields, SchemaField f) { dFields.add(new DynamicField(f)); log.debug("dynamic field defined: " + f); } private boolean isDuplicateDynField(List<DynamicField> dFields, SchemaField f) { for( DynamicField df : dFields ) { if( df.regex.equals( f.name ) ) return true; } return false; } public void registerCopyField( String source, String dest ) { registerCopyField(source, dest, CopyField.UNLIMITED); } /** * <p> * NOTE: this function is not thread safe. However, it is safe to use within the standard * <code>inform( SolrCore core )</code> function for <code>SolrCoreAware</code> classes. * Outside <code>inform</code>, this could potentially throw a ConcurrentModificationException * </p> * * @see SolrCoreAware */ public void registerCopyField( String source, String dest, int maxChars ) { boolean sourceIsPattern = isWildCard(source); boolean destIsPattern = isWildCard(dest); log.debug("copyField source='"+source+"' dest='"+dest+"' maxChars='"+maxChars); SchemaField d = getFieldOrNull(dest); if(d == null){ throw new SolrException( SolrException.ErrorCode.SERVER_ERROR, "copyField destination :'"+dest+"' does not exist" ); } if(sourceIsPattern) { if( destIsPattern ) { DynamicField df = null; for( DynamicField dd : dynamicFields ) { if( dd.regex.equals( dest ) ) { df = dd; break; } } if( df == null ) { throw new SolrException( SolrException.ErrorCode.SERVER_ERROR, "copyField dynamic destination must match a dynamicField." ); } registerDynamicCopyField(new DynamicDestCopy(source, df, maxChars )); } else { registerDynamicCopyField(new DynamicCopy(source, d, maxChars)); } } else if( destIsPattern ) { String msg = "copyField only supports a dynamic destination if the source is also dynamic" ; throw new SolrException( SolrException.ErrorCode.SERVER_ERROR, msg ); } else { // retrieve the field to force an exception if it doesn't exist SchemaField f = getField(source); List<CopyField> copyFieldList = copyFieldsMap.get(source); if (copyFieldList == null) { copyFieldList = new ArrayList<CopyField>(); copyFieldsMap.put(source, copyFieldList); } copyFieldList.add(new CopyField(f, d, maxChars)); copyFieldTargetCounts.put(d, (copyFieldTargetCounts.containsKey(d) ? copyFieldTargetCounts.get(d) + 1 : 1)); } } private void registerDynamicCopyField( DynamicCopy dcopy ) { if( dynamicCopyFields == null ) { dynamicCopyFields = new DynamicCopy[] {dcopy}; } else { DynamicCopy[] temp = new DynamicCopy[dynamicCopyFields.length+1]; System.arraycopy(dynamicCopyFields,0,temp,0,dynamicCopyFields.length); temp[temp.length -1] = dcopy; dynamicCopyFields = temp; } log.trace("Dynamic Copy Field:" + dcopy ); } private static Object[] append(Object[] orig, Object item) { Object[] newArr = (Object[])java.lang.reflect.Array.newInstance(orig.getClass().getComponentType(), orig.length+1); System.arraycopy(orig, 0, newArr, 0, orig.length); newArr[orig.length] = item; return newArr; } // // <analyzer><tokenizer class="...."/><tokenizer class="...." arg="...."> // // private Analyzer readAnalyzer(Node node) throws XPathExpressionException { // parent node used to be passed in as "fieldtype" // if (!fieldtype.hasChildNodes()) return null; // Node node = DOMUtil.getChild(fieldtype,"analyzer"); if (node == null) return null; NamedNodeMap attrs = node.getAttributes(); String analyzerName = DOMUtil.getAttr(attrs,"class"); if (analyzerName != null) { // No need to be core-aware as Analyzers are not in the core-aware list final Class<? extends Analyzer> clazz = loader.findClass(analyzerName).asSubclass(Analyzer.class); try { try { // first try to use a ctor with version parameter (needed for many new Analyzers that have no default one anymore) Constructor<? extends Analyzer> cnstr = clazz.getConstructor(Version.class); final String matchVersionStr = DOMUtil.getAttr(attrs, LUCENE_MATCH_VERSION_PARAM); final Version luceneMatchVersion = (matchVersionStr == null) ? solrConfig.luceneMatchVersion : Config.parseLuceneVersionString(matchVersionStr); if (luceneMatchVersion == null) { throw new SolrException( SolrException.ErrorCode.SERVER_ERROR, "Configuration Error: Analyzer '" + clazz.getName() + "' needs a 'luceneMatchVersion' parameter"); } return cnstr.newInstance(luceneMatchVersion); } catch (NoSuchMethodException nsme) { // otherwise use default ctor return clazz.newInstance(); } } catch (Exception e) { throw new SolrException( SolrException.ErrorCode.SERVER_ERROR, "Cannot load analyzer: "+analyzerName ); } } XPath xpath = XPathFactory.newInstance().newXPath(); // Load the CharFilters // -------------------------------------------------------------------------------- final ArrayList<CharFilterFactory> charFilters = new ArrayList<CharFilterFactory>(); AbstractPluginLoader<CharFilterFactory> charFilterLoader = new AbstractPluginLoader<CharFilterFactory>( "[schema.xml] analyzer/charFilter", false, false ) { @Override protected void init(CharFilterFactory plugin, Node node) throws Exception { if( plugin != null ) { final Map<String,String> params = DOMUtil.toMapExcept(node.getAttributes(),"class"); // copy the luceneMatchVersion from config, if not set if (!params.containsKey(LUCENE_MATCH_VERSION_PARAM)) params.put(LUCENE_MATCH_VERSION_PARAM, solrConfig.luceneMatchVersion.toString()); plugin.init( params ); charFilters.add( plugin ); } } @Override protected CharFilterFactory register(String name, CharFilterFactory plugin) throws Exception { return null; // used for map registration } }; charFilterLoader.load( solrConfig.getResourceLoader(), (NodeList)xpath.evaluate("./charFilter", node, XPathConstants.NODESET) ); // Load the Tokenizer // Although an analyzer only allows a single Tokenizer, we load a list to make sure // the configuration is ok // -------------------------------------------------------------------------------- final ArrayList<TokenizerFactory> tokenizers = new ArrayList<TokenizerFactory>(1); AbstractPluginLoader<TokenizerFactory> tokenizerLoader = new AbstractPluginLoader<TokenizerFactory>( "[schema.xml] analyzer/tokenizer", false, false ) { @Override protected void init(TokenizerFactory plugin, Node node) throws Exception { if( !tokenizers.isEmpty() ) { throw new SolrException( SolrException.ErrorCode.SERVER_ERROR, "The schema defines multiple tokenizers for: "+node ); } final Map<String,String> params = DOMUtil.toMapExcept(node.getAttributes(),"class"); // copy the luceneMatchVersion from config, if not set if (!params.containsKey(LUCENE_MATCH_VERSION_PARAM)) params.put(LUCENE_MATCH_VERSION_PARAM, solrConfig.luceneMatchVersion.toString()); plugin.init( params ); tokenizers.add( plugin ); } @Override protected TokenizerFactory register(String name, TokenizerFactory plugin) throws Exception { return null; // used for map registration } }; tokenizerLoader.load( loader, (NodeList)xpath.evaluate("./tokenizer", node, XPathConstants.NODESET) ); // Make sure something was loaded if( tokenizers.isEmpty() ) { throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,"analyzer without class or tokenizer & filter list"); } // Load the Filters // -------------------------------------------------------------------------------- final ArrayList<TokenFilterFactory> filters = new ArrayList<TokenFilterFactory>(); AbstractPluginLoader<TokenFilterFactory> filterLoader = new AbstractPluginLoader<TokenFilterFactory>( "[schema.xml] analyzer/filter", false, false ) { @Override protected void init(TokenFilterFactory plugin, Node node) throws Exception { if( plugin != null ) { final Map<String,String> params = DOMUtil.toMapExcept(node.getAttributes(),"class"); // copy the luceneMatchVersion from config, if not set if (!params.containsKey(LUCENE_MATCH_VERSION_PARAM)) params.put(LUCENE_MATCH_VERSION_PARAM, solrConfig.luceneMatchVersion.toString()); plugin.init( params ); filters.add( plugin ); } } @Override protected TokenFilterFactory register(String name, TokenFilterFactory plugin) throws Exception { return null; // used for map registration } }; filterLoader.load( loader, (NodeList)xpath.evaluate("./filter", node, XPathConstants.NODESET) ); return new TokenizerChain(charFilters.toArray(new CharFilterFactory[charFilters.size()]), tokenizers.get(0), filters.toArray(new TokenFilterFactory[filters.size()])); }; static abstract class DynamicReplacement implements Comparable<DynamicReplacement> { final static int STARTS_WITH=1; final static int ENDS_WITH=2; final String regex; final int type; final String str; protected DynamicReplacement(String regex) { this.regex = regex; if (regex.startsWith("*")) { type=ENDS_WITH; str=regex.substring(1); } else if (regex.endsWith("*")) { type=STARTS_WITH; str=regex.substring(0,regex.length()-1); } else { throw new RuntimeException("dynamic field name must start or end with *"); } } public boolean matches(String name) { if (type==STARTS_WITH && name.startsWith(str)) return true; else if (type==ENDS_WITH && name.endsWith(str)) return true; else return false; } /** * Sort order is based on length of regex. Longest comes first. * @param other The object to compare to. * @return a negative integer, zero, or a positive integer * as this object is less than, equal to, or greater than * the specified object. */ public int compareTo(DynamicReplacement other) { return other.regex.length() - regex.length(); } } // // Instead of storing a type, this could be implemented as a hierarchy // with a virtual matches(). // Given how often a search will be done, however, speed is the overriding // concern and I'm not sure which is faster. // final static class DynamicField extends DynamicReplacement { final SchemaField prototype; DynamicField(SchemaField prototype) { super(prototype.name); this.prototype=prototype; } SchemaField makeSchemaField(String name) { // could have a cache instead of returning a new one each time, but it might // not be worth it. // Actually, a higher level cache could be worth it to avoid too many // .startsWith() and .endsWith() comparisons. it depends on how many // dynamic fields there are. return new SchemaField(prototype, name); } public String toString() { return prototype.toString(); } } static class DynamicCopy extends DynamicReplacement { final SchemaField targetField; final int maxChars; DynamicCopy(String regex, SchemaField targetField) { this(regex, targetField, CopyField.UNLIMITED); } DynamicCopy(String regex, SchemaField targetField, int maxChars) { super(regex); this.targetField = targetField; this.maxChars = maxChars; } public SchemaField getTargetField( String sourceField ) { return targetField; } @Override public String toString() { return targetField.toString(); } } static class DynamicDestCopy extends DynamicCopy { final DynamicField dynamic; final int dtype; final String dstr; DynamicDestCopy(String source, DynamicField dynamic) { this(source, dynamic, CopyField.UNLIMITED); } DynamicDestCopy(String source, DynamicField dynamic, int maxChars) { super(source, dynamic.prototype, maxChars); this.dynamic = dynamic; String dest = dynamic.regex; if (dest.startsWith("*")) { dtype=ENDS_WITH; dstr=dest.substring(1); } else if (dest.endsWith("*")) { dtype=STARTS_WITH; dstr=dest.substring(0,dest.length()-1); } else { throw new RuntimeException("dynamic copyField destination name must start or end with *"); } } @Override public SchemaField getTargetField( String sourceField ) { String dyn = ( type==STARTS_WITH ) ? sourceField.substring( str.length() ) : sourceField.substring( 0, sourceField.length()-str.length() ); String name = (dtype==STARTS_WITH) ? (dstr+dyn) : (dyn+dstr); return dynamic.makeSchemaField( name ); } @Override public String toString() { return targetField.toString(); } } public SchemaField[] getDynamicFieldPrototypes() { SchemaField[] df = new SchemaField[dynamicFields.length]; for (int i=0;i<dynamicFields.length;i++) { df[i] = dynamicFields[i].prototype; } return df; } public String getDynamicPattern(String fieldName) { for (DynamicField df : dynamicFields) { if (df.matches(fieldName)) return df.regex; } return null; } /** * Does the schema have the specified field defined explicitly, i.e. * not as a result of a copyField declaration with a wildcard? We * consider it explicitly defined if it matches a field or dynamicField * declaration. * @param fieldName * @return true if explicitly declared in the schema. */ public boolean hasExplicitField(String fieldName) { if(fields.containsKey(fieldName)) { return true; } for (DynamicField df : dynamicFields) { if (df.matches(fieldName)) return true; } return false; } /** * Is the specified field dynamic or not. * @param fieldName * @return true if the specified field is dynamic */ public boolean isDynamicField(String fieldName) { if(fields.containsKey(fieldName)) { return false; } for (DynamicField df : dynamicFields) { if (df.matches(fieldName)) return true; } return false; } /** * Returns the SchemaField that should be used for the specified field name, or * null if none exists. * * @param fieldName may be an explicitly defined field or a name that * matches a dynamic field. * @see #getFieldType * @see #getField(String) * @return The {@link org.apache.solr.schema.SchemaField} */ public SchemaField getFieldOrNull(String fieldName) { SchemaField f = fields.get(fieldName); if (f != null) return f; for (DynamicField df : dynamicFields) { if (df.matches(fieldName)) return df.makeSchemaField(fieldName); } return f; } /** * Returns the SchemaField that should be used for the specified field name * * @param fieldName may be an explicitly defined field or a name that * matches a dynamic field. * @throws SolrException if no such field exists * @see #getFieldType * @see #getFieldOrNull(String) * @return The {@link SchemaField} */ public SchemaField getField(String fieldName) { SchemaField f = getFieldOrNull(fieldName); if (f != null) return f; // Hmmm, default field could also be implemented with a dynamic field of "*". // It would have to be special-cased and only used if nothing else matched. /*** REMOVED -YCS if (defaultFieldType != null) return new SchemaField(fieldName,defaultFieldType); ***/ throw new SolrException( SolrException.ErrorCode.BAD_REQUEST,"undefined field "+fieldName); } /** * Returns the FieldType for the specified field name. * * <p> * This method exists because it can be more efficient then * {@link #getField} for dynamic fields if a full SchemaField isn't needed. * </p> * * @param fieldName may be an explicitly created field, or a name that * excercies a dynamic field. * @throws SolrException if no such field exists * @see #getField(String) * @see #getFieldTypeNoEx */ public FieldType getFieldType(String fieldName) { SchemaField f = fields.get(fieldName); if (f != null) return f.getType(); return getDynamicFieldType(fieldName); } /** * Given the name of a {@link org.apache.solr.schema.FieldType} (not to be confused with {@link #getFieldType(String)} which * takes in the name of a field), return the {@link org.apache.solr.schema.FieldType}. * @param fieldTypeName The name of the {@link org.apache.solr.schema.FieldType} * @return The {@link org.apache.solr.schema.FieldType} or null. */ public FieldType getFieldTypeByName(String fieldTypeName){ return fieldTypes.get(fieldTypeName); } /** * Returns the FieldType for the specified field name. * * <p> * This method exists because it can be more efficient then * {@link #getField} for dynamic fields if a full SchemaField isn't needed. * </p> * * @param fieldName may be an explicitly created field, or a name that * excercies a dynamic field. * @return null if field is not defined. * @see #getField(String) * @see #getFieldTypeNoEx */ public FieldType getFieldTypeNoEx(String fieldName) { SchemaField f = fields.get(fieldName); if (f != null) return f.getType(); return dynFieldType(fieldName); } /** * Returns the FieldType of the best matching dynamic field for * the specified field name * * @param fieldName may be an explicitly created field, or a name that * excercies a dynamic field. * @throws SolrException if no such field exists * @see #getField(String) * @see #getFieldTypeNoEx */ public FieldType getDynamicFieldType(String fieldName) { for (DynamicField df : dynamicFields) { if (df.matches(fieldName)) return df.prototype.getType(); } throw new SolrException( SolrException.ErrorCode.BAD_REQUEST,"undefined field "+fieldName); } private FieldType dynFieldType(String fieldName) { for (DynamicField df : dynamicFields) { if (df.matches(fieldName)) return df.prototype.getType(); } return null; }; /** * Get all copy fields, both the static and the dynamic ones. * @param destField * @return Array of fields copied into this field */ public SchemaField[] getCopySources(String destField) { SchemaField f = getField(destField); if (!isCopyFieldTarget(f)) { return new SchemaField[0]; } List<SchemaField> sf = new ArrayList<SchemaField>(); for (Map.Entry<String, List<CopyField>> cfs : copyFieldsMap.entrySet()) { for (CopyField copyField : cfs.getValue()) { if (copyField.getDestination().getName().equals(destField)) { sf.add(copyField.getSource()); } } } return sf.toArray(new SchemaField[sf.size()]); } /** * Get all copy fields, both the static and the dynamic ones. * * @param sourceField * @return Array of fields to copy to. * @deprecated Use {@link #getCopyFieldsList(String)} instead. */ @Deprecated public SchemaField[] getCopyFields(String sourceField) { // This is the List that holds all the results, dynamic or not. List<SchemaField> matchCopyFields = new ArrayList<SchemaField>(); // Get the dynamic results into the list. for(DynamicCopy dynamicCopy : dynamicCopyFields) { if(dynamicCopy.matches(sourceField)) { matchCopyFields.add(dynamicCopy.getTargetField(sourceField)); } } // Get the fixed ones, if there are any and add them. final List<CopyField> copyFields = copyFieldsMap.get(sourceField); if (copyFields!=null) { final Iterator<CopyField> it = copyFields.iterator(); while (it.hasNext()) { matchCopyFields.add(it.next().getDestination()); } } // Construct the results by transforming the list into an array. return matchCopyFields.toArray(new SchemaField[matchCopyFields.size()]); } /** * Get all copy fields for a specified source field, both static * and dynamic ones. * @param sourceField * @return List of CopyFields to copy to. * @since solr 1.4 */ // This is useful when we need the maxSize param of each CopyField public List<CopyField> getCopyFieldsList(final String sourceField){ final List<CopyField> result = new ArrayList<CopyField>(); for (DynamicCopy dynamicCopy : dynamicCopyFields) { if (dynamicCopy.matches(sourceField)) { result.add(new CopyField(getField(sourceField), dynamicCopy.getTargetField(sourceField), dynamicCopy.maxChars)); } } List<CopyField> fixedCopyFields = copyFieldsMap.get(sourceField); if (fixedCopyFields != null) { result.addAll(fixedCopyFields); } return result; } /** * Check if a field is used as the destination of a copyField operation * * @since solr 1.3 */ public boolean isCopyFieldTarget( SchemaField f ) { return copyFieldTargetCounts.containsKey( f ); } /** * Is the given field name a wildcard? I.e. does it begin or end with *? * @param name * @return true/false */ private static boolean isWildCard(String name) { return name.startsWith("*") || name.endsWith("*"); } }