/* * Licensed to Elasticsearch under one or more contributor * license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright * ownership. Elasticsearch licenses this file to you under * the Apache License, Version 2.0 (the "License"); you may * not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.elasticsearch.index.mapper; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.document.Field; import org.apache.lucene.document.SortedSetDocValuesField; import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.IndexableField; import org.apache.lucene.search.Query; import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.lucene.Lucene; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.common.xcontent.support.XContentMapValues; import org.elasticsearch.index.analysis.NamedAnalyzer; import org.elasticsearch.index.fielddata.IndexFieldData; import org.elasticsearch.index.fielddata.plain.DocValuesIndexFieldData; import java.io.IOException; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Objects; import static org.elasticsearch.index.mapper.TypeParsers.parseField; /** * A field mapper for keywords. This mapper accepts strings and indexes them as-is. */ public final class KeywordFieldMapper extends FieldMapper { public static final String CONTENT_TYPE = "keyword"; public static class Defaults { public static final MappedFieldType FIELD_TYPE = new KeywordFieldType(); static { FIELD_TYPE.setTokenized(false); FIELD_TYPE.setOmitNorms(true); FIELD_TYPE.setIndexOptions(IndexOptions.DOCS); FIELD_TYPE.freeze(); } public static final String NULL_VALUE = null; public static final int IGNORE_ABOVE = Integer.MAX_VALUE; } public static class Builder extends FieldMapper.Builder<Builder, KeywordFieldMapper> { protected String nullValue = Defaults.NULL_VALUE; protected int ignoreAbove = Defaults.IGNORE_ABOVE; public Builder(String name) { super(name, Defaults.FIELD_TYPE, Defaults.FIELD_TYPE); builder = this; } @Override public KeywordFieldType fieldType() { return (KeywordFieldType) super.fieldType(); } public Builder ignoreAbove(int ignoreAbove) { if (ignoreAbove < 0) { throw new IllegalArgumentException("[ignore_above] must be positive, got " + ignoreAbove); } this.ignoreAbove = ignoreAbove; return this; } @Override public Builder indexOptions(IndexOptions indexOptions) { if (indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS) > 0) { throw new IllegalArgumentException("The [keyword] field does not support positions, got [index_options]=" + indexOptionToString(indexOptions)); } return super.indexOptions(indexOptions); } public Builder eagerGlobalOrdinals(boolean eagerGlobalOrdinals) { fieldType().setEagerGlobalOrdinals(eagerGlobalOrdinals); return builder; } public Builder normalizer(NamedAnalyzer normalizer) { fieldType().setNormalizer(normalizer); fieldType().setSearchAnalyzer(normalizer); return builder; } @Override public KeywordFieldMapper build(BuilderContext context) { setupFieldType(context); return new KeywordFieldMapper( name, fieldType, defaultFieldType, ignoreAbove, includeInAll, context.indexSettings(), multiFieldsBuilder.build(this, context), copyTo); } } public static class TypeParser implements Mapper.TypeParser { @Override public Mapper.Builder<?,?> parse(String name, Map<String, Object> node, ParserContext parserContext) throws MapperParsingException { KeywordFieldMapper.Builder builder = new KeywordFieldMapper.Builder(name); parseField(builder, name, node, parserContext); for (Iterator<Map.Entry<String, Object>> iterator = node.entrySet().iterator(); iterator.hasNext();) { Map.Entry<String, Object> entry = iterator.next(); String propName = entry.getKey(); Object propNode = entry.getValue(); if (propName.equals("null_value")) { if (propNode == null) { throw new MapperParsingException("Property [null_value] cannot be null."); } builder.nullValue(propNode.toString()); iterator.remove(); } else if (propName.equals("ignore_above")) { builder.ignoreAbove(XContentMapValues.nodeIntegerValue(propNode, -1)); iterator.remove(); } else if (propName.equals("norms")) { builder.omitNorms(XContentMapValues.nodeBooleanValue(propNode, "norms") == false); iterator.remove(); } else if (propName.equals("eager_global_ordinals")) { builder.eagerGlobalOrdinals(XContentMapValues.nodeBooleanValue(propNode, "eager_global_ordinals")); iterator.remove(); } else if (propName.equals("normalizer")) { if (propNode != null) { NamedAnalyzer normalizer = parserContext.getIndexAnalyzers().getNormalizer(propNode.toString()); if (normalizer == null) { throw new MapperParsingException("normalizer [" + propNode.toString() + "] not found for field [" + name + "]"); } builder.normalizer(normalizer); } iterator.remove(); } } return builder; } } public static final class KeywordFieldType extends StringFieldType { private NamedAnalyzer normalizer = null; public KeywordFieldType() { setIndexAnalyzer(Lucene.KEYWORD_ANALYZER); setSearchAnalyzer(Lucene.KEYWORD_ANALYZER); } protected KeywordFieldType(KeywordFieldType ref) { super(ref); this.normalizer = ref.normalizer; } public KeywordFieldType clone() { return new KeywordFieldType(this); } @Override public boolean equals(Object o) { if (super.equals(o) == false) { return false; } return Objects.equals(normalizer, ((KeywordFieldType) o).normalizer); } @Override public void checkCompatibility(MappedFieldType otherFT, List<String> conflicts, boolean strict) { super.checkCompatibility(otherFT, conflicts, strict); KeywordFieldType other = (KeywordFieldType) otherFT; if (Objects.equals(normalizer, other.normalizer) == false) { conflicts.add("mapper [" + name() + "] has different [normalizer]"); } } @Override public int hashCode() { return 31 * super.hashCode() + Objects.hashCode(normalizer); } @Override public String typeName() { return CONTENT_TYPE; } public NamedAnalyzer normalizer() { return normalizer; } public void setNormalizer(NamedAnalyzer normalizer) { checkIfFrozen(); this.normalizer = normalizer; } @Override public Query nullValueQuery() { if (nullValue() == null) { return null; } return termQuery(nullValue(), null); } @Override public IndexFieldData.Builder fielddataBuilder() { failIfNoDocValues(); return new DocValuesIndexFieldData.Builder(); } @Override public Object valueForDisplay(Object value) { if (value == null) { return null; } // keywords are internally stored as utf8 bytes BytesRef binaryValue = (BytesRef) value; return binaryValue.utf8ToString(); } @Override protected BytesRef indexedValueForSearch(Object value) { if (searchAnalyzer() == Lucene.KEYWORD_ANALYZER) { // keyword analyzer with the default attribute source which encodes terms using UTF8 // in that case we skip normalization, which may be slow if there many terms need to // parse (eg. large terms query) since Analyzer.normalize involves things like creating // attributes through reflection // This if statement will be used whenever a normalizer is NOT configured return super.indexedValueForSearch(value); } if (value == null) { return null; } if (value instanceof BytesRef) { value = ((BytesRef) value).utf8ToString(); } return searchAnalyzer().normalize(name(), value.toString()); } } private Boolean includeInAll; private int ignoreAbove; protected KeywordFieldMapper(String simpleName, MappedFieldType fieldType, MappedFieldType defaultFieldType, int ignoreAbove, Boolean includeInAll, Settings indexSettings, MultiFields multiFields, CopyTo copyTo) { super(simpleName, fieldType, defaultFieldType, indexSettings, multiFields, copyTo); assert fieldType.indexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) <= 0; this.ignoreAbove = ignoreAbove; this.includeInAll = includeInAll; } /** Values that have more chars than the return value of this method will * be skipped at parsing time. */ // pkg-private for testing int ignoreAbove() { return ignoreAbove; } @Override protected KeywordFieldMapper clone() { return (KeywordFieldMapper) super.clone(); } @Override public KeywordFieldType fieldType() { return (KeywordFieldType) super.fieldType(); } // pkg-private for testing Boolean includeInAll() { return includeInAll; } @Override protected void parseCreateField(ParseContext context, List<IndexableField> fields) throws IOException { String value; if (context.externalValueSet()) { value = context.externalValue().toString(); } else { XContentParser parser = context.parser(); if (parser.currentToken() == XContentParser.Token.VALUE_NULL) { value = fieldType().nullValueAsString(); } else { value = parser.textOrNull(); } } if (value == null || value.length() > ignoreAbove) { return; } final NamedAnalyzer normalizer = fieldType().normalizer(); if (normalizer != null) { try (TokenStream ts = normalizer.tokenStream(name(), value)) { final CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); ts.reset(); if (ts.incrementToken() == false) { throw new IllegalStateException("The normalization token stream is " + "expected to produce exactly 1 token, but got 0 for analyzer " + normalizer + " and input \"" + value + "\""); } final String newValue = termAtt.toString(); if (ts.incrementToken()) { throw new IllegalStateException("The normalization token stream is " + "expected to produce exactly 1 token, but got 2+ for analyzer " + normalizer + " and input \"" + value + "\""); } ts.end(); value = newValue; } } if (context.includeInAll(includeInAll, this)) { context.allEntries().addText(fieldType().name(), value, fieldType().boost()); } // convert to utf8 only once before feeding postings/dv/stored fields final BytesRef binaryValue = new BytesRef(value); if (fieldType().indexOptions() != IndexOptions.NONE || fieldType().stored()) { Field field = new Field(fieldType().name(), binaryValue, fieldType()); fields.add(field); } if (fieldType().hasDocValues()) { fields.add(new SortedSetDocValuesField(fieldType().name(), binaryValue)); } } @Override protected String contentType() { return CONTENT_TYPE; } @Override protected void doMerge(Mapper mergeWith, boolean updateAllTypes) { super.doMerge(mergeWith, updateAllTypes); this.includeInAll = ((KeywordFieldMapper) mergeWith).includeInAll; this.ignoreAbove = ((KeywordFieldMapper) mergeWith).ignoreAbove; } @Override protected void doXContentBody(XContentBuilder builder, boolean includeDefaults, Params params) throws IOException { super.doXContentBody(builder, includeDefaults, params); if (includeDefaults || fieldType().nullValue() != null) { builder.field("null_value", fieldType().nullValue()); } if (includeInAll != null) { builder.field("include_in_all", includeInAll); } else if (includeDefaults) { builder.field("include_in_all", true); } if (includeDefaults || ignoreAbove != Defaults.IGNORE_ABOVE) { builder.field("ignore_above", ignoreAbove); } if (fieldType().normalizer() != null) { builder.field("normalizer", fieldType().normalizer().name()); } else if (includeDefaults) { builder.nullField("normalizer"); } } }