/* * Licensed to Elasticsearch under one or more contributor * license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright * ownership. Elasticsearch licenses this file to you under * the Apache License, Version 2.0 (the "License"); you may * not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.elasticsearch.index.mapper; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.IndexableField; import org.apache.lucene.search.Query; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.support.XContentMapValues; import org.elasticsearch.index.analysis.NamedAnalyzer; import org.elasticsearch.index.fielddata.IndexFieldData; import org.elasticsearch.index.fielddata.plain.PagedBytesIndexFieldData; import java.io.IOException; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Objects; import static org.elasticsearch.index.mapper.TypeParsers.parseTextField; /** A {@link FieldMapper} for full-text fields. */ public class TextFieldMapper extends FieldMapper { public static final String CONTENT_TYPE = "text"; private static final int POSITION_INCREMENT_GAP_USE_ANALYZER = -1; public static class Defaults { public static double FIELDDATA_MIN_FREQUENCY = 0; public static double FIELDDATA_MAX_FREQUENCY = Integer.MAX_VALUE; public static int FIELDDATA_MIN_SEGMENT_SIZE = 0; public static final MappedFieldType FIELD_TYPE = new TextFieldType(); static { FIELD_TYPE.freeze(); } /** * The default position_increment_gap is set to 100 so that phrase * queries of reasonably high slop will not match across field values. */ public static final int POSITION_INCREMENT_GAP = 100; } public static class Builder extends FieldMapper.Builder<Builder, TextFieldMapper> { private int positionIncrementGap = POSITION_INCREMENT_GAP_USE_ANALYZER; public Builder(String name) { super(name, Defaults.FIELD_TYPE, Defaults.FIELD_TYPE); builder = this; } @Override public TextFieldType fieldType() { return (TextFieldType) super.fieldType(); } public Builder positionIncrementGap(int positionIncrementGap) { if (positionIncrementGap < 0) { throw new MapperParsingException("[positions_increment_gap] must be positive, got " + positionIncrementGap); } this.positionIncrementGap = positionIncrementGap; return this; } public Builder fielddata(boolean fielddata) { fieldType().setFielddata(fielddata); return builder; } @Override public Builder docValues(boolean docValues) { if (docValues) { throw new IllegalArgumentException("[text] fields do not support doc values"); } return super.docValues(docValues); } public Builder eagerGlobalOrdinals(boolean eagerGlobalOrdinals) { fieldType().setEagerGlobalOrdinals(eagerGlobalOrdinals); return builder; } public Builder fielddataFrequencyFilter(double minFreq, double maxFreq, int minSegmentSize) { fieldType().setFielddataMinFrequency(minFreq); fieldType().setFielddataMaxFrequency(maxFreq); fieldType().setFielddataMinSegmentSize(minSegmentSize); return builder; } @Override public TextFieldMapper build(BuilderContext context) { if (positionIncrementGap != POSITION_INCREMENT_GAP_USE_ANALYZER) { if (fieldType.indexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) { throw new IllegalArgumentException("Cannot set position_increment_gap on field [" + name + "] without positions enabled"); } fieldType.setIndexAnalyzer(new NamedAnalyzer(fieldType.indexAnalyzer(), positionIncrementGap)); fieldType.setSearchAnalyzer(new NamedAnalyzer(fieldType.searchAnalyzer(), positionIncrementGap)); fieldType.setSearchQuoteAnalyzer(new NamedAnalyzer(fieldType.searchQuoteAnalyzer(), positionIncrementGap)); } setupFieldType(context); return new TextFieldMapper( name, fieldType, defaultFieldType, positionIncrementGap, includeInAll, context.indexSettings(), multiFieldsBuilder.build(this, context), copyTo); } } public static class TypeParser implements Mapper.TypeParser { @Override public Mapper.Builder parse(String fieldName, Map<String, Object> node, ParserContext parserContext) throws MapperParsingException { TextFieldMapper.Builder builder = new TextFieldMapper.Builder(fieldName); builder.fieldType().setIndexAnalyzer(parserContext.getIndexAnalyzers().getDefaultIndexAnalyzer()); builder.fieldType().setSearchAnalyzer(parserContext.getIndexAnalyzers().getDefaultSearchAnalyzer()); builder.fieldType().setSearchQuoteAnalyzer(parserContext.getIndexAnalyzers().getDefaultSearchQuoteAnalyzer()); parseTextField(builder, fieldName, node, parserContext); for (Iterator<Map.Entry<String, Object>> iterator = node.entrySet().iterator(); iterator.hasNext();) { Map.Entry<String, Object> entry = iterator.next(); String propName = entry.getKey(); Object propNode = entry.getValue(); if (propName.equals("position_increment_gap")) { int newPositionIncrementGap = XContentMapValues.nodeIntegerValue(propNode, -1); builder.positionIncrementGap(newPositionIncrementGap); iterator.remove(); } else if (propName.equals("fielddata")) { builder.fielddata(XContentMapValues.nodeBooleanValue(propNode, "fielddata")); iterator.remove(); } else if (propName.equals("eager_global_ordinals")) { builder.eagerGlobalOrdinals(XContentMapValues.nodeBooleanValue(propNode, "eager_global_ordinals")); iterator.remove(); } else if (propName.equals("fielddata_frequency_filter")) { Map<?,?> frequencyFilter = (Map<?, ?>) propNode; double minFrequency = XContentMapValues.nodeDoubleValue(frequencyFilter.remove("min"), 0); double maxFrequency = XContentMapValues.nodeDoubleValue(frequencyFilter.remove("max"), Integer.MAX_VALUE); int minSegmentSize = XContentMapValues.nodeIntegerValue(frequencyFilter.remove("min_segment_size"), 0); builder.fielddataFrequencyFilter(minFrequency, maxFrequency, minSegmentSize); DocumentMapperParser.checkNoRemainingFields(propName, frequencyFilter, parserContext.indexVersionCreated()); iterator.remove(); } } return builder; } } public static final class TextFieldType extends StringFieldType { private boolean fielddata; private double fielddataMinFrequency; private double fielddataMaxFrequency; private int fielddataMinSegmentSize; public TextFieldType() { setTokenized(true); fielddata = false; fielddataMinFrequency = Defaults.FIELDDATA_MIN_FREQUENCY; fielddataMaxFrequency = Defaults.FIELDDATA_MAX_FREQUENCY; fielddataMinSegmentSize = Defaults.FIELDDATA_MIN_SEGMENT_SIZE; } protected TextFieldType(TextFieldType ref) { super(ref); this.fielddata = ref.fielddata; this.fielddataMinFrequency = ref.fielddataMinFrequency; this.fielddataMaxFrequency = ref.fielddataMaxFrequency; this.fielddataMinSegmentSize = ref.fielddataMinSegmentSize; } public TextFieldType clone() { return new TextFieldType(this); } @Override public boolean equals(Object o) { if (super.equals(o) == false) { return false; } TextFieldType that = (TextFieldType) o; return fielddata == that.fielddata && fielddataMinFrequency == that.fielddataMinFrequency && fielddataMaxFrequency == that.fielddataMaxFrequency && fielddataMinSegmentSize == that.fielddataMinSegmentSize; } @Override public int hashCode() { return Objects.hash(super.hashCode(), fielddata, fielddataMinFrequency, fielddataMaxFrequency, fielddataMinSegmentSize); } @Override public void checkCompatibility(MappedFieldType other, List<String> conflicts, boolean strict) { super.checkCompatibility(other, conflicts, strict); TextFieldType otherType = (TextFieldType) other; if (strict) { if (fielddata() != otherType.fielddata()) { conflicts.add("mapper [" + name() + "] is used by multiple types. Set update_all_types to true to update [fielddata] " + "across all types."); } if (fielddataMinFrequency() != otherType.fielddataMinFrequency()) { conflicts.add("mapper [" + name() + "] is used by multiple types. Set update_all_types to true to update " + "[fielddata_frequency_filter.min] across all types."); } if (fielddataMaxFrequency() != otherType.fielddataMaxFrequency()) { conflicts.add("mapper [" + name() + "] is used by multiple types. Set update_all_types to true to update " + "[fielddata_frequency_filter.max] across all types."); } if (fielddataMinSegmentSize() != otherType.fielddataMinSegmentSize()) { conflicts.add("mapper [" + name() + "] is used by multiple types. Set update_all_types to true to update " + "[fielddata_frequency_filter.min_segment_size] across all types."); } } } public boolean fielddata() { return fielddata; } public void setFielddata(boolean fielddata) { checkIfFrozen(); this.fielddata = fielddata; } public double fielddataMinFrequency() { return fielddataMinFrequency; } public void setFielddataMinFrequency(double fielddataMinFrequency) { checkIfFrozen(); this.fielddataMinFrequency = fielddataMinFrequency; } public double fielddataMaxFrequency() { return fielddataMaxFrequency; } public void setFielddataMaxFrequency(double fielddataMaxFrequency) { checkIfFrozen(); this.fielddataMaxFrequency = fielddataMaxFrequency; } public int fielddataMinSegmentSize() { return fielddataMinSegmentSize; } public void setFielddataMinSegmentSize(int fielddataMinSegmentSize) { checkIfFrozen(); this.fielddataMinSegmentSize = fielddataMinSegmentSize; } @Override public String typeName() { return CONTENT_TYPE; } @Override public Query nullValueQuery() { if (nullValue() == null) { return null; } return termQuery(nullValue(), null); } @Override public IndexFieldData.Builder fielddataBuilder() { if (fielddata == false) { throw new IllegalArgumentException("Fielddata is disabled on text fields by default. Set fielddata=true on [" + name() + "] in order to load fielddata in memory by uninverting the inverted index. Note that this can however " + "use significant memory. Alternatively use a keyword field instead."); } return new PagedBytesIndexFieldData.Builder(fielddataMinFrequency, fielddataMaxFrequency, fielddataMinSegmentSize); } } private Boolean includeInAll; private int positionIncrementGap; protected TextFieldMapper(String simpleName, MappedFieldType fieldType, MappedFieldType defaultFieldType, int positionIncrementGap, Boolean includeInAll, Settings indexSettings, MultiFields multiFields, CopyTo copyTo) { super(simpleName, fieldType, defaultFieldType, indexSettings, multiFields, copyTo); assert fieldType.tokenized(); assert fieldType.hasDocValues() == false; if (fieldType().indexOptions() == IndexOptions.NONE && fieldType().fielddata()) { throw new IllegalArgumentException("Cannot enable fielddata on a [text] field that is not indexed: [" + name() + "]"); } this.positionIncrementGap = positionIncrementGap; this.includeInAll = includeInAll; } @Override protected TextFieldMapper clone() { return (TextFieldMapper) super.clone(); } // pkg-private for testing Boolean includeInAll() { return includeInAll; } public int getPositionIncrementGap() { return this.positionIncrementGap; } @Override protected void parseCreateField(ParseContext context, List<IndexableField> fields) throws IOException { final String value; if (context.externalValueSet()) { value = context.externalValue().toString(); } else { value = context.parser().textOrNull(); } if (value == null) { return; } if (context.includeInAll(includeInAll, this)) { context.allEntries().addText(fieldType().name(), value, fieldType().boost()); } if (fieldType().indexOptions() != IndexOptions.NONE || fieldType().stored()) { Field field = new Field(fieldType().name(), value, fieldType()); fields.add(field); } } @Override protected String contentType() { return CONTENT_TYPE; } @Override protected void doMerge(Mapper mergeWith, boolean updateAllTypes) { super.doMerge(mergeWith, updateAllTypes); this.includeInAll = ((TextFieldMapper) mergeWith).includeInAll; } @Override public TextFieldType fieldType() { return (TextFieldType) super.fieldType(); } @Override protected void doXContentBody(XContentBuilder builder, boolean includeDefaults, Params params) throws IOException { super.doXContentBody(builder, includeDefaults, params); doXContentAnalyzers(builder, includeDefaults); if (includeInAll != null) { builder.field("include_in_all", includeInAll); } else if (includeDefaults) { builder.field("include_in_all", true); } if (includeDefaults || positionIncrementGap != POSITION_INCREMENT_GAP_USE_ANALYZER) { builder.field("position_increment_gap", positionIncrementGap); } if (includeDefaults || fieldType().fielddata() != ((TextFieldType) defaultFieldType).fielddata()) { builder.field("fielddata", fieldType().fielddata()); } if (fieldType().fielddata()) { if (includeDefaults || fieldType().fielddataMinFrequency() != Defaults.FIELDDATA_MIN_FREQUENCY || fieldType().fielddataMaxFrequency() != Defaults.FIELDDATA_MAX_FREQUENCY || fieldType().fielddataMinSegmentSize() != Defaults.FIELDDATA_MIN_SEGMENT_SIZE) { builder.startObject("fielddata_frequency_filter"); if (includeDefaults || fieldType().fielddataMinFrequency() != Defaults.FIELDDATA_MIN_FREQUENCY) { builder.field("min", fieldType().fielddataMinFrequency()); } if (includeDefaults || fieldType().fielddataMaxFrequency() != Defaults.FIELDDATA_MAX_FREQUENCY) { builder.field("max", fieldType().fielddataMaxFrequency()); } if (includeDefaults || fieldType().fielddataMinSegmentSize() != Defaults.FIELDDATA_MIN_SEGMENT_SIZE) { builder.field("min_segment_size", fieldType().fielddataMinSegmentSize()); } builder.endObject(); } } } }