package org.molgenis.data.elasticsearch.index;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.molgenis.data.elasticsearch.util.MapperTypeSanitizer;
import org.molgenis.data.meta.AttributeType;
import org.molgenis.data.meta.model.Attribute;
import org.molgenis.data.meta.model.EntityType;
import java.io.IOException;
import static java.lang.String.format;
/**
* Builds mappings for a documentType. For each column a multi_field is created, one analyzed for searching and one
* not_analyzed for sorting
*
* @author erwin
*/
public class MappingsBuilder
{
public static final String FIELD_NOT_ANALYZED = "raw";
public static final String FIELD_NGRAM_ANALYZED = "ngram";
private MappingsBuilder()
{
}
/**
* Creates a Elasticsearch mapping for the given entity meta data
*
* @param jsonBuilder {@link XContentBuilder} to write the mapping to
* @param entityType {@link EntityType} for the entity to map
* @throws IOException writing to JSON builder
*/
public static void buildMapping(XContentBuilder jsonBuilder, EntityType entityType, boolean enableNorms,
boolean createAllIndex) throws IOException
{
String documentType = MapperTypeSanitizer.sanitizeMapperType(entityType.getName());
jsonBuilder.startObject().startObject(documentType);
jsonBuilder.startObject("_source").field("enabled", false).endObject();
jsonBuilder.startObject("properties");
for (Attribute attr : entityType.getAtomicAttributes())
{
createAttributeMapping(attr, enableNorms, createAllIndex, true, true, jsonBuilder);
}
jsonBuilder.endObject();
jsonBuilder.endObject().endObject();
}
// TODO discuss: use null_value for nillable attributes?
private static void createAttributeMapping(Attribute attr, boolean enableNorms, boolean createAllIndex,
boolean nestRefs, boolean enableNgramAnalyzer, XContentBuilder jsonBuilder) throws IOException
{
String attrName = attr.getName();
jsonBuilder.startObject(attrName);
createAttributeMappingContents(attr, enableNorms, createAllIndex, nestRefs, enableNgramAnalyzer, jsonBuilder);
jsonBuilder.endObject();
}
private static void createAttributeMappingContents(Attribute attr, boolean enableNorms, boolean createAllIndex,
boolean nestRefs, boolean enableNgramAnalyzer, XContentBuilder jsonBuilder) throws IOException
{
AttributeType dataType = attr.getDataType();
switch (dataType)
{
case BOOL:
jsonBuilder.field("type", "boolean");
// disable norms for numeric fields
// note: https://github.com/elasticsearch/elasticsearch/issues/5502
jsonBuilder.field("norms").startObject().field("enabled", false).endObject();
break;
case CATEGORICAL:
case CATEGORICAL_MREF:
case FILE:
case MREF:
case ONE_TO_MANY:
case XREF:
EntityType refEntity = attr.getRefEntity();
if (nestRefs)
{
jsonBuilder.field("type", "nested");
jsonBuilder.field("norms").startObject().field("enabled", enableNorms).endObject();
jsonBuilder.startObject("properties");
for (Attribute refAttr : refEntity.getAtomicAttributes())
{
createAttributeMapping(refAttr, enableNorms, createAllIndex, false, true, jsonBuilder);
}
jsonBuilder.endObject();
}
else
{
createAttributeMappingContents(refEntity.getLabelAttribute(), enableNorms, createAllIndex, false,
enableNgramAnalyzer, jsonBuilder);
}
break;
case COMPOUND:
throw new UnsupportedOperationException();
case DATE:
jsonBuilder.field("type", "date").field("format", "date");
// disable norms for numeric fields
jsonBuilder.field("norms").startObject().field("enabled", false).endObject();
// not-analyzed field for aggregation
// note: the include_in_all setting is ignored on any field that is defined in the fields options
// note: the norms settings defaults to false for not_analyzed fields
jsonBuilder.startObject("fields").startObject(FIELD_NOT_ANALYZED).field("type", "string")
.field("index", "not_analyzed").endObject().endObject();
break;
case DATE_TIME:
jsonBuilder.field("type", "date").field("format", "date_time_no_millis");
// disable norms for numeric fields
jsonBuilder.field("norms").startObject().field("enabled", false).endObject();
// not-analyzed field for aggregation
// note: the include_in_all setting is ignored on any field that is defined in the fields options
// note: the norms settings defaults to false for not_analyzed fields
jsonBuilder.startObject("fields").startObject(FIELD_NOT_ANALYZED).field("type", "string")
.field("index", "not_analyzed").endObject().endObject();
break;
case DECIMAL:
jsonBuilder.field("type", "double");
// disable norms for numeric fields
jsonBuilder.field("norms").startObject().field("enabled", false).endObject();
break;
case INT:
jsonBuilder.field("type", "integer");
// Fix sorting by using disk-based "fielddata" instead of in-memory "fielddata"
jsonBuilder.field("doc_values", true);
// disable norms for numeric fields
jsonBuilder.field("norms").startObject().field("enabled", false).endObject();
break;
case LONG:
jsonBuilder.field("type", "long");
// disable norms for numeric fields
jsonBuilder.field("norms").startObject().field("enabled", false).endObject();
break;
case EMAIL:
case ENUM:
case HYPERLINK:
case STRING:
case TEXT:
// enable/disable norms based on given value
jsonBuilder.field("type", "string");
jsonBuilder.field("norms").startObject().field("enabled", enableNorms).endObject();
// not-analyzed field for sorting and wildcard queries
// note: the include_in_all setting is ignored on any field that is defined in the fields options
// note: the norms settings defaults to false for not_analyzed fields
XContentBuilder fieldsObject = jsonBuilder.startObject("fields").startObject(FIELD_NOT_ANALYZED)
.field("type", "string").field("index", "not_analyzed").endObject();
if (enableNgramAnalyzer)
{
// add ngram analyzer (not applied to nested documents)
fieldsObject.startObject(FIELD_NGRAM_ANALYZED).field("type", "string")
.field("analyzer", ElasticsearchIndexCreator.NGRAM_ANALYZER).endObject();
}
fieldsObject.endObject();
break;
case HTML:
case SCRIPT:
// enable/disable norms based on given value
jsonBuilder.field("type", "string");
jsonBuilder.field("norms").startObject().field("enabled", enableNorms).endObject();
// not-analyzed field for sorting and wildcard queries
// note: the include_in_all setting is ignored on any field that is defined in the fields options
// note: the norms settings defaults to false for not_analyzed fields
jsonBuilder.startObject("fields").startObject(FIELD_NOT_ANALYZED).field("type", "string")
.field("index", "not_analyzed").endObject().endObject();
break;
default:
throw new RuntimeException(format("Unknown data type [%s]", dataType.toString()));
}
jsonBuilder.field("include_in_all", createAllIndex && attr.isVisible());
}
}