/* * Licensed to Elasticsearch under one or more contributor * license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright * ownership. Elasticsearch licenses this file to you under * the Apache License, Version 2.0 (the "License"); you may * not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.elasticsearch.hadoop.serialization.field; import java.util.ArrayList; import java.util.List; import org.elasticsearch.hadoop.EsHadoopIllegalArgumentException; import org.elasticsearch.hadoop.cfg.Settings; import org.elasticsearch.hadoop.serialization.SettingsAware; import org.elasticsearch.hadoop.serialization.bulk.RawJson; import org.elasticsearch.hadoop.util.Assert; import org.elasticsearch.hadoop.util.ObjectUtils; import org.elasticsearch.hadoop.util.StringUtils; public abstract class AbstractIndexExtractor implements IndexExtractor, SettingsAware { private static final String FORMAT_SEPARATOR = "|"; protected Settings settings; protected String pattern; protected boolean hasPattern = false; protected List<Object> index; protected List<Object> type; @Override public void setSettings(Settings settings) { this.settings = settings; } @Override public void compile(String pattern) { this.pattern = pattern; // break it down into index/type String[] split = pattern.split("/"); Assert.isTrue(!ObjectUtils.isEmpty(split), "invalid pattern given " + pattern); Assert.isTrue(split.length == 2, "invalid pattern given " + pattern); // check pattern hasPattern = pattern.contains("{") && pattern.contains("}"); index = parse(split[0].trim()); type = parse(split[1].trim()); } protected List<Object> parse(String string) { // break it down into fields List<Object> template = new ArrayList<Object>(); while (string.contains("{")) { int startPattern = string.indexOf("{"); template.add(string.substring(0, startPattern)); int endPattern = string.indexOf("}"); Assert.isTrue(endPattern > startPattern + 1, "Invalid pattern given " + string); String nestedString = string.substring(startPattern + 1, endPattern); int separator = nestedString.indexOf(FORMAT_SEPARATOR); if (separator > 0) { Assert.isTrue(nestedString.length() > separator + 1, "Invalid format given " + nestedString); String format = nestedString.substring(separator + 1); nestedString = nestedString.substring(0, separator); template.add(wrapWithFormatter(format, createFieldExtractor(nestedString))); } else { template.add(createFieldExtractor(nestedString)); } string = string.substring(endPattern + 1).trim(); } if (StringUtils.hasText(string)) { template.add(string); } return template; } private Object wrapWithFormatter(String format, final FieldExtractor createFieldExtractor) { // instantiate field extractor final IndexFormatter iformatter = ObjectUtils.instantiate(settings.getMappingIndexFormatterClassName(), settings); iformatter.configure(format); return new FieldExtractor() { @Override public Object field(Object target) { String string = createFieldExtractor.field(target).toString(); // typically a string in JSON so remove the quotes if (string.startsWith("\"")) { string = string.substring(1); } if (string.endsWith("\"")) { string = string.substring(0, string.length() - 1); } // hack: an index will always be a primitive so just call toString (instead of doing JSON parsing) // the returned value is not formatted as JSON since : 1. there's no need (it will be picked up down the chain), 2: date formatter depends on it return iformatter.format(string); } }; } private void append(StringBuilder sb, List<Object> list, Object target) { for (Object object : list) { if (object instanceof FieldExtractor) { Object field = ((FieldExtractor) object).field(target); if (field == NOT_FOUND) { throw new EsHadoopIllegalArgumentException(String.format("Cannot find match for %s", pattern)); } else { sb.append(StringUtils.jsonEncoding(field.toString())); } } else { sb.append(StringUtils.jsonEncoding(object.toString())); } } } @Override public Object field(Object target) { StringBuilder sb = new StringBuilder(); sb.append("\"_index\":\""); append(sb, index, target); sb.append("\","); sb.append("\"_type\":\""); append(sb, type, target); sb.append("\""); return new RawJson(sb.toString()); } @Override public boolean hasPattern() { return hasPattern; } protected abstract FieldExtractor createFieldExtractor(String fieldName); }