/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.hadoop.serialization.bulk;
import java.util.ArrayList;
import java.util.List;
import java.util.Date;
import java.util.concurrent.Callable;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.elasticsearch.hadoop.EsHadoopIllegalArgumentException;
import org.elasticsearch.hadoop.cfg.Settings;
import org.elasticsearch.hadoop.rest.Resource;
import org.elasticsearch.hadoop.serialization.builder.ValueWriter;
import org.elasticsearch.hadoop.serialization.bulk.MetadataExtractor.Metadata;
import org.elasticsearch.hadoop.serialization.field.ConstantFieldExtractor;
import org.elasticsearch.hadoop.serialization.field.FieldExplainer;
import org.elasticsearch.hadoop.serialization.field.FieldExtractor;
import org.elasticsearch.hadoop.serialization.field.IndexExtractor;
import org.elasticsearch.hadoop.serialization.field.JsonFieldExtractors;
import org.elasticsearch.hadoop.serialization.json.JacksonJsonGenerator;
import org.elasticsearch.hadoop.util.BytesArray;
import org.elasticsearch.hadoop.util.BytesArrayPool;
import org.elasticsearch.hadoop.util.FastByteArrayOutputStream;
import org.elasticsearch.hadoop.util.ObjectUtils;
import org.elasticsearch.hadoop.util.StringUtils;
public abstract class AbstractBulkFactory implements BulkFactory {
private static Log log = LogFactory.getLog(AbstractBulkFactory.class);
protected Settings settings;
private final boolean jsonInput;
private final boolean isStatic;
private final MetadataExtractor metaExtractor;
// used when specifying an index pattern
private IndexExtractor indexExtractor;
private FieldExtractor idExtractor,
typeExtractor,
parentExtractor,
routingExtractor,
versionExtractor,
ttlExtractor,
timestampExtractor,
paramsExtractor;
private final FieldExtractor versionTypeExtractor = new FieldExtractor() {
private Object value;
@Override
public Object field(Object target) {
// lazy init to have the settings in place
if (value == null) {
value = new RawJson(StringUtils.toJsonString(settings.getMappingVersionType()));
}
return value;
}
};
private JsonFieldExtractors jsonExtractors;
private final ValueWriter valueWriter;
class FieldWriter {
final FieldExtractor extractor;
final BytesArrayPool pool = new BytesArrayPool();
FieldWriter(FieldExtractor extractor) {
this.extractor = extractor;
}
BytesArrayPool write(Object object) {
pool.reset();
Object value = extractor.field(object);
if (value == FieldExtractor.NOT_FOUND) {
String obj = (extractor instanceof FieldExplainer ? ((FieldExplainer) extractor).toString(object) : object.toString());
throw new EsHadoopIllegalArgumentException(String.format("[%s] cannot extract value from entity [%s] | instance [%s]", extractor, obj.getClass(), obj));
}
if (value instanceof List) {
for (Object val : (List) value) {
doWrite(val);
}
}
// if/else to save one collection/iterator instance
else {
doWrite(value);
}
return pool;
}
void doWrite(Object value) {
// common-case - constants or JDK types
if (value instanceof String || jsonInput || value instanceof Number || value instanceof Boolean || value == null) {
String valueString = (value == null ? "null" : value.toString());
if (value instanceof String && !jsonInput) {
valueString = StringUtils.toJsonString(valueString);
}
pool.get().bytes(valueString);
}
else if (value instanceof Date) {
String valueString = (value == null ? "null": Long.toString(((Date) value).getTime()));
pool.get().bytes(valueString);
}
else if (value instanceof RawJson) {
pool.get().bytes(((RawJson) value).json());
}
// library specific type - use the value writer (a bit overkill but handles collections/arrays properly)
else {
BytesArray ba = pool.get();
JacksonJsonGenerator generator = new JacksonJsonGenerator(new FastByteArrayOutputStream(ba));
valueWriter.write(value, generator);
generator.flush();
generator.close();
}
}
@Override
public String toString() {
return "FieldWriter for " + extractor;
}
}
interface DynamicContentRef {
List<Object> getDynamicContent();
}
public class DynamicHeaderRef implements DynamicContentRef {
final List<Object> header = new ArrayList<Object>();
public List<Object> getDynamicContent() {
header.clear();
writeObjectHeader(header);
return compact(header);
}
}
public class DynamicEndRef implements DynamicContentRef {
final List<Object> end = new ArrayList<Object>();
public List<Object> getDynamicContent() {
end.clear();
writeObjectEnd(end);
return compact(end);
}
}
AbstractBulkFactory(Settings settings, MetadataExtractor metaExtractor) {
this.settings = settings;
this.valueWriter = ObjectUtils.instantiate(settings.getSerializerValueWriterClassName(), settings);
this.metaExtractor = metaExtractor;
jsonInput = settings.getInputAsJson();
isStatic = metaExtractor == null;
initExtractorsFromSettings(settings);
}
private void initExtractorsFromSettings(final Settings settings) {
if (jsonInput) {
if (log.isDebugEnabled()) {
log.debug("JSON input; using internal field extractor for efficient parsing...");
}
jsonExtractors = new JsonFieldExtractors(settings);
indexExtractor = jsonExtractors.indexAndType();
idExtractor = jsonExtractors.id();
parentExtractor = jsonExtractors.parent();
routingExtractor = jsonExtractors.routing();
versionExtractor = jsonExtractors.version();
ttlExtractor = jsonExtractors.ttl();
timestampExtractor = jsonExtractors.timestamp();
paramsExtractor = jsonExtractors.params();
}
else {
// init extractors (if needed)
if (settings.getMappingId() != null) {
settings.setProperty(ConstantFieldExtractor.PROPERTY, settings.getMappingId());
idExtractor = ObjectUtils.<FieldExtractor> instantiate(settings.getMappingIdExtractorClassName(),
settings);
}
if (settings.getMappingParent() != null) {
settings.setProperty(ConstantFieldExtractor.PROPERTY, settings.getMappingParent());
parentExtractor = ObjectUtils.<FieldExtractor> instantiate(
settings.getMappingParentExtractorClassName(), settings);
}
if (settings.getMappingRouting() != null) {
settings.setProperty(ConstantFieldExtractor.PROPERTY, settings.getMappingRouting());
routingExtractor = ObjectUtils.<FieldExtractor> instantiate(
settings.getMappingRoutingExtractorClassName(), settings);
}
if (settings.getMappingTtl() != null) {
settings.setProperty(ConstantFieldExtractor.PROPERTY, settings.getMappingTtl());
ttlExtractor = ObjectUtils.<FieldExtractor> instantiate(settings.getMappingTtlExtractorClassName(),
settings);
}
if (settings.getMappingVersion() != null) {
settings.setProperty(ConstantFieldExtractor.PROPERTY, settings.getMappingVersion());
versionExtractor = ObjectUtils.<FieldExtractor> instantiate(
settings.getMappingVersionExtractorClassName(), settings);
}
if (settings.getMappingTimestamp() != null) {
settings.setProperty(ConstantFieldExtractor.PROPERTY, settings.getMappingTimestamp());
timestampExtractor = ObjectUtils.<FieldExtractor> instantiate(
settings.getMappingTimestampExtractorClassName(), settings);
}
// create adapter
IndexExtractor iformat = ObjectUtils.<IndexExtractor> instantiate(settings.getMappingIndexExtractorClassName(), settings);
iformat.compile(new Resource(settings, false).toString());
if (iformat.hasPattern()) {
indexExtractor = iformat;
}
if (settings.hasUpdateScriptParams()) {
settings.setProperty(ConstantFieldExtractor.PROPERTY, settings.getUpdateScriptParams());
paramsExtractor = ObjectUtils.instantiate(settings.getMappingParamsExtractorClassName(), settings);
}
if (log.isTraceEnabled()) {
log.trace(String.format("Instantiated value writer [%s]", valueWriter));
if (idExtractor != null) {
log.trace(String.format("Instantiated id extractor [%s]", idExtractor));
}
if (parentExtractor != null) {
log.trace(String.format("Instantiated parent extractor [%s]", parentExtractor));
}
if (routingExtractor != null) {
log.trace(String.format("Instantiated routing extractor [%s]", routingExtractor));
}
if (ttlExtractor != null) {
log.trace(String.format("Instantiated ttl extractor [%s]", ttlExtractor));
}
if (versionExtractor != null) {
log.trace(String.format("Instantiated version extractor [%s]", versionExtractor));
}
if (timestampExtractor != null) {
log.trace(String.format("Instantiated timestamp extractor [%s]", timestampExtractor));
}
if (paramsExtractor != null) {
log.trace(String.format("Instantiated params extractor [%s]", paramsExtractor));
}
}
}
// json params override other extractors
if (settings.hasUpdateScriptParamsJson()) {
paramsExtractor = new FieldExtractor() {
@Override
public Object field(Object target) {
return new RawJson(settings.getUpdateScriptParamsJson().trim());
}
};
}
}
class DynamicFieldExtractor implements FieldExtractor {
private final List<Object> before = new ArrayList<Object>();
@Override
public Object field(Object target) {
before.clear();
writeObjectHeader(before);
return compact(before);
}
}
@Override
public BulkCommand createBulk() {
List<Object> before = new ArrayList<Object>();
List<Object> after = new ArrayList<Object>();
if (!isStatic) {
before.add(new DynamicHeaderRef());
after.add(new DynamicEndRef());
}
else {
writeObjectHeader(before);
before = compact(before);
writeObjectEnd(after);
after = compact(after);
}
boolean isScriptUpdate = settings.hasUpdateScript();
// compress pieces
if (jsonInput) {
if (isScriptUpdate) {
return new JsonScriptTemplateBulk(before, after, jsonExtractors, settings);
}
return new JsonTemplatedBulk(before, after, jsonExtractors, settings);
}
if (isScriptUpdate) {
return new ScriptTemplateBulk(settings, before, after, valueWriter);
}
return new TemplatedBulk(before, after, valueWriter);
}
// write action & metadata header
protected void writeObjectHeader(List<Object> list) {
// action
list.add("{\"" + getOperation() + "\":{");
// flag indicating whether a comma needs to be added between fields
boolean commaMightBeNeeded = false;
commaMightBeNeeded = addExtractorOrDynamicValue(list, getExtractorOrDynamicValue(Metadata.INDEX, indexExtractor), "", commaMightBeNeeded);
commaMightBeNeeded = addExtractorOrDynamicValue(list, getExtractorOrDynamicValue(Metadata.TYPE, typeExtractor), "\"_type\":", commaMightBeNeeded);
commaMightBeNeeded = id(list, commaMightBeNeeded);
commaMightBeNeeded = addExtractorOrDynamicValue(list, getExtractorOrDynamicValue(Metadata.PARENT, parentExtractor), "\"_parent\":", commaMightBeNeeded);
commaMightBeNeeded = addExtractorOrDynamicValue(list, getExtractorOrDynamicValue(Metadata.ROUTING, routingExtractor), "\"_routing\":", commaMightBeNeeded);
commaMightBeNeeded = addExtractorOrDynamicValue(list, getExtractorOrDynamicValue(Metadata.TTL, ttlExtractor), "\"_ttl\":", commaMightBeNeeded);
commaMightBeNeeded = addExtractorOrDynamicValue(list, getExtractorOrDynamicValue(Metadata.TIMESTAMP, timestampExtractor), "\"_timestamp\":", commaMightBeNeeded);
// version & version_type fields
Object versionField = getExtractorOrDynamicValue(Metadata.VERSION, versionExtractor);
if (versionField != null) {
if (commaMightBeNeeded) {
list.add(",");
commaMightBeNeeded = false;
}
commaMightBeNeeded = true;
list.add("\"_version\":");
list.add(versionField);
// version_type - only needed when a version is specified
Object versionTypeField = getExtractorOrDynamicValue(Metadata.VERSION_TYPE, versionTypeExtractor);
if (versionTypeField != null) {
if (commaMightBeNeeded) {
list.add(",");
commaMightBeNeeded = false;
}
commaMightBeNeeded = true;
list.add("\"_version_type\":");
list.add(versionTypeField);
}
}
// useful for update command
otherHeader(list, commaMightBeNeeded);
list.add("}}\n");
}
protected boolean id(List<Object> list, boolean commaMightBeNeeded) {
return addExtractorOrDynamicValue(list, getExtractorOrDynamicValue(Metadata.ID, idExtractor), "\"_id\":", commaMightBeNeeded);
}
// trivial utility that adds a comma before the current field alongside but only if the extractor is present
private boolean addExtractorOrDynamicValue(List<Object> list, Object extractor, String header, boolean commaMightBeNeeded) {
if (extractor != null) {
if (commaMightBeNeeded) {
list.add(",");
}
list.add(header);
list.add(extractor);
return true;
}
return commaMightBeNeeded;
}
protected void otherHeader(List<Object> list, boolean commaMightBeNeeded) {
// no-op
}
// get the extractor for a given field, trying first the dynamic one, with a fallback on the 'static' one
protected Object getExtractorOrDynamicValue(Metadata meta, FieldExtractor fallbackExtractor) {
if (metaExtractor != null) {
FieldExtractor metaFE = metaExtractor.get(meta);
if (metaFE != null) {
return metaFE;
}
}
return fallbackExtractor;
}
protected abstract String getOperation();
protected void writeObjectEnd(List<Object> list) {
list.add("\n");
}
// optimization method used when dealing with 'static' extractors
// concatenates all the strings to minimize the amount of data needed for construction
private List<Object> compact(List<Object> list) {
if (list == null || list.isEmpty()) {
return null;
}
List<Object> compacted = new ArrayList<Object>();
StringBuilder stringAccumulator = new StringBuilder();
for (Object object : list) {
if (object instanceof FieldExtractor) {
if (stringAccumulator.length() > 0) {
compacted.add(new BytesArray(stringAccumulator.toString()));
stringAccumulator.setLength(0);
}
compacted.add(new FieldWriter((FieldExtractor) object));
}
else {
stringAccumulator.append(object.toString());
}
}
if (stringAccumulator.length() > 0) {
compacted.add(new BytesArray(stringAccumulator.toString()));
}
return compacted;
}
protected FieldExtractor getParamExtractor() {
return paramsExtractor;
}
}