/*
* JBoss, Home of Professional Open Source
* Copyright 2012 Red Hat Inc. and/or its affiliates and other contributors
* as indicated by the @authors tag. All rights reserved.
*/
package org.jboss.elasticsearch.tools.content;
import java.util.Map;
import org.elasticsearch.common.settings.SettingsException;
import org.elasticsearch.common.xcontent.support.XContentMapValues;
/**
* Content preprocessor which allows to trim value from source field to the configured maximal length and store it to
* another or same target field. White spaces at the begining and end are removed too. Example of configuration for this
* preprocessor:
*
* <pre>
* {
* "name" : "Short description creator",
* "class" : "org.jboss.elasticsearch.tools.content.TrimStringValuePreprocessor",
* "settings" : {
* "source_field" : "fields.summary",
* "target_field" : "dcp_description",
* "max_size" : 300
* }
* }
* </pre>
*
* Options are:
* <ul>
* <li><code>source_field</code> - source field in input data. Dot notation for nested values can be used here (see
* {@link XContentMapValues#extractValue(String, Map)}).
* <li><code>target_field</code> - target field in data to store mapped value into. Can be same as input field. Dot
* notation can be used here for structure nesting.
* <li><code>max_size</code> - maximal size of string. Strings longer than this value are trimmed.
* <li><code>source_bases</code> - list of fields in source data which are used as bases for trimming. If defined then
* trimming is performed for each of this fields, <code>source_field</code> and <code>target_field</code> are resolved
* relatively against this base. Base must provide object or list of objects.
* </ul>
*
* @author Vlastimil Elias (velias at redhat dot com)
* @see StructuredContentPreprocessorFactory
*/
public class TrimStringValuePreprocessor extends StructuredContentPreprocessorWithSourceBasesBase<Object> {
protected static final String CFG_SOURCE_FIELD = "source_field";
protected static final String CFG_TARGET_FIELD = "target_field";
protected static final String CFG_MAX_SIZE = "max_size";
protected String fieldSource;
protected String fieldTarget;
protected int maxSize;
@Override
public void init(Map<String, Object> settings) throws SettingsException {
super.init(settings);
fieldSource = XContentMapValues.nodeStringValue(settings.get(CFG_SOURCE_FIELD), null);
validateConfigurationStringNotEmpty(fieldSource, CFG_SOURCE_FIELD);
fieldTarget = XContentMapValues.nodeStringValue(settings.get(CFG_TARGET_FIELD), null);
validateConfigurationStringNotEmpty(fieldTarget, CFG_TARGET_FIELD);
maxSize = readMandatoryIntegerConfigValue(settings, CFG_MAX_SIZE);
}
@Override
protected void processOneSourceValue(Map<String, Object> data, Object context, String base,
PreprocessChainContext chainContext) {
Object v = null;
if (fieldSource.contains(".")) {
v = XContentMapValues.extractValue(fieldSource, data);
} else {
v = data.get(fieldSource);
}
if (v != null) {
if (!(v instanceof String)) {
String msg = "Value for field '" + getFullFieldName(base, fieldSource)
+ "' is not String, so can't be processed";
addDataWarning(chainContext, msg);
logger.debug(msg);
} else {
String origValue = v.toString().trim();
if (origValue.length() > maxSize) {
if (maxSize > 4) {
origValue = origValue.substring(0, maxSize - 3) + "...";
} else {
origValue = origValue.substring(0, maxSize);
}
}
putTargetValue(data, origValue);
}
}
}
protected void putTargetValue(Map<String, Object> data, String value) {
StructureUtils.putValueIntoMapOfMaps(data, fieldTarget, value);
}
@Override
protected Object createContext(Map<String, Object> data) {
return null;
}
public String getFieldSource() {
return fieldSource;
}
public String getFieldTarget() {
return fieldTarget;
}
public int getMaxSize() {
return maxSize;
}
}