/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.addthis.hydra.data.filter.value;
import java.util.ArrayList;
import java.util.List;
import com.addthis.basis.util.Parameter;
import com.addthis.basis.util.LessStrings;
import com.addthis.bundle.core.Bundle;
import com.addthis.bundle.util.ValueUtil;
import com.addthis.bundle.value.ValueArray;
import com.addthis.bundle.value.ValueFactory;
import com.addthis.bundle.value.ValueMap;
import com.addthis.bundle.value.ValueObject;
import com.addthis.hydra.data.util.Tokenizer;
import com.google.common.base.Splitter;
import com.google.common.collect.Iterables;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* This {@link AbstractValueFilter ValueFilter} <span class="hydra-summary">splits the input into an array or a map</span>.
* <p/>
* <p>The input must be a string. The {@link #split split} field or the
* {@link #fixedLength fixedLength} field is used to divide the input into a sequence
* of elements. If the {@link #keySplit keySplit} field
* is used then the sequence is returned as a map. Otherwise
* the sequence is returned as an array.</p>
* <p>Example:</p>
* <pre>
* // The use of the keySplit field specifies that a map is returned
* {from:"PATH_PARAMS", split {split:"&", keySplit:"="}}
* </pre>
*
* @user-reference
* @exclude-fields once
*/
public class ValueFilterSplit extends AbstractValueFilterContextual {
private static final Logger log = LoggerFactory.getLogger(ValueFilterSplit.class);
private static final boolean ERROR_ON_ARRAY = Parameter.boolValue("hydra.filter.split.error", false);
/** Use this field as a delimiter in between * elements in the input string. Default is ",". */
private final String split;
/**
* If this field is non-null,
* then split the input into a map using
* this delimiter between keys and values.
* Default is null.
*/
private final String keySplit;
/**
* An optional filter on elements of the output sequence. Default is null.
*/
private final ValueFilter filter;
/**
* If keySplit is used, then this is an optional
* filter on keys of the output map. Default is null.
*/
private final ValueFilter keyFilter;
/**
* If this field is a positive integer,
* then ignore the 'split' field and
* divide the input string
* into elements of equal length.
* Default is -1.
*/
private final int fixedLength;
private final Tokenizer tokenizer;
/**
* Not thread safe. OK just used for logging purposes.
*/
private boolean warnedOnArrayInput = false;
@JsonCreator public ValueFilterSplit(@JsonProperty("split") String split,
@JsonProperty("keySplit") String keySplit,
@JsonProperty("filter") ValueFilter filter,
@JsonProperty("keyFilter") ValueFilter keyFilter,
@JsonProperty("fixedLength") int fixedLength,
@JsonProperty("tokenizer") Tokenizer tokenizer) {
this.split = split;
this.keySplit = keySplit;
this.filter = filter;
this.keyFilter = keyFilter;
this.fixedLength = fixedLength;
if (tokenizer == null) {
this.tokenizer = null;
} else {
this.tokenizer = tokenizer.initialize();
}
}
@Override
public ValueObject filterValue(ValueObject value, Bundle context) {
return filter != null ? filter.filter(value, context) : value;
}
private String filterKey(String value, Bundle context) {
return keyFilter != null ? ValueUtil.asNativeString(keyFilter.filter(ValueFactory.create(value), context)) : value;
}
@Override
public ValueObject filter(ValueObject value, Bundle context) {
if ((value != null) && (value.getObjectType() == ValueObject.TYPE.ARRAY) && !warnedOnArrayInput) {
log.warn("Input value to 'split' ValueFilter is an array: {}. It may not be what you intended.", value);
if (ERROR_ON_ARRAY) {
throw new IllegalArgumentException("hydra.filter.split.error set to true and tried to split an array");
}
warnedOnArrayInput = true;
}
String string = ValueUtil.asNativeString(value);
if ((string == null) || string.isEmpty()) {
return null;
}
String[] token;
if (tokenizer != null) {
token = tokenizer.tokenize(string).toArray(new String[0]);
} else if (fixedLength > 0) {
token = splitFixedLength(string, fixedLength);
} else if (value.getObjectType() == ValueObject.TYPE.ARRAY && ",".equals(split)) {
// XXX Make sure applying this filter on an array field still works.
// DefaultArray had a custom toString that produced a comma delimited string, so splitting
// an array field on "," would work (albeit that might not be the job writer's
// intention). The custom toString has been removed in bundle v2.2.8, so the string
// value has the extra enclosing square brackets: [foo,bar], causing the split filter to
// produce "[foo" and "bar]". This is special handling to deal with that
token = extractArray(value.asArray());
} else {
token = LessStrings.splitArray(string, split);
}
if (keySplit != null) {
ValueMap map = ValueFactory.createMap();
for (String v : token) {
int pos;
if ((pos = v.indexOf(keySplit)) >= 0) {
String k = filterKey(v.substring(0, pos), context);
if (k == null) {
continue;
}
v = v.substring(pos + keySplit.length());
map.put(k, filterValue(ValueFactory.create(v)));
} else {
v = filterKey(v, context);
if (v == null) {
continue;
}
map.put(v, filterValue(ValueFactory.create(v)));
}
}
return map;
} else {
ValueArray arr = ValueFactory.createArray(token.length);
for (String v : token) {
arr.add(filterValue(ValueFactory.create(v)));
}
return arr;
}
}
protected String[] splitFixedLength(String line, int length) {
Iterable<String> splitIter = Splitter.fixedLength(length).split(line);
List<String> tok = new ArrayList<>();
Iterables.addAll(tok, splitIter);
return Iterables.toArray(tok, String.class);
}
private String[] extractArray(ValueArray va) {
int size = va.size();
String[] arr = new String[size];
for (int i = 0; i < size; i++) {
arr[i] = va.get(i).toString();
}
return arr;
}
}