/* * Copyright 2013 Cloudera Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.kitesdk.morphline.stdlib; import java.util.Collection; import java.util.Collections; import java.util.regex.Matcher; import org.kitesdk.morphline.api.Command; import org.kitesdk.morphline.api.CommandBuilder; import org.kitesdk.morphline.api.MorphlineCompilationException; import org.kitesdk.morphline.api.MorphlineContext; import org.kitesdk.morphline.api.Record; import org.kitesdk.morphline.base.AbstractCommand; import com.typesafe.config.Config; /** * Command that iterates over the items in a given record input field, interprets each item as a key-value * pair where the key and value are separated by the given separator character, and adds the pair's * value to the record field named after the pair's key. */ public final class SplitKeyValueBuilder implements CommandBuilder { @Override public Collection<String> getNames() { return Collections.singletonList("splitKeyValue"); } @Override public Command build(Config config, Command parent, Command child, MorphlineContext context) { return new SplitKeyValue(this, config, parent, child, context); } /////////////////////////////////////////////////////////////////////////////// // Nested classes: /////////////////////////////////////////////////////////////////////////////// private static final class SplitKeyValue extends AbstractCommand { private final String inputFieldName; private final String outputFieldPrefix; private final String separator; private final Matcher regex; private final boolean addEmptyStrings; private final boolean trim; public SplitKeyValue(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) { super(builder, config, parent, child, context); this.inputFieldName = getConfigs().getString(config, "inputField"); this.outputFieldPrefix = getConfigs().getString(config, "outputFieldPrefix", ""); this.separator = getConfigs().getString(config, "separator", "="); if (separator.length() == 0) { throw new MorphlineCompilationException("separator must not be the empty string", config); } if (getConfigs().getBoolean(config, "isRegex", false)) { GrokDictionaries dict = new GrokDictionaries(config, getConfigs()); this.regex = dict.compileExpression(separator).pattern().matcher(""); } else { this.regex = null; } this.addEmptyStrings = getConfigs().getBoolean(config, "addEmptyStrings", false); this.trim = getConfigs().getBoolean(config, "trim", true); validateArguments(); } @Override protected boolean doProcess(Record record) { for (Object item : record.get(inputFieldName)) { String str = item.toString(); int start; int end; if (regex != null) { if (regex.reset(str).find()) { start = regex.start(); end = regex.end(); } else { start = -1; end = -1; } } else if (separator.length() == 1) { start = str.indexOf(separator.charAt(0)); end = start + 1; } else { start = str.indexOf(separator); end = start + separator.length(); } String key = str; String value = ""; if (start >= 0) { // found? key = str.substring(0, start); value = str.substring(end, str.length()); value = trim(value); } if (value.length() > 0 || addEmptyStrings) { record.put(concat(outputFieldPrefix, trim(key)), value); } } // pass record to next command in chain: return super.doProcess(record); } private String trim(String str) { return trim ? str.trim() : str; } private String concat(String x, String y) { return x.length() == 0 ? y : x + y; } } }