/* * Copyright (C) 2015 Stratio (http://stratio.com) * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.stratio.morphlines.commons; import com.google.common.base.Joiner; import com.google.common.collect.ImmutableList; import com.google.common.collect.ListMultimap; import com.typesafe.config.Config; import org.kitesdk.morphline.api.*; import org.kitesdk.morphline.base.AbstractCommand; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.List; import java.util.regex.Pattern; public class FieldFilterBuilder implements CommandBuilder { private static final String COMMAND_NAME = "fieldFilter"; private static final String CONF_EXCLUDE_FIELDS = "excludeFields"; private static final String CONF_INCLUDE_FIELDS = "includeFields"; private static final String CONF_IS_REGEX = "isRegex"; private static final List<String> DEFAULT_EXCLUDE_FIELDS = ImmutableList.of(); private static final List<String> DEFAULT_INCLUDE_FIELDS = ImmutableList.of(); private static final boolean DEFAULT_IS_REGEX = false; public Collection<String> getNames() { return Collections.singleton(COMMAND_NAME); } public Command build(final Config config, final Command parent, final Command child, final MorphlineContext context) { return new FieldFilter(this, config, parent, child, context); } private static final class FieldFilter extends AbstractCommand { private final List<String> excludeFields; private final List<String> includeFields; private final Pattern excludePattern; private final Pattern includePattern; private final boolean isRegex; public FieldFilter(final CommandBuilder builder, final Config config, final Command parent, final Command child, final MorphlineContext context) { super(builder, config, parent, child, context); this.excludeFields = getConfigs().getStringList(config, CONF_EXCLUDE_FIELDS, DEFAULT_EXCLUDE_FIELDS); this.includeFields = getConfigs().getStringList(config, CONF_INCLUDE_FIELDS, DEFAULT_INCLUDE_FIELDS); if (excludeFields.isEmpty() && includeFields.isEmpty()) { throw new MorphlineCompilationException(String.format("Either %s or %s must be set", CONF_EXCLUDE_FIELDS, CONF_INCLUDE_FIELDS), config); } this.isRegex = getConfigs().getBoolean(config, CONF_IS_REGEX, DEFAULT_IS_REGEX); if (this.isRegex) { if (!excludeFields.isEmpty()) { excludePattern = Pattern.compile(String.format("(%s)", Joiner.on('|').join(excludeFields))); } else { excludePattern = Pattern.compile(".*"); } if (!includeFields.isEmpty()) { includePattern = Pattern.compile(String.format("(%s)", Joiner.on('|').join(includeFields))); } else { includePattern = Pattern.compile(".*"); } } else { includePattern = null; excludePattern = null; } } @Override protected boolean doProcess(Record record) { final ListMultimap<String, Object> entries = record.getFields(); List<String> toRemove = new ArrayList<String>(); if (this.isRegex) { for (final String field : entries.keySet()) { if (!includeFields.isEmpty() && includePattern.matcher(field).matches()) { continue; } if (excludePattern.matcher(field).matches()) { toRemove.add(field); } } } else { for (final String field : entries.keySet()) { if (!includeFields.isEmpty() && includeFields.contains(field)) { continue; } if (excludeFields.isEmpty() || excludeFields.contains(field)) { toRemove.add(field); } } } for (final String field : toRemove) { record.removeAll(field); } return super.doProcess(record); } } }