/** * Copyright 2015 Otto (GmbH & Co KG) * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.ottogroup.bi.spqr.operator.json.filter; import java.io.IOException; import java.util.ArrayList; import java.util.List; import java.util.Properties; import java.util.regex.Pattern; import java.util.regex.PatternSyntaxException; import org.apache.commons.lang3.StringUtils; import org.apache.log4j.Logger; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; import com.ottogroup.bi.spqr.exception.ComponentInitializationFailedException; import com.ottogroup.bi.spqr.exception.RequiredInputMissingException; import com.ottogroup.bi.spqr.operator.json.JsonContentType; import com.ottogroup.bi.spqr.pipeline.component.MicroPipelineComponent; import com.ottogroup.bi.spqr.pipeline.component.MicroPipelineComponentType; import com.ottogroup.bi.spqr.pipeline.component.annotation.SPQRComponent; import com.ottogroup.bi.spqr.pipeline.component.operator.DirectResponseOperator; import com.ottogroup.bi.spqr.pipeline.message.StreamingDataMessage; /** * Filters the content of incoming {@link StreamingDataMessage} for specific content. All non-matching messages * are removed from the pipeline, all matching messages get passed on to the next {@link MicroPipelineComponent}. * <br/><br/> * To configure a content filter instance the properties must show the following settings: (id = enumeration value starting with value 1) * <ul> * <li><i>field.[id].path</i> - path to field (eg. data.wt.cs-host)</li> * <li><i>field.[id].expression</i> - regular expression applied on field content (see {@linkplain http://en.wikipedia.org/wiki/Regular_expression} for more information)</li> * <lI><i>field.[id].type</i> - string, numerical or boolean (required for content conversion and expression application: type-to-string)</li> * </ul> * @author mnxfst * @since Apr 8, 2015 */ @SPQRComponent(type=MicroPipelineComponentType.DIRECT_RESPONSE_OPERATOR, name="jsonContentFilter", version="0.0.1", description="Filters arbitrary JSON content") public class JsonContentFilter implements DirectResponseOperator { /** our faithful logging facility .... ;-) */ private static final Logger logger = Logger.getLogger(JsonContentFilter.class); /** empty array of streaming data messages required when holding back messages not matching with required patterns */ private static final StreamingDataMessage[] EMPTY_MESSAGES_ARRAY = new StreamingDataMessage[0]; /** prefix to all field settings - required: field.1.path, field.1.expression and field.1.type (settings must use continuous enumeration starting with value 1) */ public static final String CFG_FIELD_PREFIX = "field."; /** unique component identifier */ private String id = null; /** number of messages processed since initialization */ private int totalNumOfMessages = 0; /** fields considered to be relevant mapped to aggregator that must be applied to values - none = data is added to raw output only */ private final List<JsonContentFilterFieldSetting> fields = new ArrayList<>(); /** maps inbound strings into object representations and json strings vice versa */ private final ObjectMapper jsonMapper = new ObjectMapper(); /** * @see com.ottogroup.bi.spqr.pipeline.component.MicroPipelineComponent#initialize(java.util.Properties) */ public void initialize(Properties properties) throws RequiredInputMissingException, ComponentInitializationFailedException { if(properties == null) throw new RequiredInputMissingException("Missing required properties"); for(int i = 1; i < Integer.MAX_VALUE; i++) { String expression = properties.getProperty(CFG_FIELD_PREFIX + i + ".expression"); if(StringUtils.isBlank(expression)) break; String path = properties.getProperty(CFG_FIELD_PREFIX + i + ".path"); String valueType = properties.getProperty(CFG_FIELD_PREFIX + i + ".type"); try { this.fields.add(new JsonContentFilterFieldSetting(path.split("\\."), Pattern.compile(expression), StringUtils.equalsIgnoreCase("STRING", valueType) ? JsonContentType.STRING : JsonContentType.NUMERICAL)); } catch(PatternSyntaxException e) { throw new ComponentInitializationFailedException("Failed to parse '"+expression+"' into a valid pattern expression"); } } if(logger.isDebugEnabled()) logger.debug("json content filter [id="+id+"] initialized"); } /** * @see com.ottogroup.bi.spqr.pipeline.component.MicroPipelineComponent#shutdown() */ public boolean shutdown() { return true; } /** * @see com.ottogroup.bi.spqr.pipeline.component.operator.DirectResponseOperator#onMessage(com.ottogroup.bi.spqr.pipeline.message.StreamingDataMessage) */ public StreamingDataMessage[] onMessage(StreamingDataMessage message) { // increment number of messages processed so far this.totalNumOfMessages++; // do nothing if either the event or the body is empty if(message == null || message.getBody() == null || message.getBody().length < 1) return EMPTY_MESSAGES_ARRAY; JsonNode jsonNode = null; try { jsonNode = jsonMapper.readTree(message.getBody()); } catch(IOException e) { logger.error("Failed to read message body to json node. Ignoring message. Error: " + e.getMessage()); } // return null in case the message could not be parsed into // an object representation - the underlying processor does // not forward any NULL messages if(jsonNode == null) return EMPTY_MESSAGES_ARRAY; // step through fields considered to be relevant, extract values and apply filtering function for(final JsonContentFilterFieldSetting fieldSettings : fields) { // read value into string representation for further investigation String value = getTextFieldValue(jsonNode, fieldSettings.getPath()); if(!fieldSettings.getExpression().matcher(StringUtils.trim(value)).matches()) return EMPTY_MESSAGES_ARRAY; } return new StreamingDataMessage[]{message}; } /** * Walks along the path provided and reads out the leaf value which is returned as string * @param jsonNode * @param fieldPath * @return */ protected String getTextFieldValue(final JsonNode jsonNode, final String[] fieldPath) { int fieldAccessStep = 0; JsonNode contentNode = jsonNode; while(fieldAccessStep < fieldPath.length) { contentNode = contentNode.get(fieldPath[fieldAccessStep]); fieldAccessStep++; } if(contentNode != null) return contentNode.textValue(); return ""; } /** * @see com.ottogroup.bi.spqr.pipeline.component.MicroPipelineComponent#getType() */ public MicroPipelineComponentType getType() { return MicroPipelineComponentType.DIRECT_RESPONSE_OPERATOR; } /** * @see com.ottogroup.bi.spqr.pipeline.component.operator.Operator#getTotalNumOfMessages() */ public long getTotalNumOfMessages() { return this.totalNumOfMessages; } /** * @see com.ottogroup.bi.spqr.pipeline.component.MicroPipelineComponent#setId(java.lang.String) */ public void setId(String id) { this.id = id; } /** * @see com.ottogroup.bi.spqr.pipeline.component.MicroPipelineComponent#getId() */ public String getId() { return this.id; } }