/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.kylin.source.kafka; import java.io.IOException; import java.lang.reflect.Constructor; import java.nio.ByteBuffer; import java.util.List; import java.util.ArrayList; import java.util.Map; import java.util.HashMap; import java.util.TreeMap; import java.util.Collections; import java.util.Arrays; import com.fasterxml.jackson.databind.DeserializationFeature; import org.apache.commons.lang3.StringUtils; import org.apache.kylin.common.util.ByteBufferBackedInputStream; import org.apache.kylin.common.util.StreamingMessageRow; import org.apache.kylin.metadata.model.TblColRef; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.JavaType; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.type.MapType; import com.fasterxml.jackson.databind.type.SimpleType; import com.google.common.collect.Lists; /** * An utility class which parses a JSON streaming message to a list of strings (represent a row in table). * <p> * Each message should have a property whose value represents the message's timestamp, default the column name is "timestamp" * but can be customized by StreamingParser#PROPERTY_TS_PARSER. * <p> * By default it will parse the timestamp col value as Unix time. If the format isn't Unix time, need specify the time parser * with property StreamingParser#PROPERTY_TS_PARSER. * <p> * It also support embedded JSON format; Use a separator (customized by StreamingParser#EMBEDDED_PROPERTY_SEPARATOR) to concat * the property names. */ public final class TimedJsonStreamParser extends StreamingParser { private static final Logger logger = LoggerFactory.getLogger(TimedJsonStreamParser.class); private List<TblColRef> allColumns; private final ObjectMapper mapper; private String tsColName = null; private String tsParser = null; private String separator = null; private final Map<String, Object> root = new TreeMap<>(String.CASE_INSENSITIVE_ORDER); private final Map<String, Object> tempMap = new TreeMap<>(String.CASE_INSENSITIVE_ORDER); private final Map<String, String[]> nameMap = new HashMap<>(); private final JavaType mapType = MapType.construct(HashMap.class, SimpleType.construct(String.class), SimpleType.construct(Object.class)); private AbstractTimeParser streamTimeParser; public TimedJsonStreamParser(List<TblColRef> allColumns, Map<String, String> properties) { this.allColumns = allColumns; if (properties == null) { properties = StreamingParser.defaultProperties; } tsColName = properties.get(PROPERTY_TS_COLUMN_NAME); tsParser = properties.get(PROPERTY_TS_PARSER); separator = properties.get(EMBEDDED_PROPERTY_SEPARATOR); if (!StringUtils.isEmpty(tsParser)) { try { Class clazz = Class.forName(tsParser); Constructor constructor = clazz.getConstructor(Map.class); streamTimeParser = (AbstractTimeParser) constructor.newInstance(properties); } catch (Exception e) { throw new IllegalStateException("Invalid StreamingConfig, tsParser " + tsParser + ", parserProperties " + properties + ".", e); } } else { throw new IllegalStateException("Invalid StreamingConfig, tsParser " + tsParser + ", parserProperties " + properties + "."); } mapper = new ObjectMapper(); mapper.disable(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES); mapper.disable(DeserializationFeature.FAIL_ON_INVALID_SUBTYPE); mapper.enable(DeserializationFeature.USE_JAVA_ARRAY_FOR_JSON_ARRAY); } @Override public List<StreamingMessageRow> parse(ByteBuffer buffer) { try { Map<String, Object> message = mapper.readValue(new ByteBufferBackedInputStream(buffer), mapType); root.clear(); root.putAll(message); String tsStr = objToString(root.get(tsColName)); long t = streamTimeParser.parseTime(tsStr); ArrayList<String> result = Lists.newArrayList(); for (TblColRef column : allColumns) { final String columnName = column.getName().toLowerCase(); if (populateDerivedTimeColumns(columnName, result, t) == false) { result.add(getValueByKey(columnName, root)); } } StreamingMessageRow streamingMessageRow = new StreamingMessageRow(result, 0, t, Collections.<String, Object>emptyMap()); List<StreamingMessageRow> messageRowList = new ArrayList<StreamingMessageRow>(); messageRowList.add(streamingMessageRow); return messageRowList; } catch (IOException e) { logger.error("error", e); throw new RuntimeException(e); } } @Override public boolean filter(StreamingMessageRow streamingMessageRow) { return true; } protected String getValueByKey(String key, Map<String, Object> rootMap) throws IOException { if (rootMap.containsKey(key)) { return objToString(rootMap.get(key)); } String[] names = nameMap.get(key); if (names == null && key.contains(separator)) { names = key.toLowerCase().split(separator); nameMap.put(key, names); } if (names != null && names.length > 0) { tempMap.clear(); tempMap.putAll(rootMap); for (int i = 0; i < names.length - 1; i++) { Object o = tempMap.get(names[i]); if (o instanceof Map) { tempMap.clear(); tempMap.putAll((Map<String, Object>) o); } else { throw new IOException("Property '" + names[i] + "' is not embedded format"); } } Object finalObject = tempMap.get(names[names.length - 1]); return objToString(finalObject); } return StringUtils.EMPTY; } public static String objToString(Object value) { if (value == null) return StringUtils.EMPTY; if (value.getClass().isArray()) return String.valueOf(Arrays.asList((Object[]) value)); return String.valueOf(value); } }