/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.qubole.presto.kinesis.decoder.json;
import com.facebook.presto.spi.type.BigintType;
import com.facebook.presto.spi.type.BooleanType;
import com.facebook.presto.spi.type.DoubleType;
import com.facebook.presto.spi.type.VarcharType;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.io.ByteStreams;
import com.qubole.presto.kinesis.KinesisColumnHandle;
import com.qubole.presto.kinesis.KinesisFieldValueProvider;
import com.qubole.presto.kinesis.decoder.KinesisFieldDecoder;
import com.qubole.presto.kinesis.decoder.util.DecoderTestUtil;
import io.airlift.json.ObjectMapperProvider;
import io.airlift.log.Logger;
import org.testng.annotations.Test;
import java.nio.charset.StandardCharsets;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertNotNull;
import static org.testng.Assert.assertTrue;
public class TestJsonDecoder
{
private static final Logger log = Logger.get(TestJsonDecoder.class);
private static final JsonKinesisFieldDecoder DEFAULT_FIELD_DECODER = new JsonKinesisFieldDecoder();
private static final ObjectMapperProvider PROVIDER = new ObjectMapperProvider();
private static Map<KinesisColumnHandle, KinesisFieldDecoder<?>> buildMap(List<KinesisColumnHandle> columns)
{
ImmutableMap.Builder<KinesisColumnHandle, KinesisFieldDecoder<?>> map = ImmutableMap.builder();
for (KinesisColumnHandle column : columns) {
map.put(column, DEFAULT_FIELD_DECODER);
}
return map.build();
}
@Test
public void testSimple()
throws Exception
{
byte[] json = ByteStreams.toByteArray(TestJsonDecoder.class.getResourceAsStream("/decoder/json/message.json"));
JsonKinesisRowDecoder rowDecoder = new JsonKinesisRowDecoder(PROVIDER.get());
KinesisColumnHandle row1 = new KinesisColumnHandle("", 0, "row1", VarcharType.VARCHAR, "source", null, null, false, false);
KinesisColumnHandle row2 = new KinesisColumnHandle("", 1, "row2", VarcharType.VARCHAR, "user/screen_name", null, null, false, false);
KinesisColumnHandle row3 = new KinesisColumnHandle("", 2, "row3", BigintType.BIGINT, "id", null, null, false, false);
KinesisColumnHandle row4 = new KinesisColumnHandle("", 3, "row4", BigintType.BIGINT, "user/statuses_count", null, null, false, false);
KinesisColumnHandle row5 = new KinesisColumnHandle("", 4, "row5", BooleanType.BOOLEAN, "user/geo_enabled", null, null, false, false);
List<KinesisColumnHandle> columns = ImmutableList.of(row1, row2, row3, row4, row5);
Set<KinesisFieldValueProvider> providers = new HashSet<>();
boolean valid = rowDecoder.decodeRow(json, providers, columns, buildMap(columns));
assertTrue(valid);
assertEquals(providers.size(), columns.size());
DecoderTestUtil.checkValue(providers, row1, "<a href=\"http://twitterfeed.com\" rel=\"nofollow\">twitterfeed</a>");
DecoderTestUtil.checkValue(providers, row2, "EKentuckyNews");
DecoderTestUtil.checkValue(providers, row3, 493857959588286460L);
DecoderTestUtil.checkValue(providers, row4, 7630);
DecoderTestUtil.checkValue(providers, row5, true);
}
@Test
public void testOtherExtracts()
throws Exception
{
// Test other scenarios: deeper dive into object, get JSON constructs as strings, etc.
byte[] json = ByteStreams.toByteArray(TestJsonDecoder.class.getResourceAsStream("/decoder/json/event.json"));
JsonKinesisRowDecoder rowDecoder = new JsonKinesisRowDecoder(PROVIDER.get());
KinesisColumnHandle row1 = new KinesisColumnHandle("", 0, "event_source", VarcharType.VARCHAR, "source", null, null, false, false);
KinesisColumnHandle row2 = new KinesisColumnHandle("", 1, "user", VarcharType.VARCHAR, "user/handle", null, null, false, false);
KinesisColumnHandle row3 = new KinesisColumnHandle("", 2, "user_string", VarcharType.VARCHAR, "user", null, null, false, false);
KinesisColumnHandle row4 = new KinesisColumnHandle("", 3, "timestamp", BigintType.BIGINT, "timestamp", null, null, false, false);
KinesisColumnHandle row5 = new KinesisColumnHandle("", 4, "browser_name", VarcharType.VARCHAR, "environment/browser/name", null, null, false, false);
KinesisColumnHandle row6 = new KinesisColumnHandle("", 5, "tags_array", VarcharType.VARCHAR, "tags", null, null, false, false);
List<KinesisColumnHandle> columns = ImmutableList.of(row1, row2, row3, row4, row5, row6);
Set<KinesisFieldValueProvider> providers = new HashSet<>();
log.info("Decoding row from event JSON file");
boolean valid = rowDecoder.decodeRow(json, providers, columns, buildMap(columns));
assertTrue(valid);
assertEquals(providers.size(), columns.size());
DecoderTestUtil.checkValue(providers, row1, "otherworld");
DecoderTestUtil.checkValue(providers, row2, "joeblow");
DecoderTestUtil.checkValue(providers, row3, "{\"email\":\"joeblow@wherever.com\",\"handle\":\"joeblow\"}");
KinesisFieldValueProvider provider = DecoderTestUtil.findValueProvider(providers, row6);
assertNotNull(provider);
log.info(new String(provider.getSlice().getBytes(), StandardCharsets.UTF_8));
DecoderTestUtil.checkValue(providers, row4, 1450214872847L);
DecoderTestUtil.checkValue(providers, row5, "Chrome");
DecoderTestUtil.checkValue(providers, row6, "[\"tag1\",\"tag2\",\"tag3\"]");
log.info("DONE");
}
@Test
public void testNonExistent()
throws Exception
{
byte[] json = "{}".getBytes(StandardCharsets.UTF_8);
JsonKinesisRowDecoder rowDecoder = new JsonKinesisRowDecoder(PROVIDER.get());
KinesisColumnHandle row1 = new KinesisColumnHandle("", 0, "row1", VarcharType.VARCHAR, "very/deep/varchar", null, null, false, false);
KinesisColumnHandle row2 = new KinesisColumnHandle("", 1, "row2", BigintType.BIGINT, "no_bigint", null, null, false, false);
KinesisColumnHandle row3 = new KinesisColumnHandle("", 2, "row3", DoubleType.DOUBLE, "double/is_missing", null, null, false, false);
KinesisColumnHandle row4 = new KinesisColumnHandle("", 3, "row4", BooleanType.BOOLEAN, "hello", null, null, false, false);
List<KinesisColumnHandle> columns = ImmutableList.of(row1, row2, row3, row4);
Set<KinesisFieldValueProvider> providers = new HashSet<>();
boolean valid = rowDecoder.decodeRow(json, providers, columns, buildMap(columns));
assertTrue(valid);
assertEquals(providers.size(), columns.size());
DecoderTestUtil.checkIsNull(providers, row1);
DecoderTestUtil.checkIsNull(providers, row2);
DecoderTestUtil.checkIsNull(providers, row3);
DecoderTestUtil.checkIsNull(providers, row4);
}
@Test
public void testStringNumber()
throws Exception
{
byte[] json = "{\"a_number\":481516,\"a_string\":\"2342\"}".getBytes(StandardCharsets.UTF_8);
JsonKinesisRowDecoder rowDecoder = new JsonKinesisRowDecoder(PROVIDER.get());
KinesisColumnHandle row1 = new KinesisColumnHandle("", 0, "row1", VarcharType.VARCHAR, "a_number", null, null, false, false);
KinesisColumnHandle row2 = new KinesisColumnHandle("", 1, "row2", BigintType.BIGINT, "a_number", null, null, false, false);
KinesisColumnHandle row3 = new KinesisColumnHandle("", 2, "row3", VarcharType.VARCHAR, "a_string", null, null, false, false);
KinesisColumnHandle row4 = new KinesisColumnHandle("", 3, "row4", BigintType.BIGINT, "a_string", null, null, false, false);
List<KinesisColumnHandle> columns = ImmutableList.of(row1, row2, row3, row4);
Set<KinesisFieldValueProvider> providers = new HashSet<>();
boolean valid = rowDecoder.decodeRow(json, providers, columns, buildMap(columns));
assertTrue(valid);
assertEquals(providers.size(), columns.size());
DecoderTestUtil.checkValue(providers, row1, "481516");
DecoderTestUtil.checkValue(providers, row2, 481516);
DecoderTestUtil.checkValue(providers, row3, "2342");
DecoderTestUtil.checkValue(providers, row4, 2342);
}
}