package org.rakam.collection;
import com.fasterxml.jackson.core.JsonParser;
import com.fasterxml.jackson.core.JsonToken;
import com.fasterxml.jackson.databind.DeserializationContext;
import com.fasterxml.jackson.databind.JsonDeserializer;
import com.fasterxml.jackson.databind.JsonMappingException;
import com.fasterxml.jackson.databind.util.TokenBuffer;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.cache.Cache;
import com.google.common.cache.CacheBuilder;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
import org.apache.avro.Schema;
import org.apache.avro.SchemaParseException;
import org.apache.avro.generic.GenericData;
import org.rakam.analysis.ApiKeyService;
import org.rakam.analysis.ConfigManager;
import org.rakam.analysis.metadata.Metastore;
import org.rakam.analysis.metadata.SchemaChecker;
import org.rakam.collection.Event.EventContext;
import org.rakam.collection.FieldDependencyBuilder.FieldDependency;
import org.rakam.config.ProjectConfig;
import org.rakam.util.AvroUtil;
import org.rakam.util.DateTimeUtils;
import org.rakam.util.JsonHelper;
import org.rakam.util.NotExistsException;
import org.rakam.util.ProjectCollection;
import org.rakam.util.RakamException;
import javax.inject.Inject;
import java.io.IOException;
import java.time.LocalTime;
import java.time.temporal.ChronoField;
import java.util.AbstractMap.SimpleImmutableEntry;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import static com.fasterxml.jackson.core.JsonToken.END_ARRAY;
import static com.fasterxml.jackson.core.JsonToken.END_OBJECT;
import static com.fasterxml.jackson.core.JsonToken.START_OBJECT;
import static com.fasterxml.jackson.core.JsonToken.VALUE_NULL;
import static com.fasterxml.jackson.core.JsonToken.VALUE_NUMBER_INT;
import static com.fasterxml.jackson.core.JsonToken.VALUE_STRING;
import static io.netty.handler.codec.http.HttpResponseStatus.BAD_REQUEST;
import static io.netty.handler.codec.http.HttpResponseStatus.FORBIDDEN;
import static io.netty.handler.codec.http.HttpResponseStatus.INTERNAL_SERVER_ERROR;
import static java.lang.Boolean.TRUE;
import static java.lang.String.format;
import static java.util.stream.Collectors.toList;
import static org.apache.avro.Schema.Type.NULL;
import static org.rakam.analysis.ApiKeyService.AccessKeyType.MASTER_KEY;
import static org.rakam.analysis.ApiKeyService.AccessKeyType.WRITE_KEY;
import static org.rakam.analysis.InternalConfig.FIXED_SCHEMA;
import static org.rakam.analysis.InternalConfig.USER_TYPE;
import static org.rakam.collection.FieldType.ARRAY_STRING;
import static org.rakam.collection.FieldType.MAP_STRING;
import static org.rakam.collection.FieldType.STRING;
import static org.rakam.collection.SchemaField.stripName;
import static org.rakam.util.AvroUtil.convertAvroSchema;
import static org.rakam.util.ValidationUtil.checkCollection;
import static org.rakam.util.ValidationUtil.checkCollectionValid;
public class JsonEventDeserializer
extends JsonDeserializer<Event>
{
private final Map<String, List<SchemaField>> conditionalMagicFields;
private final Metastore metastore;
private final Cache<ProjectCollection, Map.Entry<List<SchemaField>, Schema>> schemaCache =
CacheBuilder
.newBuilder()
.expireAfterWrite(5, TimeUnit.MINUTES).build();
private final Set<SchemaField> constantFields;
private final ApiKeyService apiKeyService;
private final ConfigManager configManager;
private final SchemaChecker schemaChecker;
private final ProjectConfig projectConfig;
@Inject
public JsonEventDeserializer(Metastore metastore,
ApiKeyService apiKeyService,
ConfigManager configManager,
SchemaChecker schemaChecker,
ProjectConfig projectConfig,
FieldDependency fieldDependency)
{
this.metastore = metastore;
this.conditionalMagicFields = fieldDependency.dependentFields;
this.apiKeyService = apiKeyService;
this.schemaChecker = schemaChecker;
this.projectConfig = projectConfig;
this.configManager = configManager;
this.constantFields = fieldDependency.constantFields;
}
@Override
public Event deserialize(JsonParser jp, DeserializationContext ctx)
throws IOException
{
Object project = ctx.getAttribute("project");
Object masterKey = ctx.getAttribute("master_key");
return deserializeWithProject(
jp,
project != null ? project.toString() : null,
null,
Boolean.TRUE.equals(masterKey));
}
public Event deserializeWithProject(JsonParser jp, String project, EventContext mainApi, boolean masterKey)
throws IOException, RakamException
{
Map.Entry<List<SchemaField>, GenericData.Record> properties = null;
String collection = null;
JsonToken t = jp.getCurrentToken();
if (t == JsonToken.START_OBJECT) {
t = jp.nextToken();
}
TokenBuffer propertiesBuffer = null;
EventContext api = null;
for (; t == JsonToken.FIELD_NAME; t = jp.nextToken()) {
String fieldName = jp.getCurrentName();
t = jp.nextToken();
switch (fieldName) {
case "collection":
if (t != VALUE_STRING) {
throw new RakamException("collection parameter must be a string", BAD_REQUEST);
}
collection = checkCollectionValid(jp.getValueAsString().toLowerCase());
break;
case "event_id":
if (t != VALUE_NUMBER_INT) {
throw new RakamException("event_id must be numeric", BAD_REQUEST);
}
break;
case "api":
if (api != null) {
throw new RakamException("api is already set", BAD_REQUEST);
}
api = jp.readValueAs(EventContext.class);
break;
case "properties":
t = jp.getCurrentToken();
if (t != START_OBJECT) {
throw new RakamException("properties must be an object", BAD_REQUEST);
}
if (collection == null) {
propertiesBuffer = jp.readValueAs(TokenBuffer.class);
}
else {
if (project == null) {
if (api == null) {
throw new RakamException("api parameter is required", BAD_REQUEST);
}
if (api.apiKey == null) {
throw new RakamException("api.api_key is required", BAD_REQUEST);
}
try {
project = apiKeyService.getProjectOfApiKey(api.apiKey, WRITE_KEY);
}
catch (RakamException e) {
try {
project = apiKeyService.getProjectOfApiKey(api.apiKey, MASTER_KEY);
}
catch (Exception e1) {
if (e.getStatusCode() == FORBIDDEN) {
throw new RakamException("api_key is invalid", FORBIDDEN);
}
throw e;
}
masterKey = true;
}
}
if (collection == null) {
throw new RakamException("Collection is not set.", BAD_REQUEST);
}
properties = parseProperties(project, collection, jp, masterKey);
t = jp.getCurrentToken();
if (t != END_OBJECT) {
if (t == JsonToken.START_OBJECT) {
throw new RakamException("Nested properties are not supported.", BAD_REQUEST);
}
else {
throw new RakamException("Error while de-serializing event", INTERNAL_SERVER_ERROR);
}
}
}
break;
default:
throw new RakamException(String.format("Unrecognized field '%s'. Should be one of (api, collection, properties)", fieldName), BAD_REQUEST);
}
}
if (properties == null) {
if (propertiesBuffer != null) {
if (project == null) {
try {
project = apiKeyService.getProjectOfApiKey(api.apiKey, WRITE_KEY);
}
catch (RakamException e) {
project = apiKeyService.getProjectOfApiKey(api.apiKey, MASTER_KEY);
masterKey = true;
}
}
JsonParser fakeJp = propertiesBuffer.asParser(jp);
// pass START_OBJECT
fakeJp.nextToken();
properties = parseProperties(project, collection, fakeJp, masterKey);
}
else {
throw new JsonMappingException(jp, "properties is null");
}
}
return new Event(project, collection, api == null ? mainApi : api, properties.getKey(), properties.getValue());
}
public Map.Entry<List<SchemaField>, GenericData.Record> parseProperties(String project, String collection, JsonParser jp, boolean masterKey)
throws IOException, NotExistsException
{
ProjectCollection key = new ProjectCollection(project, collection);
Map.Entry<List<SchemaField>, Schema> schema = schemaCache.getIfPresent(key);
boolean isNew = schema == null;
if (schema == null) {
List<SchemaField> rakamSchema = metastore.getCollection(project, collection);
schema = new SimpleImmutableEntry<>(rakamSchema, convertAvroSchema(
rakamSchema == null ? ImmutableList.copyOf(constantFields) : rakamSchema,
conditionalMagicFields));
schemaCache.put(key, schema);
}
Schema avroSchema = schema.getValue();
List<SchemaField> rakamSchema = schema.getKey();
GenericData.Record record = new GenericData.Record(avroSchema);
List<SchemaField> newFields = null;
JsonToken t = jp.nextToken();
for (; t == JsonToken.FIELD_NAME; t = jp.nextToken()) {
String fieldName = jp.getCurrentName();
Schema.Field field = avroSchema.getField(fieldName);
jp.nextToken();
if (field == null) {
field = avroSchema.getField(stripName(fieldName, "field name"));
if (field == null) {
FieldType type = getTypeForUnknown(jp);
if (type != null) {
if (newFields == null) {
newFields = new ArrayList<>();
}
if (fieldName.equals(projectConfig.getUserColumn())) {
// the type of magic _user field must be consistent between collections
if (type.isArray() || type.isMap()) {
throw new RakamException("_user field must be numeric or string.", BAD_REQUEST);
}
final FieldType eventUserType = type.isNumeric() ? (type != FieldType.INTEGER ? FieldType.LONG : FieldType.INTEGER) :
STRING;
type = configManager.setConfigOnce(project, USER_TYPE.name(), eventUserType);
}
SchemaField newField = new SchemaField(fieldName, type);
newFields.add(newField);
avroSchema = createNewSchema(avroSchema, newField);
field = avroSchema.getField(newField.getName());
GenericData.Record newRecord = new GenericData.Record(avroSchema);
for (Schema.Field f : record.getSchema().getFields()) {
newRecord.put(f.name(), record.get(f.name()));
}
record = newRecord;
if (type.isArray() || type.isMap()) {
// if the type of new field is ARRAY, we already switched to next token
// so current token is not START_ARRAY.
record.put(field.pos(), getValue(jp, type, field, true));
}
else {
record.put(field.pos(), getValue(jp, type, field, false));
}
continue;
}
else {
// the type is null or an empty array
t = jp.getCurrentToken();
continue;
}
}
}
else {
if (field.schema().getType() == NULL) {
// TODO: get rid of this loop.
for (SchemaField schemaField : conditionalMagicFields.get(fieldName)) {
if (avroSchema.getField(schemaField.getName()) == null) {
if (newFields == null) {
newFields = new ArrayList<>();
}
newFields.add(schemaField);
}
}
}
}
FieldType type = field.schema().getType() == NULL ? null :
(field.pos() >= rakamSchema.size() ?
newFields.get(field.pos() - rakamSchema.size()) : rakamSchema.get(field.pos())).getType();
Object value = getValue(jp, type, field, false);
record.put(field.pos(), value);
}
if (newFields != null) {
if (!masterKey && TRUE.equals(configManager.getConfig(project, FIXED_SCHEMA.name(), Boolean.class))) {
throw new RakamException("Schema is invalid", BAD_REQUEST);
}
if (isNew) {
if (!newFields.stream().anyMatch(e -> e.getName().equals("_user"))) {
newFields.add(new SchemaField("_user", configManager.setConfigOnce(project, USER_TYPE.name(), STRING)));
}
}
rakamSchema = metastore.getOrCreateCollectionFieldList(project, collection,
schemaChecker.checkNewFields(collection, ImmutableSet.copyOf(newFields)));
Schema newAvroSchema = convertAvroSchema(rakamSchema, conditionalMagicFields);
schemaCache.put(key, new SimpleImmutableEntry<>(rakamSchema, newAvroSchema));
GenericData.Record newRecord = new GenericData.Record(newAvroSchema);
for (Schema.Field field : record.getSchema().getFields()) {
Object value = record.get(field.name());
newRecord.put(field.name(), value);
}
record = newRecord;
}
return new SimpleImmutableEntry<>(rakamSchema, record);
}
private Schema createNewSchema(Schema currentSchema, SchemaField newField)
{
List<Schema.Field> avroFields = currentSchema.getFields().stream()
.filter(field -> field.schema().getType() != Schema.Type.NULL)
.map(field -> new Schema.Field(field.name(), field.schema(), field.doc(), field.defaultValue()))
.collect(toList());
try {
avroFields.add(AvroUtil.generateAvroField(newField));
}
catch (SchemaParseException e) {
throw new RakamException("Couldn't create new column: " + e.getMessage(), BAD_REQUEST);
}
conditionalMagicFields.keySet().stream()
.filter(s -> !avroFields.stream().anyMatch(af -> af.name().equals(s)))
.map(n -> new Schema.Field(n, Schema.create(NULL), "", null))
.forEach(x -> avroFields.add(x));
Schema avroSchema = Schema.createRecord("collection", null, null, false);
avroSchema.setFields(avroFields);
return avroSchema;
}
public static Object getValueOfMagicField(JsonParser jp)
throws IOException
{
switch (jp.getCurrentToken()) {
case VALUE_TRUE:
return TRUE;
case VALUE_FALSE:
return Boolean.FALSE;
case VALUE_NUMBER_FLOAT:
return jp.getValueAsDouble();
case VALUE_NUMBER_INT:
return jp.getValueAsLong();
case VALUE_STRING:
return jp.getValueAsString();
case VALUE_NULL:
return null;
default:
throw new RakamException("The value of magic field is unknown", BAD_REQUEST);
}
}
private Object getValue(JsonParser jp, FieldType type, Schema.Field field, boolean passInitialToken)
throws IOException
{
if (type == null) {
return getValueOfMagicField(jp);
}
if (jp.getCurrentToken().isScalarValue() && !passInitialToken) {
if (jp.getCurrentToken() == VALUE_NULL) {
return null;
}
switch (type) {
case STRING:
String valueAsString = jp.getValueAsString();
if (valueAsString.length() > 100) {
return valueAsString.substring(0, 100);
}
return valueAsString;
case BOOLEAN:
return jp.getValueAsBoolean();
case LONG:
case DECIMAL:
return jp.getValueAsLong();
case INTEGER:
return jp.getValueAsInt();
case TIME:
try {
return (long) LocalTime.parse(jp.getValueAsString())
.get(ChronoField.MILLI_OF_DAY);
}
catch (Exception e) {
return null;
// throw new RakamException(String.format("Unable to parse TIME value '%s'", jp.getValueAsString()),
// BAD_REQUEST);
}
case DOUBLE:
return jp.getValueAsDouble();
case TIMESTAMP:
if (jp.getValueAsString().isEmpty()) {
return null;
}
if (jp.getCurrentToken().isNumeric()) {
return jp.getValueAsLong();
}
try {
return DateTimeUtils.parseTimestamp(jp.getValueAsString());
}
catch (Exception e) {
if (field.name().equals(projectConfig.getTimeColumn())) {
throw new RakamException(String.format("Unable to parse TIMESTAMP value '%s' in time column", jp.getValueAsString()),
BAD_REQUEST);
}
return null;
}
case DATE:
if (jp.getValueAsString().isEmpty()) {
return null;
}
try {
return DateTimeUtils.parseDate(jp.getValueAsString());
}
catch (Exception e) {
if (field.name().equals(projectConfig.getTimeColumn())) {
throw new RakamException(String.format("Unable to parse DATE value '%s' in time column", jp.getValueAsString()),
BAD_REQUEST);
}
return null;
}
default:
throw new JsonMappingException(jp, format("Scalar value '%s' cannot be cast to %s type for '%s' field.",
jp.getValueAsString(), type.name(), field.name()));
}
}
else {
Schema actualSchema = field.schema().getTypes().get(1);
if (type.isMap()) {
JsonToken t = jp.getCurrentToken();
Map<String, Object> map = new HashMap<>();
if (!passInitialToken) {
if (t != JsonToken.START_OBJECT) {
jp.skipChildren();
return null;
}
else {
t = jp.nextToken();
}
}
else {
// In order to determine the value type of map, getTypeForUnknown method performed an extra
// jp.nextToken() so the cursor should be at VALUE_STRING token.
String key = jp.getCurrentName();
Object value;
if (t.isScalarValue()) {
value = getValue(jp, type.getMapValueType(), null, false);
}
else {
value = JsonHelper.encode(jp.readValueAsTree());
}
map.put(key, value);
t = jp.nextToken();
}
for (; t == JsonToken.FIELD_NAME; t = jp.nextToken()) {
String key = jp.getCurrentName();
Object value;
if (!jp.nextToken().isScalarValue()) {
if (type.getMapValueType() != STRING) {
throw new JsonMappingException(jp, String.format("Nested properties are not supported if the type is not MAP_STRING. ('%s' field)", field.name()));
}
value = JsonHelper.encode(jp.readValueAsTree());
}
else {
value = getValue(jp, type.getMapValueType(), null, false);
}
map.put(key, value);
}
return map;
}
else if (type.isArray()) {
JsonToken t = jp.getCurrentToken();
// if the passStartArrayToken is true, we already performed jp.nextToken
// so there is no need to check if the current token is START_ARRAY
if (!passInitialToken) {
if (t != JsonToken.START_ARRAY) {
return null;
}
else {
t = jp.nextToken();
}
}
List<Object> objects = new ArrayList<>();
for (; t != JsonToken.END_ARRAY; t = jp.nextToken()) {
if (!t.isScalarValue()) {
if (type.getArrayElementType() != STRING) {
throw new JsonMappingException(jp, String.format("Nested properties are not supported if the type is not MAP_STRING. ('%s' field)", field.name()));
}
objects.add(JsonHelper.encode(jp.readValueAsTree()));
}
else {
objects.add(getValue(jp, type.getArrayElementType(), null, false));
}
}
return new GenericData.Array(actualSchema, objects);
}
else {
if (type == STRING) {
return JsonHelper.encode(jp.readValueAs(TokenBuffer.class));
}
else {
jp.skipChildren();
return null;
// throw new JsonMappingException(jp, String.format("Cannot cast object to %s for '%s' field", type.name(), field.name()));
}
}
}
}
private static FieldType getTypeForUnknown(JsonParser jp)
throws IOException
{
switch (jp.getCurrentToken()) {
case VALUE_NULL:
return null;
case VALUE_STRING:
String value = jp.getValueAsString();
try {
DateTimeUtils.parseDate(value);
return FieldType.DATE;
}
catch (Exception e) {
}
try {
DateTimeUtils.parseTimestamp(value);
return FieldType.TIMESTAMP;
}
catch (Exception e) {
}
return STRING;
case VALUE_FALSE:
return FieldType.BOOLEAN;
case VALUE_NUMBER_FLOAT:
case VALUE_NUMBER_INT:
return FieldType.DOUBLE;
case VALUE_TRUE:
return FieldType.BOOLEAN;
case START_ARRAY:
JsonToken t = jp.nextToken();
if (t == JsonToken.END_ARRAY) {
// if the array is null, return null as value.
// TODO: if the key already has a type, return that type instead of null.
return null;
}
FieldType type;
if (t.isScalarValue()) {
type = getTypeForUnknown(jp);
}
else {
type = MAP_STRING;
}
if (type == null) {
// TODO: what if the other values are not null?
while (t != END_ARRAY) {
if (!t.isScalarValue()) {
return ARRAY_STRING;
}
else {
t = jp.nextToken();
}
}
return null;
}
if (type.isArray() || type.isMap()) {
return ARRAY_STRING;
}
return type.convertToArrayType();
case START_OBJECT:
t = jp.nextToken();
if (t == JsonToken.END_OBJECT) {
// if the map is null, return null as value.
// TODO: if the key already has a type, return that type instead of null.
return null;
}
if (t != JsonToken.FIELD_NAME) {
throw new IllegalArgumentException();
}
t = jp.nextToken();
if (!t.isScalarValue()) {
return MAP_STRING;
}
type = getTypeForUnknown(jp);
if (type == null) {
// TODO: what if the other values are not null?
while (t != END_OBJECT) {
if (!t.isScalarValue()) {
return MAP_STRING;
}
else {
t = jp.nextToken();
}
}
jp.nextToken();
return null;
}
if (type.isArray() || type.isMap()) {
return MAP_STRING;
}
return type.convertToMapValueType();
default:
throw new JsonMappingException(jp, format("The type is not supported: %s", jp.getValueAsString()));
}
}
@VisibleForTesting
public void cleanCache()
{
schemaCache.invalidateAll();
}
}