package edu.berkeley.cs.succinct.block.json; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.node.ObjectNode; import com.fasterxml.jackson.databind.node.ValueNode; import edu.berkeley.cs.succinct.DataType; import edu.berkeley.cs.succinct.SerializationException; import edu.berkeley.cs.succinct.block.BlockSerializer; import edu.berkeley.cs.succinct.util.container.IntArrayList; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.util.Iterator; import java.util.Map; public class JsonBlockSerializer implements BlockSerializer<String> { private ObjectMapper objectMapper; private FieldMapping fieldMapping; private byte[] delimiters; private int currentDelimiterIdx; public JsonBlockSerializer(byte[] delimiters) { this.objectMapper = new ObjectMapper(); this.fieldMapping = new FieldMapping(); this.delimiters = delimiters; this.currentDelimiterIdx = 0; } @Override public SerializedData serialize(Iterator<String> data) throws SerializationException { ByteArrayOutputStream out = new ByteArrayOutputStream(); IntArrayList offsets = new IntArrayList(); int currentOffset = 0; while (data.hasNext()) { String json = data.next(); try { offsets.add(currentOffset); byte[] serializedJson = flattenToMap(json); out.write(serializedJson); currentOffset += serializedJson.length; } catch (IOException e) { throw new SerializationException(e.getMessage()); } } return new SerializedData(out.toByteArray(), offsets.toArray(), fieldMapping); } private byte[] flattenToMap(String json) throws SerializationException { ByteArrayOutputStream out = new ByteArrayOutputStream(); try { flattenJsonTree("", objectMapper.readTree(json), out); } catch (IOException e) { throw new SerializationException(e.getMessage()); } return out.toByteArray(); } private void flattenJsonTree(String currentPath, JsonNode jsonNode, ByteArrayOutputStream out) throws SerializationException { if (jsonNode.isObject()) { ObjectNode objectNode = (ObjectNode) jsonNode; Iterator<Map.Entry<String, JsonNode>> iter = objectNode.fields(); String pathPrefix = currentPath.isEmpty() ? "" : currentPath + "."; while (iter.hasNext()) { Map.Entry<String, JsonNode> entry = iter.next(); flattenJsonTree(pathPrefix + entry.getKey(), entry.getValue(), out); } } else if (jsonNode.isArray()) { throw new SerializationException("Arrays in JSON are not supported yet."); } else if (jsonNode.isValueNode()) { ValueNode valueNode = (ValueNode) jsonNode; if (!fieldMapping.containsField(currentPath)) { fieldMapping.put(currentPath, delimiters[currentDelimiterIdx++], getNodeType(jsonNode)); } else { DataType existingType = fieldMapping.getDataType(currentPath); DataType newType = getNodeType(valueNode); if (existingType != newType) { DataType encapsulatingType = DataType.encapsulatingType(existingType, newType); fieldMapping.updateType(currentPath, encapsulatingType); } } try { byte fieldByte = fieldMapping.getDelimiter(currentPath); out.write(fieldByte); out.write(valueNode.asText().getBytes()); out.write(fieldByte); } catch (IOException e) { throw new SerializationException(e.getMessage()); } } } private DataType getNodeType(JsonNode node) { if (node.isTextual()) { return DataType.STRING; } else if (node.isBoolean()) { return DataType.BOOLEAN; } else if (node.isInt()) { return DataType.INT; } else if (node.isLong()) { return DataType.LONG; } else if (node.isFloat()) { return DataType.FLOAT; } else if (node.isDouble()) { return DataType.DOUBLE; } else { throw new UnsupportedOperationException("JSON DataType not supported."); } } }