/* * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.facebook.presto.operator.scalar; import com.facebook.presto.spi.PrestoException; import com.fasterxml.jackson.core.JsonFactory; import com.fasterxml.jackson.core.JsonGenerator; import com.fasterxml.jackson.core.JsonParseException; import com.fasterxml.jackson.core.JsonParser; import com.fasterxml.jackson.core.JsonToken; import com.fasterxml.jackson.core.io.SerializedString; import com.google.common.base.Throwables; import com.google.common.collect.ImmutableList; import io.airlift.slice.DynamicSliceOutput; import io.airlift.slice.Slice; import java.io.IOException; import static com.facebook.presto.spi.StandardErrorCode.INVALID_FUNCTION_ARGUMENT; import static com.facebook.presto.util.JsonUtil.createJsonGenerator; import static com.facebook.presto.util.JsonUtil.createJsonParser; import static com.fasterxml.jackson.core.JsonFactory.Feature.CANONICALIZE_FIELD_NAMES; import static com.fasterxml.jackson.core.JsonToken.END_ARRAY; import static com.fasterxml.jackson.core.JsonToken.END_OBJECT; import static com.fasterxml.jackson.core.JsonToken.FIELD_NAME; import static com.fasterxml.jackson.core.JsonToken.START_ARRAY; import static com.fasterxml.jackson.core.JsonToken.START_OBJECT; import static com.fasterxml.jackson.core.JsonToken.VALUE_NULL; import static io.airlift.slice.Slices.utf8Slice; import static java.util.Objects.requireNonNull; /** * Extracts values from JSON * <p/> * Supports the following JSON path primitives: * <pre> * $ : Root object * . or [] : Child operator * [] : Subscript operator for array * </pre> * <p/> * Supported JSON Path Examples: * <pre> * { "store": { * "book": [ * { "category": "reference", * "author": "Nigel Rees", * "title": "Sayings of the Century", * "price": 8.95, * "contributors": [["Adam", "Levine"], ["Bob", "Strong"]] * }, * { "category": "fiction", * "author": "Evelyn Waugh", * "title": "Sword of Honour", * "price": 12.99, * "isbn": "0-553-21311-3", * "last_owner": null * } * ], * "bicycle": { * "color": "red", * "price": 19.95 * } * } * } * </pre> * <p/> * With only scalar values using dot-notation of path: * <pre> * $.store.book[0].author => Nigel Rees * $.store.bicycle.price => 19.95 * $.store.book[0].isbn => NULL (Doesn't exist becomes java null) * $.store.book[1].last_owner => NULL (json null becomes java null) * $.store.book[0].contributors[0][1] => Levine * </pre> * <p/> * With json values using dot-notation of path: * <pre> * $.store.book[0].author => "Nigel Rees" * $.store.bicycle.price => 19.95 * $.store.book[0].isbn => NULL (Doesn't exist becomes java null) * $.store.book[1].last_owner => null (json null becomes the string "null") * $.store.book[0].contributors[0] => ["Adam", "Levine"] * $.store.bicycle => {"color": "red", "price": 19.95} * </pre> * With only scalar values using bracket-notation of path: * <pre> * $["store"]["book"][0]["author"] => Nigel Rees * $["store"]["bicycle"]["price"] => 19.95 * $["store"]["book"][0]["isbn"] => NULL (Doesn't exist becomes java null) * $["store"]["book"][1]["last_owner"] => NULL (json null becomes java null) * $["store"]["book"][0]["contributors"][0][1] => Levine * </pre> * <p/> * With json values using bracket-notation of path: * <pre> * $["store"]["book"][0]["author"] => "Nigel Rees" * $["store"]["bicycle"]["price"] => 19.95 * $["store"]["book"][0]["isbn"] => NULL (Doesn't exist becomes java null) * $["store"]["book"][1]["last_owner"] => null (json null becomes the string "null") * $["store"]["book"][0]["contributors"][0] => ["Adam", "Levine"] * $["store"]["bicycle"] => {"color": "red", "price": 19.95} * </pre> */ public final class JsonExtract { private static final int ESTIMATED_JSON_OUTPUT_SIZE = 512; private static final JsonFactory JSON_FACTORY = new JsonFactory() .disable(CANONICALIZE_FIELD_NAMES); private JsonExtract() {} public static <T> T extract(Slice jsonInput, JsonExtractor<T> jsonExtractor) { requireNonNull(jsonInput, "jsonInput is null"); try { try (JsonParser jsonParser = createJsonParser(JSON_FACTORY, jsonInput)) { // Initialize by advancing to first token and make sure it exists if (jsonParser.nextToken() == null) { return null; } return jsonExtractor.extract(jsonParser); } } catch (JsonParseException e) { // Return null if we failed to parse something return null; } catch (IOException e) { throw Throwables.propagate(e); } } public static <T> JsonExtractor<T> generateExtractor(String path, JsonExtractor<T> rootExtractor) { return generateExtractor(path, rootExtractor, false); } public static <T> JsonExtractor<T> generateExtractor(String path, JsonExtractor<T> rootExtractor, boolean exceptionOnOutOfBounds) { ImmutableList<String> tokens = ImmutableList.copyOf(new JsonPathTokenizer(path)); JsonExtractor<T> jsonExtractor = rootExtractor; for (String token : tokens.reverse()) { jsonExtractor = new ObjectFieldJsonExtractor<>(token, jsonExtractor, exceptionOnOutOfBounds); } return jsonExtractor; } public interface JsonExtractor<T> { /** * Executes the extraction on the existing content of the JsonParser and outputs the match. * <p/> * Notes: * <ul> * <li>JsonParser must be on the FIRST token of the value to be processed when extract is called</li> * <li>INVARIANT: when extract() returns, the current token of the parser will be the LAST token of the value</li> * </ul> * * @return the value, or null if not applicable */ T extract(JsonParser jsonParser) throws IOException; } public static class ObjectFieldJsonExtractor<T> implements JsonExtractor<T> { private final SerializedString fieldName; private final JsonExtractor<? extends T> delegate; private final int index; private final boolean exceptionOnOutOfBounds; public ObjectFieldJsonExtractor(String fieldName, JsonExtractor<? extends T> delegate) { this(fieldName, delegate, false); } public ObjectFieldJsonExtractor(String fieldName, JsonExtractor<? extends T> delegate, boolean exceptionOnOutOfBounds) { this.fieldName = new SerializedString(requireNonNull(fieldName, "fieldName is null")); this.delegate = requireNonNull(delegate, "delegate is null"); this.exceptionOnOutOfBounds = exceptionOnOutOfBounds; this.index = tryParseInt(fieldName, -1); } @Override public T extract(JsonParser jsonParser) throws IOException { if (jsonParser.getCurrentToken() == START_OBJECT) { return processJsonObject(jsonParser); } if (jsonParser.getCurrentToken() == START_ARRAY) { return processJsonArray(jsonParser); } throw new JsonParseException("Expected a JSON object or array", jsonParser.getCurrentLocation()); } public T processJsonObject(JsonParser jsonParser) throws IOException { while (!jsonParser.nextFieldName(fieldName)) { if (!jsonParser.hasCurrentToken()) { throw new JsonParseException("Unexpected end of object", jsonParser.getCurrentLocation()); } if (jsonParser.getCurrentToken() == END_OBJECT) { // Unable to find matching field return null; } jsonParser.skipChildren(); // Skip nested structure if currently at the start of one } jsonParser.nextToken(); // Shift to first token of the value return delegate.extract(jsonParser); } public T processJsonArray(JsonParser jsonParser) throws IOException { int currentIndex = 0; while (true) { JsonToken token = jsonParser.nextToken(); if (token == null) { throw new JsonParseException("Unexpected end of array", jsonParser.getCurrentLocation()); } if (token == END_ARRAY) { // Index out of bounds if (exceptionOnOutOfBounds) { throw new PrestoException(INVALID_FUNCTION_ARGUMENT, "Index out of bounds"); } return null; } if (currentIndex == index) { break; } currentIndex++; jsonParser.skipChildren(); // Skip nested structure if currently at the start of one } return delegate.extract(jsonParser); } } public static class ScalarValueJsonExtractor implements JsonExtractor<Slice> { @Override public Slice extract(JsonParser jsonParser) throws IOException { JsonToken token = jsonParser.getCurrentToken(); if (token == null) { throw new JsonParseException("Unexpected end of value", jsonParser.getCurrentLocation()); } if (!token.isScalarValue() || token == VALUE_NULL) { return null; } return utf8Slice(jsonParser.getText()); } } public static class JsonValueJsonExtractor implements JsonExtractor<Slice> { @Override public Slice extract(JsonParser jsonParser) throws IOException { if (!jsonParser.hasCurrentToken()) { throw new JsonParseException("Unexpected end of value", jsonParser.getCurrentLocation()); } DynamicSliceOutput dynamicSliceOutput = new DynamicSliceOutput(ESTIMATED_JSON_OUTPUT_SIZE); try (JsonGenerator jsonGenerator = createJsonGenerator(JSON_FACTORY, dynamicSliceOutput)) { jsonGenerator.copyCurrentStructure(jsonParser); } return dynamicSliceOutput.slice(); } } public static class JsonSizeExtractor implements JsonExtractor<Long> { @Override public Long extract(JsonParser jsonParser) throws IOException { if (!jsonParser.hasCurrentToken()) { throw new JsonParseException("Unexpected end of value", jsonParser.getCurrentLocation()); } if (jsonParser.getCurrentToken() == START_ARRAY) { long length = 0; while (true) { JsonToken token = jsonParser.nextToken(); if (token == null) { return null; } if (token == END_ARRAY) { return length; } jsonParser.skipChildren(); length++; } } if (jsonParser.getCurrentToken() == START_OBJECT) { long length = 0; while (true) { JsonToken token = jsonParser.nextToken(); if (token == null) { return null; } if (token == END_OBJECT) { return length; } if (token == FIELD_NAME) { length++; } else { jsonParser.skipChildren(); } } } return 0L; } } private static int tryParseInt(String fieldName, int defaultValue) { int index = defaultValue; try { index = Integer.parseInt(fieldName); } catch (NumberFormatException ignored) { } return index; } }