/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.tajo.datum.protobuf; import com.google.protobuf.*; import com.google.protobuf.Descriptors.Descriptor; import com.google.protobuf.Descriptors.EnumDescriptor; import com.google.protobuf.Descriptors.EnumValueDescriptor; import com.google.protobuf.Descriptors.FieldDescriptor; import java.io.IOException; import java.math.BigInteger; import java.text.CharacterIterator; import java.text.StringCharacterIterator; import java.util.Iterator; import java.util.List; import java.util.Locale; import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; public class ProtobufJsonFormat extends AbstractCharBasedFormatter { private final static ProtobufJsonFormat instance; static { instance = new ProtobufJsonFormat(); } public static ProtobufJsonFormat getInstance() { return instance; } private ProtobufJsonFormat() { super(); } /** * Outputs a textual representation of the Protocol Message supplied into the parameter output. * (This representation is the new version of the classic "ProtocolPrinter" output from the * original Protocol Buffer system) */ public void print(final Message message, Appendable output) throws IOException { JsonGenerator generator = new JsonGenerator(output); generator.print("{"); print(message, generator); generator.print("}"); } /** * Outputs a textual representation of {@code fields} to {@code output}. */ public void print(final UnknownFieldSet fields, Appendable output) throws IOException { JsonGenerator generator = new JsonGenerator(output); generator.print("{"); printUnknownFields(fields, generator); generator.print("}"); } protected void print(Message message, JsonGenerator generator) throws IOException { for (Iterator<Map.Entry<FieldDescriptor, Object>> iter = message.getAllFields().entrySet().iterator(); iter.hasNext();) { Map.Entry<FieldDescriptor, Object> field = iter.next(); printField(field.getKey(), field.getValue(), generator); if (iter.hasNext()) { generator.print(","); } } if (message.getUnknownFields().asMap().size() > 0) generator.print(", "); printUnknownFields(message.getUnknownFields(), generator); } public void printField(FieldDescriptor field, Object value, JsonGenerator generator) throws IOException { printSingleField(field, value, generator); } private void printSingleField(FieldDescriptor field, Object value, JsonGenerator generator) throws IOException { if (field.isExtension()) { generator.print("\""); // We special-case MessageSet elements for compatibility with proto1. if (field.getContainingType().getOptions().getMessageSetWireFormat() && (field.getType() == FieldDescriptor.Type.MESSAGE) && (field.isOptional()) // object equality && (field.getExtensionScope() == field.getMessageType())) { generator.print(field.getMessageType().getFullName()); } else { generator.print(field.getFullName()); } generator.print("\""); } else { generator.print("\""); if (field.getType() == FieldDescriptor.Type.GROUP) { // Groups must be serialized with their original capitalization. generator.print(field.getMessageType().getName()); } else { generator.print(field.getName()); } generator.print("\""); } // Done with the name, on to the value if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) { generator.print(": "); generator.indent(); } else { generator.print(": "); } if (field.isRepeated()) { // Repeated field. Print each element. generator.print("["); for (Iterator<?> iter = ((List<?>) value).iterator(); iter.hasNext();) { printFieldValue(field, iter.next(), generator); if (iter.hasNext()) { generator.print(","); } } generator.print("]"); } else { printFieldValue(field, value, generator); if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) { generator.outdent(); } } } private void printFieldValue(FieldDescriptor field, Object value, JsonGenerator generator) throws IOException { switch (field.getType()) { case INT32: case INT64: case SINT32: case SINT64: case SFIXED32: case SFIXED64: case FLOAT: case DOUBLE: case BOOL: // Good old toString() does what we want for these types. generator.print(value.toString()); break; case UINT32: case FIXED32: generator.print(TextUtils.unsignedToString((Integer) value)); break; case UINT64: case FIXED64: generator.print(TextUtils.unsignedToString((Long) value)); break; case STRING: generator.print("\""); generator.print(escapeText((String) value)); generator.print("\""); break; case BYTES: { generator.print("\""); generator.print(escapeBytes((ByteString) value)); generator.print("\""); break; } case ENUM: { generator.print("\""); generator.print(((EnumValueDescriptor) value).getName()); generator.print("\""); break; } case MESSAGE: case GROUP: generator.print("{"); print((Message) value, generator); generator.print("}"); break; } } protected void printUnknownFields(UnknownFieldSet unknownFields, JsonGenerator generator) throws IOException { boolean firstField = true; for (Map.Entry<Integer, UnknownFieldSet.Field> entry : unknownFields.asMap().entrySet()) { UnknownFieldSet.Field field = entry.getValue(); if (firstField) {firstField = false;} else {generator.print(", ");} generator.print("\""); generator.print(entry.getKey().toString()); generator.print("\""); generator.print(": ["); boolean firstValue = true; for (long value : field.getVarintList()) { if (firstValue) {firstValue = false;} else {generator.print(", ");} generator.print(TextUtils.unsignedToString(value)); } for (int value : field.getFixed32List()) { if (firstValue) {firstValue = false;} else {generator.print(", ");} generator.print(String.format((Locale) null, "0x%08x", value)); } for (long value : field.getFixed64List()) { if (firstValue) {firstValue = false;} else {generator.print(", ");} generator.print(String.format((Locale) null, "0x%016x", value)); } for (ByteString value : field.getLengthDelimitedList()) { if (firstValue) {firstValue = false;} else {generator.print(", ");} generator.print("\""); generator.print(escapeBytes(value)); generator.print("\""); } for (UnknownFieldSet value : field.getGroupList()) { if (firstValue) {firstValue = false;} else {generator.print(", ");} generator.print("{"); printUnknownFields(value, generator); generator.print("}"); } generator.print("]"); } } /** * An inner class for writing text to the output stream. */ protected static class JsonGenerator { Appendable output; boolean atStartOfLine = true; StringBuilder indent = new StringBuilder(); public JsonGenerator(Appendable output) { this.output = output; } /** * Indent text by two spaces. After calling Indent(), two spaces will be inserted at the * beginning of each line of text. Indent() may be called multiple times to produce deeper * indents. */ public void indent() { indent.append(" "); } /** * Reduces the current indent level by two spaces, or crashes if the indent level is zero. */ public void outdent() { int length = indent.length(); if (length == 0) { throw new IllegalArgumentException(" Outdent() without matching Indent()."); } indent.delete(length - 2, length); } /** * Print text to the output stream. */ public void print(CharSequence text) throws IOException { int size = text.length(); int pos = 0; for (int i = 0; i < size; i++) { if (text.charAt(i) == '\n') { write(text.subSequence(pos, size), i - pos + 1); pos = i + 1; atStartOfLine = true; } } write(text.subSequence(pos, size), size - pos); } private void write(CharSequence data, int size) throws IOException { if (size == 0) { return; } if (atStartOfLine) { atStartOfLine = false; output.append(indent); } output.append(data); } } // ================================================================= // Parsing /** * Represents a stream of tokens parsed from a {@code String}. * <p/> * <p> * The Java standard library provides many classes that you might think would be useful for * implementing this, but aren't. For example: * <p/> * <ul> * <li>{@code java.io.StreamTokenizer}: This almost does what we want -- or, at least, something * that would get us close to what we want -- except for one fatal flaw: It automatically * un-escapes strings using Java escape sequences, which do not include all the escape sequences * we need to support (e.g. '\x'). * <li>{@code java.util.Scanner}: This seems like a great way at least to parse regular * expressions out of a stream (so we wouldn't have to load the entire input into a single * string before parsing). Sadly, {@code Scanner} requires that tokens be delimited with some * delimiter. Thus, although the text "foo:" should parse to two tokens ("foo" and ":"), {@code * Scanner} would recognize it only as a single token. Furthermore, {@code Scanner} provides no * way to inspect the contents of delimiters, making it impossible to keep track of line and * column numbers. * </ul> * <p/> * <p> * Luckily, Java's regular expression support does manage to be useful to us. (Barely: We need * {@code Matcher.usePattern()}, which is new in Java 1.5.) So, we can use that, at least. * Unfortunately, this implies that we need to have the entire input in one contiguous string. */ protected static class Tokenizer { private final CharSequence text; private final Matcher matcher; private String currentToken; // The character index within this.text at which the current token begins. private int pos = 0; // The line and column numbers of the current token. private int line = 0; private int column = 0; // The line and column numbers of the previous token (allows throwing // errors *after* consuming). private int previousLine = 0; private int previousColumn = 0; // We use possesive quantifiers (*+ and ++) because otherwise the Java // regex matcher has stack overflows on large inputs. private static final Pattern WHITESPACE = Pattern.compile("(\\s|(#.*$))++", Pattern.MULTILINE); private static final Pattern TOKEN = Pattern.compile( "[a-zA-Z_][0-9a-zA-Z_+-]*+|" + // an identifier "[.]?[0-9+-][0-9a-zA-Z_.+-]*+|" + // a number "\"([^\"\n\\\\]|\\\\.)*+(\"|\\\\?$)|" + // a double-quoted string "\'([^\'\n\\\\]|\\\\.)*+(\'|\\\\?$)", // a single-quoted string Pattern.MULTILINE); private static final Pattern DOUBLE_INFINITY = Pattern.compile( "-?inf(inity)?", Pattern.CASE_INSENSITIVE); private static final Pattern FLOAT_INFINITY = Pattern.compile( "-?inf(inity)?f?", Pattern.CASE_INSENSITIVE); private static final Pattern FLOAT_NAN = Pattern.compile( "nanf?", Pattern.CASE_INSENSITIVE); /** * Construct a tokenizer that parses tokens from the given text. */ public Tokenizer(CharSequence text) { this.text = text; matcher = WHITESPACE.matcher(text); skipWhitespace(); nextToken(); } /** * Are we at the end of the input? */ public boolean atEnd() { return currentToken.length() == 0; } /** * Advance to the next token. */ public void nextToken() { previousLine = line; previousColumn = column; // Advance the line counter to the current position. while (pos < matcher.regionStart()) { if (text.charAt(pos) == '\n') { ++line; column = 0; } else { ++column; } ++pos; } // Match the next token. if (matcher.regionStart() == matcher.regionEnd()) { // EOF currentToken = ""; } else { matcher.usePattern(TOKEN); if (matcher.lookingAt()) { currentToken = matcher.group(); matcher.region(matcher.end(), matcher.regionEnd()); } else { // Take one character. currentToken = String.valueOf(text.charAt(pos)); matcher.region(pos + 1, matcher.regionEnd()); } skipWhitespace(); } } /** * Skip over any whitespace so that the matcher region starts at the next token. */ private void skipWhitespace() { matcher.usePattern(WHITESPACE); if (matcher.lookingAt()) { matcher.region(matcher.end(), matcher.regionEnd()); } } /** * If the next token exactly matches {@code token}, consume it and return {@code true}. * Otherwise, return {@code false} without doing anything. */ public boolean tryConsume(String token) { if (currentToken.equals(token)) { nextToken(); return true; } else { return false; } } /** * If the next token exactly matches {@code token}, consume it. Otherwise, throw a * {@link ParseException}. */ public void consume(String token) throws ParseException { if (!tryConsume(token)) { throw parseException("Expected \"" + token + "\"."); } } /** * Returns {@code true} if the next token is an integer, but does not consume it. */ public boolean lookingAtInteger() { if (currentToken.length() == 0) { return false; } char c = currentToken.charAt(0); return (('0' <= c) && (c <= '9')) || (c == '-') || (c == '+'); } /** * Returns {@code true} if the next token is a boolean (true/false), but does not consume it. */ public boolean lookingAtBoolean() { if (currentToken.length() == 0) { return false; } return ("true".equals(currentToken) || "false".equals(currentToken)); } /** * @return currentToken to which the Tokenizer is pointing. */ public String currentToken() { return currentToken; } /** * If the next token is an identifier, consume it and return its value. Otherwise, throw a * {@link ParseException}. */ public String consumeIdentifier() throws ParseException { for (int i = 0; i < currentToken.length(); i++) { char c = currentToken.charAt(i); if ((('a' <= c) && (c <= 'z')) || (('A' <= c) && (c <= 'Z')) || (('0' <= c) && (c <= '9')) || (c == '_') || (c == '.') || (c == '"')) { // OK } else { throw parseException("Expected identifier. -" + c); } } String result = currentToken; // Need to clean-up result to remove quotes of any kind result = result.replaceAll("\"|'", ""); nextToken(); return result; } /** * If the next token is a 32-bit signed integer, consume it and return its value. Otherwise, * throw a {@link ParseException}. */ public int consumeInt32() throws ParseException { try { int result = parseInt32(currentToken); nextToken(); return result; } catch (NumberFormatException e) { throw integerParseException(e); } } /** * If the next token is a 32-bit unsigned integer, consume it and return its value. * Otherwise, throw a {@link ParseException}. */ public int consumeUInt32() throws ParseException { try { int result = parseUInt32(currentToken); nextToken(); return result; } catch (NumberFormatException e) { throw integerParseException(e); } } /** * If the next token is a 64-bit signed integer, consume it and return its value. Otherwise, * throw a {@link ParseException}. */ public long consumeInt64() throws ParseException { try { long result = parseInt64(currentToken); nextToken(); return result; } catch (NumberFormatException e) { throw integerParseException(e); } } /** * If the next token is a 64-bit unsigned integer, consume it and return its value. * Otherwise, throw a {@link ParseException}. */ public long consumeUInt64() throws ParseException { try { long result = parseUInt64(currentToken); nextToken(); return result; } catch (NumberFormatException e) { throw integerParseException(e); } } /** * If the next token is a double, consume it and return its value. Otherwise, throw a * {@link ParseException}. */ public double consumeDouble() throws ParseException { // We need to parse infinity and nan separately because // Double.parseDouble() does not accept "inf", "infinity", or "nan". if (DOUBLE_INFINITY.matcher(currentToken).matches()) { boolean negative = currentToken.startsWith("-"); nextToken(); return negative ? Double.NEGATIVE_INFINITY : Double.POSITIVE_INFINITY; } if (currentToken.equalsIgnoreCase("nan")) { nextToken(); return Double.NaN; } try { double result = Double.parseDouble(currentToken); nextToken(); return result; } catch (NumberFormatException e) { throw floatParseException(e); } } /** * If the next token is a float, consume it and return its value. Otherwise, throw a * {@link ParseException}. */ public float consumeFloat() throws ParseException { // We need to parse infinity and nan separately because // Float.parseFloat() does not accept "inf", "infinity", or "nan". if (FLOAT_INFINITY.matcher(currentToken).matches()) { boolean negative = currentToken.startsWith("-"); nextToken(); return negative ? Float.NEGATIVE_INFINITY : Float.POSITIVE_INFINITY; } if (FLOAT_NAN.matcher(currentToken).matches()) { nextToken(); return Float.NaN; } try { float result = Float.parseFloat(currentToken); nextToken(); return result; } catch (NumberFormatException e) { throw floatParseException(e); } } /** * If the next token is a boolean, consume it and return its value. Otherwise, throw a * {@link ParseException}. */ public boolean consumeBoolean() throws ParseException { if (currentToken.equals("true")) { nextToken(); return true; } else if (currentToken.equals("false")) { nextToken(); return false; } else { throw parseException("Expected \"true\" or \"false\"."); } } /** * If the next token is a string, consume it and return its (unescaped) value. Otherwise, * throw a {@link ParseException}. */ public String consumeString() throws ParseException { char quote = currentToken.length() > 0 ? currentToken.charAt(0) : '\0'; if ((quote != '\"') && (quote != '\'')) { throw parseException("Expected string."); } if ((currentToken.length() < 2) || (currentToken.charAt(currentToken.length() - 1) != quote)) { throw parseException("String missing ending quote."); } try { String escaped = currentToken.substring(1, currentToken.length() - 1); String result = unescapeText(escaped); nextToken(); return result; } catch (InvalidEscapeSequence e) { throw parseException(e.getMessage()); } } /** * If the next token is a string, consume it, unescape it as a * {@link com.googlecode.protobuf.format.ByteString}, and return it. Otherwise, throw a * {@link ParseException}. */ public ByteString consumeByteString() throws ParseException { char quote = currentToken.length() > 0 ? currentToken.charAt(0) : '\0'; if ((quote != '\"') && (quote != '\'')) { throw parseException("Expected string."); } if ((currentToken.length() < 2) || (currentToken.charAt(currentToken.length() - 1) != quote)) { throw parseException("String missing ending quote."); } try { String escaped = currentToken.substring(1, currentToken.length() - 1); ByteString result = unescapeBytes(escaped); nextToken(); return result; } catch (InvalidEscapeSequence e) { throw parseException(e.getMessage()); } } /** * Returns a {@link ParseException} with the current line and column numbers in the * description, suitable for throwing. */ public ParseException parseException(String description) { // Note: People generally prefer one-based line and column numbers. return new ParseException((line + 1) + ":" + (column + 1) + ": " + description); } /** * Returns a {@link ParseException} with the line and column numbers of the previous token * in the description, suitable for throwing. */ public ParseException parseExceptionPreviousToken(String description) { // Note: People generally prefer one-based line and column numbers. return new ParseException((previousLine + 1) + ":" + (previousColumn + 1) + ": " + description); } /** * Constructs an appropriate {@link ParseException} for the given {@code * NumberFormatException} when trying to parse an integer. */ private ParseException integerParseException(NumberFormatException e) { return parseException("Couldn't parse integer: " + e.getMessage()); } /** * Constructs an appropriate {@link ParseException} for the given {@code * NumberFormatException} when trying to parse a float or double. */ private ParseException floatParseException(NumberFormatException e) { return parseException("Couldn't parse number: " + e.getMessage()); } } /** * Thrown when parsing an invalid text format message. */ public static class ParseException extends IOException { private static final long serialVersionUID = 1L; public ParseException(String message) { super(message); } } /** * Parse a text-format message from {@code input} and merge the contents into {@code builder}. * Extensions will be recognized if they are registered in {@code extensionRegistry}. */ public void merge(CharSequence input, ExtensionRegistry extensionRegistry, Message.Builder builder) throws ParseException { Tokenizer tokenizer = new Tokenizer(input); // Based on the state machine @ http://json.org/ tokenizer.consume("{"); // Needs to happen when the object starts. while (!tokenizer.tryConsume("}")) { // Continue till the object is done mergeField(tokenizer, extensionRegistry, builder); } // Test to make sure the tokenizer has reached the end of the stream. if (!tokenizer.atEnd()) { throw tokenizer.parseException("Expecting the end of the stream, but there seems to be more data! Check the input for a valid JSON format."); } } /** * Parse a single field from {@code tokenizer} and merge it into {@code builder}. If a ',' is * detected after the field ends, the next field will be parsed automatically */ protected void mergeField(Tokenizer tokenizer, ExtensionRegistry extensionRegistry, Message.Builder builder) throws ParseException { FieldDescriptor field; Descriptor type = builder.getDescriptorForType(); ExtensionRegistry.ExtensionInfo extension = null; boolean unknown = false; String name = tokenizer.consumeIdentifier(); field = type.findFieldByName(name); // Group names are expected to be capitalized as they appear in the // .proto file, which actually matches their type names, not their field // names. if (field == null) { // Explicitly specify US locale so that this code does not break when // executing in Turkey. String lowerName = name.toLowerCase(Locale.US); field = type.findFieldByName(lowerName); // If the case-insensitive match worked but the field is NOT a group, if ((field != null) && (field.getType() != FieldDescriptor.Type.GROUP)) { field = null; } } // Again, special-case group names as described above. if ((field != null) && (field.getType() == FieldDescriptor.Type.GROUP) && !field.getMessageType().getName().equals(name)) { field = null; } // Last try to lookup by field-index if 'name' is numeric, // which indicates a possible unknown field if (field == null && TextUtils.isDigits(name)) { field = type.findFieldByNumber(Integer.parseInt(name)); unknown = true; } // Finally, look for extensions extension = extensionRegistry.findExtensionByName(name); if (extension != null) { if (extension.descriptor.getContainingType() != type) { throw tokenizer.parseExceptionPreviousToken("Extension \"" + name + "\" does not extend message type \"" + type.getFullName() + "\"."); } field = extension.descriptor; } // Disabled throwing exception if field not found, since it could be a different version. if (field == null) { handleMissingField(tokenizer, extensionRegistry, builder); //throw tokenizer.parseExceptionPreviousToken("Message type \"" + type.getFullName() // + "\" has no field named \"" + name // + "\"."); } if (field != null) { tokenizer.consume(":"); boolean array = tokenizer.tryConsume("["); if (array) { while (!tokenizer.tryConsume("]")) { handleValue(tokenizer, extensionRegistry, builder, field, extension, unknown); tokenizer.tryConsume(","); } } else { handleValue(tokenizer, extensionRegistry, builder, field, extension, unknown); } } if (tokenizer.tryConsume(",")) { // Continue with the next field mergeField(tokenizer, extensionRegistry, builder); } } private void handleMissingField(Tokenizer tokenizer, ExtensionRegistry extensionRegistry, Message.Builder builder) throws ParseException { tokenizer.tryConsume(":"); if ("{".equals(tokenizer.currentToken())) { // Message structure tokenizer.consume("{"); do { tokenizer.consumeIdentifier(); handleMissingField(tokenizer, extensionRegistry, builder); } while (tokenizer.tryConsume(",")); tokenizer.consume("}"); } else if ("[".equals(tokenizer.currentToken())) { // Collection tokenizer.consume("["); do { handleMissingField(tokenizer, extensionRegistry, builder); } while (tokenizer.tryConsume(",")); tokenizer.consume("]"); } else { //if (!",".equals(tokenizer.currentToken)){ // Primitive value if ("null".equals(tokenizer.currentToken())) { tokenizer.consume("null"); } else if (tokenizer.lookingAtInteger()) { tokenizer.consumeInt64(); } else if (tokenizer.lookingAtBoolean()) { tokenizer.consumeBoolean(); } else { tokenizer.consumeString(); } } } private void handleValue(Tokenizer tokenizer, ExtensionRegistry extensionRegistry, Message.Builder builder, FieldDescriptor field, ExtensionRegistry.ExtensionInfo extension, boolean unknown) throws ParseException { Object value = null; if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) { value = handleObject(tokenizer, extensionRegistry, builder, field, extension, unknown); } else { value = handlePrimitive(tokenizer, field); } if (value != null) { if (field.isRepeated()) { builder.addRepeatedField(field, value); } else { builder.setField(field, value); } } } private Object handlePrimitive(Tokenizer tokenizer, FieldDescriptor field) throws ParseException { Object value = null; if ("null".equals(tokenizer.currentToken())) { tokenizer.consume("null"); return value; } switch (field.getType()) { case INT32: case SINT32: case SFIXED32: value = tokenizer.consumeInt32(); break; case INT64: case SINT64: case SFIXED64: value = tokenizer.consumeInt64(); break; case UINT32: case FIXED32: value = tokenizer.consumeUInt32(); break; case UINT64: case FIXED64: value = tokenizer.consumeUInt64(); break; case FLOAT: value = tokenizer.consumeFloat(); break; case DOUBLE: value = tokenizer.consumeDouble(); break; case BOOL: value = tokenizer.consumeBoolean(); break; case STRING: value = tokenizer.consumeString(); break; case BYTES: value = tokenizer.consumeByteString(); break; case ENUM: { EnumDescriptor enumType = field.getEnumType(); if (tokenizer.lookingAtInteger()) { int number = tokenizer.consumeInt32(); value = enumType.findValueByNumber(number); if (value == null) { throw tokenizer.parseExceptionPreviousToken("Enum type \"" + enumType.getFullName() + "\" has no value with number " + number + "."); } } else { String id = tokenizer.consumeIdentifier(); value = enumType.findValueByName(id); if (value == null) { throw tokenizer.parseExceptionPreviousToken("Enum type \"" + enumType.getFullName() + "\" has no value named \"" + id + "\"."); } } break; } case MESSAGE: case GROUP: throw new RuntimeException("Can't get here."); } return value; } private Object handleObject(Tokenizer tokenizer, ExtensionRegistry extensionRegistry, Message.Builder builder, FieldDescriptor field, ExtensionRegistry.ExtensionInfo extension, boolean unknown) throws ParseException { Message.Builder subBuilder; if (extension == null) { subBuilder = builder.newBuilderForField(field); } else { subBuilder = extension.defaultInstance.newBuilderForType(); } if (unknown) { ByteString data = tokenizer.consumeByteString(); try { subBuilder.mergeFrom(data); return subBuilder.build(); } catch (InvalidProtocolBufferException e) { throw tokenizer.parseException("Failed to build " + field.getFullName() + " from " + data); } } tokenizer.consume("{"); String endToken = "}"; while (!tokenizer.tryConsume(endToken)) { if (tokenizer.atEnd()) { throw tokenizer.parseException("Expected \"" + endToken + "\"."); } mergeField(tokenizer, extensionRegistry, subBuilder); if (tokenizer.tryConsume(",")) { // there are more fields in the object, so continue continue; } } return subBuilder.build(); } // ================================================================= // Utility functions // // Some of these methods are package-private because Descriptors.java uses // them. /** * Escapes bytes in the format used in protocol buffer text format, which is the same as the * format used for C string literals. All bytes that are not printable 7-bit ASCII characters * are escaped, as well as backslash, single-quote, and double-quote characters. Characters for * which no defined short-hand escape sequence is defined will be escaped using 3-digit octal * sequences. */ static String escapeBytes(ByteString input) { StringBuilder builder = new StringBuilder(input.size()); for (int i = 0; i < input.size(); i++) { byte b = input.byteAt(i); switch (b) { // Java does not recognize \a or \v, apparently. case 0x07: builder.append("\\a"); break; case '\b': builder.append("\\b"); break; case '\f': builder.append("\\f"); break; case '\n': builder.append("\\n"); break; case '\r': builder.append("\\r"); break; case '\t': builder.append("\\t"); break; case 0x0b: builder.append("\\v"); break; case '\\': builder.append("\\\\"); break; case '\'': builder.append("\\\'"); break; case '"': builder.append("\\\""); break; default: if (b >= 0x20) { builder.append((char) b); } else { final String unicodeString = unicodeEscaped((char) b); builder.append(unicodeString); } break; } } return builder.toString(); } static String unicodeEscaped(char ch) { if (ch < 0x10) { return "\\u000" + Integer.toHexString(ch); } else if (ch < 0x100) { return "\\u00" + Integer.toHexString(ch); } else if (ch < 0x1000) { return "\\u0" + Integer.toHexString(ch); } return "\\u" + Integer.toHexString(ch); } /** * Un-escape a byte sequence as escaped using * {@link #escapeBytes(com.googlecode.protobuf.format.ByteString)}. Two-digit hex escapes (starting with * "\x") are also recognized. */ static ByteString unescapeBytes(CharSequence input) throws InvalidEscapeSequence { byte[] result = new byte[input.length()]; int pos = 0; for (int i = 0; i < input.length(); i++) { char c = input.charAt(i); if (c == '\\') { if (i + 1 < input.length()) { ++i; c = input.charAt(i); if (TextUtils.isOctal(c)) { // Octal escape. int code = TextUtils.digitValue(c); if ((i + 1 < input.length()) && TextUtils.isOctal(input.charAt(i + 1))) { ++i; code = code * 8 + TextUtils.digitValue(input.charAt(i)); } if ((i + 1 < input.length()) && TextUtils.isOctal(input.charAt(i + 1))) { ++i; code = code * 8 + TextUtils.digitValue(input.charAt(i)); } result[pos++] = (byte) code; } else { switch (c) { case 'a': result[pos++] = 0x07; break; case 'b': result[pos++] = '\b'; break; case 'f': result[pos++] = '\f'; break; case 'n': result[pos++] = '\n'; break; case 'r': result[pos++] = '\r'; break; case 't': result[pos++] = '\t'; break; case 'v': result[pos++] = 0x0b; break; case '\\': result[pos++] = '\\'; break; case '\'': result[pos++] = '\''; break; case '"': result[pos++] = '\"'; break; case 'x': // hex escape int code = 0; if ((i + 1 < input.length()) && TextUtils.isHex(input.charAt(i + 1))) { ++i; code = TextUtils.digitValue(input.charAt(i)); } else { throw new InvalidEscapeSequence("Invalid escape sequence: '\\x' with no digits"); } if ((i + 1 < input.length()) && TextUtils.isHex(input.charAt(i + 1))) { ++i; code = code * 16 + TextUtils.digitValue(input.charAt(i)); } result[pos++] = (byte) code; break; case 'u': // UTF8 escape code = (16 * 3 * TextUtils.digitValue(input.charAt(i+1))) + (16 * 2 * TextUtils.digitValue(input.charAt(i+2))) + (16 * TextUtils.digitValue(input.charAt(i+3))) + TextUtils.digitValue(input.charAt(i+4)); i = i+4; result[pos++] = (byte) code; break; default: throw new InvalidEscapeSequence("Invalid escape sequence: '\\" + c + "'"); } } } else { throw new InvalidEscapeSequence("Invalid escape sequence: '\\' at end of string."); } } else { result[pos++] = (byte) c; } } return ByteString.copyFrom(result, 0, pos); } /** * Thrown by {@link ProtobufJsonFormat#unescapeBytes} and {@link ProtobufJsonFormat#unescapeText} when an * invalid escape sequence is seen. */ static class InvalidEscapeSequence extends IOException { private static final long serialVersionUID = 1L; public InvalidEscapeSequence(String description) { super(description); } } /** * Implements JSON string escaping as specified <a href="http://www.ietf.org/rfc/rfc4627.txt">here</a>. * <ul> * <li>The following characters are escaped by prefixing them with a '\' : \b,\f,\n,\r,\t,\,"</li> * <li>Other control characters in the range 0x0000-0x001F are escaped using the \\uXXXX notation</li> * <li>UTF-16 surrogate pairs are encoded using the \\uXXXX\\uXXXX notation</li> * <li>any other character is printed as-is</li> * </ul> */ static String escapeText(String input) { StringBuilder builder = new StringBuilder(input.length()); CharacterIterator iter = new StringCharacterIterator(input); for(char c = iter.first(); c != CharacterIterator.DONE; c = iter.next()) { switch(c) { case '\b': builder.append("\\b"); break; case '\f': builder.append("\\f"); break; case '\n': builder.append("\\n"); break; case '\r': builder.append("\\r"); break; case '\t': builder.append("\\t"); break; case '\\': builder.append("\\\\"); break; case '"': builder.append("\\\""); break; default: // Check for other control characters if(c >= 0x0000 && c <= 0x001F) { appendEscapedUnicode(builder, c); } else if(Character.isHighSurrogate(c)) { // Encode the surrogate pair using 2 six-character sequence (\\uXXXX\\uXXXX) appendEscapedUnicode(builder, c); c = iter.next(); if(c == CharacterIterator.DONE) throw new IllegalArgumentException("invalid unicode string: unexpected high surrogate pair value without corresponding low value."); appendEscapedUnicode(builder, c); } else { // Anything else can be printed as-is builder.append(c); } break; } } return builder.toString(); } static void appendEscapedUnicode(StringBuilder builder, char ch) { String prefix = "\\u"; if(ch < 0x10) { prefix = "\\u000"; } else if(ch < 0x100) { prefix = "\\u00"; } else if(ch < 0x1000) { prefix = "\\u0"; } builder.append(prefix).append(Integer.toHexString(ch)); } /** * Un-escape a text string as escaped using {@link #escapeText(String)}. */ static String unescapeText(String input) throws InvalidEscapeSequence { StringBuilder builder = new StringBuilder(); char[] array = input.toCharArray(); for(int i = 0; i < array.length; i++) { char c = array[i]; if(c == '\\') { if(i + 1 < array.length) { ++i; c = array[i]; switch(c) { case 'b': builder.append('\b'); break; case 'f': builder.append('\f'); break; case 'n': builder.append('\n'); break; case 'r': builder.append('\r'); break; case 't': builder.append('\t'); break; case '\\': builder.append('\\'); break; case '"': builder.append('\"'); break; case '\'': builder.append('\''); break; case 'u': // read the next 4 chars if(i + 4 < array.length) { ++i; int code = Integer.parseInt(new String(array, i, 4), 16); // this cast is safe because we know how many chars we read builder.append((char)code); i += 3; } else { throw new InvalidEscapeSequence("Invalid escape sequence: '\\u' at end of string."); } break; default: throw new InvalidEscapeSequence("Invalid escape sequence: '\\" + c + "'"); } } else { throw new InvalidEscapeSequence("Invalid escape sequence: '\\' at end of string."); } } else { builder.append(c); } } return builder.toString(); } /** * Parse a 32-bit signed integer from the text. Unlike the Java standard {@code * Integer.parseInt()}, this function recognizes the prefixes "0x" and "0" to signify * hexidecimal and octal numbers, respectively. */ static int parseInt32(String text) throws NumberFormatException { return (int) parseInteger(text, true, false); } /** * Parse a 32-bit unsigned integer from the text. Unlike the Java standard {@code * Integer.parseInt()}, this function recognizes the prefixes "0x" and "0" to signify * hexidecimal and octal numbers, respectively. The result is coerced to a (signed) {@code int} * when returned since Java has no unsigned integer type. */ static int parseUInt32(String text) throws NumberFormatException { return (int) parseInteger(text, false, false); } /** * Parse a 64-bit signed integer from the text. Unlike the Java standard {@code * Integer.parseInt()}, this function recognizes the prefixes "0x" and "0" to signify * hexidecimal and octal numbers, respectively. */ static long parseInt64(String text) throws NumberFormatException { return parseInteger(text, true, true); } /** * Parse a 64-bit unsigned integer from the text. Unlike the Java standard {@code * Integer.parseInt()}, this function recognizes the prefixes "0x" and "0" to signify * hexidecimal and octal numbers, respectively. The result is coerced to a (signed) {@code long} * when returned since Java has no unsigned long type. */ static long parseUInt64(String text) throws NumberFormatException { return parseInteger(text, false, true); } private static long parseInteger(String text, boolean isSigned, boolean isLong) throws NumberFormatException { int pos = 0; boolean negative = false; if (text.startsWith("-", pos)) { if (!isSigned) { throw new NumberFormatException("Number must be positive: " + text); } ++pos; negative = true; } int radix = 10; if (text.startsWith("0x", pos)) { pos += 2; radix = 16; } else if (text.startsWith("0", pos)) { radix = 8; } String numberText = text.substring(pos); long result = 0; if (numberText.length() < 16) { // Can safely assume no overflow. result = Long.parseLong(numberText, radix); if (negative) { result = -result; } // Check bounds. // No need to check for 64-bit numbers since they'd have to be 16 chars // or longer to overflow. if (!isLong) { if (isSigned) { if ((result > Integer.MAX_VALUE) || (result < Integer.MIN_VALUE)) { throw new NumberFormatException("Number out of range for 32-bit signed integer: " + text); } } else { if ((result >= (1L << 32)) || (result < 0)) { throw new NumberFormatException("Number out of range for 32-bit unsigned integer: " + text); } } } } else { BigInteger bigValue = new BigInteger(numberText, radix); if (negative) { bigValue = bigValue.negate(); } // Check bounds. if (!isLong) { if (isSigned) { if (bigValue.bitLength() > 31) { throw new NumberFormatException("Number out of range for 32-bit signed integer: " + text); } } else { if (bigValue.bitLength() > 32) { throw new NumberFormatException("Number out of range for 32-bit unsigned integer: " + text); } } } else { if (isSigned) { if (bigValue.bitLength() > 63) { throw new NumberFormatException("Number out of range for 64-bit signed integer: " + text); } } else { if (bigValue.bitLength() > 64) { throw new NumberFormatException("Number out of range for 64-bit unsigned integer: " + text); } } } result = bigValue.longValue(); } return result; } }