/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.camel.jsonpath; import java.io.CharConversionException; import java.io.FilterInputStream; import java.io.IOException; import java.io.InputStream; import java.nio.charset.Charset; import java.util.Arrays; /** * Special stream for JSON streams. Determines from the first 4 bytes the JSON * encoding according to JSON specification RFC-4627 or newer. In addition BOMs * are taken into account. * <p> * This class is not thread safe. */ public class JsonStream extends FilterInputStream { private static final byte[] BOM_UTF_32BE = new byte[] {0x00, 0x00, (byte) 0xFE, (byte) 0xFF }; private static final byte[] BOM_UTF_32LE = new byte[] {(byte) 0xFF, (byte) 0xFE, 0x00, 0x00 }; private static final byte[] BOM_UTF_32_2143 = new byte[] {0x00, 0x00, (byte) 0xFF, (byte) 0xFE }; private static final byte[] BOM_UTF_32_3412 = new byte[] {(byte) 0xFE, (byte) 0xFF, 0x00, 0x00 }; private static final byte[] BOM_UTF_16BE = new byte[] {(byte) 0xFE, (byte) 0xFF }; private static final byte[] BOM_UTF_16LE = new byte[] {(byte) 0xFF, (byte) 0xFE }; private static final byte[] BOM_UTF_8 = new byte[] {(byte) 0xEF, (byte) 0xBB, (byte) 0xBF }; private final byte[] fourByteBuffer = new byte[4]; /* input index of the four byte buffer (BOMs are skipped) */ private int inputIndex; /* read bytes into the buffer */ private int inputEnd; private final Charset encoding; /** * Constructor. Determines the encoding during the instantiation according * to JSON specification RFC-4627 or newer. In addition BOMs are taken into * account. * * @param in * input stream must contain a JSON content * @throws IOException * if an error occurs during the determination of the encoding * @throws CharConversionException * if the UCS4 endianess 2143 or 3412 is used * @throws IllegalArgumentException * if the input stream is <code>null</code> */ public JsonStream(InputStream in) throws IOException { super(in); if (in == null) { throw new IllegalArgumentException("input stream is null"); } inputEnd = inputIndex = 0; Charset enc = null; if (loadAtLeast(4)) { enc = getEncodingFromBOM(); if (enc == null) { // no BOM enc = getUTF32EncodingFromNullPattern(); if (enc == null) { enc = getUTF16EncodingFromNullPattern(); } } } else if (loadAtLeast(2)) { enc = getUTF16EncodingFromNullPattern(); } if (enc == null) { // not found; as per specification, this means it must be UTF-8. enc = Charset.forName("UTF-8"); } encoding = enc; } public Charset getEncoding() { return encoding; } private boolean loadAtLeast(int minimum) throws IOException { int received = inputEnd - inputIndex; while (received < minimum) { int count = in.read(fourByteBuffer, inputEnd, fourByteBuffer.length - inputEnd); if (count < 1) { return false; } inputEnd += count; received += count; } return true; } private Charset getEncodingFromBOM() throws IOException { // 32-bit encoding BOMs if (Arrays.equals(fourByteBuffer, BOM_UTF_32BE)) { inputIndex = 4; return Charset.forName("UTF-32BE"); } else if (Arrays.equals(fourByteBuffer, BOM_UTF_32LE)) { inputIndex = 4; return Charset.forName("UTF-32LE"); } else if (Arrays.equals(fourByteBuffer, BOM_UTF_32_2143)) { throw getExceptionUnsupportedUCS4("2143"); } else if (Arrays.equals(fourByteBuffer, BOM_UTF_32_3412)) { throw getExceptionUnsupportedUCS4("3412"); } byte[] firstTwoBytes = Arrays.copyOf(fourByteBuffer, 2); // 16-bit encoding BOMs if (Arrays.equals(firstTwoBytes, BOM_UTF_16BE)) { inputIndex = 2; return Charset.forName("UTF-16BE"); } if (Arrays.equals(firstTwoBytes, BOM_UTF_16LE)) { inputIndex = 2; return Charset.forName("UTF-16LE"); } byte[] firstThreeBytes = Arrays.copyOf(fourByteBuffer, 3); // UTF-8 BOM? if (Arrays.equals(firstThreeBytes, BOM_UTF_8)) { inputIndex = 3; return Charset.forName("UTF-8"); } return null; } private Charset getUTF32EncodingFromNullPattern() throws IOException { //content without BOM if (fourByteBuffer[0] == 0 && fourByteBuffer[1] == 0 && fourByteBuffer[2] == 0) { // 00 00 00 xx return Charset.forName("UTF-32BE"); } else if (fourByteBuffer[1] == 0 && fourByteBuffer[2] == 0 && fourByteBuffer[3] == 0) { // xx 00 00 00 return Charset.forName("UTF-32LE"); } else if (fourByteBuffer[0] == 0 && fourByteBuffer[2] == 0 && fourByteBuffer[3] == 0) { // 00 xx 00 00 throw getExceptionUnsupportedUCS4("3412"); } else if (fourByteBuffer[0] == 0 && fourByteBuffer[1] == 0 && fourByteBuffer[3] == 0) { //00 00 xx 00 throw getExceptionUnsupportedUCS4("2143"); } else { // Cannot be valid UTF-32 encoded JSON... return null; } } private Charset getUTF16EncodingFromNullPattern() { if (fourByteBuffer[0] == 0) { return Charset.forName("UTF-16BE"); } else if (fourByteBuffer[1] == 0) { return Charset.forName("UTF-16LE"); } else { // not UTF-16 return null; } } private CharConversionException getExceptionUnsupportedUCS4(String type) throws IOException { return new CharConversionException("Unsupported UCS-4 endianness (" + type + ") detected"); } @Override public int read() throws IOException { if (inputIndex < inputEnd) { int result = fourByteBuffer[inputIndex]; inputIndex++; return result; } try { return in.read(); } catch (java.io.EOFException ex) { return -1; } } @Override public int read(byte b[]) throws IOException { if (inputIndex < inputEnd) { int minimum = Math.min(b.length, inputEnd - inputIndex); for (int i = 0; i < minimum; i++) { b[i] = fourByteBuffer[inputIndex]; inputIndex++; } int rest = b.length - minimum; if (rest == 0) { return minimum; } try { int additionalRead = in.read(b, minimum, rest); if (additionalRead < 0) { return minimum; } else { return minimum + additionalRead; } } catch (java.io.EOFException ex) { return minimum; } } else { return read(b, 0, b.length); } } @Override public int read(byte b[], int off, int len) throws IOException { if (inputIndex < inputEnd) { int minimum = Math.min(b.length, inputEnd - inputIndex); for (int i = 0; i < minimum; i++) { b[off + i] = fourByteBuffer[inputIndex]; inputIndex++; } int rest = b.length - minimum; if (rest == 0) { return minimum; } try { int additionalRead = in.read(b, minimum + off, rest); if (additionalRead < 0) { return minimum; } else { return minimum + additionalRead; } } catch (java.io.EOFException ex) { return minimum; } } else { try { return in.read(b, off, len); } catch (java.io.EOFException ex) { return -1; } } } @Override public long skip(long n) throws IOException { if (inputIndex < inputEnd) { long minimum = Math.min(n, inputEnd - inputIndex); for (int i = 0; i < minimum; i++) { inputIndex++; } long rest = n - minimum; if (rest == 0) { return minimum; } long additionalSkipped = in.skip(rest); return additionalSkipped + minimum; } else { return in.skip(n); } } @Override public synchronized void reset() throws IOException { throw new IOException("reset not supported"); } @Override public boolean markSupported() { return false; } }