/* * #! * Ontopia Engine * #- * Copyright (C) 2001 - 2013 The Ontopia Project * #- * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * !# */ package net.ontopia.topicmaps.utils.jtm; import java.io.IOException; import java.io.PushbackInputStream; import net.ontopia.topicmaps.impl.utils.EncodingSnifferIF; /** * INTERNAL: An encoding sniffer for JTM. */ public class JTMEncodingSniffer implements EncodingSnifferIF { public String guessEncoding(PushbackInputStream stream) throws IOException { // http://www.ietf.org/rfc/rfc4627.txt requires that a JSON data stream // has to be in UTF notation. This code checks which specific UTF format // is being used. String encoding = "UTF8"; byte[] buffer = new byte[4]; int bytesread = stream.read(buffer, 0, 4); if (bytesread == 4) { // check for the different UTF formats: // // 00 00 00 xx UTF-32BE // 00 xx 00 xx UTF-16BE // xx 00 00 00 UTF-32LE // xx 00 xx 00 UTF-16LE // xx xx xx xx UTF-8 // Note: UTF32 is not supported by Java // http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html if (buffer[0] == (byte) 0x00 && buffer[1] != (byte) 0x00 && buffer[2] == (byte) 0x00 && buffer[3] != (byte) 0x00) { encoding = "UnicodeBigUnmarked"; } else if (buffer[0] != (byte) 0x00 && buffer[1] == (byte) 0x00 && buffer[2] != (byte) 0x00 && buffer[3] == (byte) 0x00) { encoding = "UnicodeLittleUnmarked"; } } stream.unread(buffer, 0, bytesread); return encoding; } }