/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.apache.shindig.gadgets.encoding; import static org.easymock.EasyMock.expect; import static org.easymock.EasyMock.replay; import static org.easymock.EasyMock.verify; import static org.junit.Assert.assertEquals; import org.easymock.EasyMock; import org.junit.Test; import java.nio.charset.Charset; public class EncodingDetectorTest { private EncodingDetector.FallbackEncodingDetector newMockFallbackEncoding(byte[] input, String charset) { EncodingDetector.FallbackEncodingDetector detector = EasyMock.createNiceMock(EncodingDetector.FallbackEncodingDetector.class); expect(detector.detectEncoding(input)).andReturn(Charset.forName(charset)).once(); replay(detector); return detector; } @Test public void asciiAssumesUtf8() throws Exception { byte[] data = "Hello, world".getBytes("US-ASCII"); assertEquals("UTF-8", EncodingDetector.detectEncoding(data, true, null).name()); } @Test public void detectedUtf8WithByteOrderMark() { byte[] data = { (byte)0xEF, (byte)0xBB, (byte)0xBF, 'h', 'e', 'l', 'l', 'o' }; assertEquals("UTF-8", EncodingDetector.detectEncoding(data, true, null).name()); } @Test public void assumeLatin1OnInvalidUtf8() throws Exception { byte[] data = "\u4F60\u597D".getBytes("BIG5"); assertEquals("ISO-8859-1", EncodingDetector.detectEncoding(data, true, null).name()); } @Test public void badStreamEnd() throws Exception { byte[] data = { 'd', 'u', (byte)0xC0 }; assertEquals("ISO-8859-1", EncodingDetector.detectEncoding(data, true, null).name()); } @Test public void testFallbackDetectorIsUsed() throws Exception { byte[] data = ("\u6211\u662F\u4E00\u4E2A\u4E0D\u5584\u4E8E\u8BB2\u8BDD\u7684\u4EBA\uFF0C" + "\u552F\u5176\u4E0D\u5584\u4E8E\u8BB2\u8BDD\uFF0C\u6709\u601D\u60F3\u8868" + "\u8FBE\u4E0D\u51FA\uFF0C\u6709\u611F\u60C5\u65E0\u6CD5\u503E\u5410") .getBytes("GB18030"); EncodingDetector.FallbackEncodingDetector detector = newMockFallbackEncoding(data, "GB18030"); assertEquals("GB18030", EncodingDetector.detectEncoding(data, false, detector).name()); verify(detector); } // Test the fallback detector: @Test public void doNotAssumeLatin1OnInvalidUtf8() throws Exception { byte[] data = ("\u6211\u662F\u4E00\u4E2A\u4E0D\u5584\u4E8E\u8BB2\u8BDD\u7684\u4EBA\uFF0C" + "\u552F\u5176\u4E0D\u5584\u4E8E\u8BB2\u8BDD\uFF0C\u6709\u601D\u60F3\u8868" + "\u8FBE\u4E0D\u51FA\uFF0C\u6709\u611F\u60C5\u65E0\u6CD5\u503E\u5410") .getBytes("GB18030"); EncodingDetector.FallbackEncodingDetector detector = new EncodingDetector.FallbackEncodingDetector(); assertEquals("GB18030", EncodingDetector.detectEncoding(data, false, detector).name()); } @Test public void longUtf8StringIsUtf8() throws Exception { byte[] data = ("\u6211\u662F\u4E00\u4E2A\u4E0D\u5584\u4E8E\u8BB2\u8BDD\u7684\u4EBA\uFF0C" + "\u552F\u5176\u4E0D\u5584\u4E8E\u8BB2\u8BDD\uFF0C\u6709\u601D\u60F3\u8868" + "\u8FBE\u4E0D\u51FA\uFF0C\u6709\u611F\u60C5\u65E0\u6CD5\u503E\u5410") .getBytes("UTF-8"); EncodingDetector.FallbackEncodingDetector detector = new EncodingDetector.FallbackEncodingDetector(); assertEquals("UTF-8", detector.detectEncoding(data).name()); } @Test public void shortUtf8StringIsUtf8() throws Exception { byte[] data = "Games, HQ, Mang\u00E1, Anime e tudo que um bom nerd ama".getBytes("UTF-8"); EncodingDetector.FallbackEncodingDetector detector = new EncodingDetector.FallbackEncodingDetector(); assertEquals("UTF-8", detector.detectEncoding(data).name()); } @Test(expected=NullPointerException.class) public void nullCustomDetector() throws Exception { byte[] data = "\u4F60\u597D".getBytes("BIG5"); // expect a NPE assertEquals("ISO-8859-1", EncodingDetector.detectEncoding(data, false, null).name()); } }