/* * Copyright (C) 2010 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package libcore.java.lang; import java.lang.reflect.Field; import java.lang.reflect.Modifier; import java.nio.ByteBuffer; import java.nio.CharBuffer; import java.nio.ReadOnlyBufferException; import java.nio.charset.Charset; import java.nio.charset.CharsetDecoder; import java.nio.charset.CharsetEncoder; import java.nio.charset.CoderResult; import java.nio.charset.CodingErrorAction; import java.util.Arrays; import java.util.Locale; import junit.framework.TestCase; public class StringTest extends TestCase { public void testIsEmpty() { assertTrue("".isEmpty()); assertFalse("x".isEmpty()); } // The evil decoder keeps hold of the CharBuffer it wrote to. private static final class EvilCharsetDecoder extends CharsetDecoder { private static char[] chars; public EvilCharsetDecoder(Charset cs) { super(cs, 1.0f, 1.0f); } protected CoderResult decodeLoop(ByteBuffer in, CharBuffer out) { chars = out.array(); int inLength = in.remaining(); for (int i = 0; i < inLength; ++i) { in.put((byte) 'X'); out.put('Y'); } return CoderResult.UNDERFLOW; } public static void corrupt() { for (int i = 0; i < chars.length; ++i) { chars[i] = '$'; } } } // The evil encoder tries to write to the CharBuffer it was given to // read from. private static final class EvilCharsetEncoder extends CharsetEncoder { public EvilCharsetEncoder(Charset cs) { super(cs, 1.0f, 1.0f); } protected CoderResult encodeLoop(CharBuffer in, ByteBuffer out) { int inLength = in.remaining(); for (int i = 0; i < inLength; ++i) { in.put('x'); out.put((byte) 'y'); } return CoderResult.UNDERFLOW; } } private static final Charset EVIL_CHARSET = new Charset("evil", null) { public boolean contains(Charset charset) { return false; } public CharsetEncoder newEncoder() { return new EvilCharsetEncoder(this); } public CharsetDecoder newDecoder() { return new EvilCharsetDecoder(this); } }; public void testGetBytes_MaliciousCharset() { try { String s = "hi"; // Check that our encoder can't write to the input CharBuffer // it was given. s.getBytes(EVIL_CHARSET); fail(); // We shouldn't have got here! } catch (ReadOnlyBufferException expected) { // We caught you trying to be naughty! } } public void testString_BII() throws Exception { byte[] bytes = "xa\u0666bx".getBytes("UTF-8"); assertEquals("a\u0666b", new String(bytes, 1, bytes.length - 2)); } public void testString_BIIString() throws Exception { byte[] bytes = "xa\u0666bx".getBytes("UTF-8"); assertEquals("a\u0666b", new String(bytes, 1, bytes.length - 2, "UTF-8")); } public void testString_BIICharset() throws Exception { byte[] bytes = "xa\u0666bx".getBytes("UTF-8"); assertEquals("a\u0666b", new String(bytes, 1, bytes.length - 2, Charset.forName("UTF-8"))); } public void testString_BCharset() throws Exception { byte[] bytes = "a\u0666b".getBytes("UTF-8"); assertEquals("a\u0666b", new String(bytes, Charset.forName("UTF-8"))); } public void testStringFromCharset_MaliciousCharset() { Charset cs = EVIL_CHARSET; byte[] bytes = new byte[] {(byte) 'h', (byte) 'i'}; final String result = new String(bytes, cs); assertEquals("YY", result); // (Our decoder always outputs 'Y's.) // Check that even if the decoder messes with the output CharBuffer // after we've created a string from it, it doesn't affect the string. EvilCharsetDecoder.corrupt(); assertEquals("YY", result); } public void test_getBytes_bad() throws Exception { // Check that we use '?' as the replacement byte for invalid characters. assertEquals("[97, 63, 98]", Arrays.toString("a\u0666b".getBytes("US-ASCII"))); assertEquals("[97, 63, 98]", Arrays.toString("a\u0666b".getBytes(Charset.forName("US-ASCII")))); } public void test_getBytes_UTF_8() { // We have a fast path implementation of String.getBytes for UTF-8. Charset cs = Charset.forName("UTF-8"); // Test the empty string. assertEquals("[]", Arrays.toString("".getBytes(cs))); // Test one-byte characters. assertEquals("[0]", Arrays.toString("\u0000".getBytes(cs))); assertEquals("[127]", Arrays.toString("\u007f".getBytes(cs))); assertEquals("[104, 105]", Arrays.toString("hi".getBytes(cs))); // Test two-byte characters. assertEquals("[-62, -128]", Arrays.toString("\u0080".getBytes(cs))); assertEquals("[-39, -90]", Arrays.toString("\u0666".getBytes(cs))); assertEquals("[-33, -65]", Arrays.toString("\u07ff".getBytes(cs))); assertEquals("[104, -39, -90, 105]", Arrays.toString("h\u0666i".getBytes(cs))); // Test three-byte characters. assertEquals("[-32, -96, -128]", Arrays.toString("\u0800".getBytes(cs))); assertEquals("[-31, -120, -76]", Arrays.toString("\u1234".getBytes(cs))); assertEquals("[-17, -65, -65]", Arrays.toString("\uffff".getBytes(cs))); assertEquals("[104, -31, -120, -76, 105]", Arrays.toString("h\u1234i".getBytes(cs))); // Test supplementary characters. // Minimum supplementary character: U+10000 assertEquals("[-16, -112, -128, -128]", Arrays.toString("\ud800\udc00".getBytes(cs))); // Random supplementary character: U+10381 Ugaritic letter beta assertEquals("[-16, -112, -114, -127]", Arrays.toString("\ud800\udf81".getBytes(cs))); // Maximum supplementary character: U+10FFFF assertEquals("[-12, -113, -65, -65]", Arrays.toString("\udbff\udfff".getBytes(cs))); // A high surrogate at end of string is an error replaced with '?'. assertEquals("[104, 63]", Arrays.toString("h\ud800".getBytes(cs))); // A high surrogate not followed by a low surrogate is an error replaced with '?'. assertEquals("[104, 63, 105]", Arrays.toString("h\ud800i".getBytes(cs))); } public void test_new_String_bad() throws Exception { // Check that we use U+FFFD as the replacement string for invalid bytes. assertEquals("a\ufffdb", new String(new byte[] { 97, -2, 98 }, "US-ASCII")); assertEquals("a\ufffdb", new String(new byte[] { 97, -2, 98 }, Charset.forName("US-ASCII"))); } /** * Tests a widely assumed performance characteristic of String.substring(): * that it reuses the original's backing array. Although behavior should be * correct even if this test fails, many applications may suffer * significant performance degradation. */ public void testSubstringSharesBackingArray() throws IllegalAccessException { String abcdefghij = "ABCDEFGHIJ"; String cdefg = abcdefghij.substring(2, 7); assertSame(getBackingArray(abcdefghij), getBackingArray(cdefg)); } /** * Tests a widely assumed performance characteristic of string's copy * constructor: that it ensures the backing array is the same length as the * string. Although behavior should be correct even if this test fails, * many applications may suffer significant performance degradation. */ public void testStringCopiesAvoidHeapRetention() throws IllegalAccessException { String abcdefghij = "ABCDEFGHIJ"; assertSame(getBackingArray(abcdefghij), getBackingArray(new String(abcdefghij))); String cdefg = abcdefghij.substring(2, 7); assertSame(getBackingArray(abcdefghij), getBackingArray(cdefg)); assertEquals(5, getBackingArray(new String(cdefg)).length); } /** * Uses reflection to return the char[] backing the given string. This * returns the actual backing array; which must not be modified. */ private char[] getBackingArray(String string) throws IllegalAccessException { for (Field f : String.class.getDeclaredFields()) { if (!Modifier.isStatic(f.getModifiers()) && f.getType() == char[].class) { f.setAccessible(true); return (char[]) f.get(string); } } throw new UnsupportedOperationException("No chars[] field on String!"); } /** * Test that strings interned manually and then later loaded as literals * maintain reference equality. http://b/3098960 */ public void testInternBeforeLiteralIsLoaded() throws Exception{ String programmatic = Arrays.asList("5058", "9962", "1563", "5744").toString().intern(); String literal = (String) Class.forName("libcore.java.lang.StringTest$HasLiteral") .getDeclaredField("literal").get(null); assertEquals(System.identityHashCode(programmatic), System.identityHashCode(literal)); assertSame(programmatic, literal); } static class HasLiteral { static String literal = "[5058, 9962, 1563, 5744]"; } private static final String COMBINING_DOT_ABOVE = "\u0307"; private static final String LATIN_CAPITAL_I = "I"; private static final String LATIN_CAPITAL_I_WITH_DOT_ABOVE = "\u0130"; private static final String LATIN_SMALL_I = "i"; private static final String LATIN_SMALL_DOTLESS_I = "\u0131"; private static final String[] LATIN_I_VARIANTS = { LATIN_SMALL_I, LATIN_SMALL_DOTLESS_I, LATIN_CAPITAL_I, LATIN_CAPITAL_I_WITH_DOT_ABOVE, }; public void testCaseMapping_tr_TR() { Locale tr_TR = new Locale("tr", "TR"); assertEquals(LATIN_SMALL_I, LATIN_SMALL_I.toLowerCase(tr_TR)); assertEquals(LATIN_SMALL_I, LATIN_CAPITAL_I_WITH_DOT_ABOVE.toLowerCase(tr_TR)); assertEquals(LATIN_SMALL_DOTLESS_I, LATIN_SMALL_DOTLESS_I.toLowerCase(tr_TR)); assertEquals(LATIN_CAPITAL_I, LATIN_CAPITAL_I.toUpperCase(tr_TR)); assertEquals(LATIN_CAPITAL_I_WITH_DOT_ABOVE, LATIN_CAPITAL_I_WITH_DOT_ABOVE.toUpperCase(tr_TR)); assertEquals(LATIN_CAPITAL_I_WITH_DOT_ABOVE, LATIN_SMALL_I.toUpperCase(tr_TR)); assertEquals(LATIN_CAPITAL_I, LATIN_SMALL_DOTLESS_I.toUpperCase(tr_TR)); assertEquals(LATIN_SMALL_DOTLESS_I, LATIN_CAPITAL_I.toLowerCase(tr_TR)); } public void testCaseMapping_en_US() { Locale en_US = new Locale("en", "US"); assertEquals(LATIN_CAPITAL_I, LATIN_SMALL_I.toUpperCase(en_US)); assertEquals(LATIN_CAPITAL_I, LATIN_CAPITAL_I.toUpperCase(en_US)); assertEquals(LATIN_CAPITAL_I_WITH_DOT_ABOVE, LATIN_CAPITAL_I_WITH_DOT_ABOVE.toUpperCase(en_US)); assertEquals(LATIN_SMALL_I, LATIN_SMALL_I.toLowerCase(en_US)); assertEquals(LATIN_SMALL_I, LATIN_CAPITAL_I.toLowerCase(en_US)); assertEquals(LATIN_SMALL_DOTLESS_I, LATIN_SMALL_DOTLESS_I.toLowerCase(en_US)); assertEquals(LATIN_CAPITAL_I, LATIN_SMALL_DOTLESS_I.toUpperCase(en_US)); // http://b/3325799: the RI fails this because it's using an obsolete version of the Unicode rules. // Android correctly preserves canonical equivalence. (See the separate test for tr_TR.) assertEquals(LATIN_SMALL_I + COMBINING_DOT_ABOVE, LATIN_CAPITAL_I_WITH_DOT_ABOVE.toLowerCase(en_US)); } public void testCaseMapping_el() { Locale el_GR = new Locale("el", "GR"); assertEquals("ΟΔΟΣ ΟΔΟΣ ΣΟ ΣΟ OΣ ΟΣ Σ ΕΞ", "ΟΔΌΣ Οδός Σο ΣΟ oΣ ΟΣ σ ἕξ".toUpperCase(el_GR)); assertEquals("ΟΔΟΣ ΟΔΟΣ ΣΟ ΣΟ OΣ ΟΣ Σ ΕΞ", "ΟΔΌΣ Οδός Σο ΣΟ oΣ ΟΣ σ ἕξ".toUpperCase(el_GR)); assertEquals("ΟΔΟΣ ΟΔΟΣ ΣΟ ΣΟ OΣ ΟΣ Σ ΕΞ", "ΟΔΌΣ Οδός Σο ΣΟ oΣ ΟΣ σ ἕξ".toUpperCase(el_GR)); Locale en_US = new Locale("en", "US"); assertEquals("ΟΔΌΣ ΟΔΌΣ ΣΟ ΣΟ OΣ ΟΣ Σ ἝΞ", "ΟΔΌΣ Οδός Σο ΣΟ oΣ ΟΣ σ ἕξ".toUpperCase(en_US)); } public void testEqualsIgnoreCase_tr_TR() { testEqualsIgnoreCase(new Locale("tr", "TR")); } public void testEqualsIgnoreCase_en_US() { testEqualsIgnoreCase(new Locale("en", "US")); } /** * String.equalsIgnoreCase should not depend on the locale. */ private void testEqualsIgnoreCase(Locale locale) { Locale defaultLocale = Locale.getDefault(); Locale.setDefault(locale); try { for (String a : LATIN_I_VARIANTS) { for (String b : LATIN_I_VARIANTS) { if (!a.equalsIgnoreCase(b)) { fail("Expected " + a + " to equal " + b + " in " + locale); } } } } finally { Locale.setDefault(defaultLocale); } } public void testRegionMatches_ignoreCase_en_US() { testRegionMatches_ignoreCase(new Locale("en", "US")); } public void testRegionMatches_ignoreCase_tr_TR() { testRegionMatches_ignoreCase(new Locale("tr", "TR")); } private void testRegionMatches_ignoreCase(Locale locale) { Locale defaultLocale = Locale.getDefault(); Locale.setDefault(locale); try { for (String a : LATIN_I_VARIANTS) { for (String b : LATIN_I_VARIANTS) { if (!a.regionMatches(true, 0, b, 0, b.length())) { fail("Expected " + a + " to equal " + b + " in " + locale); } } } } finally { Locale.setDefault(defaultLocale); } } // http://code.google.com/p/android/issues/detail?id=15266 public void test_replaceAll() throws Exception { assertEquals("project_Id", "projectId".replaceAll("(?!^)(\\p{Upper})(?!$)", "_$1")); } // https://code.google.com/p/android/issues/detail?id=23831 public void test_23831() throws Exception { byte[] bytes = { (byte) 0xf5, (byte) 0xa9, (byte) 0xea, (byte) 0x21 }; String expected = "\ufffd\ufffd\u0021"; // Since we use icu4c for CharsetDecoder... CharsetDecoder decoder = Charset.forName("UTF-8").newDecoder(); decoder.onMalformedInput(CodingErrorAction.REPLACE); assertEquals(expected, decoder.decode(ByteBuffer.wrap(bytes)).toString()); // Our fast-path code in String should behave the same... assertEquals(expected, new String(bytes, "UTF-8")); } // https://code.google.com/p/android/issues/detail?id=55129 public void test_55129() throws Exception { assertEquals("-h-e-l-l-o- -w-o-r-l-d-", "hello world".replace("", "-")); assertEquals("-w-o-r-l-d-", "hello world".substring(6).replace("", "-")); assertEquals("-*-w-*-o-*-r-*-l-*-d-*-", "hello world".substring(6).replace("", "-*-")); } public void test_String_getBytes() throws Exception { // http://b/11571917 assertEquals("[-126, -96]", Arrays.toString("あ".getBytes("Shift_JIS"))); assertEquals("[-126, -87]", Arrays.toString("か".getBytes("Shift_JIS"))); assertEquals("[-105, 67]", Arrays.toString("佑".getBytes("Shift_JIS"))); assertEquals("[36]", Arrays.toString("$".getBytes("Shift_JIS"))); assertEquals("[-29, -127, -117]", Arrays.toString("か".getBytes("UTF-8"))); // http://b/11639117 assertEquals("[-79, -72, -70, -48]", Arrays.toString("구분".getBytes("EUC-KR"))); // https://code.google.com/p/android/issues/detail?id=63188 assertEquals("[-77, -10, -64, -76, -63, -53]", Arrays.toString("出来了".getBytes("gbk"))); assertEquals("[-77, -10, -64, -76]", Arrays.toString("出来".getBytes("gbk"))); assertEquals("[-77, -10]", Arrays.toString("出".getBytes("gbk"))); } public void test_compareTo() throws Exception { // For strings where a character differs, the result is // the difference between the characters. assertEquals(-1, "a".compareTo("b")); assertEquals(-2, "a".compareTo("c")); assertEquals(1, "b".compareTo("a")); assertEquals(2, "c".compareTo("a")); // For strings where the characters match up to the length of the shorter, // the result is the difference between the strings' lengths. assertEquals(0, "a".compareTo("a")); assertEquals(-1, "a".compareTo("aa")); assertEquals(-1, "a".compareTo("az")); assertEquals(-2, "a".compareTo("aaa")); assertEquals(-2, "a".compareTo("azz")); assertEquals(-3, "a".compareTo("aaaa")); assertEquals(-3, "a".compareTo("azzz")); assertEquals(0, "a".compareTo("a")); assertEquals(1, "aa".compareTo("a")); assertEquals(1, "az".compareTo("a")); assertEquals(2, "aaa".compareTo("a")); assertEquals(2, "azz".compareTo("a")); assertEquals(3, "aaaa".compareTo("a")); assertEquals(3, "azzz".compareTo("a")); } public void test_compareToIgnoreCase() throws Exception { // For strings where a character differs, the result is // the difference between the characters. assertEquals(-1, "a".compareToIgnoreCase("b")); assertEquals(-1, "a".compareToIgnoreCase("B")); assertEquals(-2, "a".compareToIgnoreCase("c")); assertEquals(-2, "a".compareToIgnoreCase("C")); assertEquals(1, "b".compareToIgnoreCase("a")); assertEquals(1, "B".compareToIgnoreCase("a")); assertEquals(2, "c".compareToIgnoreCase("a")); assertEquals(2, "C".compareToIgnoreCase("a")); // For strings where the characters match up to the length of the shorter, // the result is the difference between the strings' lengths. assertEquals(0, "a".compareToIgnoreCase("a")); assertEquals(0, "a".compareToIgnoreCase("A")); assertEquals(0, "A".compareToIgnoreCase("a")); assertEquals(0, "A".compareToIgnoreCase("A")); assertEquals(-1, "a".compareToIgnoreCase("aa")); assertEquals(-1, "a".compareToIgnoreCase("aA")); assertEquals(-1, "a".compareToIgnoreCase("Aa")); assertEquals(-1, "a".compareToIgnoreCase("az")); assertEquals(-1, "a".compareToIgnoreCase("aZ")); assertEquals(-2, "a".compareToIgnoreCase("aaa")); assertEquals(-2, "a".compareToIgnoreCase("AAA")); assertEquals(-2, "a".compareToIgnoreCase("azz")); assertEquals(-2, "a".compareToIgnoreCase("AZZ")); assertEquals(-3, "a".compareToIgnoreCase("aaaa")); assertEquals(-3, "a".compareToIgnoreCase("AAAA")); assertEquals(-3, "a".compareToIgnoreCase("azzz")); assertEquals(-3, "a".compareToIgnoreCase("AZZZ")); assertEquals(1, "aa".compareToIgnoreCase("a")); assertEquals(1, "aA".compareToIgnoreCase("a")); assertEquals(1, "Aa".compareToIgnoreCase("a")); assertEquals(1, "az".compareToIgnoreCase("a")); assertEquals(2, "aaa".compareToIgnoreCase("a")); assertEquals(2, "azz".compareToIgnoreCase("a")); assertEquals(3, "aaaa".compareToIgnoreCase("a")); assertEquals(3, "azzz".compareToIgnoreCase("a")); } }