/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ package freenet.test; import junit.framework.TestCase; /** * Utility class used throught test cases classes * * @author Alberto Bacchelli <sback@freenetproject.org> */ public final class UTFUtil extends TestCase { public void testFake() { } /** * Contains all unicode characters except the low and high surrogates (they are no valid characters and constructing strings with them will cause * the JRE to replace them with the default replacement character). Even 0x0000 is included. */ public static final char[] ALL_CHARACTERS; static { ALL_CHARACTERS = new char[Character.MAX_VALUE - Character.MIN_VALUE + 1]; for(int i = 0; i <= (Character.MAX_VALUE - Character.MIN_VALUE); ++i) { int characterValue = (Character.MIN_VALUE + i); // The low and high surrogates are no valid unicode characters. if(characterValue >= Character.MIN_LOW_SURROGATE && characterValue <= Character.MAX_LOW_SURROGATE) ALL_CHARACTERS[i] = ' '; else if(characterValue >= Character.MIN_HIGH_SURROGATE && characterValue <= Character.MAX_HIGH_SURROGATE) ALL_CHARACTERS[i] = ' '; else ALL_CHARACTERS[i] = (char)characterValue; } } //printable ascii symbols public static final char PRINTABLE_ASCII[] = { ' ','!','@','#','$','%','^','&','(',')','+','=','{','}','[',']',':',';','\\','\"','\'', ',','<','>','.','?','~','`'}; //stressed UTF chars values public static final char STRESSED_UTF[] = { //ÉâûĔĭņşÊãüĕĮŇŠËäýĖįňšÌåþėİʼnŢÍæÿĘıŊţÎçĀęIJŋŤÏèāĚijŌťÐéĂěĴōŦÑêăĜĵŎŧ '\u00c9','\u00e2','\u00fb','\u0114','\u012d','\u0146','\u015f','\u00ca','\u00e3','\u00fc', '\u0115','\u012e','\u0147','\u0160','\u00cb','\u00e4','\u00fd','\u0116','\u012f','\u0148', '\u0161','\u00cc','\u00e5','\u00fe','\u0117','\u0130','\u0149','\u0162','\u00cd','\u00e6', '\u00ff','\u0118','\u0131','\u014a','\u0163','\u00ce','\u00e7','\u0100','\u0119','\u0132', '\u014b','\u0164','\u00cf','\u00e8','\u0101','\u011a','\u0133','\u014c','\u0165','\u00d0', '\u00e9','\u0102','\u011b','\u0134','\u014d','\u0166','\u00d1','\u00ea','\u0103','\u011c', '\u0135','\u014e','\u0167', //ÒëĄĝĶŏŨÓìąĞķŐũÔíĆğĸőŪÕîćĠĹŒūÖïĈġĺœŬ×ðĉĢĻŔŭØñĊģļŕŮÙòċĤĽŖůÚóČĥľŗŰ '\u00d2','\u00eb','\u0104','\u011d','\u0136','\u014f','\u0168','\u00d3','\u00ec','\u0105', '\u011e','\u0137','\u0150','\u0169','\u00d4','\u00ed','\u0106','\u011f','\u0138','\u0151', '\u016a','\u00d5','\u00ee','\u0107','\u0120','\u0139','\u0152','\u016b','\u00d6','\u00ef', '\u0108','\u0121','\u013a','\u0153','\u016c','\u00d7','\u00f0','\u0109','\u0122','\u013b', '\u0154','\u016d','\u00d8','\u00f1','\u010a','\u0123','\u013c','\u0155','\u016e','\u00d9', '\u00f2','\u010b','\u0124','\u013d','\u0156','\u016f','\u00da','\u00f3','\u010c','\u0125', '\u013e','\u0157','\u0170', //ÛôčĦĿŘűÜõĎħŀřŲÝöďĨŁŚųÞ÷ĐĩłśŴßøđĪŃŜŵàùĒīńŝŶáúēĬŅŞŷ '\u00db','\u00f4','\u010d','\u0126','\u013f','\u0158','\u0171','\u00dc','\u00f5','\u010e', '\u0127','\u0140','\u0159','\u0172','\u00dd','\u00f6','\u010f','\u0128','\u0141','\u015a', '\u0173','\u00de','\u00f7','\u0110','\u0129','\u0142','\u015b','\u0174','\u00df','\u00f8', '\u0111','\u012a','\u0143','\u015c','\u0175','\u00e0','\u00f9','\u0112','\u012b','\u0144', '\u015d','\u0176','\u00e1','\u00fa','\u0113','\u012c','\u0145','\u015e','\u0177'}; /* * HTML entities ISO-88591 * see for reference http://www.w3.org/TR/html4/sgml/entities.html#iso-88591 */ public static final String HTML_ENTITIES_UTF[][] = { //ISO 8859-1 Symbol Entities {"\u00a1","¡"},{"\u00a2","¢"},{"\u00a3","£"},{"\u00a4","¤"}, {"\u00a5","¥"},{"\u00a6","¦"},{"\u00a7","§"},{"\u00a8","¨"}, {"\u00a9","©"},{"\u00aa","ª"},{"\u00ab","«"},{"\u00ac","¬"}, {"\u00ad","­"},{"\u00ae","®"},{"\u00af","¯"}, {"\u00b0","°"},{"\u00b1","±"},{"\u00b2","²"},{"\u00b3","³"}, {"\u00b4","´"},{"\u00b5","µ"},{"\u00b6","¶"},{"\u00b7","·"}, {"\u00b8","¸"},{"\u00b9","¹"},{"\u00ba","º"},{"\u00bb","»"}, {"\u00bc","¼"},{"\u00bd","½"},{"\u00be","¾"},{"\u00bf","¿"}, //ISO 8859-1 Character Entities {"\u00c0","À"},{"\u00c1","Á"},{"\u00c2","Â"},{"\u00c3","Ã"}, {"\u00c4","Ä"},{"\u00c5","Å"},{"\u00c6","Æ"},{"\u00c7","Ç"}, {"\u00c8","È"},{"\u00c9","É"},{"\u00ca","Ê"},{"\u00cb","Ë"}, {"\u00cc","Ì"},{"\u00cd","Í"},{"\u00ce","Î"},{"\u00cf","Ï"}, {"\u00d0","Ð"},{"\u00d1","Ñ"},{"\u00d2","Ò"},{"\u00d3","Ó"}, {"\u00d4","Ô"},{"\u00d5","Õ"},{"\u00d6","Ö"},{"\u00d7","×"}, {"\u00d8","Ø"},{"\u00d9","Ù"},{"\u00da","Ú"},{"\u00db","Û"}, {"\u00dc","Ü"},{"\u00dd","Ý"},{"\u00de","Þ"},{"\u00df","ß"}, {"\u00e0","à"},{"\u00e1","á"},{"\u00e2","â"},{"\u00e3","ã"}, {"\u00e4","ä"},{"\u00e5","å"},{"\u00e6","æ"},{"\u00e7","ç"}, {"\u00e8","è"},{"\u00e9","é"},{"\u00ea","ê"},{"\u00eb","ë"}, {"\u00ec","ì"},{"\u00ed","í"},{"\u00ee","î"},{"\u00ef","ï"}, {"\u00f0","ð"},{"\u00f1","ñ"}, {"\u00f2","ò"},{"\u00f3","ó"},{"\u00f4","ô"},{"\u00f5","õ"}, {"\u00f6","ö"},{"\u00f7","÷"},{"\u00f8","ø"}, {"\u00f9","ù"},{"\u00fa","ú"},{"\u00fb","û"},{"\u00fc","ü"}, {"\u00fd","ý"},{"\u00fe","þ"},{"\u00ff","ÿ"}, //Greek {"\u0391","Α"},{"\u0392","Β"},{"\u0393","Γ"},{"\u0394","Δ"}, {"\u0395","Ε"},{"\u0396","Ζ"},{"\u0397","Η"},{"\u0398","Θ"}, {"\u0399","Ι"},{"\u039a","Κ"},{"\u039b","Λ"},{"\u039c","Μ"}, {"\u039d","Ν"},{"\u039e","Ξ"},{"\u039f","Ο"},{"\u03a0","Π"}, {"\u03a1","Ρ"},{"\u03a3","Σ"},{"\u03a4","Τ"},{"\u03a5","Υ"}, {"\u03a6","Φ"},{"\u03a7","Χ"},{"\u03a8","Ψ"},{"\u03a9","Ω"}, {"\u03b1","α"},{"\u03b2","β"},{"\u03b3","γ"},{"\u03b4","δ"}, {"\u03b5","ε"},{"\u03b6","ζ"},{"\u03b7","η"},{"\u03b8","θ"}, {"\u03b9","ι"},{"\u03ba","κ"},{"\u03bb","λ"},{"\u03bc","μ"}, {"\u03bd","ν"},{"\u03be","ξ"},{"\u03bf","ο"},{"\u03c0","π"}, {"\u03c1","ρ"},{"\u03c2","ς"},{"\u03c3","σ"},{"\u03c4","τ"}, {"\u03c5","υ"},{"\u03c6","φ"},{"\u03c7","χ"},{"\u03c8","ψ"}, {"\u03c9","ω"},{"\u03d1","ϑ"},{"\u03d2","ϒ"},{"\u03d6","ϖ"}, //General Punctuation {"\u2022","•"},{"\u2026","…"},{"\u2032","′"},{"\u2033","″"}, {"\u203e","‾"},{"\u2044","⁄"}, //Letterlike Symbols {"\u2118","℘"},{"\u2111","ℑ"},{"\u211c","ℜ"},{"\u2122","™"}, {"\u2135","ℵ"}, //Arrows {"\u2190","←"},{"\u2191","↑"},{"\u2192","→"},{"\u2193","↓"}, {"\u2194","↔"},{"\u21b5","↵"},{"\u21d0","⇐"},{"\u21d1","⇑"}, {"\u21d2","⇒"},{"\u21d3","⇓"},{"\u21d4","⇔"}, //Mathematical Operators {"\u2200","∀"},{"\u2202","∂"},{"\u2203","∃"},{"\u2205","∅"}, {"\u2207","∇"},{"\u2208","∈"},{"\u2209","∉"},{"\u220b","∋"}, {"\u220f","∏"},{"\u2211","∑"},{"\u2212","−"},{"\u2217","∗"}, {"\u221a","√"},{"\u221d","∝"},{"\u221e","∞"},{"\u2220","∠"}, {"\u2227","∧"},{"\u2228","∨"},{"\u2229","∩"},{"\u222a","∪"}, {"\u222b","∫"},{"\u2234","∴"},{"\u223c","∼"},{"\u2245","≅"}, {"\u2248","≈"},{"\u2260","≠"},{"\u2261","≡"},{"\u2264","≤"}, {"\u2265","≥"},{"\u2282","⊂"},{"\u2283","⊃"},{"\u2284","⊄"}, {"\u2286","⊆"},{"\u2287","⊇"},{"\u2295","⊕"},{"\u2297","⊗"}, {"\u22a5","⊥"},{"\u22c5","⋅"}, //Miscellaneous Technical {"\u2308","⌈"},{"\u2309","⌉"},{"\u230a","⌊"},{"\u230b","⌋"}, {"\u2329","⟨"},{"\u232a","⟩"}, //Geometric Shapes {"\u25ca","◊"},{"\u2660","♠"},{"\u2663","♣"},{"\u2665","♥"}, {"\u2666","♦"}, //Latin Extended-A {"\u0152","Œ"},{"\u0153","œ"},{"\u0160","Š"},{"\u0161","š"}, {"\u0178","Ÿ"}, //Spacing Modifier Letters {"\u02c6","ˆ"},{"\u02dc","˜"}, //General Punctuation {"\u2002"," "},{"\u2003"," "},{"\u2009"," "},{"\u200c","‌"}, {"\u200d","‍"},{"\u200e","‎"},{"\u200f","‏"},{"\u2013","–"}, {"\u2014","—"},{"\u2018","‘"},{"\u2019","’"},{"\u201a","‚"}, {"\u201c","“"},{"\u201d","”"},{"\u201e","„"},{"\u2020","†"}, {"\u2021","‡"},{"\u2030","‰"},{"\u2039","‹"},{"\u203a","›"}, {"\u20ac","€"} }; }