package com.m.common.utils;
/**
* <p>Title: PunycodeUtil.java</p>
* <p>Description: </p>
* <p>Copyright: Copyright (c) 2004</p>
* <p>Company: TEC 3rd of CNNIC </p>
* @author Orsen Leo
* @version 1.0
*/
import java.io.*;
import java.util.*;
public class PunycodeUtil {
private static final String input_string_null = "Translate Error : input string is a null";
private static final String punystr_bad_input = "Exception in change punycode to chinese,Input_Str is not punycode";
private static final String unicode_bad_input = "Exception in change chinese to punycode,Input_Str is not unicode chinese";
private static final String uninum_bad_input = "Exception in change U+num to chinese,Input_Str is not unicode num";
private static final String too_long_output = "Output Error, Output would exceed the space provided(256 as default)";
// public static final String output_meaningless =
// "Output Error, Output would contain some meaningless unicodes";
private static final String translate_cancel = "Translate cancel nothing changed";
private static final String Integer_overflow = "Input needs wider integers to process";
private static PunycodeUtil punycoder = null;
/**
* change chinese string to punycode string chstr is the chinese str to be
* translated can not be null
* */
public static String chinese2punycode(String chstr) throws PunyException {
if (chstr == null || chstr.equals("")) {
throw new PunyException(input_string_null);
}
String punystr = "";
try {
punystr = cdntopuny(chstr);
return punystr;
} catch (PunyException e) {
throw e;
}
}
/**
* change punycode string to chinese string punystr is the punycode str to
* be translated can not be null
* */
public static String punycode2chinese(String punystr) throws PunyException {
if (punystr == null || punystr.equals("")) {
throw new PunyException(punystr_bad_input);
}
String chstr = "";
try {
chstr = punys_to_ocdn(punystr);
return chstr;
} catch (PunyException e) {
throw e;
}
}
private static long decode_digit(long cp) {
return cp - 48 < 10 ? cp - 22 : cp - 65 < 26 ? cp - 65 : cp - 97 < 26 ? cp - 97 : base;
}
private static char encode_digit(long d, int flag) {
double x, y;
/* 0..25 map to ASCII a..z or A..Z */
/* 26..35 map to ASCII 0..9 */
if (d < 26)
x = 1;
else
x = 0;
if (flag != 0)
y = (1 << 5);
else
y = (0 << 5);
return ((char) (d + 22 + 75 * x - y));
}
private static char encode_basic(long bcp, int flag) {
boolean x, y;
int z;
// bcp -= (bcp - 97 < 26) << 5;
if ((bcp - 97) < 26)
bcp -= (1 << 5);
else
bcp -= (0 << 5);
if (flag > 0)
x = true;
else
x = false;
if ((bcp - 65) < 26)
y = true;
else
y = false;
if (x && y)
z = 1;
else
z = 0;
// long yy=bcp + (z<< 5);
// char yx=(char)(49);
// Long.
return (char) (bcp + (z << 5));
}
private static long adapt(long delta, long numpoints, int firsttime) {
long k;
delta = (firsttime > 0) ? ((long) delta / damp) : (delta >> 1);
/* delta >> 1 is a faster way of doing delta / 2 */
delta += delta / numpoints;
for (k = 0; delta > ((base - tmin) * tmax) / 2; k += base) {
delta /= base - tmin;
}
return k + (base - tmin + 1) * delta / (delta + skew);
}
/**
* this method transfer unicode(U+****U+****...) into characters if bad
* input ,then return "" Modified by LiuYu 2004-11-19 if fail to transfer ,
* throw PunyExceptions
*/
private static String punycode_cn(String s_input) throws PunyException {
if (s_input == null) {
throw new PunyException(input_string_null);
}
char[] input = new char[unicode_max_length];
char[] case_flags = new char[unicode_max_length];
String bad_input = "";
if (!((s_input.startsWith(u_big)) || (s_input.startsWith(u_small)))) {
throw new PunyException(uninum_bad_input);
}
if (((s_input.length() % 6) != 0) || (s_input.length() < 2)) {
throw new PunyException(uninum_bad_input);
}
String ls, lxs = s_input.substring(2);
String ss = s_input.substring(0, 1);
int input_length = 0;
// ls=lxs.substring(0,lxs.indexOf('+')-2);
while (lxs.indexOf('+') > 0) {
if (ss.charAt(0) == 'U') {
case_flags[input_length] = '1';
} else if (ss.charAt(0) == 'u') {
case_flags[input_length] = '0';
} else {
throw new PunyException(uninum_bad_input);
}
ls = lxs.substring(0, lxs.indexOf('+') - 1);
input[input_length] = (char) Integer.parseInt(ls, 16);
// System.out.println(ls);
input_length++;
lxs = lxs.substring(lxs.indexOf('+') - 1);
ss = lxs.substring(0, 1);
lxs = lxs.substring(2);
}
if (ss.charAt(0) == 'U')
case_flags[input_length] = '1';
else if (ss.charAt(0) == 'u')
case_flags[input_length] = '0';
else
throw new PunyException(uninum_bad_input);
input[input_length] = (char) Integer.parseInt(lxs, 16);
return new String(input, 0, ++input_length);
}
/***
* this method transfer characters into unicode(U+****U+****...) if bad
* input ,then return ""**
*
* s_input is a chinese or english string to be translated can not be null
*/
private static String get_cnuni_out(String s_input) {
int k = s_input.length();
String res = "", ith = "";
int ii, m;
for (int i = 0; i < k; ++i) {
ith = Integer.toHexString((int) (s_input.charAt(i)));
m = ith.length();
for (ii = 0; ii < 4 - m; ii++) {
ith = "0" + ith;
}
res = res + u_big + ith;
}
return res;
}
/**
* check weather the string is a domain name string input inputString return
* true false the input string cant be null
* */
private static boolean isdomain(String inputStr) {
boolean eflag = true;
for (int j = 0; j < inputStr.length(); ++j) {
if (espec_str.indexOf(inputStr.charAt(j)) < 0) {
eflag = false;
break;
}
}
return eflag;
}
/***
* Main_encode function s_input begins with "u+" Modified by LiuYu
* 2004-11-19 if fail to transfer , throw PunyExceptions
* */
private static String punycode_encode(String s_input) throws PunyException {
if (s_input == null) {
throw new PunyException(input_string_null);
}
/** ����ַ�ΪȫӢ���ַ�,ֱ�ӷ����ַ��� */
long n, delta, h, b, max_out, bias, m, q, k, t;
int input_length;
int j, out;
// String inputtemp;
long[] input = new long[unicode_max_length];
char[] output = new char[ace_max_length + 1];
char[] inout = new char[unicode_max_length];
char[] case_flags = new char[unicode_max_length];
/* Initialize the state: */
n = initial_n;
delta = 0;
out = 0;
input_length = 0;
int output_length = ace_max_length;
max_out = output_length;
bias = initial_bias;
String ls, lxs = s_input.substring(2);
String ss = s_input.substring(0, 1);
// ls=lxs.substring(0,lxs.indexOf('+')-2);
while (lxs.indexOf('+') > 0) {
if (ss.charAt(0) == 'U')
case_flags[input_length] = '1';
else if (ss.charAt(0) == 'u')
case_flags[input_length] = '0';
else
throw new PunyException(uninum_bad_input);
ls = lxs.substring(0, lxs.indexOf('+') - 1);
input[input_length] = Long.parseLong(ls, 16);
inout[input_length] = (char) Integer.parseInt(ls, 16);
// System.out.println(ls);
input_length++;
lxs = lxs.substring(lxs.indexOf('+') - 1);
ss = lxs.substring(0, 1);
lxs = lxs.substring(2);
}
if (ss.charAt(0) == 'U')
case_flags[input_length] = '1';
else if (ss.charAt(0) == 'u')
case_flags[input_length] = '0';
else
throw new PunyException(uninum_bad_input);
;
input[input_length] = Long.parseLong(lxs, 16);
inout[input_length] = (char) Integer.parseInt(lxs, 16);
// System.out.println(input[input_length]);
input_length++;
// //a..z,A..Z,0..9
/** for all english string */
boolean eflag = false;
for (j = 0; j < input_length; ++j) {
if (espec_str.indexOf(inout[j]) < 0) {
eflag = true;
break;
}
}
if (eflag == false) {
return (new String(inout, 0, input_length));
}
/* Handle the basic code points: */
for (j = 0; j < input_length; ++j) {
if (input[j] < (0x80L)) {
if (max_out - out < 2)
throw new PunyException(too_long_output);
// output[out++] = case_flags ? encode_basic(input[j],
// case_flags[j]) : input[j];
output[out++] = encode_basic(input[j], (int) (case_flags[j]));
// System.out.println(out+"uu"+output[out-1]);
}
}
h = b = out;
/* h is the number of code points that have been handled, b is the */
/* number of basic code points, and out is the number of characters */
/* that have been output. */
if (b > 0)
output[out++] = (char) (delimiter);
/* Main encoding loop: */
while (h < input_length) {
/* All non-basic code points < n have been */
/* handled already. Find the next larger one: */
for (m = maxint, j = 0; j < input_length; ++j) {
/* if (basic(input[j])) continue; */
/* (not needed for Punycode) */
if (((int) input[j] >= n) && (input[j] < m))
m = input[j];
}
/* Increase delta enough to advance the decoder's */
/* <n,i> state to <m,0>, but guard against overflow: */
if (m - n > (maxint - delta) / (h + 1))
throw new PunyException(Integer_overflow);
delta += (m - n) * (h + 1);
n = m;
for (j = 0; j < input_length; ++j) { // System.out.println("tttl"+out);
/* Punycode does not need to check whether input[j] is basic: */
if (input[j] < n /* || basic(input[j]) */) {
if (++delta == 0)
throw new PunyException(Integer_overflow);
}
if (input[j] == n) {
/* Represent delta as a generalized variable-length integer: */
for (q = delta, k = base;; k += base) {
if (out >= max_out)
throw new PunyException(too_long_output);
t = (k <= bias ? tmin : (k >= bias + tmax ? tmax : k - bias));
if (q < t)
break;
output[out++] = encode_digit(t + (q - t) % (base - t), 0);
// System.out.println("gg"+out);
q = (q - t) / (base - t);
}
if (case_flags[j] != '0')
output[out++] = encode_digit(q, 1);
else
output[out++] = encode_digit(q, 0);
if (h == b)
bias = adapt(delta, h + 1, 1);
else
bias = adapt(delta, h + 1, 0);
// bias = adapt(delta, h + 1, h == b);
delta = 0;
++h;
}
} // end for
++delta;
++n;
} // end while
output_length = out;
/**
* e_out = puny_prefix + new String(output, 0, output_length); return
* e_out;
*/
return puny_prefix + new String(output, 0, output_length);
}
/*
* domain name to punycode with "." ,for example iesg--kdjfkd.iesg--fjkdj
* cdns is the string to be translated , can not be null Modified by LiuYu
* 2004-11-19 if fail to transfer , throw PunyExceptions
*/
private static String cdntopuny(String cdns) throws PunyException {
if (cdns == null) {
throw new PunyException(input_string_null);
}
String rds = cdns;
String rtds = "", tds = "", rtds_u = "", resu_out = "";
int flag;
while (rds.indexOf(dndot) != -1) {
tds = rds.substring(0, rds.indexOf(dndot));
rds = rds.substring(rds.indexOf(dndot) + 1);
if (!(tds.equalsIgnoreCase(""))) {
rtds_u = get_cnuni_out(tds);
if (!(rtds_u.equalsIgnoreCase(""))) {
try {
resu_out = resu_out + punycode_encode(rtds_u) + dndot;
} catch (PunyException e) {
throw e;
}
}
} else {
resu_out += dndot;
}
}
if (!(rds.equalsIgnoreCase(""))) {
rtds_u = get_cnuni_out(rds);
if (!(rtds_u.equalsIgnoreCase(""))) {
try {
resu_out = resu_out + punycode_encode(rtds_u);
} catch (PunyException e) {
throw e;
}
}
}
return resu_out;
}
/***
* this method transfer punycode(s) to original form -----characters if fail
* to transfer ,return "" else return right result
*
* Modified by LiuYu 2004-11-19 if fail to transfer , throw PunyExceptions
* ***/
private static String punys_to_ocdn(String punys) throws PunyException {
if (punys == null) {
throw new PunyException(input_string_null);
}
String rds = punys;
String rtds = "", tds = "", rtds_u = "", resu_out = "", strpuny = "";
int flag;
int dec_ru, jtout_ru;
while (rds.indexOf(dndot) != -1) {
tds = rds.substring(0, rds.indexOf(dndot));
rds = rds.substring(rds.indexOf(dndot) + 1);
if (!(tds.equalsIgnoreCase(""))) {
/** update for xn--��Сд�� */
if (isdomain(tds)
&& ((tds.length() < puny_prefix.length()) || (!tds.substring(0, puny_prefix.length()).equalsIgnoreCase(puny_prefix)))) {
rtds_u = tds;
} else {
try {
strpuny = punycode_decode(tds);
} catch (PunyException e) {
throw e;
}
rtds_u = punycode_cn(strpuny);
}
if (rtds_u.equalsIgnoreCase("")) {
return rtds_u;
} else {
resu_out += rtds_u + dndot;
}
} else {
resu_out += dndot;
}
}
if (!(rds.equalsIgnoreCase(""))) {
if (isdomain(tds) && ((rds.length() < puny_prefix.length()) || (!rds.substring(0, puny_prefix.length()).equalsIgnoreCase(puny_prefix)))) {
rtds_u = rds;
}
/**
* if ((rds.indexOf(puny_prefix) == -1)&&isdomain(rds)) { rtds_u =
* rds; }
*/
else {
try {
strpuny = punycode_decode(rds);
} catch (PunyException e) {
throw e;
}
rtds_u = punycode_cn(strpuny);
}
if (rtds_u.equalsIgnoreCase(""))
return rtds_u;
else
resu_out = resu_out + rtds_u;
}
return resu_out;
}
/**
* Important Function s_input is a punycode string can not be null s_input
* will not contain a "."
* */
private static String punycode_decode(String s_input) throws PunyException {
if (s_input == null) {
throw new PunyException(input_string_null);
}
long n, max_out, bias, oldi, w, k, digit, t;
int j, out, in, b, i, input_length, jj;
/* Initialize the state: */
char[] case_flags = new char[unicode_max_length];
long[] output = new long[unicode_max_length];
if (s_input.length() <= puny_prefix.length()) {
// d_out = s_input;
System.out.println("s_input.length() <= puny_prefix.length()");
throw new PunyException(punystr_bad_input);
}
String inout = s_input.substring(0, puny_prefix.length());
if (!(inout.equalsIgnoreCase(puny_prefix))) {
// d_out = s_input;
// System.out.println("inout.equalsIgnoreCase(puny_prefix)");
throw new PunyException(punystr_bad_input);
}
String sprefix = s_input.substring(puny_prefix.length());
for (jj = 0; jj < sprefix.length(); ++jj) {
if (print_ascii.indexOf(sprefix.charAt(jj)) < 0)
throw new PunyException(punystr_bad_input);
}
n = initial_n;
out = 0;
input_length = sprefix.length();
char[] input = new char[ace_max_length + 2];
// if (s_input.length()<(puny_prefix.length()))
// throw new PunyException(punystr_bad_input);;
for (i = 0; i < input_length; ++i) {
input[i] = sprefix.charAt(i);
}
i = 0;
input[input_length] = '0';
int output_length = unicode_max_length;
max_out = output_length;
bias = initial_bias;
/* Handle the basic code points: Let b be the number of input code */
/* points before the last delimiter, or 0 if there is none, then */
/* copy the first b code points to the output. */
for (b = j = 0; j < input_length; ++j)
if (input[j] == delimiter)
b = j;
if (b > max_out)
throw new PunyException(too_long_output);
for (j = 0; j < b; ++j) {
// if (case_flags) case_flags[out] = flagged(input[j]);
if ((((int) input[j]) - 65) < 26)
case_flags[out] = '1';
else
case_flags[out] = '0';
if (!(input[j] < 0x80))
throw new PunyException(punystr_bad_input);
;
// #define basic(cp) ((punycode_uint)(cp) < 0x80)
output[out++] = input[j];
}
/* Main decoding loop: Start just after the last delimiter if any */
/* basic code points were copied; start at the beginning otherwise. */
for (in = b > 0 ? b + 1 : 0; in < input_length; ++out) {
/* in is the index of the next character to be consumed, and */
/* out is the number of code points in the output array. */
/* Decode a generalized variable-length integer into delta, */
/* which gets added to i. The overflow checking is easier */
/* if we increase i as we go, then subtract off its starting */
/* value at the end to obtain delta. */
for (oldi = i, w = 1, k = base;; k += base) {
if (in >= input_length)
throw new PunyException(punystr_bad_input);
;
digit = decode_digit(input[in++]);
if (digit >= base)
throw new PunyException(punystr_bad_input);
;
if (digit > (maxint - i) / w)
throw new PunyException(Integer_overflow);
i += digit * w;
t = k <= bias /* + tmin */? tmin : /* +tmin not needed */
k >= bias + tmax ? tmax : k - bias;
if (digit < t)
break;
if (w > maxint / (base - t))
throw new PunyException(Integer_overflow);
w *= (base - t);
}
if (oldi == 0)
bias = adapt(i - oldi, out + 1, 1);
else
bias = adapt(i - oldi, out + 1, 0);
/* i was supposed to wrap around from out+1 to 0, */
/* incrementing n each time, so we'll fix that now: */
if (i / (out + 1) > maxint - n)
throw new PunyException(Integer_overflow);
n += i / (out + 1);
i = i % (out + 1);
/* Insert n at position i of the output: */
/* not needed for Punycode: */
/* if (decode_digit(n) <= base) return punycode_invalid_input; */
if (out >= max_out)
throw new PunyException(too_long_output);
// if (case_flags)
// {
// memmove(case_flags + i + 1, case_flags + i, out - i);
for (int q = 0; q < out - i; ++q) {
case_flags[i + 1 + q] = case_flags[i + q];
}
/* Case of last character determines uppercase flag: */
if (input[in - 1] - 65 < 26)
case_flags[i] = '1';
else
case_flags[i] = '0';
for (int qq = 0; qq < out - i; ++qq) {
output[i + out - i - qq] = output[i + out - i - qq - 1];
}
// memmove(output + i + 1, output + i, (out - i) * sizeof *output);
output[i++] = n;
}
output_length = out;
long xx;
String yy = "";
String sy = "";
int ys, yss;
for (i = 0; i < output_length; ++i) {
xx = output[i];
sy = Long.toString(xx, 16);
yss = sy.length();
for (ys = 0; ys < 4 - yss; ++ys) {
sy = "0" + sy;
}
if (case_flags[i] == '1') {
yy = yy + u_big + sy;
} else if (case_flags[i] == '0')
yy = yy + u_small + sy;
}
// if (yy.length()<6)
// yy=yy.substring(0,2)+"0"+yy.substring(2);
// d_out = yy;
// return d_out;
return yy;
} // ////////
private static final int base = 36;
private static final int tmin = 1;
private static final int tmax = 26;
private static final int skew = 38;
private static final int damp = 700;
private static final int initial_bias = 72;
private static final int initial_n = 0x80;
private static final int delimiter = 0x2D;
private static final int unicode_max_length = 256;
private static final int ace_max_length = 256;
private static final String print_ascii = "!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_` "
+ "abcdefghijklmnopqrstuvwxyz{|}~";
private static final String espec_str = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz-";
private static final long maxint = 9223372036854775807L;
private static String puny_prefix = "xn--";
private static String u_big = "U+";
private static String u_small = "u+";
private static char dndot = '.';
public static class PunyException extends java.lang.Exception implements java.io.Serializable {
public PunyException() {
super();
}
public PunyException(String msg) {
super(msg);
}
}
}