package edu.fudan.nlp.cn;
import java.lang.Character.UnicodeBlock;
public class LangDetection {
public static String detect(String str){
char[] ch = str.toCharArray();
if(isChinese(ch))
return "cn";
else
return "en";
}
public static boolean isChinese(char[] ch){
for(int i=0;i<ch.length;i++){
if(isChinese(ch[i]))
return true;
}
return false;
}
private static boolean isChinese(char c) {
UnicodeBlock ub = UnicodeBlock.of(c);
if(ub==UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS ||
ub == UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS||
ub == UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A||
ub == UnicodeBlock.GENERAL_PUNCTUATION||
ub == UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION||
ub == UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS)
return true;
return false;
}
/**
* @param args
*/
public static void main(String[] args) {
String str;
str = ".";
System.out.println(LangDetection.detect(str)+":\t"+str);
str = "you and me";
System.out.println(LangDetection.detect(str)+":\t"+str);
str = "()";
System.out.println(LangDetection.detect(str)+":\t"+str);
str = "。";
System.out.println(LangDetection.detect(str)+":\t"+str);
str = "我们";
System.out.println(LangDetection.detect(str)+":\t"+str);
str = "我们and";
System.out.println(LangDetection.detect(str)+":\t"+str);
str = "《and";
System.out.println(LangDetection.detect(str)+":\t"+str);
}
}