package guang.crawler.extension.urlExtractor;
import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* 该类没有在系统中使用,而是自己在查看JSON数据的时候,发现网页获取的JSON数据实在是比较乱,就想给他格式化一下,另外,
* JSON中的中文都变成了unicode字符,不好看,结果都给转化成UTF-8字符了,这样就清晰多了.
*
* @author sun
*
*/
public class JSONParser {
/**
* 将JSON报文格式化一下,有缩进
*
* @param file
* @throws FileNotFoundException
* @throws IOException
*/
private static void formatJSON(final File file)
throws FileNotFoundException, IOException {
StringBuilder sb = new StringBuilder();
BufferedReader reader = new BufferedReader(new InputStreamReader(
new FileInputStream(file)));
try {
int ch = -1;
int depth = 0;
while ((ch = reader.read()) != -1) {
if (('{' == ch) || ('[' == ch)) {
sb.append("\n");
for (int i = 0; i < depth; i++) {
sb.append(" ");
}
sb.append((char) ch);
depth++;
sb.append("\n");
for (int i = 0; i < depth; i++) {
sb.append(" ");
}
} else if (('}' == ch) || (']' == ch)) {
depth--;
sb.append("\n");
for (int i = 0; i < depth; i++) {
sb.append(" ");
}
sb.append((char) ch);
} else if (',' == ch) {
sb.append((char) ch);
sb.append("\n");
for (int i = 0; i < depth; i++) {
sb.append(" ");
}
} else if (('\n' == ch) || (' ' == ch)) {
// 吃掉换行
} else {
sb.append((char) ch);
}
}
} finally {
reader.close();
}
BufferedOutputStream fileout = new BufferedOutputStream(
new FileOutputStream(file));
try {
fileout.write(sb.toString()
.getBytes());
} finally {
fileout.close();
}
}
public static void main(final String[] args) throws IOException {
File file = new File("/home/sun/desktop/QQComment");
JSONParser.formatJSON(file);
JSONParser.transferUnicode(file);
}
/**
* 将报文中的unicode编码转化为UTF-8编码.
*
* @param file
* @throws FileNotFoundException
* @throws IOException
*/
private static void transferUnicode(final File file)
throws FileNotFoundException, IOException {
StringBuffer sb = new StringBuffer();
BufferedReader reader = new BufferedReader(new InputStreamReader(
new FileInputStream(file)));
try {
String line;
Pattern pattern = Pattern.compile("\\\\u([0-9a-f]{4})");
while ((line = reader.readLine()) != null) {
Matcher matcher = pattern.matcher(line);
while (matcher.find()) {
String unicode = matcher.group(1);
char data = (char) Integer.parseInt(unicode, 16);
matcher.appendReplacement(sb, String.valueOf(data));
}
matcher.appendTail(sb);
sb.append("\n");
}
} finally {
reader.close();
}
BufferedOutputStream fileout = new BufferedOutputStream(
new FileOutputStream(file));
try {
fileout.write(sb.toString()
.getBytes());
} finally {
fileout.close();
}
}
}