/*
* Copyright (c) 2002-2012 Alibaba Group Holding Limited.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.alibaba.citrus.util.i18n.tool;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.nio.charset.Charset;
import java.nio.charset.CharsetEncoder;
/**
* 将charset倒到一个文件中,以便用第三方工具转换编码。
*
* @author Michael Zhou
*/
public class CharsetDumper extends CharsetTool {
public static void main(String[] args) throws Exception {
CharsetDumper dumper = new CharsetDumper();
dumper.dump("gbk.txt");
}
private Charset charset;
private CharsetEncoder encoder;
private int startChar;
private int endChar;
public CharsetDumper() {
this("GBK", 0x4E00, 0xFA30);
}
public CharsetDumper(String charsetName, int startChar, int endChar) {
this.charset = Charset.forName(charsetName);
this.encoder = charset.newEncoder();
this.startChar = Math.max(startChar, MIN_CHAR);
this.endChar = Math.min(endChar, MAX_CHAR);
}
public void dump(String dumpfile) throws IOException {
File destfile = getFile(dumpfile);
System.out.println("Dump to " + destfile.getAbsolutePath());
Writer out = null;
int blocks = 0;
int chars = 0;
int maxBlockSize = 0;
try {
out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(destfile), OUTPUT_CHARSET));
CharsetBlock block = new CharsetBlock();
int i = startChar;
while ((block = nextBlock(i)) != null) {
chars += block.getLength();
maxBlockSize = Math.max(maxBlockSize, block.getLength());
blocks++;
out.write(block + NEW_LINE);
i = block.getEnd();
int count = 0;
for (int j = block.getStart(); j < block.getEnd(); j++, count++) {
out.write((char) j);
out.write(SEP_CHAR);
if (count % LINE_LENGTH == LINE_LENGTH - 1 || j == block.getEnd() - 1) {
out.write(NEW_LINE);
}
}
out.write(NEW_LINE);
}
out.write("- Charset: " + charset.name() + NEW_LINE);
out.write("- Blocks: " + blocks + NEW_LINE);
out.write("- Max block size: " + maxBlockSize + NEW_LINE);
out.write("- Total chars: " + chars + NEW_LINE);
out.flush();
} finally {
if (out != null) {
try {
out.close();
} catch (IOException e) {
}
}
}
}
private CharsetBlock nextBlock(int i) {
while (i < endChar) {
if (encoder.canEncode((char) i)) {
break;
}
i++;
}
if (i >= endChar) {
return null;
}
CharsetBlock block = new CharsetBlock();
block.setStart(i);
do {
i++;
} while (i < endChar && encoder.canEncode((char) i));
block.setEnd(i);
return block;
}
}