package edu.fudan.nlp.corpus; import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.InputStreamReader; import java.io.OutputStreamWriter; public class CharEnc { private static boolean labeled=false; public static void main(String[] args) throws Exception { String input1 ="D:/Datasets/sighan2006/processed"; File f = new File(input1); if (f.isDirectory()) { File[] files = f.listFiles(); for (int i = 0; i < files.length; i++) { processLabeledData(files[i].toString(),"utf8","gbk"); } } System.out.println("Done"); } public static void processLabeledData(String input,String enc1, String enc2) throws Exception{ FileInputStream is = new FileInputStream(input); // is.skip(3); //skip BOM BufferedReader r = new BufferedReader( new InputStreamReader(is, enc1)); OutputStreamWriter w = new OutputStreamWriter(new FileOutputStream(input+"."+enc2), enc2); while(true) { String sent = r.readLine(); if(null == sent) break; w.write(sent); w.write('\n'); } r.close(); w.close(); } }