/* * Copyright (c) 2015 University of Illinois Board of Trustees, All rights reserved. * Developed at GSLIS/ the iSchool, by Dr. Jana Diesner, Amirhossein Aleyasen, * Chieh-Li Chin, Shubhanshu Mishra, Kiumars Soltani, and Liang Tao. * * This program is free software; you can redistribute it and/or modify it under * the terms of the GNU General Public License as published by the Free Software * Foundation; either version 2 of the License, or any later version. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for * more details. * * You should have received a copy of the GNU General Public License along with * this program; if not, see <http://www.gnu.org/licenses>. * */ package context.core.util; /** * * @author Aale */ import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStreamReader; import java.io.UnsupportedEncodingException; //import org.mozilla.universalchardet.UniversalDetector; import org.openide.util.Exceptions; /** * * @author Aale */ public class CharsetDetector { /** * * @param filename * @return */ public static String detectCharset(String filename) { return "UTF8"; //TODO: fix charset detection using new library /* java.io.FileInputStream fis = null; try { byte[] buf = new byte[4096]; fis = new java.io.FileInputStream(filename); // (1) UniversalDetector detector = new UniversalDetector(null); // (2) int nread; while ((nread = fis.read(buf)) > 0 && !detector.isDone()) { detector.handleData(buf, 0, nread); } // (3) detector.dataEnd(); // (4) String encoding = detector.getDetectedCharset(); // (5) detector.reset(); if (encoding != null) { return encoding; } else { return null; } } catch (FileNotFoundException ex) { Exceptions.printStackTrace(ex); } catch (IOException ex) { Exceptions.printStackTrace(ex); } finally { try { fis.close(); } catch (IOException ex) { Exceptions.printStackTrace(ex); } } return null; */ } /** * * @param args */ public static void main(String[] args) { String file = "C:\\Users\\Aale\\DBBACuP2\\Development\\Java-ws\\Context-FX\\data\\test-arabic\\test-arabic\\TEST\\arabictest.txt"; // System.out.println(detectCharset(file)); final String output = readFile(file); System.out.println("output=" + output); } /** * * @param filepath * @return */ public static String readFile(String filepath) { try { File fileDir = new File(filepath); String encoding = detectCharset(filepath); if (encoding == null) { encoding = "UTF8"; } BufferedReader in = new BufferedReader( new InputStreamReader( new FileInputStream(fileDir), encoding)); String str; StringBuilder strbf = new StringBuilder(); while ((str = in.readLine()) != null) { // System.out.println(str); strbf.append(str).append("\n"); } in.close(); return strbf.toString(); } catch (UnsupportedEncodingException e) { System.out.println(e.getMessage()); } catch (IOException e) { System.out.println(e.getMessage()); } return null; } }