package com.orgzly.android.util;
import org.mozilla.universalchardet.UniversalDetector;
import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;
public class EncodingDetect {
public enum Method {
// ICU, // Too big. Maybe strip it?
// JCHARDET, // Did not detect encoding for worg/org-blog-articles.org
JUNIVERSALCHARDET
}
public static final Method USED_METHOD = Method.JUNIVERSALCHARDET;
private InputStream fileInputStream;
private boolean hasRan = false;
private String detectedCharset;
public static EncodingDetect getInstance(InputStream fileInputStream) {
EncodingDetect detect = new EncodingDetect();
detect.fileInputStream = fileInputStream;
return detect;
}
public String getEncoding() {
detect();
return detectedCharset;
}
public boolean isDetected() {
detect();
return detectedCharset != null;
}
/**
* Sets charset to detected value.
*/
private void detect() {
if (! hasRan) {
switch (USED_METHOD) {
// case ICU:
// icuDetect();
// break;
// case JCHARDET:
// nsDetect();
// break;
case JUNIVERSALCHARDET:
universalDetect();
break;
}
}
hasRan = true;
}
/**
* Detect charset using ICU.
* International Components for Unicode (http://site.icu-project.org/)
*
* Source code available at
* http://source.icu-project.org/repos/icu/icu4j/trunk/
* (it's a SVN repo - it can be checked out)
*/
// private void icuDetect() {
// BufferedInputStream imp = null;
//
// try {
// imp = new BufferedInputStream(fileInputStream);
//
// CharsetMatch match = new CharsetDetector().setText(imp).detect();
//
// System.out.println(name + ": " + match.getLanguage() + " " + match.getName() + " " + match.getConfidence());
//
// detectedCharset = match.getName();
//
// } catch (Exception e) {
// e.printStackTrace();
//
// } finally {
// if (imp != null) {
// try {
// imp.close();
// } catch (IOException e) {
// e.printStackTrace();
// }
// }
// }
// }
/**
* Detect charset using Java port of Mozilla charset detector
* (http://sourceforge.net/projects/jchardet/?source=typ_redirect)
*/
// private void nsDetect() {
// detectedCharset = null;
//
// // Initialize the nsDetector() ;
// nsDetector det = new nsDetector();
//
// // Set an observer...
// // The Notify() will be called when a matching charset is found.
// det.Init(new nsICharsetDetectionObserver() {
// public void Notify(String charset) {
// System.out.println(name + ": " + charset);
// detectedCharset = charset;
// }
// });
//
//
// byte[] buf = new byte[1024];
// int len;
// boolean done = false;
// boolean isAscii = true;
// BufferedInputStream imp = null;
//
// try {
// imp = new BufferedInputStream(fileInputStream);
//
// while ((len = imp.read(buf, 0, buf.length)) != -1) {
//
// // Check if the stream is only ascii.
// if (isAscii) {
// isAscii = det.isAscii(buf, len);
// }
//
// // DoIt if non-ascii and not done yet.
// if (!isAscii && !done) {
// done = det.DoIt(buf, len, false);
// }
//
// if (detectedCharset != null) {
// return;
// }
// }
//
// } catch (IOException e) {
// e.printStackTrace();
// return;
//
// } finally {
// if (imp != null) {
// try {
// imp.close();
// } catch (IOException e) {
// e.printStackTrace();
// }
// }
// }
//
// det.DataEnd();
//
// if (isAscii) {
// System.out.println(name + ": ASCII");
// detectedCharset = "ASCII";
// }
// }
/**
* juniversalchardet is a Java port of 'universalchardet',
* that is the encoding detector library of Mozilla.
* (https://code.google.com/p/juniversalchardet/)
*/
private void universalDetect() {
byte[] buf = new byte[4096];
// (1)
UniversalDetector detector = new UniversalDetector(null);
// (2)
int n;
BufferedInputStream imp = null;
try {
imp = new BufferedInputStream(fileInputStream);
while ((n = imp.read(buf)) > 0 && !detector.isDone()) {
detector.handleData(buf, 0, n);
}
} catch (IOException e) {
e.printStackTrace();
return;
} finally {
if (imp != null) {
try {
imp.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
// (3)
detector.dataEnd();
// (4)
String charset = detector.getDetectedCharset();
if (charset != null) {
// System.out.println(name + ": " + charset);
detectedCharset = charset;
}
// (5)
detector.reset();
}
}