package com.knowgate.dfs.chardet; import java.io.File; import java.io.IOException; import java.io.InputStream; import java.io.FileInputStream; import com.knowgate.dfs.chardet.nsICharsetDetectionObserver; public class CharacterSetDetector implements nsICharsetDetectionObserver { private boolean bDetectedCharset; private String sDetectedCharset; private nsDetector oDetector; public CharacterSetDetector() { bDetectedCharset = false; sDetectedCharset = null; oDetector = new nsDetector(nsPSMDetector.ALL); } public void Notify(String sCharSet) { sDetectedCharset = sCharSet; bDetectedCharset = true ; } public String detect(InputStream oInStrm, String sDefaultCharset) throws IOException { byte[] aBytes = new byte[1024] ; int iLen; boolean bDone = false ; boolean bIsAscii = true ; oDetector.Init(this); while( (iLen=oInStrm.read(aBytes,0,aBytes.length)) != -1) { // Check if the stream is only ascii. if (bIsAscii) bIsAscii = oDetector.isAscii(aBytes,iLen); // DoIt if non-ascii and not done yet. if (!bIsAscii && !bDone) bDone = oDetector.DoIt(aBytes, iLen, false); } // wend oDetector.DataEnd(); if (bIsAscii) { bDetectedCharset = true; sDetectedCharset = "ASCII"; } if (!bDetectedCharset) { if (sDefaultCharset==null) sDetectedCharset = oDetector.getProbableCharsets()[0]; else sDetectedCharset = sDefaultCharset; } // fi return sDetectedCharset; } // detect public String detect(File oFile, String sDefaultCharset) throws IOException { FileInputStream oInStrm = new FileInputStream(oFile); String sRetVal = detect(oInStrm, sDefaultCharset); oInStrm.close(); return sRetVal; } public String detect(String sFile, String sDefaultCharset) throws IOException { return detect(new File(sFile), sDefaultCharset); } // detect }