/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- * * The contents of this file are subject to the Netscape Public * License Version 1.1 (the "License"); you may not use this file * except in compliance with the License. You may obtain a copy of * the License at http://www.mozilla.org/NPL/ * * Software distributed under the License is distributed on an "AS * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or * implied. See the License for the specific language governing * rights and limitations under the License. * * The Original Code is mozilla.org code. * * The Initial Developer of the Original Code is Netscape * Communications Corporation. Portions created by Netscape are * Copyright (C) 1998 Netscape Communications Corporation. All * Rights Reserved. * * Contributor(s): */ /* * DO NOT EDIT THIS DOCUMENT MANUALLY !!! * THIS FILE IS AUTOMATICALLY GENERATED BY THE TOOLS UNDER * AutoDetect/tools/ */ package bboss.org.mozilla.intl.chardet; import java.io.*; import java.net.*; import java.util.*; import bboss.org.mozilla.intl.chardet.*; public class HtmlCharsetDetector { public static boolean found = false; public static void main(String argv[]) throws Exception { if (argv.length != 1 && argv.length != 2) { System.out .println("Usage: HtmlCharsetDetector <url> [<languageHint>]"); System.out.println(""); System.out.println("Where <url> is http://..."); System.out.println("For optional <languageHint>. Use following..."); System.out.println(" 1 => Japanese"); System.out.println(" 2 => Chinese"); System.out.println(" 3 => Simplified Chinese"); System.out.println(" 4 => Traditional Chinese"); System.out.println(" 5 => Korean"); System.out.println(" 6 => Dont know (default)"); return; } // Initalize the nsDetector() ; int lang = (argv.length == 2) ? Integer.parseInt(argv[1]) : nsPSMDetector.ALL; nsDetector det = new nsDetector(lang); // Set an observer... // The Notify() will be called when a matching charset is found. det.Init(new nsICharsetDetectionObserver() { public void Notify(String charset) { HtmlCharsetDetector.found = true; System.out.println("CHARSET = " + charset); } }); URL url = new URL(argv[0]); BufferedInputStream imp = new BufferedInputStream(url.openStream()); byte[] buf = new byte[1024]; int len; boolean done = false; boolean isAscii = true; while ((len = imp.read(buf, 0, buf.length)) != -1) { // Check if the stream is only ascii. if (isAscii) isAscii = det.isAscii(buf, len); // DoIt if non-ascii and not done yet. if (!isAscii && !done) done = det.DoIt(buf, len, false); } det.DataEnd(); if (isAscii) { System.out.println("CHARSET = ASCII"); found = true; } if (!found) { String prob[] = det.getProbableCharsets(); for (int i = 0; i < prob.length; i++) { System.out.println("Probable Charset = " + prob[i]); } } } }