/* This code is part of Freenet. It is distributed under the GNU General
* Public License, version 2 (or at your option any later version). See
* http://www.gnu.org/ for further details of the GPL. */
package freenet.client.filter;
import java.io.IOException;
/**
* For a specific text/-based MIME type, extracts the charset if
* possible.
*/
public interface CharsetExtractor {
String getCharset(byte[] input, int length, String parseCharset) throws DataFilterException, IOException;
/** Inspect the first few bytes of the file for any obvious but
* type-specific BOM. Don't try too hard, if we don't find anything we
* will call getCharset() with some specific charset families to try.
* @param input The data.
* @return The BOM-detected charset family, this is essentially a guess
* which will have to be fed to getCharset().
* (A true BOM would give an exact match, but the caller will have
* already tested for true BOMs by this point; we are looking for
* "@charset \"" encoded with the given format)
* @throws DataFilterException
* @throws IOException
*/
BOMDetection getCharsetByBOM(byte[] input, int length) throws DataFilterException, IOException;
/**How many bytes must be fed into the CharsetExtractor to figure
* out the charset
*/
public int getCharsetBufferSize();
public class BOMDetection {
/** The charset, guessed from the first few characters. */
final String charset;
/** If this is true, getCharset() must return a charset, if it does
* not, we ignore the whole stylesheet. See CSS 2.1 section 4.4, at
* the end, "as specified" rule. */
final boolean mustHaveCharset;
BOMDetection(String charset, boolean mustHaveCharset) {
this.charset = charset;
this.mustHaveCharset = mustHaveCharset;
}
}
}