CSSReadFilter.java example

Explorer
fred-master
/* This code is part of Freenet. It is distributed under the GNU General
 * Public License, version 2 (or at your option any later version). See
 * http://www.gnu.org/ for further details of the GPL. */
package freenet.client.filter;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.Reader;
import java.io.UnsupportedEncodingException;
import java.io.Writer;
import java.util.HashMap;

import freenet.support.HexUtil;
import freenet.support.LogThresholdCallback;
import freenet.support.Logger;
import freenet.support.Logger.LogLevel;
import freenet.support.io.Closer;
import freenet.support.io.NullWriter;

public class CSSReadFilter implements ContentDataFilter, CharsetExtractor {

        private static volatile boolean logDEBUG;
        private static volatile boolean logMINOR;
	static {
		Logger.registerLogThresholdCallback(new LogThresholdCallback(){
			@Override
			public void shouldUpdate(){
				logDEBUG = Logger.shouldLog(LogLevel.DEBUG, this);
                                logMINOR = Logger.shouldLog(LogLevel.MINOR, this);
			}
		});
	}

	@Override
	public void readFilter(InputStream input, OutputStream output, String charset, HashMap<String, String> otherParams,
			FilterCallback cb) throws DataFilterException, IOException {
		if (logDEBUG)
			Logger.debug(
				this,
				"running "
					+ this
					+ "with charset"+charset);
		Reader r = null;
		Writer w = null;
		try {
			try {
				InputStreamReader isr = new InputStreamReader(input, charset);
				OutputStreamWriter osw = new OutputStreamWriter(output, charset);
				r = new BufferedReader(isr, 32768);
				w = new BufferedWriter(osw, 32768);

			} catch(UnsupportedEncodingException e) {
				throw UnknownCharsetException.create(e, charset);
			}
			CSSParser parser = new CSSParser(r, w, false, cb, charset, false, false);
			parser.parse();
		}
		finally {
			w.flush();
		}
		
	}

	@Override
	public void writeFilter(InputStream input, OutputStream output, String charset, HashMap<String, String> otherParams,
	        FilterCallback cb) throws DataFilterException, IOException {
		throw new UnsupportedOperationException();
	}

	@Override
	public String getCharset(byte [] input, int length, String charset) throws DataFilterException, IOException {
		if(logDEBUG)
			Logger.debug(this, "Fetching charset for CSS with initial charset "+charset);
		if(input.length > getCharsetBufferSize() && logMINOR) {
			Logger.minor(this, "More data than was strictly needed was passed to the charset extractor for extraction");
		}
		InputStream strm = new ByteArrayInputStream(input, 0, length);
		NullWriter w = new NullWriter();
		InputStreamReader isr;
		BufferedReader r = null;
		try {
			try {
				isr = new InputStreamReader(strm, charset);
				r = new BufferedReader(isr, 32768);
			} catch(UnsupportedEncodingException e) {
				throw UnknownCharsetException.create(e, charset);
			}
			CSSParser parser = new CSSParser(r, w, false, new NullFilterCallback(), null, true, false);
			parser.parse();
			r.close();
			r = null;
			return parser.detectedCharset();
		}
		finally {
			Closer.close(strm);
			Closer.close(r);
			Closer.close(w);
		}
	}

	// CSS 2.1 section 4.4.
	// In all cases these will be confirmed by calling getCharset().
	// We do not use all of the BOMs suggested.
	// Also, we do not use true BOMs.
	
	// We do check for ascii, even though it's the first one to check for anyway, because of the "as specified" rule: if it starts with @charset in ascii, it MUST have a valid charset, or we ignore the whole sheet, as per the spec.
	static final byte[] ascii = parse("40 63 68 61 72 73 65 74 20 22");
	static final byte[] utf16be = parse("00 40 00 63 00 68 00 61 00 72 00 73 00 65 00 74 00 20 00 22");
	static final byte[] utf16le = parse("40 00 63 00 68 00 61 00 72 00 73 00 65 00 74 00 20 00 22 00");
	static final byte[] utf32_le = parse("40 00 00 00 63 00 00 00 68 00 00 00 61 00 00 00 72 00 00 00 73 00 00 00 65 00 00 00 74 00 00 00 20 00 00 00 22 00 00 00");
	static final byte[] utf32_be = parse("00 00 00 40 00 00 00 63 00 00 00 68 00 00 00 61 00 00 00 72 00 00 00 73 00 00 00 65 00 00 00 74 00 00 00 20 00 00 00 22");
	static final byte[] ebcdic = parse("7C 83 88 81 99 A2 85 A3 40 7F");
	static final byte[] ibm1026 = parse("AE 83 88 81 99 A2 85 A3 40 FC");

	// Not supported.
	static final byte[] utf32_2143 = parse("00 00 40 00 00 00 63 00 00 00 68 00 00 00 61 00 00 00 72 00 00 00 73 00 00 00 65 00 00 00 74 00 00 00 20 00 00 00 22 00");
	static final byte[] utf32_3412 = parse("00 40 00 00 00 63 00 00 00 68 00 00 00 61 00 00 00 72 00 00 00 73 00 00 00 65 00 00 00 74 00 00 00 20 00 00 00 22 00 00");
	static final byte[] gsm = parse("00 63 68 61 72 73 65 74 20 22");
	
	static final int maxBOMLength = Math.max(utf16be.length, Math.max(utf16le.length, Math.max(utf32_le.length, Math.max(utf32_be.length, Math.max(ebcdic.length, Math.max(ibm1026.length, Math.max(utf32_2143.length, Math.max(utf32_3412.length, gsm.length))))))));
	
	static byte[] parse(String s) {
		s = s.replaceAll(" ", "");
		return HexUtil.hexToBytes(s);
	}
	
	@Override
	public BOMDetection getCharsetByBOM(byte[] input, int length) throws DataFilterException, IOException {
		if(ContentFilter.startsWith(input, ascii, length))
			return new BOMDetection("UTF-8", true);
		if(ContentFilter.startsWith(input, utf16be, length))
			return new BOMDetection("UTF-16BE", true);
		if(ContentFilter.startsWith(input, utf16le, length))
			return new BOMDetection("UTF-16LE", true);
		if(ContentFilter.startsWith(input, utf32_be, length))
			return new BOMDetection("UTF-32BE", true);
		if(ContentFilter.startsWith(input, utf32_le, length))
			return new BOMDetection("UTF-32LE", true);
		if(ContentFilter.startsWith(input, ebcdic, length))
			return new BOMDetection("IBM01140", true);
		if(ContentFilter.startsWith(input, ibm1026, length))
			return new BOMDetection("IBM1026", true);

		// Unsupported BOMs

		if(ContentFilter.startsWith(input, utf32_2143, length))
			throw new UnsupportedCharsetInFilterException("UTF-32-2143");
		if(ContentFilter.startsWith(input, utf32_3412, length))
			throw new UnsupportedCharsetInFilterException("UTF-32-3412");
		if(ContentFilter.startsWith(input, gsm, length))
			throw new UnsupportedCharsetInFilterException("GSM 03.38");
		return null;
	}

	public static String filterMediaList(String media) {
		String[] split = media.split(",");
		boolean first = true;
		StringBuffer sb = new StringBuffer();
		for(String m : split) {
			m = m.trim();
			int i;
			for(i=0;i<m.length();i++) {
				char c = m.charAt(i);
				if(!('a' <= c && 'z' >= c) || ('A' <= c && 'Z' >= c) || ('0' <= c && '9' >= c) || c == '-')
					break;
			}
			m = m.substring(0, i);
			if(FilterUtils.isMedia(m)) {
				if(!first) sb.append(", ");
				sb.append(m);
				first = false;
			}
		}
		if(sb.length() != 0) return sb.toString();
		else return null;
	}

	@Override
	public int getCharsetBufferSize() {
		return 64; //This should be a reasonable number of bytes to read in
	}

}