/* This code is part of Freenet. It is distributed under the GNU General * Public License, version 2 (or at your option any later version). See * http://www.gnu.org/ for further details of the GPL. */ package freenet.client.filter; import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.OutputStream; import java.io.OutputStreamWriter; import java.io.Reader; import java.io.UnsupportedEncodingException; import java.io.Writer; import java.util.HashMap; import freenet.support.HexUtil; import freenet.support.LogThresholdCallback; import freenet.support.Logger; import freenet.support.Logger.LogLevel; import freenet.support.io.Closer; import freenet.support.io.NullWriter; public class CSSReadFilter implements ContentDataFilter, CharsetExtractor { private static volatile boolean logDEBUG; private static volatile boolean logMINOR; static { Logger.registerLogThresholdCallback(new LogThresholdCallback(){ @Override public void shouldUpdate(){ logDEBUG = Logger.shouldLog(LogLevel.DEBUG, this); logMINOR = Logger.shouldLog(LogLevel.MINOR, this); } }); } @Override public void readFilter(InputStream input, OutputStream output, String charset, HashMap<String, String> otherParams, FilterCallback cb) throws DataFilterException, IOException { if (logDEBUG) Logger.debug( this, "running " + this + "with charset"+charset); Reader r = null; Writer w = null; try { try { InputStreamReader isr = new InputStreamReader(input, charset); OutputStreamWriter osw = new OutputStreamWriter(output, charset); r = new BufferedReader(isr, 32768); w = new BufferedWriter(osw, 32768); } catch(UnsupportedEncodingException e) { throw UnknownCharsetException.create(e, charset); } CSSParser parser = new CSSParser(r, w, false, cb, charset, false, false); parser.parse(); } finally { w.flush(); } } @Override public void writeFilter(InputStream input, OutputStream output, String charset, HashMap<String, String> otherParams, FilterCallback cb) throws DataFilterException, IOException { throw new UnsupportedOperationException(); } @Override public String getCharset(byte [] input, int length, String charset) throws DataFilterException, IOException { if(logDEBUG) Logger.debug(this, "Fetching charset for CSS with initial charset "+charset); if(input.length > getCharsetBufferSize() && logMINOR) { Logger.minor(this, "More data than was strictly needed was passed to the charset extractor for extraction"); } InputStream strm = new ByteArrayInputStream(input, 0, length); NullWriter w = new NullWriter(); InputStreamReader isr; BufferedReader r = null; try { try { isr = new InputStreamReader(strm, charset); r = new BufferedReader(isr, 32768); } catch(UnsupportedEncodingException e) { throw UnknownCharsetException.create(e, charset); } CSSParser parser = new CSSParser(r, w, false, new NullFilterCallback(), null, true, false); parser.parse(); r.close(); r = null; return parser.detectedCharset(); } finally { Closer.close(strm); Closer.close(r); Closer.close(w); } } // CSS 2.1 section 4.4. // In all cases these will be confirmed by calling getCharset(). // We do not use all of the BOMs suggested. // Also, we do not use true BOMs. // We do check for ascii, even though it's the first one to check for anyway, because of the "as specified" rule: if it starts with @charset in ascii, it MUST have a valid charset, or we ignore the whole sheet, as per the spec. static final byte[] ascii = parse("40 63 68 61 72 73 65 74 20 22"); static final byte[] utf16be = parse("00 40 00 63 00 68 00 61 00 72 00 73 00 65 00 74 00 20 00 22"); static final byte[] utf16le = parse("40 00 63 00 68 00 61 00 72 00 73 00 65 00 74 00 20 00 22 00"); static final byte[] utf32_le = parse("40 00 00 00 63 00 00 00 68 00 00 00 61 00 00 00 72 00 00 00 73 00 00 00 65 00 00 00 74 00 00 00 20 00 00 00 22 00 00 00"); static final byte[] utf32_be = parse("00 00 00 40 00 00 00 63 00 00 00 68 00 00 00 61 00 00 00 72 00 00 00 73 00 00 00 65 00 00 00 74 00 00 00 20 00 00 00 22"); static final byte[] ebcdic = parse("7C 83 88 81 99 A2 85 A3 40 7F"); static final byte[] ibm1026 = parse("AE 83 88 81 99 A2 85 A3 40 FC"); // Not supported. static final byte[] utf32_2143 = parse("00 00 40 00 00 00 63 00 00 00 68 00 00 00 61 00 00 00 72 00 00 00 73 00 00 00 65 00 00 00 74 00 00 00 20 00 00 00 22 00"); static final byte[] utf32_3412 = parse("00 40 00 00 00 63 00 00 00 68 00 00 00 61 00 00 00 72 00 00 00 73 00 00 00 65 00 00 00 74 00 00 00 20 00 00 00 22 00 00"); static final byte[] gsm = parse("00 63 68 61 72 73 65 74 20 22"); static final int maxBOMLength = Math.max(utf16be.length, Math.max(utf16le.length, Math.max(utf32_le.length, Math.max(utf32_be.length, Math.max(ebcdic.length, Math.max(ibm1026.length, Math.max(utf32_2143.length, Math.max(utf32_3412.length, gsm.length)))))))); static byte[] parse(String s) { s = s.replaceAll(" ", ""); return HexUtil.hexToBytes(s); } @Override public BOMDetection getCharsetByBOM(byte[] input, int length) throws DataFilterException, IOException { if(ContentFilter.startsWith(input, ascii, length)) return new BOMDetection("UTF-8", true); if(ContentFilter.startsWith(input, utf16be, length)) return new BOMDetection("UTF-16BE", true); if(ContentFilter.startsWith(input, utf16le, length)) return new BOMDetection("UTF-16LE", true); if(ContentFilter.startsWith(input, utf32_be, length)) return new BOMDetection("UTF-32BE", true); if(ContentFilter.startsWith(input, utf32_le, length)) return new BOMDetection("UTF-32LE", true); if(ContentFilter.startsWith(input, ebcdic, length)) return new BOMDetection("IBM01140", true); if(ContentFilter.startsWith(input, ibm1026, length)) return new BOMDetection("IBM1026", true); // Unsupported BOMs if(ContentFilter.startsWith(input, utf32_2143, length)) throw new UnsupportedCharsetInFilterException("UTF-32-2143"); if(ContentFilter.startsWith(input, utf32_3412, length)) throw new UnsupportedCharsetInFilterException("UTF-32-3412"); if(ContentFilter.startsWith(input, gsm, length)) throw new UnsupportedCharsetInFilterException("GSM 03.38"); return null; } public static String filterMediaList(String media) { String[] split = media.split(","); boolean first = true; StringBuffer sb = new StringBuffer(); for(String m : split) { m = m.trim(); int i; for(i=0;i<m.length();i++) { char c = m.charAt(i); if(!('a' <= c && 'z' >= c) || ('A' <= c && 'Z' >= c) || ('0' <= c && '9' >= c) || c == '-') break; } m = m.substring(0, i); if(FilterUtils.isMedia(m)) { if(!first) sb.append(", "); sb.append(m); first = false; } } if(sb.length() != 0) return sb.toString(); else return null; } @Override public int getCharsetBufferSize() { return 64; //This should be a reasonable number of bytes to read in } }