HTMLFilter.java example

Explorer
fred-master
/* -*- Mode: java; c-basic-indent: 4; tab-width: 4 -*- */

package freenet.client.filter;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.Reader;
import java.io.StringReader;
import java.io.StringWriter;
import java.io.UnsupportedEncodingException;
import java.io.Writer;
import java.nio.charset.Charset;
import java.nio.charset.MalformedInputException;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.Stack;
import java.util.StringTokenizer;

import freenet.clients.http.ToadletContextImpl;
import freenet.l10n.NodeL10n;
import freenet.support.HTMLDecoder;
import freenet.support.HTMLEncoder;
import freenet.support.Logger;
import freenet.support.Logger.LogLevel;
import freenet.support.URLDecoder;
import freenet.support.URLEncodedFormatException;
import freenet.support.io.NullWriter;

public class HTMLFilter implements ContentDataFilter, CharsetExtractor {

	private static boolean logMINOR;
	private static boolean logDEBUG;

	private static final boolean deleteWierdStuff = true;
	private static final boolean deleteErrors = true;
	/** If true, allow documents that don't have an <html> tag or have other tags before it.
	 * In all cases we disallow text before the first valid tag. This is because if we don't,
	 * charset detection can be ambiguous, potentially resulting in attacks. */
	private static final boolean allowNoHTMLTag = true;

	// FIXME make these configurable on a per-document level.
	// Maybe by merging with TagReplacerCallback???
	// For now they're just global.
	/** -1 means don't allow it */
	public static int metaRefreshSamePageMinInterval = 1;
	/** -1 means don't allow it */
	public static int metaRefreshRedirectMinInterval = 30;
	
	@Override
	public void readFilter(InputStream input, OutputStream output, String charset, HashMap<String, String> otherParams,
	        FilterCallback cb) throws DataFilterException, IOException {
		if(cb == null) cb = new NullFilterCallback();
		logMINOR = Logger.shouldLog(LogLevel.MINOR, this);
		logDEBUG = Logger.shouldLog(LogLevel.DEBUG, this);
		if(logMINOR) Logger.minor(this, "readFilter(): charset="+charset);
		Reader r = null;
		Writer w = null;
		InputStreamReader isr = null;
		OutputStreamWriter osw = null;
		try {
			isr = new InputStreamReader(input, charset);
			osw = new OutputStreamWriter(output, charset);
			r = new BufferedReader(isr, 4096);
			w = new BufferedWriter(osw, 4096);
		} catch(UnsupportedEncodingException e) {
			throw UnknownCharsetException.create(e, charset);
		}
		HTMLParseContext pc = new HTMLParseContext(r, w, charset, cb, false);
		pc.run();
		w.flush();
	}
	
	@Override
	public void writeFilter(InputStream input, OutputStream output, String charset, HashMap<String, String> otherParams,
			FilterCallback cb) throws DataFilterException, IOException {
		throw new UnsupportedOperationException();
	}
	
	@Override
	public String getCharset(byte[] input, int length, String parseCharset) throws DataFilterException, IOException {
		logMINOR = Logger.shouldLog(LogLevel.MINOR, this);		
		if(logMINOR) Logger.minor(this, "getCharset(): default="+parseCharset);
		if(length > getCharsetBufferSize() && Logger.shouldLog(LogLevel.MINOR, this)) {
			Logger.minor(this, "More data than was strictly needed was passed to the charset extractor for extraction");
		}
		ByteArrayInputStream strm = new ByteArrayInputStream(input, 0, length);
		Writer w = new NullWriter();
		Reader r;
		try {
			r = new BufferedReader(new InputStreamReader(strm, parseCharset), 4096);
		} catch (UnsupportedEncodingException e) {
			strm.close();
			throw e;
		}
		HTMLParseContext pc = new HTMLParseContext(r, w, null, new NullFilterCallback(), true);
		try {
			pc.run();
		} catch (MalformedInputException e) {
			// Not this charset
			return null;
		} catch (IOException e) {
			throw e;
		} catch (Throwable t) {
			// Ignore ALL errors
			if(logMINOR) Logger.minor(this, "Caught "+t+" trying to detect MIME type with "+parseCharset);
		}
		try {
			r.close();
		} catch (IOException e) {
			throw e;
		} catch (Throwable t) {
			if(logMINOR) Logger.minor(this, "Caught "+t+" closing stream after trying to detect MIME type with "+parseCharset);
		}
		if(logMINOR) Logger.minor(this, "Returning charset "+pc.detectedCharset);
		return pc.detectedCharset;
	}

	class HTMLParseContext {
		Reader r;
		Writer w;
		String charset;
		String detectedCharset;
		final FilterCallback cb;
		final boolean onlyDetectingCharset;
		boolean isXHTML=false;
		Stack<String> openElements;
		boolean failedDetectCharset;
		
		/** If <head> is found, then it is true. It is needed that if <title> or <meta> is found outside <head> or if a <body> is found first, then insert a <head> too*/
		boolean wasHeadElementFound=false;
		/** We can only have <head> once, and <meta>/<title> can't be outside it. This helps with robustness against charset attacks and allows us to stop looking for <meta> as soon as we see </head> when detecting charset. */
		boolean headEnded=false;
	
		HTMLParseContext(Reader r, Writer w, String charset, FilterCallback cb, boolean onlyDetectingCharset) {
			this.r = r;
			this.w = w;
			this.charset = charset;
			this.cb = cb;
			this.onlyDetectingCharset = onlyDetectingCharset;
			openElements=new Stack<String>();
		}
		
		public void setisXHTML(boolean value) {
			isXHTML=value;
		}
		
		public boolean getisXHTML() {
			return isXHTML;
		}
		
		public void pushElementInStack(String element) {
			openElements.push(element);
		}
		
		public String popElementFromStack() {
			if(openElements.size()>0)
				return openElements.pop();
			else
				return null;
		}
		
		public String peekTopElement() {
			if(openElements.isEmpty()) return null;
			return openElements.peek();
		}

		void run() throws IOException, DataFilterException {

			/**
			 * TOKENIZE Modes:
			 * <p>0) in text transitions: '<' ->(1) 1) in tag, not in
			 * quotes/comment/whitespace transitions: whitespace -> (4) (save
			 * current element) '"' -> (2) '--' at beginning of tag -> (3) '>' ->
			 * process whole tag 2) in tag, in quotes transitions: '"' -> (1)
			 * '>' -> grumble about markup in quotes in tag might confuse older
			 * user-agents (stay in current state) 3) in tag, in comment
			 * transitions: '-->' -> save/ignore comment, go to (0) '<' or '>' ->
			 * grumble about markup in comments 4) in tag, in whitespace
			 * transitions: '"' -> (2) '>' -> save tag, (0) anything else not
			 * whitespace -> (1)
			 * </p>
			 */
			StringBuilder b = new StringBuilder(100);
			StringBuilder balt = new StringBuilder(4000);
			List<String> splitTag = new ArrayList<String>();
			String currentTag = null;
			char pprevC = 0;
			char prevC = 0;
			char c = 0;
			mode = INTEXT;
			
			// No text before <html>
			boolean textAllowed = false;
			
			boolean firstChar = true;

			while (true) {
				// If detecting charset, stop after </head> even if haven't found <meta> charset tag.
				if(onlyDetectingCharset && failedDetectCharset)
					return;
				// If detecting charset, and found it, stop afterwards.
				if(onlyDetectingCharset && detectedCharset != null)
					return;
				int x;
				
				try {
					x = r.read();
				}
				/** 
				 * libgcj up to at least 4.2.2 has a bug: InputStreamReader.refill() throws this exception when BufferedInputReader.refill() returns false for EOF. See:
				 * line 299 at InputStreamReader.java (in refill()): http://www.koders.com/java/fidD8F7E2EB1E4C22DA90EBE0130306AE30F876AB00.aspx?s=refill#L279 
				 * line 355 at BufferedInputStream.java (in refill()): http://www.koders.com/java/fid1949641524FAC0083432D79793F554CD85F46759.aspx?s=refill#L355
				 * TODO: remove this when the gcj bug is fixed and the affected gcj versions are outdated. 
				 */
				catch(java.io.CharConversionException cce) {
					if(freenet.node.Node.checkForGCJCharConversionBug()) /* only ignore the exception on affected libgcj */
						x = -1; 
					else
						throw cce;
				}
				
				if (x == -1) {
					switch (mode) {
						case INTEXT :
							if(textAllowed) {
								saveText(b, currentTag, w, this);
							} else {
								if(!b.toString().trim().equals(""))
									throwFilterException(l10n("textBeforeHTML"));
							}
							break;
						case INTAG:
							w.write("<!-- truncated page: last tag not unfinished -->");
							break;
						case INTAGQUOTES:
							w.write("<!-- truncated page: deleted unfinished tag: still in quotes -->");
							break;
						case INTAGSQUOTES:
							w.write("<!-- truncated page: deleted unfinished tag: still in single quotes -->");
							break;
						case INTAGWHITESPACE:
							w.write("<!-- truncated page: deleted unfinished tag: still in whitespace -->");
							break;
						case INTAGCOMMENT:
							w.write("<!-- truncated page: deleted unfinished comment -->");
							break;
						case INTAGCOMMENTCLOSING:
							w.write("<!-- truncated page: deleted unfinished comment, might be closing -->");
							break;
						default:
							// Dump unfinished tag
							break;
					}
					break;
				} else {
					pprevC = prevC;
					prevC = c;
					c = (char) x;
					if(c == 0xFEFF) {
						if(firstChar) {
							// BOM
							if(w != null)
								w.write(c);
						} else {
							// Null character (zero width non breaking space). Get rid.
						}
						continue;
					}
					if(c == 0) {
						// Delete nulls. They can cause all sorts of problems and also can result from messing around with charsets.
						continue;
					}
					firstChar = false;
					switch (mode) {
						case INTEXT :
							if (c == '<') {
								if(textAllowed) {
									saveText(b, currentTag, w, this);
								} else {
									if(!b.toString().trim().equals(""))
										throwFilterException(l10n("textBeforeHTML"));
								}
								b.setLength(0);
								balt.setLength(0);
								mode = INTAG;
							} else {
								b.append(c);
							}
							break;
						case INTAG :
							balt.append(c);
							if (HTMLDecoder.isWhitespace(c)) {
								splitTag.add(b.toString());
								mode = INTAGWHITESPACE;
								b.setLength(0);
							} else if ((c == '<') && Character.isWhitespace(balt.charAt(0))) {
								// Previous was an un-escaped < in a script.
								
								if(textAllowed) {
									saveText(b, currentTag, w, this);
								} else {
									if(!b.toString().trim().equals(""))
										throwFilterException(l10n("textBeforeHTML"));
								}

								balt.setLength(0);
								b.setLength(0);
								splitTag.clear();
							} else if (c == '>') {
								splitTag.add(b.toString());
								b.setLength(0);
								String s = processTag(splitTag, w, this);
								currentTag = s;
								splitTag.clear();
								balt.setLength(0);
								mode = INTEXT;
								if(s != null && (allowNoHTMLTag || (s.equals("html") || (!isXHTML) && s.equalsIgnoreCase("html"))))
									textAllowed = true;
							} else if (
								(b.length() == 2)
									&& (c == '-')
									&& (prevC == '-')
									&& (pprevC == '!')) {
								mode = INTAGCOMMENT;
								b.append(c);
							} else if (c == '"') {
								mode = INTAGQUOTES;
								b.append(c);
							} else if (c == '\'') {
								mode = INTAGSQUOTES;
								b.append(c);
							} else if (c == '/') { /* Probable end tag */
								currentTag = null; /* We didn't remember what was the last tag, so ... */
								b.append(c);
							} else {
								b.append(c);
							}
							break;
						case INTAGQUOTES :
							// Inside double-quotes, single quotes are just another character, perfectly legal in a URL.
							if (c == '"') {
								mode = INTAG;
								b.append(c); // Part of the element
							} else if (c == '>') {
								b.append(">");
							} else if (c == '<') {
								b.append("<");
//							} else if (c=='&') {
//								b.append("&");
							} else if (c== '\u00A0') {
								b.append(" ");
							}
							else {
								b.append(c);
							}
							break;
						case INTAGSQUOTES :
							if (c == '\'') {
								mode = INTAG;
								b.append(c); // Part of the element
							} else if (c == '<') {
								b.append("<");
							} else if (c == '>') {
								b.append(">");
//							}else if (c=='&') {
//								b.append("&");
							} else if (c== '\u00A0') {
								b.append(" ");
							} 
							else {
								b.append(c);
							}
							break;
							/*
							 * Comments are often used to temporarily disable
							 * markup; I shall allow it. (avian) White space is
							 * not permitted between the markup declaration
							 * open delimiter ("
							 * <!") and the comment open delimiter ("--"), but
							 * is permitted between the comment close delimiter
							 * ("--") and the markup declaration close
							 * delimiter (">"). A common error is to include a
							 * string of hyphens ("---") within a comment.
							 * Authors should avoid putting two or more
							 * adjacent hyphens inside comments. However, the
							 * only browser that actually gets it right is IE
							 * (others either don't allow it or allow other
							 * chars as well). The only safe course of action
							 * is to allow any and all chars, but eat them.
							 * (avian)
							 */
						case INTAGCOMMENT :
							if ((b.length() >= 4) && (c == '-') && (prevC == '-')) {
								b.append(c);
								mode = INTAGCOMMENTCLOSING;
							} else
								b.append(c);
							break;
						case INTAGCOMMENTCLOSING :
							if (c == '>') {
								saveComment(b, w, this);
								b.setLength(0);
								mode = INTEXT;
							} else {
								b.append(c);
								if(c != '-')
									mode = INTAGCOMMENT;
							}
							break;
						case INTAGWHITESPACE :
							if (c == '"') {
								mode = INTAGQUOTES;
								b.append(c);
							} else if (c == '\'') {
								// e.g. <div align = 'center'> (avian)
								// This will be converted automatically to double quotes \"
								// Note that SINGLE QUOTES ARE LEGAL IN URLS ...
								// If we have single quotes inside single quotes, we could get into a major mess here... but that's really malformed code, and it will still be safe, it will just be unreadable.
								mode = INTAGSQUOTES;
								b.append(c);
							} else if (c == '>') {
								if (!killTag)
									currentTag = processTag(splitTag, w, this);
								else
									currentTag = null;
								killTag = false;
								splitTag.clear();
								b.setLength(0);
								balt.setLength(0);
								mode = INTEXT;
								if(currentTag != null && (allowNoHTMLTag || (currentTag.equals("html") || (!isXHTML) && currentTag.equalsIgnoreCase("html"))))
									textAllowed = true;
							} else if ((c == '<') && Character.isWhitespace(balt.charAt(0))) {
								// Previous was an un-escaped < in a script.
								
								if(textAllowed) {
									saveText(b, currentTag, w, this);
								} else {
									if(!b.toString().trim().equals(""))
										throwFilterException(l10n("textBeforeHTML"));
								}
								balt.setLength(0);
								b.setLength(0);
								splitTag.clear();
								mode = INTAG;
							} else if (HTMLDecoder.isWhitespace(c)) {
								// More whitespace, what fun
							} else {
								mode = INTAG;
								b.append(c);
							}
					}
				}
			}
			/**While detecting the charset, if head is not closed inside
			 * the interval which we are examining, something is wrong, and it's
			 * possible that the file has been given a freakishly large head, so
			 * that we'll miss a charset declaration.*/
			if(onlyDetectingCharset && openElements.contains("head")) {
				throw new MalformedInputException(1024*64);
			}
			//Writing the remaining tags for XHTML if any
			if(getisXHTML())
			{
				while(openElements.size()>0)
					w.write("</"+openElements.pop()+">");
			}
			w.flush();
			return;
		}
		int mode;
		static final int INTEXT = 0;
		static final int INTAG = 1;
		static final int INTAGQUOTES = 2;
		static final int INTAGSQUOTES = 3;
		static final int INTAGCOMMENT = 4;
		static final int INTAGCOMMENTCLOSING = 5;
		static final int INTAGWHITESPACE = 6;
		boolean killTag = false; // just this one
		boolean writeStyleScriptWithTag = false; // just this one
		boolean expectingBadComment = false;
		// has to be set on or off explicitly by tags
		boolean inStyle = false; // has to be set on or off explicitly by tags
		boolean inScript = false; // has to be set on or off explicitly by tags
		boolean killText = false; // has to be set on or off explicitly by tags
		boolean killStyle = false;
		int styleScriptRecurseCount = 0;
		String currentStyleScriptChunk = "";
		StringBuilder writeAfterTag = new StringBuilder(1024);

		public void closeXHTMLTag(String element, Writer w) throws IOException {
			// Assume that missing closes are way more common than extra closes.
			if(openElements.isEmpty()) return;
			if(element.equals(openElements.peek())) {
				w.write("</"+openElements.pop()+">");
			}
			else {
				if(openElements.contains(element)) {
					while(true) {
						String top = openElements.pop();
						w.write("</"+top+">");
						if(top.equals(element)) return;
					}
				} // Else it has already been closed.
			}
		}
	}


	void saveText(StringBuilder s, String tagName, Writer w, HTMLParseContext pc)
		throws IOException {
		
		if(pc.onlyDetectingCharset) return;

		if(logDEBUG) Logger.debug(this, "Saving text: "+s.toString());
		if (pc.killText) {
			return;
		}
		
		StringBuilder out = new StringBuilder(s.length()*2);
		
		for(int i=0;i<s.length();i++) {
			char c = s.charAt(i);
			if(c == '<' && !(pc.inStyle || pc.inScript)) {
				//Scripts and styles parsed elsewhere
				out.append("<");
			}
			else if((c < 32) && (c != '\t') && (c != '\n') && (c != '\r')) {
				// Not a real character
				// STRONGLY suggests somebody is using a bogus charset.
				// This could be in order to break the filter.
				if(logDEBUG) Logger.debug(this, "Removing '"+c+"' from the output stream");
				continue;
			}
			else {
				out.append(c);
			}
		}
		String sout = out.toString();
		
		if (pc.inStyle || pc.inScript) {
			pc.currentStyleScriptChunk += sout;
			return; // is parsed and written elsewhere
		}
		if(pc.cb != null)
			pc.cb.onText(HTMLDecoder.decode(sout), tagName); /* Tag name is given as type for the text */
		
		w.write(sout);
	}

	String processTag(List<String> splitTag, Writer w, HTMLParseContext pc)
		throws IOException, DataFilterException {
		// First, check that it is a recognized tag
		if(logDEBUG) {
			for(int i=0;i<splitTag.size();i++)
				Logger.debug(this, "Tag["+i+"]="+splitTag.get(i));
		}
		ParsedTag t = new ParsedTag(splitTag);
		if (!pc.killTag) {
			t = t.sanitize(pc);
			if (t != null) {
				// We have to check whether <head> exists etc even if we are just checking the charset.
				// This enables us to quit when we see </head>.
				
				//We need to make sure that <head> is present in the document. If it is not, then GWT javascript won't get loaded.
				//To achieve this, we keep track whether we processed the <head>
				if(t.element.compareTo("head")==0 && !t.startSlash){
					pc.wasHeadElementFound=true;
				} else if(t.element.compareTo("head")==0 && t.startSlash) {
					pc.headEnded = true;
					if(pc.onlyDetectingCharset) pc.failedDetectCharset = true;
				//If we found a <title> or a <meta> without a <head>, then we need to add them to a <head>
				}else if((t.element.compareTo("meta")==0 || t.element.compareTo("title")==0) && pc.wasHeadElementFound==false){
					pc.openElements.push("head");
					pc.wasHeadElementFound=true;
					String headContent=pc.cb.processTag(new ParsedTag("head", new HashMap<String, String>()));
					if(headContent!=null && !pc.onlyDetectingCharset){
						w.write(headContent);
					}
				}else if((t.element.compareTo("meta")==0 || t.element.compareTo("title")==0) && pc.headEnded){
					throwFilterException(l10n("metaOutsideHead"));
				//If we found a <body> and haven't closed <head> already, then we do
				}else if(t.element.compareTo("body") == 0 &&  pc.openElements.contains("head")){
					if(!pc.onlyDetectingCharset) w.write("</head>");
					pc.headEnded = true;
					if(pc.onlyDetectingCharset) pc.failedDetectCharset = true;
					pc.openElements.pop();
				//If we found a <body> and no <head> before it, then we insert it 
				}else if(t.element.compareTo("body")==0 && pc.wasHeadElementFound==false){
					pc.wasHeadElementFound=true;
					String headContent=pc.cb.processTag(new ParsedTag("head", new HashMap<String, String>()));
					if(headContent!=null){
						if(!pc.onlyDetectingCharset) w.write(headContent+"</head>");
						pc.headEnded = true;
						if(pc.onlyDetectingCharset) pc.failedDetectCharset = true;
					}
				}
				
				if(!pc.onlyDetectingCharset) {
				
					//If the tag needs replacement, then replace it
					String newContent=pc.cb.processTag(t);
					if(newContent!=null){
						w.write(newContent);
						if(t.endSlash==false){
							pc.openElements.push(t.element);
						}
					}else{
						if (pc.writeStyleScriptWithTag) {
							pc.writeStyleScriptWithTag = false;
							String style = pc.currentStyleScriptChunk;
							if ((style == null) || (style.length() == 0))
								pc.writeAfterTag.append("<!-- "+l10n("deletedUnknownStyle")+" -->");
							else
								w.write(style);
							pc.currentStyleScriptChunk = "";
						}
						
						t.write(w,pc);
						if (pc.writeAfterTag.length() > 0) {
							w.write(pc.writeAfterTag.toString());
							pc.writeAfterTag = new StringBuilder(1024);
						}
					}
				} else
					pc.writeStyleScriptWithTag = false;
			}
			if(t == null || t.startSlash || t.endSlash) {
				if(!pc.openElements.isEmpty())
					return pc.openElements.peek();
				if (pc.writeAfterTag.length() > 0) {
					w.write(pc.writeAfterTag.toString());
					pc.writeAfterTag = new StringBuilder(1024);
				}
				return null;
			} else return t.element;
		} else {
			pc.killTag = false;
			pc.writeStyleScriptWithTag = false;
			return null;
		}
	}

	void saveComment(StringBuilder s, Writer w, HTMLParseContext pc)
		throws IOException {
		if(pc.onlyDetectingCharset) return;
		if((s.length() > 3) && (s.charAt(0) == '!') && (s.charAt(1) == '-') && (s.charAt(2) == '-')) {
			s.delete(0, 3);
			if(s.charAt(s.length()-1) == '-')
				s.setLength(s.length()-1);
			if(s.charAt(s.length()-1) == '-')
				s.setLength(s.length()-1);
		}
		if(logDEBUG) Logger.debug(this, "Saving comment: "+s.toString());
		if (pc.expectingBadComment)
			return; // ignore it

		if (pc.inStyle || pc.inScript) {
			pc.currentStyleScriptChunk += s;
			return; // </style> handler should write
		}
		if (pc.killTag) {
			pc.killTag = false;
			return;
		}
		StringBuilder sb = new StringBuilder();
		for(int i=0;i<s.length();i++) {
			char c = s.charAt(i);
			if(c == '<') {
				sb.append("<");
			} else if(c == '>') {
				sb.append(">");
			} else {
				sb.append(c);
			}
		}
		s = sb;
		w.write("<!-- ");
		w.write(s.toString());
		w.write(" -->");
	}

	static void throwFilterException(String msg) throws DataFilterException {
		// FIXME
		String longer = l10n("failedToParseLabel");
		throw new DataFilterException(longer, longer, msg);
	}

	public static class ParsedTag {
		public final String element;
		public final String[] unparsedAttrs;
		final boolean startSlash;
		final boolean endSlash;
		/*
		 * public ParsedTag(ParsedTag t) { this.element = t.element;
		 * this.unparsedAttrs = (String[]) t.unparsedAttrs.clone();
		 * this.startSlash = t.startSlash; this.endSlash = t.endSlash; }
		 */
		
		public ParsedTag(String elementName,Map<String,String> attributes){
			this.element=elementName;
			startSlash=false;
			endSlash=true;
			String[] attrs=new String[attributes.size()];
			int pos=0;
			for(Entry<String,String> entry:attributes.entrySet()){
				attrs[pos++]=entry.getKey()+"=\""+entry.getValue()+"\"";
			}
			this.unparsedAttrs = attrs;
		}
		
		public ParsedTag(ParsedTag t, String[] outAttrs) {
			this.element = t.element;
			this.unparsedAttrs = outAttrs;
			this.startSlash = t.startSlash;
			this.endSlash = t.endSlash;
		}
		
		public ParsedTag(ParsedTag t, Map<String,String> attributes){
			String[] attrs=new String[attributes.size()];
			int pos=0;
			for(Entry<String,String> entry:attributes.entrySet()){
				attrs[pos++]=entry.getKey()+"=\""+entry.getValue()+"\"";
			}
			this.element = t.element;
			this.unparsedAttrs = attrs;
			this.startSlash = t.startSlash;
			this.endSlash = t.endSlash;
		}

		public ParsedTag(List<String> v) {
			int len = v.size();
			if (len == 0) {
				element = null;
				unparsedAttrs = new String[0];
				startSlash = endSlash = false;
				return;
			}
			String s = v.get(len - 1);
			if (((len - 1 != 0) || (s.length() > 1)) && s.endsWith("/")) {
				s = s.substring(0, s.length() - 1);
				v.set(len - 1, s);
				if (s.length() == 0)
					len--;
				endSlash = true;
				// Don't need to set it back because everything is an I-value
			} else endSlash = false;
			s = v.get(0);
			if ((s.length() > 1) && s.startsWith("/")) {
				s = s.substring(1);
				v.set(0, s);
				startSlash = true;
			} else startSlash = false;
			element = v.get(0);
			if (len > 1) {
				unparsedAttrs = new String[len - 1];
				for (int x = 1; x < len; x++)
					unparsedAttrs[x - 1] = v.get(x);
			} else
				unparsedAttrs = new String[0];
			if(logDEBUG) Logger.debug(this, "Element = "+element);
		}

		public ParsedTag sanitize(HTMLParseContext pc) throws DataFilterException {
			TagVerifier tv =
				allowedTagsVerifiers.get(element.toLowerCase());
			if(logDEBUG) Logger.debug(this, "Got verifier: "+tv+" for "+element);
			if (tv == null) {
				if (deleteWierdStuff) {
					return null;
				} else {
					String err = "<!-- "+HTMLEncoder.encode(l10n("unknownTag", "tag", element))+ " -->";
					if (!deleteErrors)
						throwFilterException(l10n("unknownTagLabel") + ' ' + err);
					return null;
				}
			}
			return tv.sanitize(this, pc);
		}

		@Override
		public String toString() {
			if (element == null)
				return "";
			StringBuilder sb = new StringBuilder("<");
			if (startSlash)
				sb.append('/');
			sb.append(element);
			if (unparsedAttrs != null) {
				int n = unparsedAttrs.length;
				for (int i = 0; i < n; i++) {
					sb.append(' ').append(unparsedAttrs[i]);
				}
			}
			if (endSlash)
				sb.append(" /");
			sb.append('>');
			return sb.toString();
		}
		
		public Map<String,String> getAttributesAsMap(){
			Map<String,String> map=new HashMap<String, String>();
			for(String attr: unparsedAttrs) {
				String name=attr.substring(0,attr.indexOf('='));
				String value=attr.substring(attr.indexOf('=')+2,attr.length()-1);
				map.put(name, value);
			}
			return map;
		}

		public void htmlwrite(Writer w,HTMLParseContext pc) throws IOException {
			String s = toString();
			if(pc.getisXHTML())
			{
				if(ElementInfo.isVoidElement(element) && s.charAt(s.length()-2)!='/')
				{
					s=s.substring(0,s.length()-1)+" />";
				}
			}
			if (s != null) {
				w.write(s);
			}
		}

		public void write(Writer w,HTMLParseContext pc) throws IOException {
			if(!startSlash)
			{
				if(ElementInfo.tryAutoClose(element) && element.equals(pc.peekTopElement()))
					pc.closeXHTMLTag(element, w);
				if(pc.getisXHTML() &&  !ElementInfo.isVoidElement(element))
					pc.pushElementInStack(element);
				htmlwrite(w,pc);
			}
			else
			{
				if(pc.getisXHTML())
				{
					pc.closeXHTMLTag(element, w);
				}
				else
				{
					htmlwrite(w,pc);
				}
			}
		}
	}

	public static Set<String> getAllowedHTMLTags() {
		return Collections.unmodifiableSet(allowedHTMLTags);
	}

	private static final Set<String> allowedHTMLTags = new HashSet<String>();
	static final Map<String, TagVerifier> allowedTagsVerifiers =
		Collections.unmodifiableMap(getAllowedTagVerifiers());
	private static final String[] emptyStringArray = new String[0];

	private static Map<String, TagVerifier> getAllowedTagVerifiers()
	{
		Map<String, TagVerifier> allowedTagsVerifiers = new HashMap<String, TagVerifier>();
		
		allowedTagsVerifiers.put("?xml", new XmlTagVerifier());
		allowedTagsVerifiers.put(
			"!doctype",
			new DocTypeTagVerifier("!doctype"));
		allowedTagsVerifiers.put("html", new HtmlTagVerifier());
		allowedTagsVerifiers.put(
			"head",
			new TagVerifier(
				"head",
				new String[] { "id" },
				// Don't support profiles.
				// We don't know what format they might be in, whether they will be parsed even though they have bogus MIME types (which seems likely), etc.
				new String[] { /*"profile"*/ },
				null));
		allowedTagsVerifiers.put(
			"title",
			new TagVerifier("title", new String[] { "id" }));
		allowedTagsVerifiers.put("meta", new MetaTagVerifier());
		allowedTagsVerifiers.put(
			"body",
			new CoreTagVerifier(
				"body",
				new String[] { "bgcolor", "text", "link", "vlink", "alink" },
				null,
				new String[] { "background" },
				new String[] { "onload", "onunload" }));
		String[] group =
			{ "div", "h1", "h2", "h3", "h4", "h5", "h6", "p", "caption" };
		for (String x: group)
			allowedTagsVerifiers.put(
				x,
				new CoreTagVerifier(
					x,
					new String[] { "align" },
					emptyStringArray,
					emptyStringArray,
					emptyStringArray));
		String[] group2 =
			{
				"span",
				"address",
				"em",
				"strong",
				"dfn",
				"code",
				"samp",
				"kbd",
				"var",
				"cite",
				"abbr",
				"acronym",
				"sub",
				"sup",
				"dt",
				"dd",
				"tt",
				"i",
				"b",
				"big",
				"small",
				"strike",
				"s",
				"u",
				"noframes",
				"fieldset",
// Delete <noscript> / </noscript>. So we can at least see the non-scripting code.
//				"noscript",
				"xmp",
				"listing",
				"plaintext",
				"center",
				"bdo",
				"aside",
				"header",
				"nav",
				"footer",
				"article",
				"section",
				"hgroup"};
		for (String x: group2)
			allowedTagsVerifiers.put(
				x,
				new CoreTagVerifier(
					x,
					emptyStringArray,
					emptyStringArray,
					emptyStringArray,
					emptyStringArray));
		allowedTagsVerifiers.put(
			"blockquote",
			new CoreTagVerifier(
				"blockquote",
				emptyStringArray,
				new String[] { "cite" },
				emptyStringArray,
				emptyStringArray));
		allowedTagsVerifiers.put(
			"q",
			new CoreTagVerifier(
				"q",
				emptyStringArray,
				new String[] { "cite" },
				emptyStringArray,
				emptyStringArray));
		allowedTagsVerifiers.put(
			"br",
			new BaseCoreTagVerifier(
				"br",
				new String[] { "clear" },
				emptyStringArray,
				emptyStringArray));
		allowedTagsVerifiers.put(
			"pre",
			new CoreTagVerifier(
				"pre",
				new String[] { "width", "xml:space" },
				emptyStringArray,
				emptyStringArray,
				emptyStringArray));
		allowedTagsVerifiers.put(
			"ins",
			new CoreTagVerifier(
				"ins",
				new String[] { "datetime" },
				new String[] { "cite" },
				emptyStringArray,
				emptyStringArray));
		allowedTagsVerifiers.put(
			"del",
			new CoreTagVerifier(
				"del",
				new String[] { "datetime" },
				new String[] { "cite" },
				emptyStringArray,
				emptyStringArray));
		allowedTagsVerifiers.put(
			"ul",
			new CoreTagVerifier(
				"ul",
				new String[] { "type", "compact" },
				emptyStringArray,
				emptyStringArray,
				emptyStringArray));
		allowedTagsVerifiers.put(
			"ol",
			new CoreTagVerifier(
				"ol",
				new String[] { "type", "compact", "start" },
				emptyStringArray,
				emptyStringArray,
				emptyStringArray));
		allowedTagsVerifiers.put(
			"li",
			new CoreTagVerifier(
				"li",
				new String[] { "type", "value" },
				emptyStringArray,
				emptyStringArray,
				emptyStringArray));
		allowedTagsVerifiers.put(
			"dl",
			new CoreTagVerifier(
				"dl",
				new String[] { "compact" },
				emptyStringArray,
				emptyStringArray,
				emptyStringArray));
		allowedTagsVerifiers.put(
			"dir",
			new CoreTagVerifier(
				"dir",
				new String[] { "compact" },
				emptyStringArray,
				emptyStringArray,
				emptyStringArray));
		allowedTagsVerifiers.put(
			"menu",
			new CoreTagVerifier(
				"menu",
				new String[] { "compact" },
				emptyStringArray,
				emptyStringArray,
				emptyStringArray));
		allowedTagsVerifiers.put(
			"table",
			new CoreTagVerifier(
				"table",
				new String[] {
					"summary",
					"width",
					"border",
					"frame",
					"rules",
					"cellspacing",
					"cellpadding",
					"align",
					"bgcolor" },
				emptyStringArray,
				new String[] { "background" },
				emptyStringArray));
		allowedTagsVerifiers.put(
			"thead",
			new CoreTagVerifier(
				"thead",
				new String[] { "align", "char", "charoff", "valign" },
				emptyStringArray,
				emptyStringArray,
				emptyStringArray));
		allowedTagsVerifiers.put(
			"tfoot",
			new CoreTagVerifier(
				"tfoot",
				new String[] { "align", "char", "charoff", "valign" },
				emptyStringArray,
				emptyStringArray,
				emptyStringArray));
		allowedTagsVerifiers.put(
			"tbody",
			new CoreTagVerifier(
				"tbody",
				new String[] { "align", "char", "charoff", "valign" },
				emptyStringArray,
				emptyStringArray,
				emptyStringArray));
		allowedTagsVerifiers.put(
			"colgroup",
			new CoreTagVerifier(
				"colgroup",
				new String[] {
					"span",
					"width",
					"align",
					"char",
					"charoff",
					"valign" },
				emptyStringArray,
				emptyStringArray,
				emptyStringArray));
		allowedTagsVerifiers.put(
			"col",
			new CoreTagVerifier(
				"col",
				new String[] {
					"span",
					"width",
					"align",
					"char",
					"charoff",
					"valign" },
				emptyStringArray,
				emptyStringArray,
				emptyStringArray));
		allowedTagsVerifiers.put(
			"tr",
			new CoreTagVerifier(
				"tr",
				new String[] {
					"align",
					"char",
					"charoff",
					"valign",
					"bgcolor" },
				emptyStringArray,
				emptyStringArray,
				emptyStringArray));
		allowedTagsVerifiers.put(
			"th",
			new CoreTagVerifier(
				"th",
				new String[] {
					"abbr",
					"axis",
					"headers",
					"scope",
					"rowspan",
					"colspan",
					"align",
					"char",
					"charoff",
					"valign",
					"nowrap",
					"bgcolor",
					"width",
					"height" },
				emptyStringArray,
				new String[] { "background" },
				emptyStringArray));
		allowedTagsVerifiers.put(
			"td",
			new CoreTagVerifier(
				"td",
				new String[] {
					"abbr",
					"axis",
					"headers",
					"scope",
					"rowspan",
					"colspan",
					"align",
					"char",
					"charoff",
					"valign",
					"nowrap",
					"bgcolor",
					"width",
					"height" },
				emptyStringArray,
				new String[] { "background" },
				emptyStringArray));
		allowedTagsVerifiers.put(
			"a",
			new LinkTagVerifier(
				"a",
				new String[] {
					"accesskey",
					"tabindex",
					"name",
					"shape",
					"coords",
					"target" },
				emptyStringArray,
				emptyStringArray,
				new String[] { "onfocus", "onblur" }));
		allowedTagsVerifiers.put(
			"link",
			new LinkTagVerifier(
				"link",
				new String[] { "media", "target" },
				emptyStringArray,
				emptyStringArray,
				emptyStringArray));
		allowedTagsVerifiers.put(
			"base",
			new BaseHrefTagVerifier(
				"base",
				new String[] { "id", "target" },
				new String[] { /* explicitly sanitized by class */ }));
		allowedTagsVerifiers.put(
			"img",
			new CoreTagVerifier(
				"img",
				new String[] {
					"alt",
					"name",
					"height",
					"width",
					"ismap",
					"align",
					"border",
					"hspace",
					"vspace" },
				new String[] { "longdesc", "usemap" },
				new String[] { "src" },
				emptyStringArray));
		// FIXME: object tag -
		// http://www.w3.org/TR/html4/struct/objects.html#h-13.3
		// FIXME: param tag -
		// http://www.w3.org/TR/html4/struct/objects.html#h-13.3.2
		// applet tag PROHIBITED - we do not support applets (FIXME?)
		allowedTagsVerifiers.put(
			"map",
			new CoreTagVerifier(
				"map",
				new String[] { "name" },
				emptyStringArray,
				emptyStringArray,
				emptyStringArray));
		allowedTagsVerifiers.put(
			"area",
			new CoreTagVerifier(
				"area",
				new String[] {
					"accesskey",
					"tabindex",
					"shape",
					"coords",
					"nohref",
					"alt",
					"target" },
				new String[] { "href" },
				emptyStringArray,
				new String[] { "onfocus", "onblur" }));
		allowedTagsVerifiers.put(
			"audio", // currently just minimal support
			new MediaTagVerifier(
				"audio",
				new String[] { // allowed tags
					"preload",
					"controls"},
				emptyStringArray, // uris
				new String[] { "src" }, // inline uris
				emptyStringArray));
		allowedTagsVerifiers.put("style", new StyleTagVerifier());
		allowedTagsVerifiers.put(
			"font",
			new BaseCoreTagVerifier(
				"font",
				new String[] { "size", "color", "face" },
				emptyStringArray,
				emptyStringArray));
		allowedTagsVerifiers.put(
			"basefont",
			new BaseCoreTagVerifier(
				"basefont",
				new String[] { "size", "color", "face" },
				emptyStringArray,
				emptyStringArray));
		allowedTagsVerifiers.put(
			"hr",
			new CoreTagVerifier(
				"hr",
				new String[] { "align", "noshade", "size", "width" },
				emptyStringArray,
				emptyStringArray,
				emptyStringArray));
		allowedTagsVerifiers.put(
			"frameset",
			new CoreTagVerifier(
				"frameset",
				new String[] { "rows", "cols" },
				emptyStringArray,
				emptyStringArray,
				new String[] { "onload", "onunload" },
				false));
		allowedTagsVerifiers.put(
			"frame",
			new BaseCoreTagVerifier(
				"frame",
				new String[] {
					"name",
					"frameborder",
					"marginwidth",
					"marginheight",
					"noresize",
					"scrolling" },
				new String[]  { "longdesc" },
				new String[] { "src" }));
		allowedTagsVerifiers.put(
			"iframe",
			new BaseCoreTagVerifier(
				"iframe",
				new String[] {
					"name",
					"frameborder",
					"marginwidth",
					"marginheight",
					"scrolling",
					"align",
					"height",
					"width" },
				new String[] { "longdesc"}, 
				new String[] { "src" }));
		
		allowedTagsVerifiers.put(
			"form",
			new FormTagVerifier(
				"form",
				new String[] {
					"name" }, // FIXME add a whitelist filter for accept
					// All other attributes are handled by FormTagVerifier.
				new String[] { },
				new String[] { "onsubmit", "onreset" }));
		allowedTagsVerifiers.put(
			"input",
			new InputTagVerifier(
				"input",
				new String[] {
					"accesskey",
					"tabindex",
					"type",
					"name",
					"value",
					"checked",
					"disabled",
					"readonly",
					"size",
					"maxlength",
					"alt",
					"ismap",
					"accept",
					"align" },
				new String[] { "usemap" },
				new String[] { "src" },
				new String[] { "onfocus", "onblur", "onselect", "onchange" }));
		allowedTagsVerifiers.put(
			"button",
			new CoreTagVerifier(
				"button",
				new String[] {
					"accesskey",
					"tabindex",
					"name",
					"value",
					"type",
					"disabled" },
				emptyStringArray,
				emptyStringArray,
				new String[] { "onfocus", "onblur" }));
		allowedTagsVerifiers.put(
			"select",
			new CoreTagVerifier(
				"select",
				new String[] {
					"name",
					"size",
					"multiple",
					"disabled",
					"tabindex" },
				emptyStringArray,
				emptyStringArray,
				new String[] { "onfocus", "onblur", "onchange" }));
		allowedTagsVerifiers.put(
			"optgroup",
			new CoreTagVerifier(
				"optgroup",
				new String[] { "disabled", "label" },
				emptyStringArray,
				emptyStringArray,
				emptyStringArray));
		allowedTagsVerifiers.put(
			"option",
			new CoreTagVerifier(
				"option",
				new String[] { "selected", "disabled", "label", "value" },
				emptyStringArray,
				emptyStringArray,
				emptyStringArray));
		allowedTagsVerifiers.put(
			"textarea",
			new CoreTagVerifier(
				"textarea",
				new String[] {
					"accesskey",
					"tabindex",
					"name",
					"rows",
					"cols",
					"disabled",
					"readonly" },
				emptyStringArray,
				emptyStringArray,
				new String[] { "onfocus", "onblur", "onselect", "onchange" }));
		allowedTagsVerifiers.put(
			"isindex",
			new BaseCoreTagVerifier(
				"isindex",
				new String[] { "prompt" },
				emptyStringArray,
				emptyStringArray));
		allowedTagsVerifiers.put(
			"label",
			new CoreTagVerifier(
				"label",
				new String[] { "for", "accesskey" },
				emptyStringArray,
				emptyStringArray,
				new String[] { "onfocus", "onblur" }));
		allowedTagsVerifiers.put(
			"legend",
			new CoreTagVerifier(
				"legend",
				new String[] { "accesskey", "align" },
				emptyStringArray,
				emptyStringArray,
				emptyStringArray));
		allowedTagsVerifiers.put("script", new ScriptTagVerifier());
		/* MathML 3.0 support for presentation markup, deprecated attributes 
		 * not included so don't try using them. xref not supported as it is 
		 * mainly used to link presentation and content in parallel markup.
		 *  
		 * Content markup not supported as it is larger and presumably not 
		 * used that much, and **HAS SECURITY ISSUES**: Content markup uses 
		 * Content Dictionaries, which by default are loaded from a default 
		 * URL on the web. 
		 * See attributes: cdgroup, definitionURL, cd.
		 * Elements: csymbol, annotation, annotation-xml. */
		allowedTagsVerifiers.put(
			"math",
			new CoreTagVerifier(
				"math",
				new String[] {
					"accent",
					"accentunder",
					"align",
					"alignmentscope",
					"altimg-height",
					"altimg-valign",
					"altimg-width",
					"alttext",
					"bevelled",
					"charalign",
					"charspacing",
					"close",
					"columnalign",
					"columnlines",
					"columnspacing",
					"columnspan",
					"columnwidth",
					"crossout",
					"decimalpoint",
					"depth",
					"denomalign",
					"dir",
					"display",
					"displaystyle",
					"edge",
					"equalcolumns",
					"equalrows",
					"fence",
					"form",
					"frame",
					"framespacing",
					"groupalign",
					"height",
					"indentalign",
					"indentalignfirst",
					"indentalignlast",
					"indentshift",
					"indentshiftfirst",
					"indentshiftlast",
					"indenttarget",
					"infixlinebreakstyle",
					"largeop",
					"leftoverhang",
					"length",
					"linebreak",
					"linebreakmultchar",
					"linebreakstyle",
					"lineleading",
					"location",
					"lquote",
					"lspace",
					"linethickness",
					"longdivstyle",
					"mathbackground",
					"mathcolor",
					"mathsize",
					"mathvariant",
					"maxsize",
					"maxwidth",
					"minlabelspacing",
					"minsize",
					"movablelimits",
					"mslinethickness",
					"notation",
					"numalign",
					"open",
					"overflow",
					"position",
					"rightoverhang",
					"rowalign",
					"rowlines",
					"rowspacing",
					"rowspan",
					"rquote",
					"rspace",
					"scriptlevel",
					"scriptminsize",
					"scriptsizemultiplier",
					"separator",
					"separators",
					"shift",
					"side",
					"stackalign",
					"stretchy",
					"subscriptshift",
					"superscriptshift",
					"symmetric",
					"voffset",
					"width" },
				new String[] { "href" },
				new String[] { "altimg" },
				emptyStringArray));
		//MathML Presentation tags follow
		String[] mathmlempty =
			{
				"mprescripts",
				"none"};
		for (String x: mathmlempty)
			allowedTagsVerifiers.put(
				x,
				new CoreTagVerifier(
					x,
					emptyStringArray,
					emptyStringArray,
					emptyStringArray,
					emptyStringArray));
		String[] mathmlpresent =
			{
				"merror",
				"mphantom",
				"mroot",
				"msqrt"};
		for (String x: mathmlpresent)
			allowedTagsVerifiers.put(
				x,
				new CoreTagVerifier(
					x,
					new String[] { "mathbackground", "mathcolor" },
					new String[] { "href" },
					emptyStringArray,
					emptyStringArray));
		allowedTagsVerifiers.put(
			"msub",
			new CoreTagVerifier(
				"msub",
				new String[] { "mathbackground", "mathcolor", "subscriptshift" },
				new String[] { "href" },
				emptyStringArray,
				emptyStringArray));
		allowedTagsVerifiers.put(
			"msup",
			new CoreTagVerifier(
				"msup",
				new String[] { "mathbackground", "mathcolor", "superscriptshift" },
				new String[] { "href" },
				emptyStringArray,
				emptyStringArray));
		String[] mathmlscripts =
			{
				"msubsup",
				"mmultiscripts"};
		for (String x: mathmlscripts)
			allowedTagsVerifiers.put(
				x,
				new CoreTagVerifier(
					x,
					new String[] { "mathbackground", "mathcolor", "subscriptshift", "superscriptshift" },
					new String[] { "href" },
					emptyStringArray,
					emptyStringArray));
		allowedTagsVerifiers.put(
		    "msrow",
			new CoreTagVerifier(
				"msrow",
				new String[] { "mathbackground", "mathcolor", "position" },
				new String[] { "href" },
				emptyStringArray,
				emptyStringArray));
		allowedTagsVerifiers.put(
			"msgroup",
			new CoreTagVerifier(
				"msgroup",
				new String[] { "mathbackground", "mathcolor", "position", "shift" },
				new String[] { "href" },
				emptyStringArray,
				emptyStringArray));
		allowedTagsVerifiers.put(
			"menclose",
			new CoreTagVerifier(
				"menclose",
				new String[] { "mathbackground", "mathcolor", "notation" },
				new String[] { "href" },
				emptyStringArray,
				emptyStringArray));
		allowedTagsVerifiers.put(
			"msline",
			new CoreTagVerifier(
				"msline",
				new String[] { "leftoverhang", "length", "mathbackground", "mathcolor", "mslinethickness", "position", "rightoverhang" },
				new String[] { "href" },
				emptyStringArray,
				emptyStringArray));
		allowedTagsVerifiers.put(
			"maligngroup",
			new CoreTagVerifier(
				"maligngroup",
				new String[] { "groupalign", "mathbackground", "mathcolor" },
				new String[] { "href" },
				emptyStringArray,
				emptyStringArray));
		allowedTagsVerifiers.put(
			"malignmark",
			new CoreTagVerifier(
				"malignmark",
				new String[] { "edge", "mathbackground", "mathcolor" },
				new String[] { "href" },
				emptyStringArray,
				emptyStringArray));
		allowedTagsVerifiers.put(
			"mrow",
			new CoreTagVerifier(
				"mrow",
				new String[] { "dir", "mathbackground", "mathcolor" },
				new String[] { "href" },
				emptyStringArray,
				emptyStringArray));
		String[] mathmlitem =
			{
				"mi",
				"mn",
				"mtext"};
		for (String x: mathmlitem)
			allowedTagsVerifiers.put(
				x,
				new CoreTagVerifier(
					x,
					new String[] { "dir", "mathbackground", "mathcolor", "mathsize", "mathvariant" },
					new String[] { "href" },
					emptyStringArray,
					emptyStringArray));
	    allowedTagsVerifiers.put(
			"ms",
			new CoreTagVerifier(
				"ms",
				new String[] { "dir", "lquote", "mathbackground", "mathcolor", "mathsize", "mathvariant", "rquote" },
				new String[] { "href" },
				emptyStringArray,
				emptyStringArray));
		allowedTagsVerifiers.put(
			"mpadded",
			new CoreTagVerifier(
				"mpadded",
				new String[] { "depth", "height", "lspace", "mathbackground", "mathcolor", "voffset", "width" },
				new String[] { "href" },
				emptyStringArray,
				emptyStringArray));
		allowedTagsVerifiers.put(
			"mspace",
			new CoreTagVerifier(
				"mspace",
				new String[] {
					"depth",
					"dir",
					"height",
					"indentalign",
					"indentalignfirst",
					"indentalignlast",
					"indentshift",
					"indentshiftfirst",
					"indentshiftlast",
					"indenttarget",
					"linebreak",
					"mathbackground",
					"mathcolor",
					"mathsize",
					"mathvariant",
					"width" },
				new String[] { "href" },
				emptyStringArray,
				emptyStringArray));
		allowedTagsVerifiers.put(
			"mscarry",
			new CoreTagVerifier(
				"mscarry",
				new String[] { "crossout", "location", "mathbackground", "mathcolor" },
				new String[] { "href" },
				emptyStringArray,
				emptyStringArray));
		allowedTagsVerifiers.put(
			"mscarries",
			new CoreTagVerifier(
				"mscarries",
				new String[] { "crossout", "location", "mathbackground", "mathcolor", "position", "scriptsizemultiplier" },
				new String[] { "href" },
				emptyStringArray,
				emptyStringArray));
		String[] mathmltr =
			{
				"mtr",
				"mlabeledtr"};
		for (String x: mathmltr)
			allowedTagsVerifiers.put(
				x,
				new CoreTagVerifier(
					x,
					new String[] { "columnalign", "groupalign", "mathbackground", "mathcolor", "rowalign" },
					new String[] { "href" },
					emptyStringArray,
					emptyStringArray));
		allowedTagsVerifiers.put(
			"mtd",
			new CoreTagVerifier(
				"mtd",
				new String[] { "columnalign", "columnspan", "groupalign", "mathbackground", "mathcolor", "rowalign", "rowspan" },
				new String[] { "href" },
				emptyStringArray,
				emptyStringArray));
		allowedTagsVerifiers.put(
			"mfenced",
			new CoreTagVerifier(
				"mfenced",
				new String[] { "close", "mathbackground", "mathcolor", "open", "separators" },
				new String[] { "href" },
				emptyStringArray,
				emptyStringArray));
		allowedTagsVerifiers.put(
			"mfrac",
			new CoreTagVerifier(
				"mfrac",
				new String[] { "bevelled", "denomalign", "linethickness", "mathbackground", "mathcolor", "numalign" },
				new String[] { "href" },
				emptyStringArray,
				emptyStringArray));
		allowedTagsVerifiers.put(
			"mglyph",
			new CoreTagVerifier(
				"mglyph",
				new String[] { "alt", "height", "mathbackground", "mathcolor", "valign", "width" },
				new String[] { "href" },
				new String[] { "src" },
				emptyStringArray));
		allowedTagsVerifiers.put(
			"mstack",
			new CoreTagVerifier(
				"mstack",
				new String[] { "align", "charalign", "charspacing", "mathbackground", "mathcolor", "stackalign" },
				new String[] { "href" },
				emptyStringArray,
				emptyStringArray));
		allowedTagsVerifiers.put(
			"mlongdiv",
			new CoreTagVerifier(
				"mlongdiv",
				new String[] { "align", "charalign", "charspacing", "longdivstyle", "mathbackground", "mathcolor", "stackalign" },
				new String[] { "href" },
				emptyStringArray,
				emptyStringArray));
		allowedTagsVerifiers.put(
		    "mtable",
			new CoreTagVerifier(
				"mtable",
				new String[] {
					"align",
					"alignmentscope",
					"columnalign",
					"columnlines",
					"columnspacing",
					"columnwidth",
					"displaystyle",
					"equalcolumns",
					"equalrows",
					"frame",
					"framespacing",
					"groupalign",
					"mathbackground",
					"mathcolor",
					"minlabelspacing",
					"rowalign",
					"rowlines",
					"rowspacing",
					"side",
					"width" },
				new String[] { "href" },
				emptyStringArray,
				emptyStringArray));
		allowedTagsVerifiers.put(
			"munder",
			new CoreTagVerifier(
				"munder",
				new String[] { "accentunder", "align", "mathbackground", "mathcolor" },
				new String[] { "href" },
				emptyStringArray,
				emptyStringArray));
		allowedTagsVerifiers.put(
			"mo",
			new CoreTagVerifier(
				"mo",
				new String[] {
					"accent",
					"dir",
					"fence",
					"form",
					"indentalign",
					"indentalignfirst",
					"indentalignlast",
					"indentshift",
					"indentshiftfirst",
					"indentshiftlast",
					"indenttarget",
					"largeop",
					"linebreak",
					"linebreakmultchar",
					"linebreakstyle",
					"lineleading",
					"lspace",
					"mathbackground",
					"mathcolor",
					"mathsize",
					"mathvariant",
					"maxsize",
					"minsize",
					"movablelimits",
					"rspace",
					"separator",
					"stretchy",
					"symmetric" },
				new String[] { "href" },
				emptyStringArray,
				emptyStringArray));
		allowedTagsVerifiers.put(
			"mover",
			new CoreTagVerifier(
				"mover",
				new String[] { "accent", "align", "mathbackground", "mathcolor" },
				new String[] { "href" },
				emptyStringArray,
				emptyStringArray));
		allowedTagsVerifiers.put(
			"munderover",
			new CoreTagVerifier(
				"munderover",
				new String[] { "accent", "accentunder", "align", "mathbackground", "mathcolor" },
				new String[] { "href" },
				emptyStringArray,
				emptyStringArray));
		allowedTagsVerifiers.put(
			"mstyle",
			new CoreTagVerifier(
				"mstyle",
				new String[] {
					"accent",
					"accentunder",
					"align",
					"alignmentscope",
					"bevelled",
					"charalign",
					"charspacing",
					"close",
					"columnalign",
					"columnlines",
					"columnspacing",
					"columnspan",
					"columnwidth",
					"crossout",
					"decimalpoint",
					"depth",
					"denomalign",
					"dir",
					"displaystyle",
					"edge",
					"equalcolumns",
					"equalrows",
					"fence",
					"form",
					"frame",
					"framespacing",
					"groupalign",
					"height",
					"indentalign",
					"indentalignfirst",
					"indentalignlast",
					"indentshift",
					"indentshiftfirst",
					"indentshiftlast",
					"indenttarget",
					"infixlinebreakstyle",
					"largeop",
					"leftoverhang",
					"length",
					"linebreak",
					"linebreakmultchar",
					"linebreakstyle",
					"lineleading",
					"location",
					"lquote",
					"lspace",
					"linethickness",
					"longdivstyle",
					"mathbackground",
					"mathcolor",
					"mathsize",
					"mathvariant",
					"maxsize",
					"minlabelspacing",
					"minsize",
					"movablelimits",
					"mslinethickness",
					"notation",
					"numalign",
					"open",
					"position",
					"rightoverhang",
					"rowalign",
					"rowlines",
					"rowspacing",
					"rowspan",
					"rquote",
					"rspace",
					"scriptlevel",
					"scriptminsize",
					"scriptsizemultiplier",
					"separator",
					"separators",
					"shift",
					"side",
					"stackalign",
					"stretchy",
					"subscriptshift",
					"superscriptshift",
					"symmetric",
					"voffset",
					"width" },
				new String[] { "href" },
				emptyStringArray,
				emptyStringArray));
		// <maction> would go here though it seems a bit pointless and may require extra filtering
		// MathML content tags would go here if anyone used them
		
		return allowedTagsVerifiers;
	}

	static class TagVerifier {
		private final String tag;
		//Attributes which need no sanitation
		private final HashSet<String> allowedAttrs;
		//Attributes which will be sanitized by child classes
		protected final HashSet<String> parsedAttrs;
		private final HashSet<String> uriAttrs;
		private final HashSet<String> inlineURIAttrs;

		TagVerifier(String tag, String[] allowedAttrs) {
			this(tag, allowedAttrs, null, null);
		}

		TagVerifier(String tag, String[] allowedAttrs, String[] uriAttrs, String[] inlineURIAttrs) {
			this.tag = tag;
			this.allowedAttrs = new HashSet<String>();
			this.parsedAttrs = new HashSet<String>();
			if (allowedAttrs != null) {
				for (String allowedAttr: allowedAttrs)
					this.allowedAttrs.add(allowedAttr);
			}
			this.uriAttrs = new HashSet<String>();
			if (uriAttrs != null) {
				for (String uriAttr: uriAttrs)
					this.uriAttrs.add(uriAttr);
			}
			this.inlineURIAttrs = new HashSet<String>();
			if (inlineURIAttrs != null) {
				for (String inlineURIAttr: inlineURIAttrs)
					this.inlineURIAttrs.add(inlineURIAttr);
			}
		}

		ParsedTag sanitize(ParsedTag t, HTMLParseContext pc) throws DataFilterException {
			/** Map contains the attributes, in order. The key is always the name
			 * of the attribute, but the value can be a raw Object if it has no value.
			 * "src" is different to "src=". Arguably we should probably use null in 
			 * the first case and "" in the second case ... FIXME */
			Map<String, Object> h = new LinkedHashMap<String, Object>();
			boolean equals = false;
			String prevX = "";
			if (t.unparsedAttrs != null)
				for (String s: t.unparsedAttrs) {
					if (equals) {
						equals = false;
						s = stripQuotes(s);
						h.remove(prevX);
						h.put(prevX, s);
						prevX = "";
					} else {
						int idx = s.indexOf('=');
						if (idx == s.length() - 1) {
							equals = true;
							if (idx == 0) {
								// prevX already set
							} else {
								prevX = s.substring(0, s.length() - 1);
								prevX = prevX.toLowerCase();
							}
						} else if (idx > -1) {
							String x = s.substring(0, idx);
							if (x.length() == 0)
								x = prevX;
							x = x.toLowerCase();
							String y;
							if (idx == s.length() - 1)
								y = "";
							else
								y = s.substring(idx + 1, s.length());
							y = stripQuotes(y);
							h.remove(x);
							h.put(x, y);
							prevX = x;
						} else {
							h.remove(s);
							h.put(s, new Object());
							prevX = s;
						}
					}
				}
			h = sanitizeHash(h, t, pc);
			if (h == null) return null;
			//Remove any blank entries
			for(Iterator<Entry<String, Object>> it = h.entrySet().iterator(); it.hasNext();){
				Map.Entry<String, Object> entry = it.next();
				if(entry.getValue() == null || entry.getValue().equals("") && pc.isXHTML){
					it.remove();
				}
			}
			//If the tag has no attributes, and this is not allowable, remove it
            if(h.isEmpty() && expungeTagIfNoAttributes()) return null;
			if (t.startSlash)
				return new ParsedTag(t, (String[])null);
			String[] outAttrs = new String[h.size()];
			int i = 0;
			for (Map.Entry<String, Object> entry : h.entrySet()) {
				String x = entry.getKey();
				Object o = entry.getValue();
				String y;
				if (o instanceof String)
					y = (String) o;
				else
					y = null;
				StringBuilder out = new StringBuilder(x);
				if (y != null)
					out.append( "=\"" ).append( y ).append( '"' );
				outAttrs[i++] = out.toString();
			}
			return new ParsedTag(t, outAttrs);
		}

		Map<String, Object> sanitizeHash(Map<String, Object> h,
			ParsedTag p,
			HTMLParseContext pc) throws DataFilterException {
			Map<String, Object> hn = new LinkedHashMap<String, Object>();
			for (Map.Entry<String, Object> entry : h.entrySet()) {
				if(logDEBUG) Logger.debug(this, "HTML Filter is sanitizing: "+entry.getKey()+" = "+entry.getValue());
				String x = entry.getKey();
				Object o = entry.getValue();
				
				boolean inline = inlineURIAttrs.contains(x);

				//URI attributes require additional processing
				if (inline || uriAttrs.contains(x)) {
					if(!inline) {
						if(logMINOR) Logger.minor(this, "Non-inline URI attribute: "+x);
					} else {
						if(logMINOR) Logger.minor(this, "Inline URI attribute: "+x);
					}
					// URI
					if (o instanceof String) {
						// Java's URL handling doesn't seem suitable
						String uri = (String) o;
						uri = HTMLDecoder.decode(uri);
						uri = htmlSanitizeURI(uri, null, null, null, pc.cb, pc, inline);
						if (uri == null) {
							continue;
						}
						uri = HTMLEncoder.encode(uri);
						o = uri;
					}
					// FIXME: rewrite absolute URLs, handle ?date= etc
					if(logDEBUG) Logger.debug(this, "HTML Filter is putting "+(inline?"inline":"")+" uri attribute: "+x+" =  "+o);
					hn.put(x, o);
					continue;
				}

				/*We create a placeholder for each parsed attribute in the
				 * sanitized output. This ensures the order of the attributes.
				 * Subclasses will take care of parsing and replacing these values.
				 * If they don't, we'll remove the placeholder later.*/
				if(parsedAttrs.contains(x)) {
					hn.put(x, null);
					continue;
				}

				/*If the attribute is to be passed through without sanitation*/
				if(allowedAttrs.contains(x)) {
					hn.put(x, o);
					continue;
				}

				// lang, xml:lang and dir can go on anything
				// lang or xml:lang = language [ "-" country [ "-" variant ] ]
				// The variant can be just about anything; no way to test (avian)
				if (x.equals("xml:lang") ||x.equals("lang") || (x.equals("dir") && (o instanceof String) && (((String)o).equalsIgnoreCase("ltr") || ((String)o).equalsIgnoreCase("rtl")))) {
					if(logDEBUG) Logger.debug(this, "HTML Filter is putting attribute: "+x+" =  "+o);
					hn.put(x, o);
				}
			}
			return hn;
		}

		/*If this function returns true, this tag will be removed from  
		 * the sanitized output if it has no attributes*/               
		protected boolean expungeTagIfNoAttributes() {                  
			return false;                                           
		}  
	}

	static String stripQuotes(String s) {
		final String quotes = "\"'";
		if (s.length() >= 2) {
			int n = quotes.length();
			for (int x = 0; x < n; x++) {
				char cc = quotes.charAt(x);
				if ((s.charAt(0) == cc) && (s.charAt(s.length() - 1) == cc)) {
					if (s.length() > 2)
						s = s.substring(1, s.length() - 1);
					else
						s = "";
					break;
				}
			}
		}
		return s;
	}

	//	static String[] titleString = new String[] {"title"};

	static abstract class ScriptStyleTagVerifier extends TagVerifier {
		ScriptStyleTagVerifier(
			String tag,
			String[] allowedAttrs,
			String[] uriAttrs) {
			super(tag, allowedAttrs, uriAttrs, null);
		}

		abstract void setStyle(boolean b, HTMLParseContext pc);

		abstract boolean getStyle(HTMLParseContext pc);

		abstract void processStyle(HTMLParseContext pc);

		@Override
		Map<String, Object> sanitizeHash(Map<String, Object> h,
			ParsedTag p,
			HTMLParseContext pc) throws DataFilterException {
			Map<String, Object> hn = super.sanitizeHash(h, p, pc);
			if (p.startSlash) {
				return finish(h, hn, pc);
			} else {
				return start(h, hn, pc);
			}
		}

		Map<String, Object> finish(Map<String, Object> h, Map<String, Object> hn,
			HTMLParseContext pc) throws DataFilterException {
			if(logDEBUG) Logger.debug(this, "Finishing script/style");
			// Finishing
			setStyle(false, pc);
			pc.styleScriptRecurseCount--;
			if (pc.styleScriptRecurseCount < 0) {
				if (deleteErrors)
					pc.writeAfterTag.append(
						"<!-- " + l10n("tooManyNestedStyleOrScriptTags") + " -->");
				else
					throwFilterException(l10n("tooManyNestedStyleOrScriptTagsLong"));
				return null;
			}
			if(!pc.killStyle) {
				processStyle(pc);
				pc.writeStyleScriptWithTag = true;
			} else {
				pc.killStyle = false;
				pc.currentStyleScriptChunk = "";
			}
			pc.expectingBadComment = false;
			// Pass it on, no params for </style>
			return hn;
		}

		Map<String, Object> start(Map<String, Object> h, Map<String, Object> hn, HTMLParseContext pc)
		        throws DataFilterException {
			if(logDEBUG) Logger.debug(this, "Starting script/style");
			pc.styleScriptRecurseCount++;
			if (pc.styleScriptRecurseCount > 1) {
				if (deleteErrors)
					pc.writeAfterTag.append("<!-- " + l10n("tooManyNestedStyleOrScriptTags") + " -->");
				else
					throwFilterException(l10n("tooManyNestedStyleOrScriptTagsLong"));
				return null;
			}
			setStyle(true, pc);
			String type = getHashString(h, "type");
			if (type != null) {
				if (!type.equalsIgnoreCase("text/css") /* FIXME */
					) {
					pc.killStyle = true;
					pc.expectingBadComment = true;
					return null; // kill the tag
				}
				hn.put("type", "text/css");
			}
			return hn;
		}
	}

	static class StyleTagVerifier extends ScriptStyleTagVerifier {
		StyleTagVerifier() {
			super(
				"style",
				new String[] { "id", "media", "title", "xml:space" },
				emptyStringArray);
		}

		@Override
		void setStyle(boolean b, HTMLParseContext pc) {
			pc.inStyle = b;
		}

		@Override
		boolean getStyle(HTMLParseContext pc) {
			return pc.inStyle;
		}

		@Override
		void processStyle(HTMLParseContext pc) {
			try {
				pc.currentStyleScriptChunk =
					sanitizeStyle(pc.currentStyleScriptChunk, pc.cb, pc, false);
			} catch (DataFilterException e) {
				Logger.error(this, "Error parsing style: "+e, e);
				pc.currentStyleScriptChunk = "";
			}
		}
	}

	static class ScriptTagVerifier extends ScriptStyleTagVerifier {
		ScriptTagVerifier() {
			super(
				"script",
				new String[] {
					"id",
					"charset",
					"type",
					"language",
					"defer",
					"xml:space" },
				new String[] { "src" });
			/*
			 * FIXME: src not supported type ignored (we will need to check
			 * this when if/when we support scripts charset ignored
			 */
		}

		@Override
		Map<String, Object> sanitizeHash(Map<String, Object> hn, ParsedTag p, HTMLParseContext pc)
		        throws DataFilterException {
			// Call parent so we swallow the scripting
			super.sanitizeHash(hn, p, pc);
			return null; // Lose the tags
		}

		@Override
		void setStyle(boolean b, HTMLParseContext pc) {
			pc.inScript = b;
		}

		@Override
		boolean getStyle(HTMLParseContext pc) {
			return pc.inScript;
		}

		@Override
		void processStyle(HTMLParseContext pc) {
			pc.currentStyleScriptChunk =
				sanitizeScripting(pc.currentStyleScriptChunk);
		}
	}

	static class BaseCoreTagVerifier extends TagVerifier {
		private static final String[] locallyVerifiedAttrs = new String[] {
			"id",
			"class",
			"style"
		};

		BaseCoreTagVerifier(
			String tag,
			String[] allowedAttrs,
			String[] uriAttrs,
			String[] inlineURIAttrs) {
			super(tag, allowedAttrs, uriAttrs, inlineURIAttrs);
			allowedHTMLTags.add(tag);
			for(String attr : locallyVerifiedAttrs) {
				this.parsedAttrs.add(attr);
			}
		}

		@Override
		Map<String, Object> sanitizeHash(Map<String, Object> h,
			ParsedTag p,
			HTMLParseContext pc) throws DataFilterException {
			Map<String, Object> hn = super.sanitizeHash(h, p, pc);
			// %i18n dealt with by TagVerifier
			// %coreattrs
			String id = getHashString(h, "id");
			if (id != null) {
				hn.put("id", id);
				// hopefully nobody will be stupid enough to encode URLs into
				// the unique ID... :)
			}
			String classNames = getHashString(h, "class");
			if (classNames != null) {
				hn.put("class", classNames);
				// ditto
			}
			String style = getHashString(h, "style");
			if (style != null) {
				style = sanitizeStyle(style, pc.cb, pc, true);
				if (style != null)
					style = escapeQuotes(style);
				if (style != null)
					hn.put("style", style);
			}
			String title = getHashString(h, "title");
			if (title != null) {
				// PARANOIA: title is PLAIN TEXT, right? In all user agents? :)
				hn.put("title", title);
			}
			return hn;
		}
	}

	static class CoreTagVerifier extends BaseCoreTagVerifier {
		private final HashSet<String> eventAttrs;
		private static final String[] stdEvents =
			new String[] {
				"onclick",
				"ondblclick",
				"onmousedown",
				"onmouseup",
				"onmouseover",
				"onmousemove",
				"onmouseout",
				"onkeypress",
				"onkeydown",
				"onkeyup",
				"onload",
				"onfocus",
				"onblur",
				"oncontextmenu",
				"onresize",
				"onscroll",
				"onunload",
				"onmouseenter",
				"onchange",
				"onreset",
				"onselect",
				"onsubmit",
				"onerror",
			};

		CoreTagVerifier(
			String tag,
			String[] allowedAttrs,
			String[] uriAttrs,
			String[] inlineURIAttrs,
			String[] eventAttrs) {
			this(tag, allowedAttrs, uriAttrs, inlineURIAttrs, eventAttrs, true);
		}

		CoreTagVerifier(
			String tag,
			String[] allowedAttrs,
			String[] uriAttrs,
			String[] inlineURIAttrs,
			String[] eventAttrs,
			boolean addStdEvents) {
			super(tag, allowedAttrs, uriAttrs, inlineURIAttrs);
			this.eventAttrs = new HashSet<String>();
			if (eventAttrs != null) {
				for (String eventAttr: eventAttrs) {
					this.eventAttrs.add(eventAttr);
					this.parsedAttrs.add(eventAttr);
				}
			}
			if (addStdEvents) {
				for (String stdEvent: stdEvents) {
					this.eventAttrs.add(stdEvent);
					this.parsedAttrs.add(stdEvent);
				}
			}
		}

		@Override
		Map<String, Object> sanitizeHash(Map<String, Object> h,
			ParsedTag p,
			HTMLParseContext pc) throws DataFilterException {
			Map<String, Object> hn = super.sanitizeHash(h, p, pc);
			// events (default and added)
			for (String name: eventAttrs) {
				String arg = getHashString(h, name);
				if (arg != null) {
					arg = sanitizeScripting(arg);
					if (arg != null)
						hn.put(name, arg);
				}
			}
			
			return hn;
		}
	}

	static class LinkTagVerifier extends CoreTagVerifier {
		private static final String[] locallyVerifiedAttrs = new String[] {
			"type",
			"charset",
			"rel",
			"rev",
			"media",
			"hreflang",
			"href"
		};

		LinkTagVerifier(
			String tag,
			String[] allowedAttrs,
			String[] uriAttrs,
			String[] inlineURIAttrs,
			String[] eventAttrs) {
			super(tag, allowedAttrs, uriAttrs, inlineURIAttrs, eventAttrs);
			for(String attr : locallyVerifiedAttrs) {
				this.parsedAttrs.add(attr);
			}
		}

		@Override
		Map<String, Object> sanitizeHash(Map<String, Object> h,
			ParsedTag p,
			HTMLParseContext pc) throws DataFilterException {
			Map<String, Object> hn = super.sanitizeHash(h, p, pc);
			String hreflang = getHashString(h, "hreflang");
			String charset = null;
			String maybecharset = null;
			String type = getHashString(h, "type");
			if (type != null) {
				String[] typesplit = splitType(type);
				type = typesplit[0];
				if ((typesplit[1] != null) && (typesplit[1].length() > 0))
					charset = typesplit[1];
				if(logDEBUG)
					Logger.debug(
							this,
							"Processing link tag, type="
							+ type
							+ ", charset="
							+ charset);
			}
			String c = getHashString(h, "charset");
			if (c != null)
				charset = c;
			if(charset != null) {
				try {
					charset = URLDecoder.decode(charset, false);
				} catch (URLEncodedFormatException e) {
					charset = null;
				}
			}
			if(charset != null && charset.indexOf('&') != -1)
				charset = null;
			if(charset != null && !Charset.isSupported(charset))
				charset = null;
			
			// Is it a style sheet?
			// Also, sanitise rel type
			// If neither rel nor rev, return null
			
			String rel = getHashString(h, "rel");
			
			String parsedRel = "", parsedRev = "";
			boolean isStylesheet = false;
			boolean isIcon = false;

			if(rel != null) {
				
				rel = rel.toLowerCase();
				
				StringTokenizer tok = new StringTokenizer(rel, " ");
				int i=0;
				String prevToken = null;
				StringBuffer sb = new StringBuffer(rel.length());
				while (tok.hasMoreTokens()) {
					String token = tok.nextToken();
					if(token.equalsIgnoreCase("stylesheet")) {
						if(token.equalsIgnoreCase("stylesheet")) {
							isStylesheet = true;
							if(!((i == 0 || i == 1 && prevToken != null && prevToken.equalsIgnoreCase("alternate"))))
								return null;
							if(tok.hasMoreTokens())
								return null; // Disallow extra tokens after "stylesheet"
						}
					} else if (token.equalsIgnoreCase("icon")) {
						isIcon = true;
					} else if(!isStandardLinkType(token)) continue;
					
					i++;
					if(sb.length() == 0)
						sb.append(token);
					else {
						sb.append(' ');
						sb.append(token);
					}
					prevToken = token;
				}
				
				parsedRel = sb.toString();
			}
			
			String rev = getHashString(h, "rev");
			if(rev != null) {
				
				StringBuffer sb = new StringBuffer(rev.length());
				rev = rev.toLowerCase();
				
				StringTokenizer tok = new StringTokenizer(rev, " ");
				sb = new StringBuffer(rev.length());
				
				while (tok.hasMoreTokens()) {
					String token = tok.nextToken();
					if(!isStandardLinkType(token)) continue;
					if(sb.length() == 0)
						sb.append(token);
					else {
						sb.append(' ');
						sb.append(token);
					}
				}
				
				
				parsedRev = sb.toString();
				
			}

			// Allow no rel or rev, even on <link>, as per HTML spec.
			
			if(parsedRel.length() != 0)
				hn.put("rel", parsedRel);
			if(parsedRev.length() != 0)
				hn.put("rev", parsedRev);
			
			if(rel != null) {
				if(rel.equals("stylesheet") || rel.equals("alternate stylesheet"))
					isStylesheet = true;
			} else {
				// Not a stylesheet.
				if(type != null && type.startsWith("text/css"))
					return null; // Not a stylesheet, so can't take a stylesheet type.
			}
			
			if(isStylesheet) {
				if(charset == null) {
					// Browser will use the referring document's charset if there
					// is no BOM and we don't specify one in HTTP.
					// So we need to pass this information to the filter.
					// We cannot force the mime type with the charset, because if
					// we do that, we might be wrong - if there is a BOM or @charset 
					// we want to use that. E.g. chinese pages might have the
					// page in GB18030 and the borrowed CSS in ISO-8859-1 or UTF-8.
					maybecharset = pc.charset;
				}
				String media = getHashString(h, "media");
				if(media != null)
					media = CSSReadFilter.filterMediaList(media);
				if(media != null)
					hn.put("media", media);
				if(type != null && !type.startsWith("text/css"))
					return null; // Different style language e.g. XSL, not supported.
				type = "text/css";
			}
			String href = getHashString(h, "href");
			if (href != null) {
				href = HTMLDecoder.decode(href);
				if (isIcon) {
					href = htmlSanitizeURI(href, type, null, null, pc.cb, pc, false);
				} else {
					href = htmlSanitizeURI(href, type, charset, maybecharset, pc.cb, pc, false);
				}
				if (href != null) {
					href = HTMLEncoder.encode(href);
					hn.put("href", href);
					if (type != null)
						hn.put("type", type);
					if (charset != null)
						hn.put("charset", charset);
					if ((charset != null) && (hreflang != null))
						hn.put("hreflang", hreflang);
				}
			}
			// FIXME: allow these if the charset and encoding are encoded into
			// the URL
			return hn;
		}

		// Does not include stylesheet
		private static final HashSet<String> standardRelTypes = new HashSet<String>();
		static {
			for(String s : new String[] {
					"alternate",
					"start",
					"next",
					"prev",
					"contents",
					"index",
					"glossary",
					"copyright",
					"chapter",
					"section",
					"subsection",
					"appendix",
					"help",
					"bookmark"
			}) standardRelTypes.add(s);
		}
		
		private boolean isStandardLinkType(String token) {
			return standardRelTypes.contains(token.toLowerCase());
		}
	}

    /** Verify media tags (audio and video). This needs its own
     * verifier, because different from images, browsers use content
     * sniffing to find out whether to display it as media
     * content. Using text/plain as content type would allow
     * exploiting this to run unfiltered files as media files. We fix
     * this by encoding the mime type into the uri.*/
	static class MediaTagVerifier extends CoreTagVerifier {
		private static final String[] locallyVerifiedAttrs = new String[] {
			"src"
		};

		MediaTagVerifier(
			String tag,
			String[] allowedAttrs,
			String[] uriAttrs,
			String[] inlineURIAttrs,
			String[] eventAttrs) {
			super(tag, allowedAttrs, uriAttrs, inlineURIAttrs, eventAttrs);
			for(String attr : locallyVerifiedAttrs) {
				this.parsedAttrs.add(attr);
			}
		}

		@Override
		Map<String, Object> sanitizeHash(Map<String, Object> h,
			ParsedTag p, 
			HTMLParseContext pc) throws DataFilterException {
			Map<String, Object> hn = super.sanitizeHash(h, p, pc);
			String hreflang = getHashString(h, "hreflang");
			String charset = null;
			String maybecharset = null;
            /* TODO: get the type from the filename. Currently we only
             * have a filter for mp3, so this is the simplest possible
             * solution.*/
            String type = "audio/mpeg";

			String src = getHashString(h, "src");
			if (src != null) {
				src = HTMLDecoder.decode(src);
                src = htmlSanitizeURI(src, type, null, null, pc.cb, pc, false);
				if (src != null) {
					src = HTMLEncoder.encode(src);
					hn.put("src", src);
                }
			}
			return hn;
		}
	}

	// We do not allow forms to act anywhere else than on / 
	static class FormTagVerifier extends CoreTagVerifier{
		private static final String[] locallyVerifiedAttrs = new String[] {
			"method",
			"action",
			"enctype",
			"accept-charset"
		};

		FormTagVerifier(
			String tag,
			String[] allowedAttrs,
			String[] uriAttrs,
			String[] eventAttrs) {
			super(tag, allowedAttrs, uriAttrs, null, eventAttrs);
			for(String attr : locallyVerifiedAttrs) {
				this.parsedAttrs.add(attr);
			}
		}

		@Override
		Map<String, Object> sanitizeHash(Map<String, Object> h,
			ParsedTag p,
			HTMLParseContext pc) throws DataFilterException {
			Map<String, Object> hn = super.sanitizeHash(h, p, pc);
			if(p.startSlash) {
				// Allow, but only with standard elements
				return hn;
			}
			String method = getHashString(h, "method");
			String action = getHashString(h, "action");
			String finalAction;
			try {
				finalAction = pc.cb.processForm(method, action);
			} catch (CommentException e) {
	            pc.writeAfterTag.append("<!-- ").append(HTMLEncoder.encode(e.toString())).append(" -->");
				return null;
			}
			if(finalAction == null) return null;
			hn.put("method", method);
			hn.put("action", finalAction);
			// Force enctype and accept-charset to acceptable values.
			hn.put("enctype", "multipart/form-data");
			hn.put("accept-charset", "UTF-8");
			return hn;
		}
	}
	
	static class InputTagVerifier extends CoreTagVerifier{
		private final HashSet<String> allowedTypes;
		private String[] types = new String[]{
			"text",
			"password",
			"checkbox",
			"radio",
			"submit",
			"reset",
			// no ! file
			"hidden",
			"image",
			"button"
		};
		
		InputTagVerifier(
			String tag,
			String[] allowedAttrs,
			String[] uriAttrs,
			String[] inlineURIAttrs,
			String[] eventAttrs) {
			super(tag, allowedAttrs, uriAttrs, inlineURIAttrs, eventAttrs);
			this.allowedTypes = new HashSet<String>();
			if (types != null) {
				for (String type: types) {
					this.allowedTypes.add(type);
				}
			}
		}

		@Override
		Map<String, Object> sanitizeHash(Map<String, Object> h,
			ParsedTag p,
			HTMLParseContext pc) throws DataFilterException {
			Map<String, Object> hn = super.sanitizeHash(h, p, pc);
			
			// We drop the whole <input> if type isn't allowed
			if(!allowedTypes.contains(hn.get("type"))){
				return null;
			}
			
			return hn;
		}
	}
	
	static class MetaTagVerifier extends TagVerifier {
		private static final String[] allowedContentTypes = ContentFilter.HTML_MIME_TYPES;
		private static final String[] locallyVerifiedAttrs = {
			"http-equiv",
			"name",
			"content"
		};

		MetaTagVerifier() {
			super("meta", new String[] { "id" });
			for(String attr : locallyVerifiedAttrs) {
				this.parsedAttrs.add(attr);
				}
			}

		@Override
		Map<String, Object> sanitizeHash(Map<String, Object> h,
			ParsedTag p,
			HTMLParseContext pc) throws DataFilterException {
			Map<String, Object> hn = super.sanitizeHash(h, p, pc);
			/*
			 * Several possibilities: a) meta http-equiv=X content=Y b) meta
			 * name=X content=Y
			 */
			String http_equiv = getHashString(h, "http-equiv");
			String name = getHashString(h, "name");
			String content = getHashString(h, "content");
			String scheme = getHashString(h, "scheme");
			if(logMINOR) Logger.minor(this, "meta: name="+name+", content="+content+", http-equiv="+http_equiv+", scheme="+scheme);
			if (content != null) {
				if ((name != null) && (http_equiv == null)) {
					if (name.equalsIgnoreCase("Author")) {
						hn.put("name", name);
						hn.put("content", content);
					} else if (name.equalsIgnoreCase("Keywords")) {
						hn.put("name", name);
						hn.put("content", content);
					} else if (name.equalsIgnoreCase("Description")) {
						hn.put("name", name);
						hn.put("content", content);
					}
				} else if ((http_equiv != null) && (name == null)) {
					if (http_equiv.equalsIgnoreCase("Expires")) {
						try {
							ToadletContextImpl.parseHTTPDate(content);
							hn.put("http-equiv", http_equiv);
							hn.put("content", content);
						} catch (ParseException e) {
							// Delete it.
							return null;
						}
					} else if (
						http_equiv.equalsIgnoreCase("Content-Script-Type")) {
						// We don't support script at this time.
					} else if (
						http_equiv.equalsIgnoreCase("Content-Style-Type")) {
						// FIXME: charsets
						if (content.equalsIgnoreCase("text/css")) {
							// FIXME: selectable style languages - only matters
							// when we have implemented more than one
							// FIXME: if we ever do allow it... the spec
							// http://www.w3.org/TR/html4/present/styles.html#h-14.2.1
							// says only the last definition counts...
							//        but it only counts if it's in the HEAD section,
							// so we DONT need to parse the whole doc
							hn.put("http-equiv", http_equiv);
							hn.put("content", content);
						}
						// FIXME: add some more headers - Dublin Core?
					} else if (http_equiv.equalsIgnoreCase("Content-Type")) {
						if(logMINOR) Logger.minor(this, "Found http-equiv content-type="+content);
						String[] typesplit = splitType(content);
						if(logDEBUG) {
							for(int i=0;i<typesplit.length;i++)
								Logger.debug(this, "["+i+"] = "+typesplit[i]);
						}
						boolean detected = false;
						for (String allowedContentType: allowedContentTypes) {
							if (typesplit[0].equalsIgnoreCase(allowedContentType)) {
								if((typesplit[1] == null) || (pc.charset != null && typesplit[1]
								        .equalsIgnoreCase(pc.charset))) {
									hn.put("http-equiv", http_equiv);
									hn.put("content", typesplit[0]
									    + (typesplit[1] != null ? "; charset="
										+ typesplit[1] : ""));
								} else if(typesplit[1] != null && pc.charset != null && !typesplit[1].equalsIgnoreCase(pc.charset)) {
									throwFilterException(l10n("wrongCharsetInMeta"));
								} else if(typesplit[1] != null) {
									if(pc.detectedCharset != null)
										throwFilterException(l10n("multipleCharsetsInMeta"));
									pc.detectedCharset = typesplit[1].trim();
								}
								detected = true;
								break;
							}
						}
						if(!detected)
							throwFilterException(l10n("invalidMetaType"));
					} else if (
						http_equiv.equalsIgnoreCase("Content-Language")) {
						if(content.matches("((?>[a-zA-Z0-9]*)(?>-[A-Za-z0-9]*)*(?>,\\s*)?)*") && (!content.trim().equals(""))) {
							hn.put("http-equiv", "Content-Language");
							hn.put("content", content);
						}
					} else if (http_equiv.equalsIgnoreCase("refresh")) {
						int idx = content.indexOf(';');
						if(idx == -1 && metaRefreshSamePageMinInterval >= 0) {
							try {
								int seconds = Integer.parseInt(content);
								if(seconds < 0) return null;
								if(seconds < metaRefreshSamePageMinInterval)
									seconds = metaRefreshSamePageMinInterval;
								hn.put("http-equiv", "refresh");
								hn.put("content", Integer.toString(seconds));
							} catch (NumberFormatException e) {
								// Delete.
								pc.writeAfterTag.append("<!-- doesn't parse as number in meta refresh -->");
								return null;
							}
						} else if(metaRefreshRedirectMinInterval >= 0) {
							int seconds;
							String before = content.substring(0, idx);
							String after = content.substring(idx+1).trim();
							try {
								seconds = Integer.parseInt(before);
								if(seconds < 0) return null;
								if(seconds < metaRefreshRedirectMinInterval) seconds = metaRefreshRedirectMinInterval;
								if(!after.toLowerCase().startsWith("url=")) {
									pc.writeAfterTag.append("<!-- no url but doesn't parse as number in meta refresh -->");
									return null;
								}
								after = after.substring("url=".length()).trim();
								try {
									String url = sanitizeURI(after, null, null, null, pc.cb, false);
									hn.put("http-equiv", "refresh");
									hn.put("content", ""+seconds+"; url="+HTMLEncoder.encode(url));
								} catch (CommentException e) {
									pc.writeAfterTag.append("<!-- "+e.getMessage()+"-->");
									// Delete
									return null;
								}
							} catch (NumberFormatException e) {
								pc.writeAfterTag.append("<!-- doesn't parse as number in meta refresh possibly with url -->");
								// Delete.
								return null;
							}
						}
					}
				}
			}

			/* try HTML5 meta charset declaration. */
			String charset = getHashString(h, "charset");
			if (charset != null) {
				if ((pc.detectedCharset != null) && !charset.equals(pc.detectedCharset)) {
					throwFilterException(l10n("multipleCharsetsInMeta"));
				}
				pc.detectedCharset = charset;
			}

			return hn;
		}

		@Override                                                       
		protected boolean expungeTagIfNoAttributes() {                  
			return true;                                            
		} 
	}

	static class DocTypeTagVerifier extends TagVerifier {
		DocTypeTagVerifier(String tag) {
			super(tag, null);
		}

		private static final Map<String, Object> DTDs = new HashMap<String, Object>();

		static {
			DTDs.put(
				"-//W3C//DTD XHTML 1.0 Strict//EN",
				"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd");
			DTDs.put(
				"-//W3C//DTD XHTML 1.0 Transitional//EN",
				"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd");
			DTDs.put(
				"-//W3C//DTD XHTML 1.0 Frameset//EN",
				"http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd");
			DTDs.put(
				"-//W3C//DTD XHTML 1.1//EN",
				"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd");
			DTDs.put(
				"-//W3C//DTD HTML 4.01//EN",
				"http://www.w3.org/TR/html4/strict.dtd");
			DTDs.put(
				"-//W3C//DTD HTML 4.01 Transitional//EN",
				"http://www.w3.org/TR/html4/loose.dtd");
			DTDs.put(
				"-//W3C//DTD HTML 4.01 Frameset//EN",
				"http://www.w3.org/TR/html4/frameset.dtd");
			DTDs.put("-//W3C//DTD HTML 3.2 Final//EN", new Object());
		}

		@Override
		ParsedTag sanitize(ParsedTag t, HTMLParseContext pc) {
			// HTML5 is just <!doctype html>
			if(t.unparsedAttrs.length == 1) {
				if (!t.unparsedAttrs[0].equalsIgnoreCase("html"))
					return null;
				return t;
			}
			if (!((t.unparsedAttrs.length == 3) || (t.unparsedAttrs.length == 4)))
				return null;
			if (!t.unparsedAttrs[0].equalsIgnoreCase("html"))
				return null;
			if(t.unparsedAttrs[1].equalsIgnoreCase("system") && t.unparsedAttrs.length == 3) {
				// HTML5 allows <!DOCTYPE html SYSTEM "about:legacy-compat"> (either kind of quotes)
				String s = stripQuotes(t.unparsedAttrs[2]);
				if(s.equals("about:legacy-compat") && t.unparsedAttrs.length == 3) {
					return t;
				} else return null;
			}
			if (!t.unparsedAttrs[1].equalsIgnoreCase("public"))
				return null;
			String s = stripQuotes(t.unparsedAttrs[2]);
			if (!DTDs.containsKey(s))
				return null;
			if (t.unparsedAttrs.length == 4) {
				String ss = stripQuotes(t.unparsedAttrs[3]);
				String spec = getHashString(DTDs, s);
				if ((spec != null) && !spec.equals(ss))
					return null;
			}
			return t;
		}
	}

	static class XmlTagVerifier extends TagVerifier {
		XmlTagVerifier() {
			super("?xml", null);
		}

		@Override
		ParsedTag sanitize(ParsedTag t, HTMLParseContext pc) throws DataFilterException {
			if (t.unparsedAttrs.length != 2 && t.unparsedAttrs.length != 3) {
				if (logMINOR) Logger.minor(this, "Deleting xml declaration, invalid length");
				return null;
			}
			if (t.unparsedAttrs.length == 3 && !t.unparsedAttrs[2].equals("?")) {
				if (logMINOR) Logger.minor(this, "Deleting xml declaration, invalid ending (length 2)");
				return null;
			}
			if (t.unparsedAttrs.length == 2 && !t.unparsedAttrs[1].endsWith("?")) {
				if (logMINOR) Logger.minor(this, "Deleting xml declaration, invalid ending (length 3)");
				return null;
			}
			if (!(t.unparsedAttrs[0].equals("version=\"1.0\"") || t.unparsedAttrs[0].equals("version='1.0'"))) {
				if (logMINOR) Logger.minor(this, "Deleting xml declaration, invalid version");
				return null;
			}
			String encodingAttr = t.unparsedAttrs[1];
			if(encodingAttr.startsWith("encoding=\"")) {
				if(!encodingAttr.endsWith("\"")) {
					if (logMINOR) Logger.minor(this, "Deleting xml declaration, invalid encoding");
					return null;
				}
			} else if(encodingAttr.startsWith("encoding='")) {
				if(!encodingAttr.endsWith("'")) {
					if (logMINOR) Logger.minor(this, "Deleting xml declaration, invalid encoding");
					return null;
				}
			} else {
				if (logMINOR) Logger.minor(this, "Deleting xml declaration, invalid encoding");
				return null;
			}
			
			String charset = encodingAttr.substring("encoding='".length(), encodingAttr.length()-1);
			
			if (!charset.equalsIgnoreCase(pc.charset)) {
				if(pc.charset != null && !charset.equalsIgnoreCase(pc.charset)) {
					if (logMINOR) Logger.minor(this, "Deleting xml declaration (invalid charset "
							+ charset + " should be "+pc.charset + ")");
					return null;
				} else if(pc.detectedCharset != null) {
					throwFilterException(l10n("multipleCharsetsInMeta"));
				} else {
					pc.detectedCharset = charset;
				}
			}
			return t;
		}
	}

	static class HtmlTagVerifier extends TagVerifier {
		private static final String[] locallyVerifiedAttrs = new String[] { "xmlns" };
		HtmlTagVerifier() {
			super("html", new String[] { "id", "version" });
			for(String attr : locallyVerifiedAttrs) {
				parsedAttrs.add(attr);
			}
		}

		@Override
		Map<String, Object> sanitizeHash(Map<String, Object> h,
			ParsedTag p,
			HTMLParseContext pc) throws DataFilterException {
			Map<String, Object> hn = super.sanitizeHash(h, p, pc);
			String xmlns = getHashString(h, "xmlns");
			if ((xmlns != null) && xmlns.equals("http://www.w3.org/1999/xhtml")) {
				hn.put("xmlns", xmlns);
				pc.setisXHTML(true);
			}
			return hn;
		}
	}

	static class BaseHrefTagVerifier extends TagVerifier {
		private static final String[] locallyVerifiedAttrs = new String[] {
			"href"};

		BaseHrefTagVerifier(String tag, String[] allowedAttrs, String[] uriAttrs) {
			super(tag, allowedAttrs, uriAttrs, null);
			for(String attr : locallyVerifiedAttrs) {
				this.parsedAttrs.add(attr);
			}
		}
		
		@Override
		Map<String, Object> sanitizeHash(Map<String, Object> h,
				ParsedTag p,
				HTMLParseContext pc) throws DataFilterException {
			Map<String, Object> hn = super.sanitizeHash(h, p, pc);
			String baseHref = getHashString(h, "href");
			if(baseHref != null) {
				// Decode and encode for the same reason we do in sanitizeHash().
				baseHref = HTMLDecoder.decode(baseHref);
				String ref = pc.cb.onBaseHref(baseHref);
				if(ref != null) {
					hn.put("href", HTMLEncoder.encode(ref));
					return hn;
				}
			}
			pc.writeAfterTag.append("<!-- deleted invalid base href -->");
			return null;
		}

	}
	
	static String sanitizeStyle(String style, FilterCallback cb, HTMLParseContext hpc, boolean isInline) throws DataFilterException {
		if(style == null) return null;
		if(hpc.onlyDetectingCharset) return null;
		Reader r = new StringReader(style);
		Writer w = new StringWriter();
		style = style.trim();
		if(logMINOR) Logger.minor(HTMLFilter.class, "Sanitizing style: " + style);
		CSSParser pc = new CSSParser(r, w, false, cb, hpc.charset, false, isInline);
		try {
			pc.parse();
		} catch (IOException e) {
			Logger.error(
				HTMLFilter.class,
				"IOException parsing inline CSS!");
		} catch (Error e) {
			if (e.getMessage().equals("Error: could not match input")) {
				// this sucks, it should be a proper exception
				Logger.normal(
					HTMLFilter.class,
					"CSS Parse Error!",
					e);
				return "/* "+l10n("couldNotParseStyle")+" */";
			} else
				throw e;
		}
		String s = w.toString();
		if ((s == null) || (s.length() == 0))
			return null;
		//		Core.logger.log(SaferFilter.class, "Style now: " + s, LogLevel.DEBUG);
		if(logMINOR) Logger.minor(HTMLFilter.class, "Style finally: " + s);
		return s;
	}

	static String escapeQuotes(String s) {
		StringBuilder buf = new StringBuilder(s.length());
		for (int x = 0; x < s.length(); x++) {
			char c = s.charAt(x);
			if (c == '\"') {
				buf.append(""");
			} else {
				buf.append(c);
			}
		}
		return buf.toString();
	}

	static String sanitizeScripting(String script) {
		// Kill it. At some point we may want to allow certain recipes - FIXME
		return null;
	}

	static String sanitizeURI(String uri, FilterCallback cb, boolean inline) throws CommentException {
		return sanitizeURI(uri, null, null, null, cb, inline);
	}

	/*
	 * While we're only interested in the type and the charset, the format is a
	 * lot more flexible than that. (avian) TEXT/PLAIN; format=flowed;
	 * charset=US-ASCII IMAGE/JPEG; name=test.jpeg; x-unix-mode=0644
	 */
	public static String[] splitType(String type) {
		StringFieldParser sfp;
		String charset = null, param, name, value;
		int x;

		sfp = new StringFieldParser(type, ';');
		type = sfp.nextField().trim();
		while (sfp.hasMoreFields()) {
			param = sfp.nextField();
			x = param.indexOf('=');
			if (x != -1) {
				name = param.substring(0, x).trim();
				value = param.substring(x + 1).trim();
				if (name.equals("charset"))
					charset = value;
			}
		}
		return new String[] { type, charset };
	}

	// A simple string splitter
	// StringTokenizer doesn't work well for our purpose. (avian)
	static class StringFieldParser {
		private String str;
		private int maxPos, curPos;
		private char c;

		public StringFieldParser(String str) {
			this(str, '\t');
		}

		public StringFieldParser(String str, char c) {
			this.str = str;
			this.maxPos = str.length();
			this.curPos = 0;
			this.c = c;
		}

		public boolean hasMoreFields() {
			return curPos <= maxPos;
		}

		public String nextField() {
			int start, end;

			if (curPos > maxPos)
				return null;
			start = curPos;
			while ((curPos < maxPos) && (str.charAt(curPos) != c))
				curPos++;
			end = curPos;
			curPos++;
			return str.substring(start, end);
		}
	}

	static String htmlSanitizeURI(
			String suri,
			String overrideType,
			String overrideCharset,
			String maybeCharset,
			FilterCallback cb,
			HTMLParseContext pc,
			boolean inline) {
		try {
			return sanitizeURI(suri, overrideType, overrideCharset, maybeCharset, cb, inline);
		} catch (CommentException e) {
            pc.writeAfterTag.append("<!-- ").append(HTMLEncoder.encode(e.toString())).append(" -->");
			return null;
		}
	}
	
	static String sanitizeURI(
		String suri,
		String overrideType,
		String overrideCharset,
		String maybeCharset,
		FilterCallback cb, boolean inline) throws CommentException {
		if(logMINOR)
			Logger.minor(HTMLFilter.class, "Sanitizing URI: "+suri+" ( override type "+overrideType +" override charset "+overrideCharset+" ) inline="+inline, new Exception("debug"));
		boolean addMaybe = false;
		if((overrideCharset != null) && (overrideCharset.length() > 0))
			overrideType += "; charset="+overrideCharset;
		else if(maybeCharset != null)
			addMaybe = true;
		String retval = cb.processURI(suri, overrideType, false, inline);
		if(addMaybe) {
			if(retval.indexOf('?') != -1)
				retval += "&maybecharset="+maybeCharset;
			else
				retval += "?maybecharset="+maybeCharset;
		}
		return retval;
	}

	static String getHashString(Map<String, Object> h, String key) {
		Object o = h.get(key);
		if (o == null)
			return null;
		if (o instanceof String)
			return (String) o;
		else
			return null;
	}

	private static String l10n(String key) {
		return NodeL10n.getBase().getString("HTMLFilter."+key);
	}

	private static String l10n(String key, String pattern, String value) {
		return NodeL10n.getBase().getString("HTMLFilter."+key, pattern, value);
	}

	@Override
	public BOMDetection getCharsetByBOM(byte[] input, int length) throws DataFilterException {
		// No enhanced BOMs.
		// FIXME XML BOMs???
		return null;
	}

	@Override
	public int getCharsetBufferSize() {
		//Read in 64 kilobytes. The charset could be defined anywhere in the head section
		return 1024*64;
	}
}