/** * */ package ch.panter.edu.parser; import java.io.Reader; import java.io.Writer; /** * @author seb * */ public class HtmlParser01 implements HtmlParser { enum STATE { UNKNOWN, IN_BODY } private STATE state = STATE.UNKNOWN; private StringBuilder buf = null; private char c; private int cnt = 0; Reader input; Writer output; /* (non-Javadoc) * @see ch.panter.edu.parser.HtmlParser#parse(java.io.Reader, java.io.Writer) */ public void parse (Reader input, Writer output) throws Exception { this.input = input; this.output = output; int i; while (-1 != (i = input.read())) { this.c = (char)i; this.cnt++; switch (this.state) { case IN_BODY: this.handleInBody(); break; default: this.handleUnknown(); break; } } } private void handleUnknown() throws Exception { // start of a tag if ('<' == c && null == buf) { this.buf = new StringBuilder(); // end of a tag } else if ('>' == c && null != buf) { // the parser has to detect body tags if ("body".equals(buf.toString())) { this.state = STATE.IN_BODY; } this.buf = null; // we are inside a tag, fill buffer with tag cname } else if (null != buf) { this.buf.append(this.c); // outside a tag, ignore char } else { } } private void handleInBody() throws Exception { if ('<' == this.c) { this.state = STATE.UNKNOWN; handleUnknown(); return; } output.append(this.c); } private void raiseException() throws Exception { throw new Exception("Parse Error: cnt -> "+cnt+" c -> "+this.c+ " state -> "+ this.state+" buffer -> "+ this.buf); } //////// getter & setter //////////////////////////////////////// /* (non-Javadoc) * @see ch.panter.edu.parser.HtmlParser#getInput() */ public Reader getInput() { return input; } /* (non-Javadoc) * @see ch.panter.edu.parser.HtmlParser#setInput(java.io.Reader) */ public void setInput(Reader input) { this.input = input; } /* (non-Javadoc) * @see ch.panter.edu.parser.HtmlParser#getOutput() */ public Writer getOutput() { return output; } /* (non-Javadoc) * @see ch.panter.edu.parser.HtmlParser#setOutput(java.io.Writer) */ public void setOutput(Writer output) { this.output = output; } }