/**
*
*/
package ch.panter.edu.parser;
import java.io.Reader;
import java.io.Writer;
/**
* @author seb
*
*/
public class HtmlParser02 implements HtmlParser {
enum STATE {
UNKNOWN, IN_TAG
}
private STATE state = STATE.UNKNOWN;
private StringBuilder buf = null;
private char c;
private int cnt = 0;
private int stack = 0;
Reader input;
Writer output;
/* (non-Javadoc)
* @see ch.panter.edu.parser.HtmlParser#parse(java.io.Reader, java.io.Writer)
*/
public void parse (Reader input, Writer output) throws Exception {
this.input = input;
this.output = output;
int i;
while (-1 != (i = input.read())) {
this.c = (char)i;
this.cnt++;
switch (this.state) {
case IN_TAG:
this.handleInTag();
break;
default:
this.handleUnknown();
break;
}
}
}
private void handleInTag() throws Exception {
// this is a closing tag
if ('/' == c) {
this.stack--;
this.state = STATE.UNKNOWN;
// end of a tag
} else if ('>' == c) {
for (int i=0; i<stack; i++) {
this.output.write(" ");
}
this.output.write(this.buf.toString());
this.output.write('\n');
stack++;
this.state = STATE.UNKNOWN;
// we are inside a tag, fill buffer with tag cname
} else if (null != buf) {
this.buf.append(this.c);
}
}
private void handleUnknown() throws Exception {
// detect start of tag
if ('<' == this.c) {
this.state = STATE.IN_TAG;
this.buf = new StringBuilder();
return;
}
}
private void raiseException() throws Exception {
throw new Exception("Parse Error: cnt -> "+cnt+" c -> "+this.c+
" state -> "+ this.state+" buffer -> "+ this.buf);
}
//////// getter & setter ////////////////////////////////////////
/* (non-Javadoc)
* @see ch.panter.edu.parser.HtmlParser#getInput()
*/
public Reader getInput() {
return input;
}
/* (non-Javadoc)
* @see ch.panter.edu.parser.HtmlParser#setInput(java.io.Reader)
*/
public void setInput(Reader input) {
this.input = input;
}
/* (non-Javadoc)
* @see ch.panter.edu.parser.HtmlParser#getOutput()
*/
public Writer getOutput() {
return output;
}
/* (non-Javadoc)
* @see ch.panter.edu.parser.HtmlParser#setOutput(java.io.Writer)
*/
public void setOutput(Writer output) {
this.output = output;
}
}