package test02.litestruts.sax; import java.io.BufferedReader; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.IOException; import java.util.HashMap; import test02.litestruts.Action; public class SAXParser { private static SAXParser parserInstance = new SAXParser(); private static SAXParserHandler parserHandler; private SAXParser(){} // Singleton Pattern, a private constructor private static SAXParserState state = SAXParserState.OUT_OF_TAG; // initial state public static SAXParser getInstance() { return parserInstance; } public Action parse(String path){ try { BufferedReader br = new BufferedReader(new FileReader(path)); int currentCharCode; // callback start document parserHandler.startDocument(); try { while((currentCharCode = br.read()) != -1){ char currentChar = (char)currentCharCode; handleParser(currentChar); } } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } catch (FileNotFoundException e) { // TODO Auto-generated catch block e.printStackTrace(); } return parserHandler.endDocument(); } public void setHandler(SAXParserHandler handler){ parserHandler = handler; } private static void handleParser(char c) { // This SAX Parser will ignore any line wrap. if(c == '\n'){ return; } switch (state){ case OUT_OF_TAG:{ if(c == '<'){ if(SAXParsedData.innerText.trim().length() != 0) { parserHandler.innerText(SAXParsedData.innerText); } SAXParsedData.innerText = ""; SAXParsedData.tagName = ""; state = SAXParserState.BEGIN_START_OR_END_TAG; } else if (c == '>') { state = SAXParserState.SYNTAX_ERROR; } else { SAXParsedData.innerText += c; } break; } case BEGIN_START_OR_END_TAG:{ if(c == '/') { SAXParsedData.tagName = ""; state = SAXParserState.IN_END_TAG; }else if(c == '?' || c == '!'){ state = SAXParserState.METADATA; }else{ SAXParsedData.tagName += c; state = SAXParserState.IN_START_TAG; } break; } case IN_START_TAG:{ if(c == ' '){ state = SAXParserState.SPACE_IN_START_TAG; }else if(c == '>'){ // callback startElement event; parserHandler.startElement(SAXParsedData.tagName, SAXParsedData.attributes); SAXParsedData.clear(); state = SAXParserState.CLOSE_START_TAG; }else { SAXParsedData.tagName += c; } break; } case SPACE_IN_START_TAG:{ if(SAXParsedData.tagName.length() > 0){ if(c != ' '){ SAXParsedData.attribKey += c; state = SAXParserState.IN_ATTRIB_KEY; } } break; } case IN_ATTRIB_KEY:{ if(c == '='){ state = SAXParserState.IN_ATTRIB_EQUAL; }else{ SAXParsedData.attribKey += c; } break; } case IN_ATTRIB_EQUAL:{ if(c == '"'){ state = SAXParserState.IN_ATTRIB_VALUE; } break; } case IN_ATTRIB_VALUE:{ if(c == '"'){ SAXParsedData.newAttribute(); state = SAXParserState.IN_START_TAG; }else{ SAXParsedData.attribValue += c; } break; } case CLOSE_START_TAG:{ if(c == '<') { state = SAXParserState.BEGIN_START_OR_END_TAG; }else{ SAXParsedData.innerText += c; state = SAXParserState.OUT_OF_TAG; } break; } case IN_END_TAG:{ if(c == '>'){ // callback endElement event parserHandler.endElement(SAXParsedData.tagName); state = SAXParserState.CLOSE_END_TAG; }else{ SAXParsedData.tagName += c; } break; } case CLOSE_END_TAG:{ if(c == ' '){ state = SAXParserState.OUT_OF_TAG; }else if(c == '<'){ SAXParsedData.tagName = ""; state = SAXParserState.BEGIN_START_OR_END_TAG; } break; } case METADATA:{ if(c == '>'){ state = SAXParserState.CLOSE_END_TAG; } break; } case SYNTAX_ERROR:{ try { throw new Exception(); } catch (Exception e) { e.printStackTrace(); } } } } private enum SAXParserState { // The state when parser meets "<". This is a pending state. // If the next char is "/", the state will be IN_END_TAG, // Otherwise, the state will be IN_START_TAG BEGIN_START_OR_END_TAG, // The state when parser is reading between start tag(<...>). // When parser meets ">", callback "startElement" event IN_START_TAG, // The state when parser is reading between end tag(</...>). // When parser meets "<", callback "endElement" event IN_END_TAG, // The state when parser meets " ", and is in IN_START_TAG state. // If the length of tag_name is non-zero, finish parsing tag_name. // Otherwise, finish parsing a key/value attribute. SPACE_IN_START_TAG, IN_ATTRIB_KEY,IN_ATTRIB_EQUAL,IN_ATTRIB_VALUE, CLOSE_START_TAG,CLOSE_END_TAG, // The state when parser is reading any char at the outside of <XXX>, or between two <XXX>. This is a pending state. // Contents between <XXX> will be recorded, but if the contents consist only spaces, the content will be discarded. // Otherwise, callback "innerText" event. OUT_OF_TAG, METADATA, SYNTAX_ERROR } private static class SAXParsedData{ private static String tagName = ""; private static String attribKey = ""; private static String attribValue = ""; private static String innerText = ""; private static HashMap<String,String> attributes = new HashMap<>(); private static void clear(){ tagName = ""; attribKey = ""; attribValue = ""; innerText = ""; attributes.clear(); } private static void newAttribute(){ attributes.put(attribKey, attribValue); attribKey = ""; attribValue = ""; } } }