package com.deepnighttwo.resourceresolver.douban.resolver.utils; import java.io.InputStream; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import javax.xml.parsers.SAXParser; import javax.xml.parsers.SAXParserFactory; import org.xml.sax.Attributes; import org.xml.sax.InputSource; import org.xml.sax.SAXException; import org.xml.sax.XMLReader; import org.xml.sax.helpers.DefaultHandler; /** * * SAXP parser working with XMLSearchUnit. * * @author mzang */ public class DoubanSearchParser extends DefaultHandler { // create and initial search units public static final XMLSearchUnit DETAILS_LINK_API_PATH = new XMLSearchUnit( "/feed/entry/id"); public static final XMLSearchUnit DETAILS_CONTENT_PATH = new XMLSearchUnit( "/entry/summary"); public static final XMLSearchUnit DETAILS_TITLE_PATH = new XMLSearchUnit( "/entry/title"); public static final XMLSearchUnit DETAILS_CHINESE_NAME_PATH = new XMLSearchUnit( "/entry/db:attribute"); public static final XMLSearchUnit DETAILS_RATINGE_PATH = new XMLSearchUnit( "/entry/gd:rating"); public static final XMLSearchUnit DETAILS_RATINGE_RATER_COUNT_PATH = new XMLSearchUnit( "/entry/gd:rating"); public static final XMLSearchUnit DETAILS_LINK_URL_PATH = new XMLSearchUnit( "/feed/entry/link"); static { DETAILS_LINK_URL_PATH.addAttributeValidation("rel", "alternate"); DETAILS_LINK_URL_PATH.setExpectedAttr("href"); DETAILS_CHINESE_NAME_PATH.addAttributeValidation("lang", "zh_CN"); DETAILS_CHINESE_NAME_PATH.addAttributeValidation("name", "aka"); DETAILS_RATINGE_PATH.setExpectedAttr("average"); DETAILS_RATINGE_RATER_COUNT_PATH.setExpectedAttr("numRaters"); } // a map to store the XMLSearchUnit and value private Map<XMLSearchUnit, String> results = new HashMap<XMLSearchUnit, String>(); // a counter of search unit. if it is 0, then all search unit finds a match // value and the result of the XML will be skipped. private int count = 0; private StringBuilder path = new StringBuilder(); private static final String pathSeparater = "/"; private XMLSearchUnit[] searchUnits; List<XMLSearchUnit> foundItems = new ArrayList<XMLSearchUnit>(); /** * constructor, accept XML input stream, 0 or more search unit instances. * * @param input * @param expectedPath * @return */ public Map<XMLSearchUnit, String> parseResults(InputStream input, XMLSearchUnit... expectedPath) { for (XMLSearchUnit search : expectedPath) { results.put(search, null); } searchUnits = expectedPath; count = expectedPath.length; XMLReader xmlReader = null; try { SAXParserFactory spfactory = SAXParserFactory.newInstance(); spfactory.setValidating(false); SAXParser saxParser = spfactory.newSAXParser(); xmlReader = saxParser.getXMLReader(); xmlReader.setContentHandler(this); xmlReader.parse(new InputSource(input)); } catch (Exception e) { System.err.println(e); System.exit(1); } return results; } private void addToPath(String addPath) { path.append(pathSeparater).append(addPath.toLowerCase()); } private void popPath() { int index = path.lastIndexOf(pathSeparater); // String removedPath = path.substring(index); path.delete(index, path.length()); } @Override public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException { foundItems.clear(); if (count == 0) { return; } // update path addToPath(qName); List<XMLSearchUnit> foundAttrItems = null; // check if current node matches search units. if it is a node value // search, then store it in a member variable named foundItems because // the value of the node is known only when reaches the end of the // node.but for attribute search, it value is known here. So then are // put in a local variable list named foundAttrItems. for (XMLSearchUnit unit : searchUnits) { if (unit.match(path.toString(), attributes) == true) { if (unit.getExpectedAttr() == null) { foundItems.add(unit); } else { if (foundAttrItems == null) { foundAttrItems = new ArrayList<XMLSearchUnit>(); } foundAttrItems.add(unit); } } } // if no attribute match, return. if (foundAttrItems == null) { return; } // fill search unit value using attribute value. update count. for (XMLSearchUnit attrUnit : foundAttrItems) { String attrValue = attributes.getValue(attrUnit.getExpectedAttr()); if (results.get(attrUnit) == null) { count--; } results.put(attrUnit, attrValue); count--; } } /** * if current node matches, the the node value is useful, store it. */ @Override public void characters(char[] ch, int start, int length) throws SAXException { if (count == 0) { return; } if (foundItems.size() == 0) { return; } for (XMLSearchUnit unit : foundItems) { String content = new String(ch, start, length); if (results.get(unit) == null) { count--; } results.put(unit, content); } } @Override public void endElement(String uri, String localName, String qName) throws SAXException { foundItems.clear(); if (count == 0) { return; } popPath(); } }