/** * (The MIT License) * * Copyright (c) 2008 - 2011: * * * {Aaron Patterson}[http://tenderlovemaking.com] * * {Mike Dalessio}[http://mike.daless.io] * * {Charles Nutter}[http://blog.headius.com] * * {Sergio Arbeo}[http://www.serabe.com] * * {Patrick Mahoney}[http://polycrystal.org] * * {Yoko Harada}[http://yokolet.blogspot.com] * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * 'Software'), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ package nokogiri.internals; import static nokogiri.internals.NokogiriHelpers.getLocalPart; import static nokogiri.internals.NokogiriHelpers.getPrefix; import static nokogiri.internals.NokogiriHelpers.isNamespace; import static nokogiri.internals.NokogiriHelpers.stringOrNil; import java.util.ArrayDeque; import java.util.LinkedList; import nokogiri.XmlSyntaxError; import org.jruby.Ruby; import org.jruby.RubyArray; import org.jruby.RubyClass; import org.jruby.RubyObject; import org.jruby.javasupport.util.RuntimeHelpers; import org.jruby.runtime.ThreadContext; import org.jruby.runtime.builtin.IRubyObject; import org.xml.sax.Attributes; import org.xml.sax.Locator; import org.xml.sax.SAXException; import org.xml.sax.SAXParseException; import org.xml.sax.ext.DefaultHandler2; /** * A handler for SAX parsing. * * @author sergio * @author Yoko Harada <yokolet@gmail.com> */ public class NokogiriHandler extends DefaultHandler2 implements XmlDeclHandler { private StringBuffer buffer; private final Ruby ruby; private final RubyClass attrClass; private final IRubyObject object; /** * Stores parse errors with the most-recent error last. * * TODO: should these be stored in the document 'errors' array? * Currently only string messages are stored there. */ private final LinkedList<XmlSyntaxError> errors = new LinkedList<XmlSyntaxError>(); private Locator locator; private static String htmlParserName = "Nokogiri::HTML::SAX::Parser"; private boolean needEmptyAttrCheck = false; public NokogiriHandler(Ruby runtime, IRubyObject object) { this.ruby = runtime; this.attrClass = (RubyClass) runtime.getClassFromPath("Nokogiri::XML::SAX::Parser::Attribute"); this.object = object; String objectName = object.getMetaClass().getName(); if (htmlParserName.equals(objectName)) needEmptyAttrCheck = true; } @Override public void skippedEntity(String skippedEntity) { call("error", ruby.newString("Entity '" + skippedEntity + "' not defined\n")); } @Override public void setDocumentLocator(Locator locator) { this.locator = locator; } @Override public void startDocument() throws SAXException { call("start_document"); } @Override public void xmlDecl(String version, String encoding, String standalone) { call("xmldecl", stringOrNil(ruby, version), stringOrNil(ruby, encoding), stringOrNil(ruby, standalone)); } @Override public void endDocument() throws SAXException { call("end_document"); } @Override public void processingInstruction(String target, String data) { call("processing_instruction", ruby.newString(target), ruby.newString(data)); } /* * This has to call either "start_element" or * "start_element_namespace" depending on whether there are any * namespace attributes. * * Attributes that define namespaces are passed in a separate * array of of <code>[:prefix, :uri]</code> arrays and are not * passed with the other attributes. */ @Override public void startElement(String uri, String localName, String qName, Attributes attrs) throws SAXException { // for attributes other than namespace attrs RubyArray rubyAttr = RubyArray.newArray(ruby); // for namespace defining attributes RubyArray rubyNSAttr = RubyArray.newArray(ruby); ThreadContext context = ruby.getCurrentContext(); boolean fromFragmentHandler = false; // isFromFragmentHandler(); for (int i = 0; i < attrs.getLength(); i++) { String u = attrs.getURI(i); String qn = attrs.getQName(i); String ln = attrs.getLocalName(i); String val = attrs.getValue(i); String pre; pre = getPrefix(qn); if (ln == null || ln.equals("")) ln = getLocalPart(qn); if (isNamespace(qn) && !fromFragmentHandler) { // I haven't figured the reason out yet, but, in somewhere, // namespace is converted to array in array in array and cause // TypeError at line 45 in fragment_handler.rb RubyArray ns = RubyArray.newArray(ruby, 2); if (ln.equals("xmlns")) ln = null; ns.add(stringOrNil(ruby, ln)); ns.add(ruby.newString(val)); rubyNSAttr.add(ns); } else { IRubyObject[] args = null; if (needEmptyAttrCheck) { if (isEmptyAttr(ln)) { args = new IRubyObject[3]; args[0] = stringOrNil(ruby, ln); args[1] = stringOrNil(ruby, pre); args[2] = stringOrNil(ruby, u); } } if (args == null) { args = new IRubyObject[4]; args[0] = stringOrNil(ruby, ln); args[1] = stringOrNil(ruby, pre); args[2] = stringOrNil(ruby, u); args[3] = stringOrNil(ruby, val); } IRubyObject attr = RuntimeHelpers.invoke(context, attrClass, "new", args); rubyAttr.add(attr); } } if (localName == null || localName.equals("")) localName = getLocalPart(qName); call("start_element_namespace", stringOrNil(ruby, localName), rubyAttr, stringOrNil(ruby, getPrefix(qName)), stringOrNil(ruby, uri), rubyNSAttr); } private static String[] emptyAttrs = {"checked", "compact", "declare", "defer", "disabled", "ismap", "multiple", "noresize", "nohref", "noshade", "nowrap", "readonly", "selected"}; private boolean isEmptyAttr(String name) { for (String emptyAttr : emptyAttrs) { if (emptyAttr.equals(name)) return true; } return false; } public Integer getLine() { return locator.getLineNumber(); } public Integer getColumn() { return locator.getColumnNumber() - 1; } private boolean isFromFragmentHandler() { if (object != null && object instanceof RubyObject) { RubyObject rubyObj = (RubyObject)object; IRubyObject document = rubyObj.getInstanceVariable("@document"); if (document != null) { String name = document.getMetaClass().getName(); if ("Nokogiri::XML::FragmentHandler".equals(name)) { return true; } } } return false; } @Override public void endElement(String uri, String localName, String qName) throws SAXException { call("end_element_namespace", stringOrNil(ruby, localName), stringOrNil(ruby, getPrefix(qName)), stringOrNil(ruby, uri)); } @Override public void characters(char[] ch, int start, int length) throws SAXException { if (buffer != null) { buffer.append(new String(ch, start, length)); } else { call("characters", ruby.newString(new String(ch, start, length))); } } @Override public void comment(char[] ch, int start, int length) throws SAXException { call("comment", ruby.newString(new String(ch, start, length))); } @Override public void startCDATA() throws SAXException { buffer = new StringBuffer(); } @Override public void endCDATA() throws SAXException { call("cdata_block", ruby.newString(buffer.toString())); buffer = null; } @Override public void error(SAXParseException saxpe) { addError(XmlSyntaxError.createError(ruby, saxpe)); call("error", ruby.newString(saxpe.getMessage())); } @Override public void fatalError(SAXParseException saxpe) throws SAXException { addError(XmlSyntaxError.createFatalError(ruby, saxpe)); call("error", ruby.newString(saxpe.getMessage())); } @Override public void warning(SAXParseException saxpe) { //System.out.println("warning: " + saxpe); call("warning", ruby.newString(saxpe.getMessage())); } protected synchronized void addError(XmlSyntaxError e) { errors.add(e); } public synchronized int getErrorCount() { return errors.size(); } public synchronized IRubyObject getLastError() { return errors.getLast(); } private void call(String methodName) { ThreadContext context = ruby.getCurrentContext(); RuntimeHelpers.invoke(context, document(context), methodName); } private void call(String methodName, IRubyObject argument) { ThreadContext context = ruby.getCurrentContext(); RuntimeHelpers.invoke(context, document(context), methodName, argument); } private void call(String methodName, IRubyObject arg1, IRubyObject arg2) { ThreadContext context = ruby.getCurrentContext(); RuntimeHelpers.invoke(context, document(context), methodName, arg1, arg2); } private void call(String methodName, IRubyObject arg1, IRubyObject arg2, IRubyObject arg3) { ThreadContext context = ruby.getCurrentContext(); RuntimeHelpers.invoke(context, document(context), methodName, arg1, arg2, arg3); } private void call(String methodName, IRubyObject arg0, IRubyObject arg1, IRubyObject arg2, IRubyObject arg3, IRubyObject arg4) { IRubyObject[] args = new IRubyObject[5]; args[0] = arg0; args[1] = arg1; args[2] = arg2; args[3] = arg3; args[4] = arg4; ThreadContext context = ruby.getCurrentContext(); RuntimeHelpers.invoke(context, document(context), methodName, args); } private IRubyObject document(ThreadContext context) { if (object instanceof RubyObject) { return ((RubyObject)object).fastGetInstanceVariable("@document"); } return context.getRuntime().getNil(); } }