XMLNode.java example

Explorer
alloy4smt-master
/* Alloy Analyzer 4 -- Copyright (c) 2006-2009, Felix Chang
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files
 * (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify,
 * merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

package edu.mit.csail.sdg.alloy4;

import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.Set;
import java.util.Map.Entry;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;

/** Immutable; this class represents an XML element node. */

public final class XMLNode implements Iterable<XMLNode> {

   /** The type of the element; never null. */
   private String type = "";

   /** If type is text, this is the text. */
   private String text = "";

   /** The set of (key,value) pairs; never null. */
   private final Map<String,String> map = new LinkedHashMap<String,String>();

   /** The list of direct children nodes. */
   private final List<XMLNode> sub = new ArrayList<XMLNode>();

   /** Constructs an empty XMLNode object. */
   private XMLNode() { }

   /** Returns the number of direct subnodes. */
   public int count() { return sub.size(); }

   /** Returns an unmodifiable view of the attributes. */
   public Set<Entry<String,String>> attributes() { return Collections.unmodifiableMap(map).entrySet(); }

   /** Dump the content to a String. */
   @Override public String toString() {
      StringBuilder sb = new StringBuilder();
      toString(sb, 0);
      return sb.toString();
   }

   /** Dump the content to a StringBuilder. */
   public void toString(StringBuilder sb, int indent) {
      for(int i=0; i<indent; i++) sb.append(' ');
      if (text.length()>0) { Util.encodeXML(sb, text); sb.append('\n'); return; }
      Util.encodeXMLs(sb, "<", type);
      for(Map.Entry<String,String> e: map.entrySet()) {
         Util.encodeXMLs(sb, " ", e.getKey(), "=\"", e.getValue(), "\"");
      }
      if (sub.size()==0) { sb.append("/>\n"); return; }
      sb.append(">\n");
      for(XMLNode x:sub) x.toString(sb, indent+2);
      for(int i=0; i<indent; i++) sb.append(' ');
      Util.encodeXMLs(sb, "</", type, ">\n");
   }

   /** Simple parser based on XML Specification 1.0 taking into account XML Specification Errata up to 2008/Jan/18. */
   private static final class XMLParser {

      /** True if we want to read text data also. */
      private final boolean wantText;

      /** The reader for the input XML file. */
      private final Reader reader;

      /** The current x position in the file. */
      private int x = 1;

      /** The current y position in the file. */
      private int y = 1;

      /** The current "readahead" character; -2 if the readahead cache is empty; -1 if EOF is detected; otherwise it is one char. */
      private int read = (-2);

      /** Constructor is private, since we want only XMLNode to be able to construct an instance of this class. */
      private XMLParser(Reader reader, boolean wantText) {
         this.wantText = wantText;
         if (reader instanceof BufferedReader) this.reader = reader; else this.reader = new BufferedReader(reader);
      }

      /** Throws an IOException with the given msg, and associate with it the current line and column location. */
      private void malform(String msg) throws IOException { throw new IOException("Error at line "+y+" column "+x+": "+msg); }

      /** Read the next character.
       * @throws IOException if end-of-file is reached.
       * @throws IOException if an I/O error occurred.
       */
      private int read() throws IOException {
         if (read<(-1)) read=reader.read();
         if (read<0) { malform("Unexpected end of file."); } else if (read=='\n') { x=1; y++; } else { x++; }
         int ans = read;
         read = -2;
         return ans;
      }

      /** Peek without consuming the next character, or return -1 if end-of-file is reached.
       * @throws IOException if an I/O error occurred.
       */
      private int peek() throws IOException {
         if (read<(-1)) read=reader.read();
         return read;
      }

      /** Consume up to and including the consecutive characters "char1" and "char2".
       * @throws IOException if we reached end-of-file without seeing the pattern.
       * @throws IOException if an I/O error occurred.
       */
      private void skipUntil(int char1, int char2) throws IOException {
         while(true) {
            int ch = read();
            if (ch==char1 && peek()==char2) { read=(-2); return; }
         }
      }

      /** If the next N characters match the given string (where N == length of string), then consume them, else throw IOException.
       * @throws IOException if the next N characters do not match the given string.
       * @throws IOException if an I/O error occurred.
       */
      private void expect(String string) throws IOException {
         int saveX=x, saveY=y;
         for(int i=0; i<string.length(); i++) {
            if (read()!=string.charAt(i)) { x=saveX; y=saveY; malform("Expects the string \""+string+"\""); }
         }
      }

      /** Skip whitespace if any, then return the first non-whitespace character after that.
       * @throws IOException if after skipping 0 or more white space character we reach end-of-file.
       * @throws IOException if an I/O error occurred.
       */
      private int skipSpace() throws IOException {
         while(true) {
            int ch=read();
            if (ch!=' ' && ch!='\t' && ch!='\r' && ch!='\n') return ch;
         }
      }

      /*
       * Taking the 79 grammar rules from XML specification, and after making conservative simplifications, we get these rules:
       * ("conservative" in that well-formed XML documents parse correctly, but some malformed documents also parse successfully)
       *
       *    S                  ::=          (#x20 | #x9 | #xD | #xA)+
       *    Name               ::=          ( [A-Za-z0-9_:.-] | [#xC0-#xEFFFF] )+
       *    Nmtoken            ::=          ( [A-Za-z0-9_:.-] | [#xC0-#xEFFFF] )+
       *    Reference          ::=          '&' Name ';'   |    '&#' [0-9]+ ';'   |   '&#x' [0-9a-fA-F]+ ';'
       *    PEReference        ::=          '%' Name ';'
       *    SystemLiteral      ::=                           '...'  |  "..."
       *    PubidLiteral       ::=                           '...'  |  "..."
       *    AttValue           ::=                           '...'  |  "..."
       *    EntityValue        ::=                           '...'  |  "..."
       *    DefaultDecl        ::=                        (  '...'  |  "..."  |  [%()|#*?+,]  |  Name  |  S  )*
       *    ExternalID         ::=                        (  '...'  |  "..."  |  [%()|#*?+,]  |  Name  |  S  )*
       *    PublicID           ::=                        (  '...'  |  "..."  |  [%()|#*?+,]  |  Name  |  S  )*
       *    NotationType       ::=                        (  '...'  |  "..."  |  [%()|#*?+,]  |  Name  |  S  )*
       *    Enumeration        ::=                        (  '...'  |  "..."  |  [%()|#*?+,]  |  Name  |  S  )*
       *    EnumeratedType     ::=                        (  '...'  |  "..."  |  [%()|#*?+,]  |  Name  |  S  )*
       *    AttType            ::=                        (  '...'  |  "..."  |  [%()|#*?+,]  |  Name  |  S  )*
       *    Mixed              ::=                        (  '...'  |  "..."  |  [%()|#*?+,]  |  Name  |  S  )*
       *    children           ::=                        (  '...'  |  "..."  |  [%()|#*?+,]  |  Name  |  S  )*
       *    contentspec        ::=                        (  '...'  |  "..."  |  [%()|#*?+,]  |  Name  |  S  )*
       *    PEDef              ::=                        (  '...'  |  "..."  |  [%()|#*?+,]  |  Name  |  S  )*
       *    NDataDecl          ::=                        (  '...'  |  "..."  |  [%()|#*?+,]  |  Name  |  S  )*
       *    EntityDef          ::=                        (  '...'  |  "..."  |  [%()|#*?+,]  |  Name  |  S  )*
       *    NotationDecl       ::=          '<!NOTATION'  (  '...'  |  "..."  |  [%()|#*?+,]  |  Name  |  S  )*  '>'
       *    AttlistDecl        ::=          '<!ATTLIST'   (  '...'  |  "..."  |  [%()|#*?+,]  |  Name  |  S  )*  '>'
       *    elementdecl        ::=          '<!ELEMENT'   (  '...'  |  "..."  |  [%()|#*?+,]  |  Name  |  S  )*  '>'
       *    EntityDecl         ::=          '<!ENTITY'    (  '...'  |  "..."  |  [%()|#*?+,]  |  Name  |  S  )*  '>'
       *    PI                 ::=          '<?' ... '?>'
       *    Comment            ::=          '<!--'  ([^-] | ('-' [^-])))*   '-->'
       *    Misc               ::=          Comment | PI | S
       *    doctypedecl        ::=          '<!DOCTYPE' S  Name (S  ExternalID)? S? ('[' intSubset ']' S?)? '>'
       *    intSubset          ::=          (elementdecl | AttlistDecl | EntityDecl | NotationDecl | PI | Comment | PEReference | S)*
       *
       * SkipNondata(false) will skip zero or more instance of the below, and thus it will consume (Misc | doctypedecl)*
       *   SPACE TAB CR LF
       *   <?...?>
       *   <!--...-->
       *   '<!' followed by SkipNondata(true) followed by '>'
       *
       * SkipNondata(true) will skip zero or more instances of the below, and thus it will consume intSubset*
       *   SPACE TAB CR LF
       *   <?...?>
       *   <!--...-->
       *   '<!' followed by SkipNondata(true) followed by '>'
       *   '['  followed by SkipNondata(true) followed by ']'
       *   '...'
       *   "..."
       *   any char that is not '<' nor '>' nor '[' nor ']' nor ''' nor '"'
       */

      /** Skip as much nondata as possible, then return the first character after that (or -1 if we end up at end-of-file).
       * <p> Specifically, skipNondata(false) consumes (Misc | doctypedecl)* from XML specification
       * <p> Likewise,     skipNondata(true)  consumes (intSubset)*          from XML specification
       * @throws IOException if the XML input is malformed.
       * @throws IOException if an I/O error occurred.
       */
      private int skipNondata(boolean inner) throws IOException {
         while(true) {
            int ch = peek();
            if (ch<0) return -1;
            read = -2;
            if (ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n') continue;
            if (ch == '<') {
               ch = read();
               if (ch == '?') { skipUntil('?', '>'); continue; }
               if (ch != '!') { read = ch ; return '<'; }
               if (peek() == '-') {
                  read = -2;
                  if (read()!='-') malform("Expects start of <!--...-->");
                  skipUntil('-', '-');
                  if (read()!='>') malform("Expects end of <!--...-->");
                  continue;
               }
               if (skipNondata(true)!='>') malform("Expects end of <!...>");
            }
            else if (!inner || ch == ']' || ch=='>') { return ch; }
            else if (ch == '[') { if (skipNondata(true)!=']') malform("Expects end of [...]"); }
            else if (ch == '\'' || ch == '\"') { while(read()!=ch) { } }
         }
      }

      /** Parse an element name or attribute name.
       * @throws IOException if the XML input is malformed.
       * @throws IOException if an I/O error occurred.
       */
      private String parseName() throws IOException {
         StringBuilder sb = new StringBuilder();
         while(true) {
            int ch = read();
            if (ch==' ' || ch=='\t' || ch=='\r' || ch=='\n' || ch=='=' || ch=='/' || ch=='<' || ch=='>' || ch=='[' || ch==']') {
               read=ch;
               return sb.toString();
            }
            sb.append((char)ch);
         }
      }

      /** Parse a value up to delim (which is always either ' or "), assuming the initial ' or " has already been consumed.
       * @throws IOException if the XML input is malformed.
       * @throws IOException if an I/O error occurred.
       */
      private String parseValue(int delim) throws IOException {
         StringBuilder sb = new StringBuilder(), sb2 = null;
         while(true) {
            int ch=read();
            if (ch==delim) return sb.toString();
            if (ch=='&') {
               if (sb2==null) sb2=new StringBuilder(); else sb2.setLength(0);
               while((ch=read()) != ';') sb2.append((char)ch);
               if (sb2.length()>2 && sb2.charAt(0)=='#' && sb2.charAt(1)=='x') {
                  try { ch=Integer.parseInt(sb2.substring(2), 16); } catch(NumberFormatException ex) { ch=(-1); }
               } else if (sb2.length()>1 && sb2.charAt(0)=='#'){
                  try { ch=Integer.parseInt(sb2.substring(1)); } catch(NumberFormatException ex) { ch=(-1); }
               } else {
                  String name = sb2.toString();
                  if (name.equals("amp")) ch='&';
                  else if (name.equals("quot")) ch='"';
                  else if (name.equals("apos")) ch='\'';
                  else if (name.equals("lt")) ch='<';
                  else if (name.equals("gt")) ch='>';
                  else ch=(-1);
               }
               if (ch<0) malform("The entity \"&"+sb2.toString()+";\" is unknown.");
            }
            sb.append((char)ch);
         }
      }

      /*
       * Below are the grammar rules for "element":
       * ==========================================
       *
       * element ::=  '<' Name (S  Name S? '=' S? AttValue)* S? '/>'
       *           |  '<' Name (S  Name S? '=' S? AttValue)* S? '>'  content   '</' Name  S? '>'
       *
       * content  ::=  CharData? ((element | Reference | CDSect | PI | Comment) CharData?)*
       * CDSect   ::=  '<![CDATA['     (Char* - (Char* ']]>' Char*))      ']]>'
       * CharData ::=  [^<&]* - ([^<&]* ']]>' [^<&]*)
       */

      /** Parse an element (and all its subelements), assuming the initial "less than" sign has already been consumed.
       * @throws IOException if the XML input is malformed.
       * @throws IOException if an I/O error occurred.
       */
      private void parseElement(XMLNode target) throws IOException {
         target.type = parseName();
         while(true) {
            boolean space = false;
            int ch = read();
            if (ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n') { space=true; ch=skipSpace(); }
            if (ch == '=') malform("Unexpected '='");
            if (ch == '/') {
               if (read()!='>') malform("Expects '/>'");
               break;
            }
            if (ch == '>') {
               parseContent(target);
               if (!target.type.equals(parseName())) malform("Start tag and end tag must have matching types.");
               if (skipSpace()!='>') malform("Expects '</"+target.type+">'");
               break;
            }
            if (!space) malform("Whitespace needed before a (key,value) pair.");
            read = ch;
            String key = parseName();
            if (key.length()==0) malform("Attribute name cannot be empty.");
            if (skipSpace()!='=') malform("Expects = after the attribute name.");
            ch = skipSpace();
            if (ch != '\'' && ch != '\"') malform("Expects \' or \" as the start of the attribute value.");
            String value = parseValue(ch);
            target.map.put(key, value);
         }
      }

      /** Parses the content until the rightful closing "LESS THAN SIGN followed by FORWARD SLASH" are both consumed.
       * @throws IOException if the XML input is malformed.
       * @throws IOException if an I/O error occurred.
       */
      private void parseContent(XMLNode parent) throws IOException {
         StringBuilder sb = wantText ? new StringBuilder() : null;
         again:
            while(true) {
               if (sb==null) {
                  while(read()!='<') {}
               } else {
                  sb.append(parseValue('<').replace('\r',' ').replace('\n', ' '));
                  parent.addText(sb);
               }
               int ch=read();
               if (ch=='/') return;
               if (ch=='?') { skipUntil('?', '>'); continue; }
               if (ch=='!') {
                  ch=read();
                  if (ch=='-') {
                     if (read()!='-')  malform("Expects start of <!--...-->");
                     skipUntil('-', '-');
                     if (read()!='>')  malform("Expects end of <!--...-->");
                     continue;
                  }
                  if (ch!='[') malform("Expects <![CDATA[...]]>");
                  expect("CDATA[");
                  for(int ah=0,bh=0; ;) {
                     ch=read();
                     if (ah==']' && bh==']' && ch=='>') {
                        parent.addText(sb);
                        continue again;
                     } else {
                        if (ah>0 && sb!=null) sb.append((char)ah);
                        ah=bh; bh=ch;
                     }
                  }
               }
               read = ch;
               XMLNode newElem = new XMLNode();
               parseElement(newElem);
               parent.sub.add(newElem);
            }
      }
   }

   /** Add a text node by removing all contents from the given StringBuilder and clearing that StringBuilder. */
   private void addText(StringBuilder stringBuilder) {
      if (stringBuilder==null || stringBuilder.length()==0) return;
      XMLNode x = new XMLNode();
      x.text = stringBuilder.toString();
      stringBuilder.setLength(0);
      sub.add(x);
   }

   /** Constructs the root XMLNode by parsing an entire XML document, then close the reader afterwards. */
   public XMLNode(Reader reader, boolean parseText) throws IOException {
      try {
         // document ::= Misc* doctypedecl? Misc* element Misc*
         XMLParser parser = new XMLParser(reader, parseText);
         if (parser.skipNondata(false)!='<') parser.malform("Expects start of root element.");
         parser.parseElement(this);
         if (parser.skipNondata(false)!=(-1)) parser.malform("Expects end of file.");
      } finally {
         Util.close(reader);
      }
   }

   /** Constructs the root XMLNode by parsing an entire XML document, then close the reader afterwards. */
   public XMLNode(Reader reader) throws IOException {
      try {
         // document ::= Misc* doctypedecl? Misc* element Misc*
         XMLParser parser = new XMLParser(reader, false);
         if (parser.skipNondata(false)!='<') parser.malform("Expects start of root element.");
         parser.parseElement(this);
         if (parser.skipNondata(false)!=(-1)) parser.malform("Expects end of file.");
      } finally {
         Util.close(reader);
      }
   }

   /** Constructs the root XMLNode by parsing an entire XML document. */
   public XMLNode(File file) throws IOException {
      FileInputStream fis = null;
      InputStreamReader reader = null;
      try {
         // document ::= Misc* doctypedecl? Misc* element Misc*
         fis = new FileInputStream(file);
         reader = new InputStreamReader(fis, "UTF-8");
         XMLParser parser = new XMLParser(reader, false);
         if (parser.skipNondata(false)!='<') parser.malform("Expects start of root element.");
         parser.parseElement(this);
         if (parser.skipNondata(false)!=(-1)) parser.malform("Expects end of file.");
      } finally {
         Util.close(reader);
         Util.close(fis);
      }
   }

   /** Returns the type of the element. */
   public String getType() { return type; }

   /** Returns the text if this is a text node, returns "" otherwise. */
   public String getText() { return text; }

   /** Returns true if the type of this element is equal to the given type. */
   public boolean is(String type) { return this.type.equals(type); }

   /** Returns a read-only iterator over the immediate subelements. */
   public Iterator<XMLNode> iterator() { return Collections.unmodifiableList(sub).iterator(); }

   /** Returns a read-only iteration of the immediate subelements whose type is equal to the given type. */
   public Iterable<XMLNode> getChildren(final String type) {
      return new Iterable<XMLNode>() {
         public Iterator<XMLNode> iterator() {
            return new Iterator<XMLNode>() {
               private final Iterator<XMLNode> it = sub.iterator();
               private XMLNode peek = null;
               public boolean hasNext() {
                  while(true) {
                     if (peek!=null && peek.type.equals(type)) return true;
                     if (!it.hasNext()) return false; else peek=it.next();
                  }
               }
               public XMLNode next() { if (!hasNext()) throw new NoSuchElementException(); XMLNode ans=peek; peek=null; return ans; }
               public void remove() { throw new UnsupportedOperationException(); }
            };
         }
      };
   }

   /** Returns the value associated with the given attribute name; if the attribute doesn't exist, return "". */
   public String getAttribute(String name) {
      String ans = map.get(name);
      return (ans==null) ? "" : ans;
   }

   /** Returns the value associated with the given attribute name; if the attribute doesn't exist, return the defaultValue. */
   public String getAttribute(String name, String defaultValue) {
      String ans = map.get(name);
      return (ans==null) ? defaultValue : ans;
   }
}