/* Alloy Analyzer 4 -- Copyright (c) 2006-2009, Felix Chang
*
* Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files
* (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify,
* merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
* OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package edu.mit.csail.sdg.alloy4;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.Set;
import java.util.Map.Entry;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;
/** Immutable; this class represents an XML element node. */
public final class XMLNode implements Iterable<XMLNode> {
/** The type of the element; never null. */
private String type = "";
/** If type is text, this is the text. */
private String text = "";
/** The set of (key,value) pairs; never null. */
private final Map<String,String> map = new LinkedHashMap<String,String>();
/** The list of direct children nodes. */
private final List<XMLNode> sub = new ArrayList<XMLNode>();
/** Constructs an empty XMLNode object. */
private XMLNode() { }
/** Returns the number of direct subnodes. */
public int count() { return sub.size(); }
/** Returns an unmodifiable view of the attributes. */
public Set<Entry<String,String>> attributes() { return Collections.unmodifiableMap(map).entrySet(); }
/** Dump the content to a String. */
@Override public String toString() {
StringBuilder sb = new StringBuilder();
toString(sb, 0);
return sb.toString();
}
/** Dump the content to a StringBuilder. */
public void toString(StringBuilder sb, int indent) {
for(int i=0; i<indent; i++) sb.append(' ');
if (text.length()>0) { Util.encodeXML(sb, text); sb.append('\n'); return; }
Util.encodeXMLs(sb, "<", type);
for(Map.Entry<String,String> e: map.entrySet()) {
Util.encodeXMLs(sb, " ", e.getKey(), "=\"", e.getValue(), "\"");
}
if (sub.size()==0) { sb.append("/>\n"); return; }
sb.append(">\n");
for(XMLNode x:sub) x.toString(sb, indent+2);
for(int i=0; i<indent; i++) sb.append(' ');
Util.encodeXMLs(sb, "</", type, ">\n");
}
/** Simple parser based on XML Specification 1.0 taking into account XML Specification Errata up to 2008/Jan/18. */
private static final class XMLParser {
/** True if we want to read text data also. */
private final boolean wantText;
/** The reader for the input XML file. */
private final Reader reader;
/** The current x position in the file. */
private int x = 1;
/** The current y position in the file. */
private int y = 1;
/** The current "readahead" character; -2 if the readahead cache is empty; -1 if EOF is detected; otherwise it is one char. */
private int read = (-2);
/** Constructor is private, since we want only XMLNode to be able to construct an instance of this class. */
private XMLParser(Reader reader, boolean wantText) {
this.wantText = wantText;
if (reader instanceof BufferedReader) this.reader = reader; else this.reader = new BufferedReader(reader);
}
/** Throws an IOException with the given msg, and associate with it the current line and column location. */
private void malform(String msg) throws IOException { throw new IOException("Error at line "+y+" column "+x+": "+msg); }
/** Read the next character.
* @throws IOException if end-of-file is reached.
* @throws IOException if an I/O error occurred.
*/
private int read() throws IOException {
if (read<(-1)) read=reader.read();
if (read<0) { malform("Unexpected end of file."); } else if (read=='\n') { x=1; y++; } else { x++; }
int ans = read;
read = -2;
return ans;
}
/** Peek without consuming the next character, or return -1 if end-of-file is reached.
* @throws IOException if an I/O error occurred.
*/
private int peek() throws IOException {
if (read<(-1)) read=reader.read();
return read;
}
/** Consume up to and including the consecutive characters "char1" and "char2".
* @throws IOException if we reached end-of-file without seeing the pattern.
* @throws IOException if an I/O error occurred.
*/
private void skipUntil(int char1, int char2) throws IOException {
while(true) {
int ch = read();
if (ch==char1 && peek()==char2) { read=(-2); return; }
}
}
/** If the next N characters match the given string (where N == length of string), then consume them, else throw IOException.
* @throws IOException if the next N characters do not match the given string.
* @throws IOException if an I/O error occurred.
*/
private void expect(String string) throws IOException {
int saveX=x, saveY=y;
for(int i=0; i<string.length(); i++) {
if (read()!=string.charAt(i)) { x=saveX; y=saveY; malform("Expects the string \""+string+"\""); }
}
}
/** Skip whitespace if any, then return the first non-whitespace character after that.
* @throws IOException if after skipping 0 or more white space character we reach end-of-file.
* @throws IOException if an I/O error occurred.
*/
private int skipSpace() throws IOException {
while(true) {
int ch=read();
if (ch!=' ' && ch!='\t' && ch!='\r' && ch!='\n') return ch;
}
}
/*
* Taking the 79 grammar rules from XML specification, and after making conservative simplifications, we get these rules:
* ("conservative" in that well-formed XML documents parse correctly, but some malformed documents also parse successfully)
*
* S ::= (#x20 | #x9 | #xD | #xA)+
* Name ::= ( [A-Za-z0-9_:.-] | [#xC0-#xEFFFF] )+
* Nmtoken ::= ( [A-Za-z0-9_:.-] | [#xC0-#xEFFFF] )+
* Reference ::= '&' Name ';' | '' [0-9]+ ';' | '' [0-9a-fA-F]+ ';'
* PEReference ::= '%' Name ';'
* SystemLiteral ::= '...' | "..."
* PubidLiteral ::= '...' | "..."
* AttValue ::= '...' | "..."
* EntityValue ::= '...' | "..."
* DefaultDecl ::= ( '...' | "..." | [%()|#*?+,] | Name | S )*
* ExternalID ::= ( '...' | "..." | [%()|#*?+,] | Name | S )*
* PublicID ::= ( '...' | "..." | [%()|#*?+,] | Name | S )*
* NotationType ::= ( '...' | "..." | [%()|#*?+,] | Name | S )*
* Enumeration ::= ( '...' | "..." | [%()|#*?+,] | Name | S )*
* EnumeratedType ::= ( '...' | "..." | [%()|#*?+,] | Name | S )*
* AttType ::= ( '...' | "..." | [%()|#*?+,] | Name | S )*
* Mixed ::= ( '...' | "..." | [%()|#*?+,] | Name | S )*
* children ::= ( '...' | "..." | [%()|#*?+,] | Name | S )*
* contentspec ::= ( '...' | "..." | [%()|#*?+,] | Name | S )*
* PEDef ::= ( '...' | "..." | [%()|#*?+,] | Name | S )*
* NDataDecl ::= ( '...' | "..." | [%()|#*?+,] | Name | S )*
* EntityDef ::= ( '...' | "..." | [%()|#*?+,] | Name | S )*
* NotationDecl ::= '<!NOTATION' ( '...' | "..." | [%()|#*?+,] | Name | S )* '>'
* AttlistDecl ::= '<!ATTLIST' ( '...' | "..." | [%()|#*?+,] | Name | S )* '>'
* elementdecl ::= '<!ELEMENT' ( '...' | "..." | [%()|#*?+,] | Name | S )* '>'
* EntityDecl ::= '<!ENTITY' ( '...' | "..." | [%()|#*?+,] | Name | S )* '>'
* PI ::= '<?' ... '?>'
* Comment ::= '<!--' ([^-] | ('-' [^-])))* '-->'
* Misc ::= Comment | PI | S
* doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>'
* intSubset ::= (elementdecl | AttlistDecl | EntityDecl | NotationDecl | PI | Comment | PEReference | S)*
*
* SkipNondata(false) will skip zero or more instance of the below, and thus it will consume (Misc | doctypedecl)*
* SPACE TAB CR LF
* <?...?>
* <!--...-->
* '<!' followed by SkipNondata(true) followed by '>'
*
* SkipNondata(true) will skip zero or more instances of the below, and thus it will consume intSubset*
* SPACE TAB CR LF
* <?...?>
* <!--...-->
* '<!' followed by SkipNondata(true) followed by '>'
* '[' followed by SkipNondata(true) followed by ']'
* '...'
* "..."
* any char that is not '<' nor '>' nor '[' nor ']' nor ''' nor '"'
*/
/** Skip as much nondata as possible, then return the first character after that (or -1 if we end up at end-of-file).
* <p> Specifically, skipNondata(false) consumes (Misc | doctypedecl)* from XML specification
* <p> Likewise, skipNondata(true) consumes (intSubset)* from XML specification
* @throws IOException if the XML input is malformed.
* @throws IOException if an I/O error occurred.
*/
private int skipNondata(boolean inner) throws IOException {
while(true) {
int ch = peek();
if (ch<0) return -1;
read = -2;
if (ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n') continue;
if (ch == '<') {
ch = read();
if (ch == '?') { skipUntil('?', '>'); continue; }
if (ch != '!') { read = ch ; return '<'; }
if (peek() == '-') {
read = -2;
if (read()!='-') malform("Expects start of <!--...-->");
skipUntil('-', '-');
if (read()!='>') malform("Expects end of <!--...-->");
continue;
}
if (skipNondata(true)!='>') malform("Expects end of <!...>");
}
else if (!inner || ch == ']' || ch=='>') { return ch; }
else if (ch == '[') { if (skipNondata(true)!=']') malform("Expects end of [...]"); }
else if (ch == '\'' || ch == '\"') { while(read()!=ch) { } }
}
}
/** Parse an element name or attribute name.
* @throws IOException if the XML input is malformed.
* @throws IOException if an I/O error occurred.
*/
private String parseName() throws IOException {
StringBuilder sb = new StringBuilder();
while(true) {
int ch = read();
if (ch==' ' || ch=='\t' || ch=='\r' || ch=='\n' || ch=='=' || ch=='/' || ch=='<' || ch=='>' || ch=='[' || ch==']') {
read=ch;
return sb.toString();
}
sb.append((char)ch);
}
}
/** Parse a value up to delim (which is always either ' or "), assuming the initial ' or " has already been consumed.
* @throws IOException if the XML input is malformed.
* @throws IOException if an I/O error occurred.
*/
private String parseValue(int delim) throws IOException {
StringBuilder sb = new StringBuilder(), sb2 = null;
while(true) {
int ch=read();
if (ch==delim) return sb.toString();
if (ch=='&') {
if (sb2==null) sb2=new StringBuilder(); else sb2.setLength(0);
while((ch=read()) != ';') sb2.append((char)ch);
if (sb2.length()>2 && sb2.charAt(0)=='#' && sb2.charAt(1)=='x') {
try { ch=Integer.parseInt(sb2.substring(2), 16); } catch(NumberFormatException ex) { ch=(-1); }
} else if (sb2.length()>1 && sb2.charAt(0)=='#'){
try { ch=Integer.parseInt(sb2.substring(1)); } catch(NumberFormatException ex) { ch=(-1); }
} else {
String name = sb2.toString();
if (name.equals("amp")) ch='&';
else if (name.equals("quot")) ch='"';
else if (name.equals("apos")) ch='\'';
else if (name.equals("lt")) ch='<';
else if (name.equals("gt")) ch='>';
else ch=(-1);
}
if (ch<0) malform("The entity \"&"+sb2.toString()+";\" is unknown.");
}
sb.append((char)ch);
}
}
/*
* Below are the grammar rules for "element":
* ==========================================
*
* element ::= '<' Name (S Name S? '=' S? AttValue)* S? '/>'
* | '<' Name (S Name S? '=' S? AttValue)* S? '>' content '</' Name S? '>'
*
* content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)*
* CDSect ::= '<![CDATA[' (Char* - (Char* ']]>' Char*)) ']]>'
* CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
*/
/** Parse an element (and all its subelements), assuming the initial "less than" sign has already been consumed.
* @throws IOException if the XML input is malformed.
* @throws IOException if an I/O error occurred.
*/
private void parseElement(XMLNode target) throws IOException {
target.type = parseName();
while(true) {
boolean space = false;
int ch = read();
if (ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n') { space=true; ch=skipSpace(); }
if (ch == '=') malform("Unexpected '='");
if (ch == '/') {
if (read()!='>') malform("Expects '/>'");
break;
}
if (ch == '>') {
parseContent(target);
if (!target.type.equals(parseName())) malform("Start tag and end tag must have matching types.");
if (skipSpace()!='>') malform("Expects '</"+target.type+">'");
break;
}
if (!space) malform("Whitespace needed before a (key,value) pair.");
read = ch;
String key = parseName();
if (key.length()==0) malform("Attribute name cannot be empty.");
if (skipSpace()!='=') malform("Expects = after the attribute name.");
ch = skipSpace();
if (ch != '\'' && ch != '\"') malform("Expects \' or \" as the start of the attribute value.");
String value = parseValue(ch);
target.map.put(key, value);
}
}
/** Parses the content until the rightful closing "LESS THAN SIGN followed by FORWARD SLASH" are both consumed.
* @throws IOException if the XML input is malformed.
* @throws IOException if an I/O error occurred.
*/
private void parseContent(XMLNode parent) throws IOException {
StringBuilder sb = wantText ? new StringBuilder() : null;
again:
while(true) {
if (sb==null) {
while(read()!='<') {}
} else {
sb.append(parseValue('<').replace('\r',' ').replace('\n', ' '));
parent.addText(sb);
}
int ch=read();
if (ch=='/') return;
if (ch=='?') { skipUntil('?', '>'); continue; }
if (ch=='!') {
ch=read();
if (ch=='-') {
if (read()!='-') malform("Expects start of <!--...-->");
skipUntil('-', '-');
if (read()!='>') malform("Expects end of <!--...-->");
continue;
}
if (ch!='[') malform("Expects <![CDATA[...]]>");
expect("CDATA[");
for(int ah=0,bh=0; ;) {
ch=read();
if (ah==']' && bh==']' && ch=='>') {
parent.addText(sb);
continue again;
} else {
if (ah>0 && sb!=null) sb.append((char)ah);
ah=bh; bh=ch;
}
}
}
read = ch;
XMLNode newElem = new XMLNode();
parseElement(newElem);
parent.sub.add(newElem);
}
}
}
/** Add a text node by removing all contents from the given StringBuilder and clearing that StringBuilder. */
private void addText(StringBuilder stringBuilder) {
if (stringBuilder==null || stringBuilder.length()==0) return;
XMLNode x = new XMLNode();
x.text = stringBuilder.toString();
stringBuilder.setLength(0);
sub.add(x);
}
/** Constructs the root XMLNode by parsing an entire XML document, then close the reader afterwards. */
public XMLNode(Reader reader, boolean parseText) throws IOException {
try {
// document ::= Misc* doctypedecl? Misc* element Misc*
XMLParser parser = new XMLParser(reader, parseText);
if (parser.skipNondata(false)!='<') parser.malform("Expects start of root element.");
parser.parseElement(this);
if (parser.skipNondata(false)!=(-1)) parser.malform("Expects end of file.");
} finally {
Util.close(reader);
}
}
/** Constructs the root XMLNode by parsing an entire XML document, then close the reader afterwards. */
public XMLNode(Reader reader) throws IOException {
try {
// document ::= Misc* doctypedecl? Misc* element Misc*
XMLParser parser = new XMLParser(reader, false);
if (parser.skipNondata(false)!='<') parser.malform("Expects start of root element.");
parser.parseElement(this);
if (parser.skipNondata(false)!=(-1)) parser.malform("Expects end of file.");
} finally {
Util.close(reader);
}
}
/** Constructs the root XMLNode by parsing an entire XML document. */
public XMLNode(File file) throws IOException {
FileInputStream fis = null;
InputStreamReader reader = null;
try {
// document ::= Misc* doctypedecl? Misc* element Misc*
fis = new FileInputStream(file);
reader = new InputStreamReader(fis, "UTF-8");
XMLParser parser = new XMLParser(reader, false);
if (parser.skipNondata(false)!='<') parser.malform("Expects start of root element.");
parser.parseElement(this);
if (parser.skipNondata(false)!=(-1)) parser.malform("Expects end of file.");
} finally {
Util.close(reader);
Util.close(fis);
}
}
/** Returns the type of the element. */
public String getType() { return type; }
/** Returns the text if this is a text node, returns "" otherwise. */
public String getText() { return text; }
/** Returns true if the type of this element is equal to the given type. */
public boolean is(String type) { return this.type.equals(type); }
/** Returns a read-only iterator over the immediate subelements. */
public Iterator<XMLNode> iterator() { return Collections.unmodifiableList(sub).iterator(); }
/** Returns a read-only iteration of the immediate subelements whose type is equal to the given type. */
public Iterable<XMLNode> getChildren(final String type) {
return new Iterable<XMLNode>() {
public Iterator<XMLNode> iterator() {
return new Iterator<XMLNode>() {
private final Iterator<XMLNode> it = sub.iterator();
private XMLNode peek = null;
public boolean hasNext() {
while(true) {
if (peek!=null && peek.type.equals(type)) return true;
if (!it.hasNext()) return false; else peek=it.next();
}
}
public XMLNode next() { if (!hasNext()) throw new NoSuchElementException(); XMLNode ans=peek; peek=null; return ans; }
public void remove() { throw new UnsupportedOperationException(); }
};
}
};
}
/** Returns the value associated with the given attribute name; if the attribute doesn't exist, return "". */
public String getAttribute(String name) {
String ans = map.get(name);
return (ans==null) ? "" : ans;
}
/** Returns the value associated with the given attribute name; if the attribute doesn't exist, return the defaultValue. */
public String getAttribute(String name, String defaultValue) {
String ans = map.get(name);
return (ans==null) ? defaultValue : ans;
}
}