/** * Copyright (c) 2002-2006 IBM Corporation and others. * All rights reserved. This program and the accompanying materials * are made available under the terms of the Eclipse Public License v1.0 * which accompanies this distribution, and is available at * http://www.eclipse.org/legal/epl-v10.html * * Contributors: * IBM - Initial API and implementation */ package org.eclipse.emf.codegen.jet; import java.io.CharArrayWriter; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.UnsupportedEncodingException; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import org.eclipse.emf.codegen.CodeGenPlugin; /** * JETReader is an input buffer for the JSP parser. It should allow * unlimited lookahead and push-back. It also has a bunch of parsing * utility methods for understanding html-style things. */ public class JETReader { protected char startTagInitialChar = '<'; protected char endTagInitialChar = '%'; protected char endTagFinalChar = '>'; protected JETMark current = null; protected String master = null; protected List<String> sourceFiles = new ArrayList<String>(); protected List<String> baseURIs = new ArrayList<String>(); protected int size = 0; protected boolean trimExtraNewLine = true; public JETReader(String baseURI, String locationURI, InputStream inputStream, String encoding) throws JETException { stackStream(baseURI, locationURI, inputStream, encoding); } public JETReader(String locationURI, InputStream inputStream, String encoding) throws JETException { this(null, locationURI, inputStream, encoding); } public String getFile(int fileid) { return sourceFiles.get(fileid); } public String getBaseURI(int fileid) { return baseURIs.get(fileid); } public void stackStream(String locationURI, InputStream iStream, String encoding) throws JETException { stackStream(null, locationURI, iStream, encoding); } /** * Stack a stream for parsing * @param iStream Stream ready to parse * @param encoding Optional encoding to read the file. */ public void stackStream(String baseURI, String locationURI, InputStream iStream, String encoding) throws JETException { InputStreamReader reader = null; try { // Until the encoding can be specified within the template // we need to assume an encoding capable of working with any character set. if (encoding == null) { encoding = "UTF8"; } // Register the file, and read its content: // int fileid = registerSourceFile(locationURI); registerBaseURI(baseURI); reader = new InputStreamReader(iStream, encoding); CharArrayWriter writer = new CharArrayWriter(); char buf[] = new char[1024]; for (int i = 0; (i = reader.read(buf)) != -1; ) { // Remove zero width non-breaking space, which may be used as a byte order marker, // and may be ignored according to the Unicode FAQ: http://www.unicode.org/unicode/faq/utf_bom.html#38 // if (buf[0] == '\uFEFF') { writer.write(buf, 1, i - 1); } else { writer.write(buf, 0, i); } } writer.close(); if (current == null) { current = new JETMark(this, writer.toCharArray(), fileid, locationURI, encoding); } else { current.pushStream(writer.toCharArray(), fileid, locationURI, encoding); } } catch (UnsupportedEncodingException exception) { throw new JETException(exception); } catch (IOException exception) { throw new JETException(exception); } finally { if (reader != null) { try { reader.close(); } catch (Exception exception) { throw new JETException(exception); } } } } public boolean popFile() { // Is stack created ? (will happen if the JET file we're looking at is missing. // if (current == null) { return false; } // Restore parser state: // size--; return current.popStream(); } /** * Register a new source file. * This method is used to implement file inclusion. Each included file * gets a unique identifier (which is the index in the array of source files). * @return The index of the now registered file. */ protected int registerSourceFile(String file) { sourceFiles.add(file); ++this.size; return sourceFiles.size() - 1; } /** * Register a new baseURI. * This method is used to implement file inclusion. Each included file * gets a unique identifier (which is the index in the array of base URIs). */ protected void registerBaseURI(String baseURI) { baseURIs.add(baseURI); } /** * Returns whether more input is available. If the end of the buffer for an included file is reached, it will return * to the context of the previous file, and return whether more input is available from there. In this case, if * trimExtraNewLine is true, then an unwanted extra newline character will be suppressed. We consider the first * newline in the buffer we are returning to be unwanted if the ending buffer already has at least one trailing * newline. */ public boolean hasMoreInput() { if (current.cursor < current.stream.length) { return true; } boolean nl = hasTrailingNewLine(); while (popFile()) { if (current.cursor < current.stream.length) { if (trimExtraNewLine && nl) { skipNewLine(); } return true; } } return false; } /** * Tests whether the current stream has at least one trailing newline, optionally followed by spaces. */ protected boolean hasTrailingNewLine() { char[] stream = current.stream; for (int i = stream.length - 1; i >= 0; i--) { if (stream[i] == '\n' || stream[i] == '\r') { return true; } else if (stream[i] != ' ') { return false; } } return false; } /** * If the next character would be a line break, moves the cursor past it. */ protected void skipNewLine() { char[] stream = current.stream; int c = current.cursor; if (stream.length > c + 1 && (stream[c] == '\n' && stream[c + 1] == '\r' || stream[c] == '\r' && stream[c + 1] == '\n')) { current.cursor += 2; current.line++; current.col = stream[0] == '\n' ? 1 : 0; } else if (stream.length > c && (stream[c] == '\n' || stream[c] == '\r')) { current.cursor++; current.line++; current.col = 0; } } public int nextChar() { if (!hasMoreInput()) { return -1; } int ch = current.stream[current.cursor]; ++current.cursor; if (ch == '\n') { ++current.line; current.col = 0; } else { ++current.col; } return ch; } /** * Gets Content until the next potential JSP element. Because all elements * begin with a '<' we can just move until we see the next one. */ public String nextContent() { int cur_cursor = current.cursor; int len = current.stream.length; if (cur_cursor == len) return ""; char ch; // pure obfuscated genius! while (++current.cursor < len && (ch = current.stream[current.cursor]) != startTagInitialChar) { if (ch == '\n') { ++current.line; current.col = 0; } else { ++current.col; } } return new String(current.stream, cur_cursor, current.cursor-cur_cursor); } public char[] getChars(JETMark start, JETMark stop) { JETMark oldstart = mark(); reset(start); CharArrayWriter writer = new CharArrayWriter(); while (!stop.equals(mark())) { writer.write(nextChar()); } writer.close(); reset(oldstart); return writer.toCharArray(); } public int peekChar() { return current.stream[current.cursor]; } public JETMark mark() { return new JETMark(current); } public void reset(JETMark mark) { current = new JETMark(mark); } public boolean matchesIgnoreCase(String string) { JETMark mark = mark(); int ch = 0; int i = 0; do { ch = nextChar(); if (Character.toLowerCase((char) ch) != string.charAt(i++)) { reset(mark); return false; } } while (i < string.length()); reset(mark); return true; } public boolean matches(String string) { JETMark mark = mark(); int ch = 0; int i = 0; do { ch = nextChar(); if (((char) ch) != string.charAt(i++)) { reset(mark); return false; } } while (i < string.length()); reset(mark); return true; } public void advance(int n) { while (--n >= 0) nextChar(); } public int skipSpaces() { int i = 0; while (isSpace()) { ++i; nextChar(); } return i; } /** * Skip until the given string is matched in the stream. * When returned, the context is positioned past the end of the match. * @param limit The String to match. * @return A non-null <code>JETMark</code> instance if found, * <strong>null</strong> otherwise. */ public JETMark skipUntil(String limit) { JETMark ret = null; int limlen = limit.length(); int ch; skip: for (ret = mark(), ch = nextChar(); ch != -1; ret = mark(), ch = nextChar()) { if (ch == limit.charAt(0)) { for (int i = 1; i < limlen; i++) { if (Character.toLowerCase((char) nextChar()) != limit.charAt(i)) { continue skip; } } return ret; } } return null; } protected boolean isSpace() { return peekChar() <= ' '; } /** * Parse a space delimited token. * If quoted the token will consume all characters up to a matching quote, * otherwise, it consumes up to the first delimiter character. * @param quoted If <strong>true</strong> accept quoted strings. */ public String parseToken(boolean quoted) throws JETException { StringBuffer stringBuffer = new StringBuffer(); skipSpaces(); stringBuffer.setLength(0); int ch = peekChar(); if (quoted) { if (ch == '"' || ch == '\'') { char endQuote = ch == '"' ? '"' : '\''; // Consume the open quote: // ch = nextChar(); for (ch = nextChar(); ch != -1 && ch != endQuote; ch = nextChar()) { if (ch == '\\') { ch = nextChar(); } stringBuffer.append((char) ch); } // Check end of quote, skip closing quote: // if (ch == -1) { throw new JETException(CodeGenPlugin.getPlugin().getString("jet.error.quotes.unterminated", new Object [] { mark().toString()})); } } else { throw new JETException(CodeGenPlugin.getPlugin().getString("jet.error.attr.quoted", new Object [] { mark().toString() })); } } else { if (!isDelimiter()) { // Read value until delimiter is found: do { ch = nextChar(); // Take care of the quoting here. if (ch == '\\') { if (peekChar() == '"' || peekChar() == '\'' || peekChar() == endTagFinalChar || peekChar() == endTagInitialChar) { ch = nextChar(); } } stringBuffer.append((char) ch); } while (!isDelimiter()); } } return stringBuffer.toString(); } /** * Parse an attribute/value pair, and store it in provided hash table. * The attribute/value pair is defined by: * <pre> * av := spaces token spaces '=' spaces token spaces * </pre> * Where <em>token</em> is defined by <code>parseToken</code> and * <em>spaces</em> is defined by <code>skipSpaces</code>. * The name is always considered case insensitive, hence stored in its * lower case version. * @param into The HashMap instance to save the result to. */ protected void parseAttributeValue(HashMap<String, String> into) throws JETException { // Get the attribute name: // skipSpaces(); String name = parseToken(false); // Check for an equal sign: // skipSpaces(); if (peekChar() != '=') { throw new JETException(CodeGenPlugin.getPlugin().getString("jet.error.attr.novalue", new Object[] { name, mark().toString() })); } nextChar(); // Get the attribute value: // skipSpaces(); String value = parseToken(true); skipSpaces(); // Add the binding to the provided hash table: // into.put(name, value); } /** * Parse some tag attributes for Beans. * The stream is assumed to be positioned right after the tag name. The * syntax recognized is: * <pre> * tag-attrs := empty | attr-list (">" | "-->" | %>) * attr-list := empty | av spaces attr-list * empty := spaces * </pre> * Where <em>av</em> is defined by <code>parseAttributeValue</code>. * @return A HashMap mapping String instances (variable names) into * String instances (variable values). */ public HashMap<String, String> parseTagAttributesBean() throws JETException { HashMap<String, String> values = new HashMap<String, String>(11); while (true) { skipSpaces(); int ch = peekChar(); if (ch == endTagFinalChar) { // End of the useBean tag. // return values; } else if (ch == '/') { JETMark mark = mark(); nextChar(); // XMLesque Close tags // try { if (nextChar() == endTagFinalChar) { return values; } } finally { reset(mark); } } if (ch == -1) { break; } // Parse as an attribute=value: // parseAttributeValue(values); } // Reached EOF: // throw new JETException(CodeGenPlugin.getPlugin().getString("jet.error.tag.attr.unterminated", new Object [] { mark().toString() })); } /** * Parse some tag attributes. * The stream is assumed to be positioned right after the tag name. The * syntax recognized is: * <pre> * tag-attributes := empty | attribute-list (">" | "-->" | %>) * attribute-list := empty | attribute-value spaces attribute-list * empty := spaces * </pre> * Where <em>attribute-value</em> is defined by <code>parseAttributeValue</code>. * @return A HashMap mapping String instances (variable names) into * String instances (variable values). */ public HashMap<String, String> parseTagAttributes() throws JETException { HashMap<String, String> values = new HashMap<String, String>(11); while (true) { skipSpaces(); int ch = peekChar(); if (ch == endTagFinalChar) { return values; } if (ch == '-') { JETMark mark = mark(); nextChar(); // Close NCSA like attributes "->" try { if (nextChar() == '-' && nextChar() == endTagFinalChar) { return values; } } finally { reset(mark); } } else if (ch == endTagInitialChar) { JETMark mark = mark(); nextChar(); // Close variable like attributes "%>" try { if (nextChar() == endTagFinalChar) { return values; } } finally { reset(mark); } } else if (ch == '/') { JETMark mark = mark(); nextChar(); // XMLesque Close tags try { if (nextChar() == endTagFinalChar) { return values; } } finally { reset(mark); } } if (ch == -1) { break; } // Parse as an attribute=value: parseAttributeValue(values); } // Reached EOF: throw new JETException(CodeGenPlugin.getPlugin().getString("jet.error.tag.attr.unterminated", new Object [] { mark().toString() })); } /** * Parse utilities - Is current character a token delimiter ? * Delimiters are currently defined to be =, >, <, ", and ' or any * any space character as defined by <code>isSpace</code>. * @return A boolean. */ protected boolean isDelimiter() { if (! isSpace()) { int ch = peekChar(); // Look for a single-char work delimiter: // if (ch == '=' || ch == endTagFinalChar || ch == '"' || ch == '\'' || ch == '/') { return true; } // Look for an end-of-comment or end-of-tag: // if (ch == '-') { JETMark mark = mark(); if (((ch = nextChar()) == endTagFinalChar) || ((ch == '-') && (nextChar() == endTagFinalChar))) { reset(mark); return true; } else { reset(mark); return false; } } return false; } else { return true; } } public void setStartTag(String startTag) { startTagInitialChar = startTag.charAt(0); } public void setEndTag(String endTag) { endTagFinalChar = endTag.charAt(endTag.length() - 1); endTagInitialChar = endTag.charAt(0); } }