/***** BEGIN LICENSE BLOCK ***** * Version: CPL 1.0/GPL 2.0/LGPL 2.1 * * The contents of this file are subject to the Common Public * License Version 1.0 (the "License"); you may not use this file * except in compliance with the License. You may obtain a copy of * the License at http://www.eclipse.org/legal/cpl-v10.html * * Software distributed under the License is distributed on an "AS * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or * implied. See the License for the specific language governing * rights and limitations under the License. * * Copyright (C) 2004 Jan Arne Petersen <jpetersen@uni-bonn.de> * * Alternatively, the contents of this file may be used under the terms of * either of the GNU General Public License Version 2 or later (the "GPL"), * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), * in which case the provisions of the GPL or the LGPL are applicable instead * of those above. If you wish to allow use of your version of this file only * under the terms of either the GPL or the LGPL, and not to allow others to * use your version of this file under the terms of the CPL, indicate your * decision by deleting the provisions above and replace them with the notice * and other provisions required by the GPL or the LGPL. If you do not delete * the provisions above, a recipient may use your version of this file under * the terms of any one of the CPL, the GPL or the LGPL. ***** END LICENSE BLOCK *****/ package org.jruby.lexer.yacc; import org.jruby.ast.RegexpNode; import org.jruby.ast.StrNode; import org.jruby.parser.ReOptions; import org.jruby.parser.Tokens; import org.jruby.util.ByteList; public class StringTerm extends StrTerm { /* bit flags to indicate the string type */ private int func; private final char term; private final char paren; /* nested string level */ private int nest; public StringTerm(int func, char term, char paren) { this.func = func; this.term = term; this.paren = paren; this.nest = 0; } public int parseString(final RubyYaccLexer lexer, LexerSource src) throws java.io.IOException { char c; int space = 0; if (func == -1) { lexer.setValue(new Token("\"", lexer.getPosition())); return Tokens.tSTRING_END; } c = src.read(); if ((func & RubyYaccLexer.STR_FUNC_QWORDS) != 0 && Character.isWhitespace(c)) { do { c = src.read(); } while (Character.isWhitespace(c)); space = 1; } if (c == term && nest == 0) { if ((func & RubyYaccLexer.STR_FUNC_QWORDS) != 0) { func = -1; lexer.getPosition(); return ' '; } if ((func & RubyYaccLexer.STR_FUNC_REGEXP) != 0) { lexer.setValue(new RegexpNode(src.getPosition(), ByteList.create(""), parseRegexpFlags(src))); return Tokens.tREGEXP_END; } lexer.setValue(new Token("\"", lexer.getPosition())); return Tokens.tSTRING_END; } if (space != 0) { src.unread(c); lexer.getPosition(); return ' '; } ByteList buffer = new ByteList(); if ((func & RubyYaccLexer.STR_FUNC_EXPAND) != 0 && c == '#') { c = src.read(); switch (c) { case '$': case '@': src.unread(c); lexer.setValue(new Token("#" + c, lexer.getPosition())); return Tokens.tSTRING_DVAR; case '{': lexer.setValue(new Token("#" + c, lexer.getPosition())); return Tokens.tSTRING_DBEG; } buffer.append('#'); } src.unread(c); if (parseStringIntoBuffer(src, buffer) == 0) { throw new SyntaxException(src.getPosition(), "unterminated string meets end of file"); } lexer.setValue(new StrNode(lexer.getPosition(), buffer)); return Tokens.tSTRING_CONTENT; } private int parseRegexpFlags(final LexerSource src) throws java.io.IOException { char kcode = 0; int options = 0; char c; StringBuffer unknownFlags = new StringBuffer(10); for (c = src.read(); c != RubyYaccLexer.EOF && Character.isLetter(c); c = src.read()) { switch (c) { case 'i': options |= ReOptions.RE_OPTION_IGNORECASE; break; case 'x': options |= ReOptions.RE_OPTION_EXTENDED; break; case 'm': options |= ReOptions.RE_OPTION_MULTILINE; break; case 'o': options |= ReOptions.RE_OPTION_ONCE; break; case 'n': kcode = 16; break; case 'e': kcode = 32; break; case 's': kcode = 48; break; case 'u': kcode = 64; break; default: unknownFlags.append(c); break; } } src.unread(c); if (unknownFlags.length() != 0) { throw new SyntaxException(src.getPosition(), "unknown regexp option" + (unknownFlags.length() > 1 ? "s" : "") + " - " + unknownFlags.toString()); } return options | kcode; } public char parseStringIntoBuffer(LexerSource src, ByteList buffer) throws java.io.IOException { char c; while ((c = src.read()) != RubyYaccLexer.EOF) { if (paren != '\0' && c == paren) { nest++; } else if (c == term) { if (nest == 0) { src.unread(c); break; } nest--; } else if ((func & RubyYaccLexer.STR_FUNC_EXPAND) != 0 && c == '#' && !src.peek('\n')) { char c2 = src.read(); if (c2 == '$' || c2 == '@' || c2 == '{') { src.unread(c2); src.unread(c); break; } src.unread(c2); } else if (c == '\\') { c = src.read(); switch (c) { case '\n': if ((func & RubyYaccLexer.STR_FUNC_QWORDS) != 0) { break; } if ((func & RubyYaccLexer.STR_FUNC_EXPAND) != 0) { continue; } buffer.append('\\'); break; case '\\': if ((func & RubyYaccLexer.STR_FUNC_ESCAPE) != 0) { buffer.append(c); } break; default: if ((func & RubyYaccLexer.STR_FUNC_REGEXP) != 0) { src.unread(c); parseEscapeIntoBuffer(src, buffer); continue; } else if ((func & RubyYaccLexer.STR_FUNC_EXPAND) != 0) { src.unread(c); if ((func & RubyYaccLexer.STR_FUNC_ESCAPE) != 0) { buffer.append('\\'); } c = src.readEscape(); } else if ((func & RubyYaccLexer.STR_FUNC_QWORDS) != 0 && Character.isWhitespace(c)) { /* ignore backslashed spaces in %w */ } else if (c != term && !(paren != '\0' && c == paren)) { buffer.append('\\'); } } } else if ((func & RubyYaccLexer.STR_FUNC_QWORDS) != 0 && Character.isWhitespace(c)) { src.unread(c); break; } if (c == '\0' && (func & RubyYaccLexer.STR_FUNC_SYMBOL) != 0) { throw new SyntaxException(src.getPosition(), "symbol cannot contain '\\0'"); } buffer.append(c); } return c; } // Was a goto in original ruby lexer private void escaped(LexerSource src, ByteList buffer) throws java.io.IOException { char c; switch (c = src.read()) { case '\\': parseEscapeIntoBuffer(src, buffer); break; case RubyYaccLexer.EOF: throw new SyntaxException(src.getPosition(), "Invalid escape character syntax"); default: buffer.append(c); } } private void parseEscapeIntoBuffer(LexerSource src, ByteList buffer) throws java.io.IOException { char c; switch (c = src.read()) { case '\n': break; /* just ignore */ case '0': case '1': case '2': case '3': /* octal constant */ case '4': case '5': case '6': case '7': buffer.append('\\'); buffer.append(c); for (int i = 0; i < 2; i++) { c = src.read(); if (c == RubyYaccLexer.EOF) { throw new SyntaxException(src.getPosition(), "Invalid escape character syntax"); } if (!RubyYaccLexer.isOctChar(c)) { src.unread(c); break; } buffer.append(c); } break; case 'x': /* hex constant */ buffer.append('\\'); buffer.append(c); c = src.read(); if (!RubyYaccLexer.isHexChar(c)) { throw new SyntaxException(src.getPosition(), "Invalid escape character syntax"); } buffer.append(c); c = src.read(); if (RubyYaccLexer.isHexChar(c)) { buffer.append(c); } else { src.unread(c); } break; case 'M': if ((c = src.read()) != '-') { throw new SyntaxException(src.getPosition(), "Invalid escape character syntax"); } buffer.append(new byte[] { '\\', 'M', '-' }); escaped(src, buffer); break; case 'C': if ((c = src.read()) != '-') { throw new SyntaxException(src.getPosition(), "Invalid escape character syntax"); } buffer.append(new byte[] { '\\', 'C', '-' }); escaped(src, buffer); break; case 'c': buffer.append(new byte[] { '\\', 'c' }); escaped(src, buffer); break; case 0: throw new SyntaxException(src.getPosition(), "Invalid escape character syntax"); default: if (c != '\\' || c != term) { buffer.append('\\'); } buffer.append(c); } } }