// Copyright (c) Corporation for National Research Initiatives package org.python.core; import java.io.FilterReader; import java.io.IOException; import java.io.Reader; import java.io.UnsupportedEncodingException; import org.python.parser.IParserHost; import org.python.parser.Node; import org.python.parser.ParseException; import org.python.parser.PythonGrammar; import org.python.parser.ReaderCharStream; import org.python.parser.Token; import org.python.parser.TokenMgrError; import org.python.parser.ast.modType; import com.aptana.shared_core.string.FastStringBuffer; /** * Facade for the classes in the org.python.parser package. */ public class parser { private static IParserHost literalMkrForParser = new LiteralMakerForParser(); private parser() { ; } static String getLine(ReaderCharStream reader, int line) { if (reader == null) return ""; reader.restorePos(0); try { reader.readChar(); } catch (IOException e1) { return ""; } try { while (reader.getEndLine() < line) { reader.readChar(); } reader.backup(1); FastStringBuffer buf = new FastStringBuffer(128); buf.append(reader.readChar()); while (reader.getEndLine() == line) { buf.append(reader.readChar()); } return buf.toString(); } catch (IOException e) { return ""; } } // if reader != null, reset it public static PyException fixParseError(ReaderCharStream reader, Throwable t, String filename) { if (t instanceof ParseException) { ParseException e = (ParseException) t; Token tok = e.currentToken; int col = 0; int line = 0; if (tok != null && tok.next != null) { col = tok.next.beginColumn; line = tok.next.beginLine; } String text = getLine(reader, line); return new PySyntaxError(e.getMessage(), line, col, text, filename); } if (t instanceof TokenMgrError) { TokenMgrError e = (TokenMgrError) t; boolean eofSeen = e.EOFSeen; int col = e.errorColumn; int line = e.errorLine; //System.err.println("eof seen: "+eofSeen+", "+e.curChar+", "+col+ // ", "+line); String text = getLine(reader, line); if (eofSeen) col -= 1; return new PySyntaxError(e.getMessage(), line, col, text, filename); } else return Py.JavaError(t); } public static Node parse(String string, String kind) { return parse(PyString.to_bytes(string), kind, "<string>", null); } public static modType parse(byte[] istream, String kind, String filename, CompilerFlags cflags) { char[] bufreader = prepBufreader(istream, cflags); ReaderCharStream charStream = new ReaderCharStream(bufreader); PythonGrammar g = new PythonGrammar(charStream, literalMkrForParser); modType node = null; try { node = doparse(kind, cflags, g); } catch (Throwable t) { throw fixParseError(charStream, t, filename); } return node; } public static modType partialParse(String string, String kind, String filename, CompilerFlags cflags, boolean stdprompt) { modType node = null; //System.err.println(new PyString(string).__repr__().toString()); char[] bufreader = prepBufreader(PyString.to_bytes(string), cflags); ReaderCharStream charStream = new ReaderCharStream(bufreader); PythonGrammar g = new PythonGrammar(charStream, literalMkrForParser); g.token_source.partial = true; g.token_source.stdprompt = stdprompt; try { node = doparse(kind, cflags, g); } catch (Throwable t) { /* CPython codeop exploits that with CPython parser adding newlines to a partial valid sentence move the reported error position, this is not true for our parser, so we need a different approach: we check whether all sentence tokens have been consumed or the remaining ones fullfill lookahead expectations. See: PythonGrammar.partial_valid_sentence (def in python.jjt) */ if (g.partial_valid_sentence(t)) { return null; } throw fixParseError(charStream, t, filename); } return node; // try { // node = parse(new StringBufferInputStream(string), // kind, filename, cflags, true); // } // catch (PySyntaxError e) { // //System.out.println("e: "+e.lineno+", "+e.column+", "+ // // e.forceNewline); // try { // node = parse(new StringBufferInputStream(string+"\n"), // kind, filename, cflags, true); // } // catch (PySyntaxError e1) { // //System.out.println("e1: "+e1.lineno+", "+e1.column+ // // ", "+e1.forceNewline); // if (e.forceNewline || !e1.forceNewline) throw e; // } // return null; // } // return node; } private static modType doparse(String kind, CompilerFlags cflags, PythonGrammar g) throws ParseException { modType node = null; if (cflags != null) g.token_source.generator_allowed = cflags.generator_allowed; if (kind.equals("eval")) { node = g.eval_input(); } else if (kind.equals("exec")) { node = g.file_input(); } else if (kind.equals("single")) { node = g.single_input(); } else { throw Py.ValueError("parse kind must be eval, exec, " + "or single"); } return node; } private static char[] prepBufreader(byte[] istream, CompilerFlags cflags) { String str; if (cflags != null && cflags.encoding != null) { try { str = new String(istream, cflags.encoding); } catch (UnsupportedEncodingException exc) { throw Py.SystemError("python.console.encoding, " + cflags.encoding + ", isn't supported by this JVM so we can't parse this data."); } } else { try { // Use ISO-8859-1 to get bytes off the input stream since it leaves their values alone. str = new String(istream, "ISO-8859-1"); } catch (UnsupportedEncodingException e) { // This JVM is whacked, it doesn't even have iso-8859-1 throw Py.SystemError("Java couldn't find the ISO-8859-1 encoding"); } } return str.toCharArray(); } } /** * A workaround for a bug in MRJ2.2's FileReader, where the value returned * from read(b, o, l) sometimes are wrong. */ class FixMacReaderBug extends FilterReader { public FixMacReaderBug(Reader in) { super(in); } public int read(char b[], int off, int len) throws IOException { int l = super.read(b, off, len); if (l < -1) l += off; return l; } } class LiteralMakerForParser implements IParserHost { public Object newLong(String s) { return Py.newLong(s); } public Object newLong(java.math.BigInteger i) { return Py.newLong(i); } public Object newFloat(double v) { return Py.newFloat(v); } public Object newImaginary(double v) { return Py.newImaginary(v); } public Object newInteger(int i) { return Py.newInteger(i); } public String decode_UnicodeEscape(String str, int start, int end, String errors, boolean unicode) { return PyString.decode_UnicodeEscape(str, start, end, errors, unicode); } }