/* * xtc - The eXTensible Compiler * Copyright (C) 2004 Robert Grimm * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ package xtc.util; import java.io.IOException; import java.io.Reader; import java.io.Writer; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.regex.Pattern; import xtc.tree.Location; /** * Implmentation of utilities for language processors, focusing on * handling of I/O. * * @author Robert Grimm * @version $Revision: 1.1 $ */ public final class Utilities { /** Hide constructor. */ private Utilities() { } /** The buffer size for pumping data. */ private static final int BUFFER_SIZE = 4096; /** The buffer for pumping data. */ private static char[] charBuffer = null; /** * The regular expression pattern used to split source code along * newlines. */ public static final Pattern SPLIT = Pattern.compile("\r\n|\r|\n"); /** * Flag for using doubled escape sequences. Doubled escape * sequences will still read as an escape sequence, even if they are * included in a program source character or string. */ public static final int ESCAPE_DOUBLE = 0x01; /** * Flag for escaping the '<code>[</code>', '<code>-</code>', and * '<code>]</code>' characters. */ public static final int ESCAPE_REGEX = 0x02; /** * Flag for escaping non-printable ASCII characters using Unicode * escapes. */ public static final int ESCAPE_UNICODE = 0x04; /** The escape flags for C/C++ escapes. */ public static final int C_ESCAPES = 0x00; /** The escape flags for Java escapes. */ public static final int JAVA_ESCAPES = ESCAPE_UNICODE; /** The escape flags for Java and regex escapes. */ public static final int FULL_ESCAPES = ESCAPE_UNICODE | ESCAPE_REGEX; /** * Pump all data from the specified reader to the specified writer. * * @param in The reader. * @param out The writer. * @throws IOException Signals an exceptinal condition during I/O. */ public static void pump(Reader in, Writer out) throws IOException { if (null == charBuffer) charBuffer = new char[BUFFER_SIZE]; int number = in.read(charBuffer); while (-1 != number) { out.write(charBuffer, 0, number); number = in.read(charBuffer); } } /** * Determine whether the double escapes flag is set. * * @see #ESCAPE_DOUBLE * * @param flags The escape flags. * @return <code>true</code> if the double escapes flag is set. */ public static boolean useDoubleEscapes(int flags) { return (0 != (ESCAPE_DOUBLE & flags)); } /** * Determine whether the regex escapes flag is set. * * @see #ESCAPE_REGEX * * @param flags The escape flags. * @return <code>true</code> if the regex escape flag is set. */ public static boolean useRegexEscapes(int flags) { return (0 != (ESCAPE_REGEX & flags)); } /** * Determine whether the Unicode escapes flag is set. * * @see #ESCAPE_UNICODE * * @param flags The escape flags. * @return <code>true</code> if the Unicode escapes flag is set. */ public static boolean useUnicodeEscapes(int flags) { return (0 != (ESCAPE_UNICODE & flags)); } /** * Escape the specified character into the specified string buffer. * * @param c The character. * @param buf The string buffer. * @param flags The escape flags. */ public static void escape(char c, StringBuffer buf, int flags) { boolean useDouble = useDoubleEscapes(flags); boolean useRegex = useRegexEscapes(flags); boolean useUnicode = useUnicodeEscapes(flags); switch (c) { case '\b': if (useDouble) { buf.append("\\\\b"); } else { buf.append("\\b"); } break; case '\t': if (useDouble) { buf.append("\\\\t"); } else { buf.append("\\t"); } break; case '\n': if (useDouble) { buf.append("\\\\n"); } else { buf.append("\\n"); } break; case '\f': if (useDouble) { buf.append("\\\\f"); } else { buf.append("\\f"); } break; case '\r': if (useDouble) { buf.append("\\\\r"); } else { buf.append("\\r"); } break; case '\"': if (useDouble) { buf.append("\\\\\\\""); } else { buf.append("\\\""); } break; case '\'': if (useDouble) { buf.append("\\\\\\\'"); } else { buf.append("\\\'"); } break; case '-': if (useRegex) { if (useDouble) { buf.append("\\\\-"); } else { buf.append("\\-"); } } else { buf.append('-'); } break; case '[': if (useRegex) { if (useDouble) { buf.append("\\\\["); } else { buf.append("\\["); } } else { buf.append('['); } break; case '\\': if (useDouble) { buf.append("\\\\\\\\"); } else { buf.append("\\\\"); } break; case ']': if (useRegex) { if (useDouble) { buf.append("\\\\]"); } else { buf.append("\\]"); } } else { buf.append(']'); } break; default: if (useUnicode && ((' ' > c) || ('~' < c))) { String hex = Integer.toHexString(c); String padding; switch (hex.length()) { case 1: padding = "000"; break; case 2: padding = "00"; break; case 3: padding = "0"; break; default: padding = ""; } if (useDouble) { buf.append("\\\\"); } else { buf.append("\\"); } buf.append('u'); buf.append(padding); buf.append(hex); } else { buf.append(c); } } } /** * Escape the specified string into the specified string buffer. * * @param s The string. * @param buf The string buffer. * @param flags The escape flags. */ public static void escape(String s, StringBuffer buf, int flags) { final int length = s.length(); for (int i=0; i<length; i++) { escape(s.charAt(i), buf, flags); } } /** * Escape the specified character. * * @param c The character. * @param flags The escape flags. * @return The escape character as a string. */ public static String escape(char c, int flags) { StringBuffer buf = new StringBuffer(1); escape(c, buf, flags); return buf.toString(); } /** * Escape the specified string. * * @param s The string. * @param flags The escape flags. * @return The escaped string. */ public static String escape(String s, int flags) { StringBuffer buf = new StringBuffer(s.length()); escape(s, buf, flags); return buf.toString(); } /** * Unescape the specified string. This method unescapes standard * C-style escapes ('<code>\b</code>', '<code>\t</code>', * '<code>\n</code>', '<code>\f</code>', <code>'\r</code>', * '<code>\"</code>', '<code>\'</code>', and '<code>\\</code>') as * well as Java Unicode escapes. To support regex-like character * classes, it also unescapes '<code>\-</code>', '<code>\[</code>', * and '<code>\]</code>'. * * @param s The string to unescape. * @return The unescaped string. */ public static String unescape(String s) { if (-1 == s.indexOf('\\')) { return s; } final int length = s.length(); StringBuffer buf = new StringBuffer(length); for (int i=0; i<length; i++) { char c = s.charAt(i); if ('\\' != c) { buf.append(c); } else { i++; c = s.charAt(i); switch (c) { case 'b': buf.append('\b'); break; case 't': buf.append('\t'); break; case 'n': buf.append('\n'); break; case 'f': buf.append('\f'); break; case 'r': buf.append('\r'); break; case '"': buf.append('"'); break; case '\'': buf.append('\''); break; case '-': buf.append('-'); break; case '[': buf.append('['); break; case '\\': buf.append('\\'); break; case ']': buf.append(']'); break; case 'u': i += 4; int n; try { n = Integer.parseInt(s.substring(i-3, i+1), 16); } catch (NumberFormatException x) { throw new IllegalArgumentException("Illegal Unicode escape (\'\\u" + s.substring(i-3, i+1) + "\')"); } buf.append((char)n); break; default: throw new IllegalArgumentException("Illegal escaped character (\'\\" + c + "\')"); } } } return buf.toString(); } /** * Convert the specified list to a human-readable representation. * This method uses <code>toString()</code> for each element in the * specified list to generate a human-readable representation. * * @param l The list. * @return The human-readable representation. */ public static String format(List l) { final int length = l.size(); if (0 == length) { return ""; } StringBuffer buf = new StringBuffer(); if (2 == length) { buf.append(l.get(0)); buf.append(" and "); buf.append(l.get(1)); } else { boolean first = true; Iterator iter = l.iterator(); while (iter.hasNext()) { Object el = iter.next(); if (first) { first = false; } else if (iter.hasNext()) { buf.append(", "); } else { buf.append(", and "); } buf.append(el); } } return buf.toString(); } /** * Convert the specified identifier into a human-readable * description. This method breaks identifiers using an upper case * character for each word component into a string of space * separated lower case words. * * @param id The identifier. * @return The corresponding description. */ public static String toDescription(String id) { // Drop any suffixes. int idx = id.indexOf('$'); if (-1 != idx) { id = id.substring(0, idx); } // Count the number of upper case characters. final int length = id.length(); int upperCount = 0; for (int i=0; i<length; i++) { if (Character.isUpperCase(id.charAt(i))) { upperCount++; } } // No conversion is necessary if all characters are either lower // or upper case. if ((0 == upperCount) || (length == upperCount)) { return id; } // Do the actual conversion. StringBuffer buf = new StringBuffer(length + upperCount); for (int i=0; i<length; i++) { char c = id.charAt(i); if (Character.isUpperCase(c)) { if (0 != i) { buf.append(' '); } buf.append(Character.toLowerCase(c)); } else { buf.append(c); } } return buf.toString(); } /** * Print the specified (error or warning) message to the error * console. The message is prefixed with the specified location * information and followed by the corresponding source line, with a * caret ('<code>^</code>') indicating the column. * * @see #msg(String,Location,String,String[]) * * @param msg The message. * @param loc The source location. * @param source The source file, one line per array entry. */ public static void msg(String msg, Location loc, String[] source) { msg(msg, loc, null, source); } /** * Print the specified (error or warning) message to the error * console. If the specified location is non-null, the message is * prefixed with the location information. Otherwise, if the * specified context is non-null, the message is prefixed with the * context. If both the specified location and source are non-null, * the message is followed by the corresponding source line and a * caret ('<code>^</code>') to indicate the column. * * @param msg The message. * @param loc The source location. * @param context The alternative context. * @param source The source file, one line per array entry. */ public static void msg(String msg, Location loc, String context, String[] source) { if (null != loc) { System.err.print(loc.toString()); System.err.print(": "); } else if (null != context) { System.err.print(context); System.err.print(": "); } System.err.println(msg); if ((null != loc) && (null != source)) { String line; try { line = source[loc.line - 1]; } catch (ArrayIndexOutOfBoundsException x) { line = ""; } System.err.println(line); for (int i=0; i<loc.column; i++) { System.err.print(' '); } System.err.println('^'); } } }