package eug.parser; import eug.shared.GenericList; import eug.shared.GenericObject; import java.io.BufferedReader; import java.io.File; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.StringReader; import javax.swing.JOptionPane; /** * This class handles the parsing of .eug files. For most uses, you will want to * use {@link EUGFileIO} instead. * @author Michael Myers */ public class CWordFile { /** * The tokenizer used to scan the input file. */ protected EUGScanner tokenizer; /** * The current token. */ protected String m_word; private String lastComment; /** * The type of the {@link #m_word current token}. */ protected TokenType tokenType; /** * The number of errors encountered. */ protected int m_errors = 0; /** * The level of debugging, on a scale of 0 to 10. */ private static final int debugLevel = 2; // Loading options protected ParserSettings settings; // /** // * Whether to display info on how long the loading took. // */ // public static boolean timingInfo = false; // // /** // * Whether to ignore comment tokens. // */ // protected boolean commentsIgnored = true; // // /** // * Whether lists are allowed to be parsed. // * @since EUGFile 1.02.00 // */ // protected boolean allowLists = true; // // /** // * Whether single words (that are not part of a variable) can be parsed. // * Any such token will be given a default value of 1 (e.g., 1001 1002 will // * be parsed as 1001 = 1 1002 = 1). // * <p> // * This is only necessary for reading Victoria AI files, as far as I know. // * @since EUGFile 1.02.00 // */ // protected boolean allowSingleTokens = false; // // /** // * Whether the strictest checking should be enabled. // * @since EUGFile 1.02.00 // */ // protected boolean warningsAreErrors = false; // // /** // * Whether we will try to recover after, e.g., an extra '}'. // * @since EUGfile 1.06.00pre1 // */ // protected boolean tryToRecover = true; // End of loading options // used during loading private int newlinesSinceComment = 0; /** * Creates a new instance of CWordFile. */ public CWordFile() { settings = ParserSettings.getDefaults(); } public CWordFile(ParserSettings settings) { this.settings = settings; } //file handling /** * Open {@link #tokenizer the tokenizer} with the given filename. * @param filename the name of the input file. * @return <CODE>true</CODE> if the stream was successfully opened. */ private boolean openInStream(String filename) { //try to open input file try { final File inFile = new File(filename); if (inFile.length() == 0) { tokenizer = new EUGScanner(new FileReader(inFile)); return true; } tokenizer = new EUGScanner(new BufferedReader(new FileReader(inFile), Math.min(65536, (int)inFile.length()))); // safeguard for very large files tokenizer.setCommentsIgnored(settings.isIgnoreComments()); //tokenizer.setFileName(filename); return true; } catch (FileNotFoundException ex) { return false; } } /** * Open {@link #tokenizer the tokenizer} with a StringReader reading from the * given string. * @param string a string in the .eug format. */ private void openStringStream(String string) { tokenizer = new EUGScanner(new StringReader(string)); //tokenizer.setFileName("(string)"); } /** * Close {@link #tokenizer the tokenizer}. */ private void closeInStream() { if (tokenizer != null) tokenizer.close(); } /** * Gets the next token from {@link #tokenizer the tokenizer}. * @see EUGScanner#nextToken() */ protected final void getNextToken() { tokenType = tokenizer.nextToken(); m_word = tokenizer.lastStr(); } /** * Loads a {@link GenericObject} tree from the given string. * @param string a string containing an EUG tree. * @return the <code>GenericObject</code> tree loaded from the string, or * <code>null</code> if there was an error during loading. */ public GenericObject loadFromString(String string) { final long startTime = System.nanoTime(); //notify about loading assert debug("Loading from string.", 2); openStringStream(string); GenericObject root = null; try { root = new GenericObject(); GenericObject curr = readObject(root); while (tokenType != TokenType.EOF) { curr = readObject(curr); } } catch (ParserException ex) { if (!settings.isTryToRecover()) root = null; } finally { closeInStream(); } if (m_errors > 0) System.out.println("There were " + m_errors + " errors during loading."); if (settings.isPrintTimingInfo()) System.out.println("Loading took " + (System.nanoTime()-startTime) + " ns.\n"); assert debug("The node read was:\n" + root, 6); return root; } /** * Loads a {@link GenericObject} tree from the given filename. * @param filename the name of the file containing an EUG tree (e.g., a savegame * or an event file). * @return the <code>GenericObject</code> tree loaded from the file, or * <code>null</code> if there was an error during loading. */ public GenericObject load(final String filename) { final long startTime = System.nanoTime(); if (!openInStream(filename)) return null; //notify about loading if (settings.isPrintTimingInfo()) System.out.println("Loading " + filename + "."); GenericObject root = null; try { root = new GenericObject(); GenericObject curr = readObject(root); //reading loop (per line mainly) while (tokenType != TokenType.EOF) { curr = readObject(curr); } } catch (ParserException ex) { System.err.println(ex.getMessage()); if (!settings.isTryToRecover()) root = null; } finally { closeInStream(); } //Tell some things about the current state: if (m_errors > 0) System.out.println("There were " + m_errors + " errors during loading."); // System.out.println("Read " + tokenizer.getCharsRead() + " bytes."); if (settings.isPrintTimingInfo()) System.out.println("Loading took " + (System.nanoTime()-startTime) + " ns.\n"); return root; } /** * Reads an object from the current stream. * @param current_node the node to read into, which cannot be <code>null</code>. * @return the next node to read into, which can be either the parameter, * the parameter's parent, or <code>null</code>. */ private final GenericObject readObject(GenericObject current_node) { if (current_node == null) { throw new ParserException("node is null"); //error("Cannot read into a null node!"); //return null; } // Start by reading a word. getNextToken(); switch (tokenType) { case IDENT: String name = m_word; getNextToken(); // A little weirdness here, brought on by Java's lack of a // 'goto' keyword. Note that if we break out of the switch // normally, the while loop will also be broken. Its only // function is to restart the switch if we read a comment. findType: while (true) { switch (tokenType) { case COMMENT: // fall through to NEWLINE case NEWLINE: do { getNextToken(); } while (tokenType == TokenType.COMMENT || tokenType == TokenType.NEWLINE); continue findType; // case LIST: // if (!settings.isAllowLists()) { // warn("Read illegal token: "+tokenizer.getList()); // break; // } // // if (settings.isIgnoreComments()) { // current_node.addList(name, tokenizer.getList()); // } else { // current_node.addList(name, tokenizer.getList(), // lastComment, readInlineComment()); // } // lastComment = null; // break; case ULSTRING: case DLSTRING: String val = m_word; boolean quotes = (tokenType == TokenType.DLSTRING); current_node.addString(name, val, quotes, lastComment, readInlineComment()); lastComment = null; break; case LBRACE: // Lookahead tokenizer.setCommentsIgnored(true); TokenType type = tokenizer.nextToken(); tokenizer.pushBack(); tokenizer.setCommentsIgnored(settings.isIgnoreComments()); if (type == TokenType.DLSTRING || type == TokenType.ULSTRING) { // it must be a list if (!settings.isAllowLists()) { warn("Read list when lists are not allowed"); break; } current_node = tryToReadList(current_node, name); lastComment = null; } else { GenericObject tmpObj = current_node.createChild(name); if (!settings.isIgnoreComments()) tmpObj.setHeadComment(lastComment); lastComment = null; current_node = tmpObj; } break; case EOF: warn("Reached end of file after " + name + " (unclosed bracket somewhere?)"); break; default: warn("Unexpected token type: " + tokenType); break; } // If we get here normally, we shouldn't loop again break; } break; case RBRACE: // Reached end of a node. if (current_node.getParent() == null) { throw new ParserException("Unmatched right brace on line " + tokenizer.getLine() + ", column " + tokenizer.getColumn()); } if (!settings.isIgnoreComments()) { current_node.setInlineComment(readInlineComment()); } current_node = current_node.getParent(); lastComment = null; break; case LBRACE: // Create nameless node current_node = current_node.createChild(""); break; case EOF: if (current_node.isRoot() || settings.isTryToRecover()) current_node = null; else throw new ParserException("Node " + current_node.name + " does not terminate"); break; case COMMENT: // Here I'm trying to avoid the problem of file header comments // being merged into event header comments. I keep a record of // how many newline characters have been read since the last // comment ended. If >= 2, the old comment is added to the root // node. if (lastComment == null || lastComment.length() == 0) { lastComment = m_word; newlinesSinceComment = 0; } else if (newlinesSinceComment >= 2) { current_node.getRoot().addGeneralComment(lastComment); lastComment = m_word; // Reset the newline count. newlinesSinceComment = 0; } else { // Append to previous comment string. lastComment += "\n" + m_word; newlinesSinceComment = 0; } break; case ULSTRING: // Changed in 1.01.00 to allow for Victoria AI files with // things like: // technology = { // preference = { 1001 1002 3301 = 20 } // } // So, we add the unmatched string with a default value of 1. // Changed in 1.02.00 to check allowSingleTokens if (settings.isAllowSingleTokens()) { current_node.addString(m_word, "1", false, lastComment, readInlineComment()); } else { warn("Warning: Illegal string: " + m_word); } break; case DLSTRING: // Shouldn't happen. warn("Warning: Illegal string: \"" + m_word + "\""); break; case NEWLINE: // Only used for matching header comments to objects. newlinesSinceComment++; break; default: // Shouldn't happen. System.err.println("Token type: " + tokenType + "\nm_word: " + m_word); break; } return current_node; } /** * Tries to read a list as a child of the given object with the given name. * If an invalid token is read, the list is converted into an object and * all list entries are turned into variables with a value of 1. * @return the node that should be read into after this method has finished. */ private GenericObject tryToReadList(final GenericObject parent, final String name) { final GenericList list = parent.createList(name); if (!settings.isIgnoreComments() && lastComment != null) list.setHeaderComment(lastComment); boolean ok = true; readlist: while (true) { getNextToken(); switch (tokenType) { case ULSTRING: list.add(m_word, false); break; case DLSTRING: list.add(m_word, true); break; case RBRACE: break readlist; case COMMENT: case NEWLINE: break; default: // warn("Unexpected list item at line " + tokenizer.getLine() + // " column " + tokenizer.getColumn()); ok = false; break readlist; } } // There are two ways that the loop could be broken: // 1. An invalid token (IDENT) was read. // -> Turn the list into an object and return it. // 2. A right brace was read. // -> Check for an inline comment and return the parent. if (!ok) { GenericObject child = parent.createChild(name); for (String var : list) { if (var.contains(" ")) { child.addString("\"" + var + "\"", "1"); } else { child.addString(var, "1"); } } if (!"".equals(list.getHeaderComment())) child.setHeadComment(list.getHeaderComment()); parent.removeList(list); tokenizer.pushBack(); // because we didn't do anything with the invalid token return child; } else { if (!settings.isIgnoreComments()) { list.setInlineComment(readInlineComment()); lastComment = null; } return parent; } } /** * Read a comment and then a newline, if possible. If there is no comment * (or if {@link #commentsIgnored}), returns <code>null</code>. */ private String readInlineComment() { if (settings.isIgnoreComments()) return null; String comment = null; getNextToken(); if (tokenType == TokenType.COMMENT) { comment = m_word; // Got the comment; now look for a newline. // System.out.println(comment); getNextToken(); if (tokenType != TokenType.NEWLINE) { // Anything other than a newline shouldn't be used here. // System.out.println(tokenType); tokenizer.pushBack(); } } else if (tokenType == TokenType.NEWLINE) { return null; } else tokenizer.pushBack(); return comment; } // /** // * Tries to read a list of strings. If an identifier is encountered, returns // * <code>null</code>. // */ // private List<String> readList() { // List<String> ret = new ArrayList<String>(); // outer: while (true) { // getNextToken(); // switch (tokenType) { // case COMMENT: // case NEWLINE: // continue outer; // case IDENT: // // } // } // } /** * Displays an error dialog with the given text, and also prints it to * System.err. * @param text the error message to display. */ private void error(final String text) { System.out.println(text); //+" on line "+tokenizer.getLine()+", column "+tokenizer.getColumn()); JOptionPane.showMessageDialog(null, text, "Error", JOptionPane.ERROR_MESSAGE); m_errors++; } /** * Prints a warning message with the given text. * If {@link #warningsAreErrors}, this method will also throw a * {@link ParserException}, so any method that calls this should check for one. * @param text the warning text to display. * @since EUGFile 1.02.00 */ private void warn(final String text) { if (settings.isPrintWarnings()) System.err.println(text); if (settings.isWarningsAreErrors()) throw new ParserException(text); } /** * Prints the given text to standard out if <code>level</code> <= * {@link #debugLevel}. * @param text the text to print out. * @param level the level of importance. * @return <code>true</code>. This is only so that this method can be used * in assertions. */ private static boolean debug(final String text, int level) { if (level <= debugLevel) { System.out.println("DEBUG: " + text); } return true; } public void setParserSettings(ParserSettings settings) { this.settings = settings; } public ParserSettings getParserSettings() { return settings.clone(); } public boolean commentsIgnored() { return settings.isIgnoreComments(); } public void setCommentsIgnored(boolean ignored) { settings.setIgnoreComments(ignored); } public boolean allowsLists() { return settings.isAllowLists(); } public void setAllowLists(boolean allowLists) { settings.setAllowLists(allowLists); } public boolean allowsSingleTokens() { return settings.isAllowSingleTokens(); } public void setAllowSingleTokens(boolean allowSingleTokens) { settings.setAllowSingleTokens(allowSingleTokens); } public boolean warningsAreErrors() { return settings.isWarningsAreErrors(); } public void setWarningsAreErrors(boolean warningsAreErrors) { settings.setWarningsAreErrors(warningsAreErrors); } public boolean isTryToRecover() { return settings.isTryToRecover(); } public void setTryToRecover(boolean tryToRecover) { settings.setTryToRecover(tryToRecover); } }