/* * $Id$ * * Copyright (c) 2004-2005 by the TeXlapse Team. * All rights reserved. This program and the accompanying materials * are made available under the terms of the Eclipse Public License v1.0 * which accompanies this distribution, and is available at * http://www.eclipse.org/legal/epl-v10.html */ package net.sourceforge.texlipse.texparser; import java.io.PushbackReader; import java.util.HashSet; import net.sourceforge.texlipse.texparser.lexer.Lexer; import net.sourceforge.texlipse.texparser.lexer.LexerException; import net.sourceforge.texlipse.texparser.node.EOF; import net.sourceforge.texlipse.texparser.node.TArgument; import net.sourceforge.texlipse.texparser.node.TBverbatim; import net.sourceforge.texlipse.texparser.node.TCnew; import net.sourceforge.texlipse.texparser.node.TCword; import net.sourceforge.texlipse.texparser.node.TEverbatim; import net.sourceforge.texlipse.texparser.node.TLBrace; import net.sourceforge.texlipse.texparser.node.TOptargument; import net.sourceforge.texlipse.texparser.node.TRBrace; import net.sourceforge.texlipse.texparser.node.TRBracket; import net.sourceforge.texlipse.texparser.node.TVtext; import net.sourceforge.texlipse.texparser.node.TWhitespace; import net.sourceforge.texlipse.texparser.node.Token; /** * Lexer for LaTeX -files. Implements tokenizing curly brace-enclosed * areas and verb and verbatim environments. * * @author Oskar Ojala */ public class LatexLexer extends Lexer { /** * Counter for braces */ private int count; private Token argStart; //private Token verbStart; private StringBuffer text; /** * Terminator char for \verb */ private char startChar; private int vline, vpos; private HashSet<String> defCommands; private boolean commandDef; /** * Creates a new lexer. * * @param in The reader to read the character stream from */ public LatexLexer(PushbackReader in) { super(in); defCommands = new HashSet<String>(); commandDef = false; } /** * We define a filter that recognizes braced strings and verbatims */ protected void filter() throws LexerException { if (state.equals(State.COMCAPT)) { // if (token instanceof TCword) { // System.out.println(token.getText().substring(1)); // System.out.println(defCommands.contains(token.getText().substring(1))); // } if (token instanceof TCnew) { commandDef = true; } else if (token instanceof TCword && !commandDef && !defCommands.contains(token.getText().substring(1))) { state = State.NORMAL; return; } // if we're to capture a brace-block } else if (state.equals(State.BLOCKCAPT)) { // if we are just entering this state if (argStart == null) { argStart = token; text = new StringBuffer(""); count = 1; token = null; // continue to scan the input. } else { if (token instanceof TLBrace) count++; else if (token instanceof TRBrace) count--; else if (token instanceof EOF) { throw new LexerException("[" + argStart.getLine() + "," + (argStart.getPos() - 1) + "] There's a } missing: unexpected end of file"); } if (count != 0) { // accumulate the string and continue to scan the input. if (token instanceof TWhitespace) text.append(" "); else text.append(token.getText()); token = null; } else { TArgument targ = new TArgument(text.toString(), argStart.getLine(), argStart.getPos()); // emit the string token = targ; state = State.COMCAPT; argStart = null; commandDef = false; } } // Capture optional argument } else if (state.equals(State.OPTCAPT)) { if (argStart == null) { argStart = token; text = new StringBuffer(""); count = 0; token = null; // continue to scan the input. } else { if (token instanceof TLBrace) count++; else if (token instanceof TRBrace) count--; else if (token instanceof EOF) { throw new LexerException("[" + argStart.getLine() + "," + argStart.getPos() + "] There's a } or a ] missing: unexpected end of file"); } if (count != 0 || !(token instanceof TRBracket)) { // accumulate the string and continue to scan the input. if (token instanceof TWhitespace) text.append(" "); else text.append(token.getText()); token = null; } else { TOptargument tsl = new TOptargument(text.toString(), argStart.getLine(), argStart.getPos()); // emit the string token = tsl; state = State.COMCAPT; argStart = null; commandDef = false; } } } else if (state.equals(State.VERBATIM)) { // we store some contents to be able to code fold if (token instanceof TBverbatim) { argStart = token; text = new StringBuffer(token.getText()); vline = token.getLine(); vpos = token.getPos(); } else if (token instanceof TVtext || token instanceof TWhitespace) { text.append(token.getText()); token = null; } else if (token instanceof EOF) { throw new LexerException("[" + vline + "," + vpos + "] The verbatim environment isn't closed: unexpected end of file"); } } else if (state.equals(State.VERB)) { if (token instanceof TVtext) { if (argStart == null) { argStart = token; startChar = token.getText().charAt(0); } else { if (startChar == token.getText().charAt(0)) { state = State.NORMAL; startChar = '\0'; argStart = null; } } token = null; } else if (token instanceof EOF) { throw new LexerException("[" + argStart.getLine() + "," + argStart.getPos() + "] The verb-command isn't closed: unexpected end of file"); } } else if (state.equals(State.NORMAL)) { if (token instanceof TEverbatim) { String startCommand = argStart.getText().substring(argStart.getText().indexOf("{")); String endCommand = token.getText().substring(token.getText().indexOf("{")); if (!startCommand.equals(endCommand)) { throw new LexerException("[" + vline + "," + vpos + "] The verbatim environment isn't closed with the correct command"); } text.append(token.getText()); token = new TVtext(text.toString(), vline, vpos); argStart = null; } } } public void registerCommand(String command) { // System.out.println("---------------------"); // System.out.println(command); // System.out.println("---------------------"); defCommands.add(command); } }