/* * $Id * * Copyright (c) 2006 by the TeXlipse team. * All rights reserved. This program and the accompanying materials * are made available under the terms of the Eclipse Public License v1.0 * which accompanies this distribution, and is available at * http://www.eclipse.org/legal/epl-v10.html */ package net.sourceforge.texlipse.texparser; import org.eclipse.jface.text.BadLocationException; import org.eclipse.jface.text.IRegion; import org.eclipse.jface.text.Region; import org.eclipse.jface.text.source.ICharacterPairMatcher; /** * This class has some static functions which are often needed when parsing * latex files * * @author Boris von Loesch */ public class LatexParserUtils { // Indicate the anchor value "right" public final static int RIGHT = ICharacterPairMatcher.RIGHT; // Indicate the anchor value "left" public final static int LEFT = ICharacterPairMatcher.LEFT; /** * Checks whether the character at position <code>index</code> is escaped * by a backslash or not. * @param input * @param index * @return */ public static boolean isEscaped (String input, int index) { while (index > 0) { index--; if (input.charAt(index) != '\\') return false; else if (index == 0 || input.charAt(index - 1) != '\\') return true; index--; } return false; } /** * Returns the index of the first character of the line * where <code>index</code> is located. Legal line delimeters are * \n, \r, \r\n. * @param input * @param index * @return */ public static int getStartofLine(String input, int index) { int pos = index; char c; c = input.charAt(pos); while (pos > 0 && c != '\r' && c != '\n') { c = input.charAt(--pos); } if (pos == 0) return pos; else return pos + 1; } /** * Checks whether position at <code>index</code> is inside a LaTeX comment. * @param input the text * @param index * @return * @throws BadLocationException if index is out of bounds */ public static boolean isInsideComment(String input, int index){ int lastLine = getStartofLine(input, index); int p = lastLine; while (p < index) { char c = input.charAt(p); if (c == '%') { return true; } else if (c == '\\') { //Ignore next character p += 2; } else { p++; } } return false; } /** * Tests if the command at the given index is a correct command. * * @param input * @param commandLength The length of the command * @param index The index where the command occurs * @return * @throws BadLocationException if index is out of bounds */ private static boolean testForCommand(String input, int commandLength, int index){ if (isEscaped(input, index)) return false; // Check the character after the command if (index + commandLength == input.length() || !Character.isLetter(input.charAt(index + commandLength))) { if (!isInsideComment(input, index)) return true; } return false; } /** * Returns the position of the first occurence of the command starting at * the specified index * * @param input * @param command The Latex command starting with a backslash (\) * @param fromIndex The index from where to start the search * @return The position of the command, or -1 if the command is not * contained in the String */ public static int findCommand(String input, String command, int fromIndex) { int pos = input.indexOf(command, fromIndex); while (pos != -1) { if (testForCommand(input, command.length(), pos)) return pos; pos = input.indexOf(command, pos + command.length()); } return -1; } /** * Returns the position of the last occurence of the command backward * starting at the specified index * * @param input * @param command The Latex command starting with a backslash (\) * @param fromIndex The index from which to backward start the search * @return The position of the command, or -1 if the command is not * contained in the String */ private static int findLastCommand(String input, String command, int fromIndex) { int pos = input.lastIndexOf(command, fromIndex); while (pos != -1) { if (testForCommand(input, command.length(), pos)) return pos; pos = input.lastIndexOf(command, pos-1); } return -1; } /** * Finds the peercharacter for opening character (can be either "left" or * "right" character. The direction of the search is determined by the achor * (i.e. anchor==LEFT -> forward search and opening character is "left", or * anchor==RIGHT -> backward search and opening character is "right") * * @param input * @param offset * @param anchor Must be either <code>LEFT</code> or <code>RIGHT</code> * @param opening * @param closing matching character for opening * @return index of the matching closing character, or -1 if the search * failed */ public static int findPeerChar(String input, int offset, int anchor, char opening, char closing) { int stack = 1, index; index = offset; while (stack > 0) { if (anchor == LEFT) { index++; } else { index--; } if ((index < 0) || (index >= input.length())) { index = -1; break; } char c = input.charAt(index); if (c == closing && (!isEscaped(input, index)) && (!isInsideComment(input, index))) stack--; else if (c == opening && (!isEscaped(input, index)) && (!isInsideComment(input, index))) stack++; } return index; } /** * Returns the first mandatory argument of the command * * @param input * @param index The index at or after the beginning of the command and before the * argument * @return The argument without braces, null if there is no valid argument * @throws BadLocationException if index is out of bounds */ public static IRegion getCommandArgument(String input, int index){ int pos = index; final int length = input.length(); if (input.charAt(index) == '\\') pos++; while (pos < length && Character.isLetter(input.charAt(pos))) pos++; while (pos < length && Character.isWhitespace(input.charAt(pos))) pos++; if (pos == length) return null; if (input.charAt(pos) == '{') { int end = findPeerChar(input, pos + 1, LEFT, '{', '}'); if (end == -1) return null; return new Region (pos + 1, end - pos - 1); } return null; } /** * Gets the command at the specified index. It returns the command if the index position is either * inside the command string or inside the first mandatory argument * @param input * @param index * @return null if it could not find any command * @throws BadLocationException */ public static IRegion getCommand (String input, int index){ if ("".equals(input)) return null; int pos = index; if (pos >= input.length()) { pos = input.length() - 1; } if (pos < 0) return null; if (isInsideComment(input, pos)) return null; boolean whiteSpace = false; if (pos > 0 && input.charAt(pos) == '}') pos--; char c = input.charAt(pos); while (!((pos <= 0 || c == '\\' || c == '{' || c == '}' || c == '%') && (!isEscaped(input, pos)))) { if (Character.isWhitespace(c)) whiteSpace = true; pos--; c = input.charAt(pos); } if (c == '\\' && whiteSpace == false) { int l = 1; while (pos + l < input.length() && Character.isLetter(input.charAt(pos + l))) l++; //A command consist of a \ and at least one letter if (l == 1) return null; return new Region(pos, l); } if (c == '{') { if (pos == 0) return null; int l = -1; int ws = 0; c = input.charAt(pos + l); while (pos + l > 0 && Character.isWhitespace(c)) { ws--; l--; c = input.charAt(pos + l); } while (pos + l > 0 && Character.isLetter(c)) { l--; c = input.charAt(pos + l); } if (pos + l >= 0 && c == '\\' && (!isEscaped(input, pos+l))) { return new Region(pos + l, -l+ws); } } return null; } private static IRegion findEnvironment(String input, String envName, String command, int fromIndex) { int pos = input.indexOf("{" + envName + "}", fromIndex + command.length()); while (pos != -1) { int end = pos + envName.length() + 2; // Search for the command int beginStart = findLastCommand(input, command, pos); if (beginStart != -1 && beginStart >= fromIndex) { // Check for whitespaces between \begin and {...} while (pos != beginStart + command.length() && Character.isWhitespace(input.charAt(--pos))) ; if (pos == beginStart + command.length()) { return new Region(beginStart, end - beginStart); } } pos = input.indexOf("{" + envName + "}", pos + envName.length() + 2); } return null; } private static IRegion findLastEnvironment(String input, String envName, String command, int fromIndex) { int pos = input.lastIndexOf("{" + envName + "}", fromIndex); while (pos != -1) { int end = pos + envName.length() + 2; // Search for the command int beginStart = findLastCommand(input, command, pos); if (beginStart != -1 && beginStart <= fromIndex) { // Check for whitespaces between \command and {...} while (pos != beginStart + command.length() && Character.isWhitespace(input.charAt(--pos))) ; if (pos == beginStart + command.length()) { return new Region(beginStart, end - beginStart); } } pos = input.lastIndexOf("{" + envName + "}", pos-1); } return null; } /** * Returns the region (offset & length) of \begin{envName} * * @param input * @param envName name of the environment * @param fromIndex The index from which to start the search * @return */ public static IRegion findBeginEnvironment(String input, String envName, int fromIndex) { return findEnvironment(input, envName, "\\begin", fromIndex); } /** * Returns the region (offset & length) of \end{envName} * * @param input * @param envName name of the environment * @param fromIndex The index from which to start the search * @return */ public static IRegion findEndEnvironment(String input, String envName, int fromIndex) { return findEnvironment(input, envName, "\\end", fromIndex); } /** * Finds for a \begin{env} the matching \end{env}. * @param input * @param envName Name of the environment, e.g. "itemize" * @param beginIndex Must be at the start or inside of \begin{env} * @return The region of the \end{env} command or null if the end was not found */ public static IRegion findMatchingEndEnvironment(String input, String envName, int beginIndex) { int pos = beginIndex + 1; IRegion nextEnd, nextBegin; int level = 0; do { nextEnd = findEndEnvironment(input, envName, pos); nextBegin = findBeginEnvironment(input, envName, pos); if (nextEnd == null) return null; if (nextBegin == null) { level--; pos = nextEnd.getOffset() + envName.length() + 6; } else { if (nextBegin.getOffset() > nextEnd.getOffset()) level--; else level++; pos = nextBegin.getOffset() + envName.length() + 8; } } while (level >= 0); return nextEnd; } /** * Finds for an \end{env} the matching \begin{env}. * @param input * @param envName Name of the environment, e.g. "itemize" * @param beginIndex Must be at the start of \end{env} * @return The region of the \begin{env} command or null if the end was not found */ public static IRegion findMatchingBeginEnvironment(String input, String envName, int beginIndex) { int pos = beginIndex; IRegion nextEnd, nextBegin; int level = 0; do { nextEnd = findLastEnvironment(input, envName, "\\end", pos); nextBegin = findLastEnvironment(input, envName, "\\begin", pos); if (nextBegin == null) return null; if (nextEnd == null) { level--; pos = nextBegin.getOffset(); } else { if (nextEnd.getOffset() > nextBegin.getOffset()) level++; else level--; pos = nextEnd.getOffset(); } } while (level >= 0); return nextBegin; } }