/* * * Copyright (c) 2004-2011 by the TeXlapse Team. * All rights reserved. This program and the accompanying materials * are made available under the terms of the Eclipse Public License v1.0 * which accompanies this distribution, and is available at * http://www.eclipse.org/legal/epl-v10.html */ package net.sourceforge.texlipse.texparser; import java.io.IOException; import java.io.PushbackReader; import java.io.StringReader; import java.util.ArrayList; import java.util.List; import net.sourceforge.texlipse.model.DocumentReference; import net.sourceforge.texlipse.model.OutlineNode; import net.sourceforge.texlipse.model.ParseErrorMessage; import net.sourceforge.texlipse.model.ReferenceEntry; import net.sourceforge.texlipse.model.TexCommandEntry; import net.sourceforge.texlipse.texparser.lexer.LexerException; import org.eclipse.core.resources.IMarker; import org.eclipse.jface.text.IDocument; import org.eclipse.jface.text.IRegion; /** * Parser front-end for parsing LaTeX files and extracting some relevant * information from them. The front-end provides an upper-level interface * to the parser and does some pre-processing and error-handling. * * @author Oskar Ojala */ public class TexParser { private IDocument inputDoc; private LatexParser lparser; // private LatexLexer llexer; private List<ParseErrorMessage> errors; private boolean fatalErrors; private String preamble; /** * @param input The string representing the document to parse */ public TexParser(IDocument input) { this.inputDoc = input; this.lparser = new LatexParser(); this.fatalErrors = false; } /** * Removes trailing whitespace from the document. This is needed since * the lexer and Eclipse have a different view of how the positions in * the trailing whitespace work out, so it's best to just remove it. * * @param input The document to process * @return The document with trailing whitespace removed * * @see Character.isWhitespace */ private String rmTrailingWhitespace(String input) { int lastChar = input.length() - 1; while (lastChar >= 0 && Character.isWhitespace(input.charAt(lastChar))) lastChar--; lastChar++; if (lastChar < input.length()) return input.substring(0, lastChar); return input; } static String extractLaTeXPreamble(String input) { /*if (LatexParserUtils.findCommand(input, "\\documentclass", 0) == -1 && LatexParserUtils.findCommand(input, "\\documentstyle", 0) == -1) { return null; }*/ IRegion region = LatexParserUtils.findBeginEnvironment(input, "document", 0); if (region != null) { return input.substring(0, region.getOffset() + region.getLength()); } else { return null; } } /** * Extracts the preamble (if there is any) and stores a copy of it * in the field <code>preamble</code>. The preamble is assumed to * exist if the string contain the \documentclass command and it ends * where the document-environment begins or at the end of file. * * The preamble stored will include the \begin{document} -command. * * @param input The document */ private void extractPreamble(String input) { /* // These regexps lead to stack overflows in the regexp parser in some occasions. // (?:\r|\n|^)(?:(?:\\%|[^%\r\n])*?(?:\\%|[^\\%]))?\\document(?:class|style)(?:\W|$) Pattern docclass = Pattern.compile("(?:\\r|\\n|^)(?:(?:\\\\%|[^%\\r\\n])*?(?:\\\\%|[^\\\\%]))?\\\\document(?:class|style)(?:\\W|$)"); Matcher m = docclass.matcher(input); if (m.find()) { // (?:\r|\n|^)(?:(?:\\%|[^%\r\n])*?(?:\\%|[^\\%]))?\\begin\s*\{document\} Pattern begindoc = Pattern.compile("(?:\\r|\\n|^)(?:(?:\\\\%|[^%\\r\\n])*?(?:\\\\%|[^\\\\%]))?\\\\begin\\s*\\{document\\}"); Matcher m2 = begindoc.matcher(input); if (m2.find(m.end() - 1)) { this.preamble = input.substring(0, m2.end()); return; } } this.preamble = null; return;*/ this.preamble = extractLaTeXPreamble(input); } /** * Parses the input * * @throws IOException */ public void parseDocument(boolean checkForMissingSections) throws IOException { parseDocument(inputDoc.get(), checkForMissingSections); } /** * Parses the document * * @throws IOException */ public void parseDocument(String input, boolean checkForMissingSections) throws IOException { // remove trailing ws (this is because a discrepancy in the lexer's // and IDocument's line counting for trailing whitespace) input = this.rmTrailingWhitespace(input); this.extractPreamble(input); try { // start the parse LatexLexer lexer = new LatexLexer(new PushbackReader(new StringReader(input), 4096)); //LatexLexer lexer = this.getLexer(input); if (this.preamble != null) { OutlineNode on = new OutlineNode("Preamble", OutlineNode.TYPE_PREAMBLE, 1, null); lparser.parse(lexer, on, checkForMissingSections); } else { lparser.parse(lexer, checkForMissingSections); } this.errors = lparser.getErrors(); this.fatalErrors = lparser.isFatalErrors(); } catch (LexerException e) { // we must parse the lexer exception into a suitable format String msg = e.getMessage(); int first = msg.indexOf('['); int last = msg.indexOf(']'); String numseq = msg.substring(first + 1, last); String[] numbers = numseq.split(","); this.errors = new ArrayList<ParseErrorMessage>(1); this.errors.add(new ParseErrorMessage(Integer.parseInt(numbers[0]), Integer.parseInt(numbers[1]), 2, msg.substring(last+2), IMarker.SEVERITY_ERROR)); this.fatalErrors = true; } } /** * @return The outline tree */ public ArrayList<OutlineNode> getOutlineTree() { return lparser.getOutlineTree(); } /** * @return The labels <code>ArrayList<ReferenceEntry></code> */ public List<ReferenceEntry> getLabels() { List<ReferenceEntry> labels = lparser.getLabels(); for (ReferenceEntry label : labels) { label.setLabelInfo(inputDoc.get()); } return labels; } /** * @return The cite-references */ public List<DocumentReference> getCites() { return lparser.getCites(); } /** * @return Returns the errors. */ public List<ParseErrorMessage> getErrors() { return errors; } /** * @return The bibliography files to include */ public String[] getBibs() { return lparser.getBibs(); } /** * @return The style of the bibiliography entries */ public String getBibstyle() { return lparser.getBibstyle(); } /** * @return Whether Biblatex mode is activated */ public boolean isBiblatexMode() { return lparser.isBiblatexMode(); } /** * @return The selected biblatex backend */ public String getBiblatexBackend() { return lparser.getBiblatexBackend(); } /** * @return Whether the parsed file contains a bibliography print command. * This is only relevant if biblatex mode is enabled. */ public boolean isLocalBib() { return lparser.isLocalBib(); } /** * @return Returns the preamble. */ public String getPreamble() { return preamble; } /** * @return True if the document has an index, false otherwise */ public boolean isIndex() { return lparser.isIndex(); } /** * @return True if there were fatal errors due to which parsing couldn't be successfully completed */ public boolean isFatalErrors() { return fatalErrors; } /** * @return Get all \ref -references */ public List<DocumentReference> getRefs() { return lparser.getRefs(); } /** * @return Get user-defined commands */ public ArrayList<TexCommandEntry> getCommands() { return lparser.getCommands(); } /** * @return The tasks to mark */ public List<ParseErrorMessage> getTasks() { return lparser.getTasks(); } /** * @return The input commands in this document */ public List<OutlineNode> getInputs() { return lparser.getInputs(); } }