/*
*
* Copyright (c) 2004-2011 by the TeXlapse Team.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*/
package net.sourceforge.texlipse.texparser;
import java.io.IOException;
import java.io.PushbackReader;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.List;
import net.sourceforge.texlipse.model.ReferenceEntry;
import net.sourceforge.texlipse.model.TexCommandEntry;
import net.sourceforge.texlipse.texparser.lexer.LexerException;
import net.sourceforge.texlipse.texparser.node.EOF;
import net.sourceforge.texlipse.texparser.node.TArgument;
import net.sourceforge.texlipse.texparser.node.TCbib;
import net.sourceforge.texlipse.texparser.node.TCbibstyle;
import net.sourceforge.texlipse.texparser.node.TClabel;
import net.sourceforge.texlipse.texparser.node.TCnew;
import net.sourceforge.texlipse.texparser.node.TCommentline;
import net.sourceforge.texlipse.texparser.node.TCpackage;
import net.sourceforge.texlipse.texparser.node.TCpindex;
import net.sourceforge.texlipse.texparser.node.TOptargument;
import net.sourceforge.texlipse.texparser.node.TStar;
import net.sourceforge.texlipse.texparser.node.TWhitespace;
import net.sourceforge.texlipse.texparser.node.Token;
/**
* A LaTeX parser for extracting labels, BibTeX -information and
* whether an index is to be generated or not.
*
* @author Oskar Ojala
*/
public class LatexRefExtractingParser {
private ArrayList<ReferenceEntry> labels;
private ArrayList<TexCommandEntry> commands; //type: TexCommandEntry
private List<String> bibs;
private String bibstyle;
private boolean biblatexMode;
private String biblatexBackend;
private boolean index;
private boolean fatalErrors = false;
private String preamble;
/**
* Extracts the preamble (if there is any) and stores a copy of it
* in the field <code>preamble</code>. The preamble is assumed to
* exist if the string contain the \documentclass command and it ends
* where the document-environment begins or at the end of file.
*
* The preamble stored will include the \beign{document} -command.
*
* @param input The document
*/
private void extractPreamble(String input) {
this.preamble = TexParser.extractLaTeXPreamble(input);
}
private void initializeDatastructs() {
//reserve enough space
this.labels = new ArrayList<ReferenceEntry>(100);
this.commands = new ArrayList<TexCommandEntry>();
this.bibs = new ArrayList<String>();
this.biblatexMode = false;
this.biblatexBackend = null;
this.index = false;
}
/**
* Evaluates package loading options for biblatex and locates the backend
* option.
*
* @param options string with options in format <code>key=value</code>,
* or simply <code>key</code>, each separated by commas
* @return selected biblatex backend, if it was selected; otherwise null
*/
private static String findBiblatexBackend(String options) {
int beIdx = options.indexOf("backend=");
if (beIdx > 0) {
int startIdx = beIdx + 8; // move forward by length of "backend="
int endIdx = options.indexOf(',', startIdx);
if (endIdx > startIdx) {
return options.substring(startIdx, endIdx).trim();
}
else if (endIdx == -1) {
return options.substring(startIdx).trim();
}
else {
return null;
}
}
else {
return null;
}
}
/**
* Creates a new parser for extracting labels and BibTeX info.
*/
public LatexRefExtractingParser() {
initializeDatastructs();
}
/**
* Parses the given string and extracts the labels and BibTeX info.
* TexCommandEntry currentCommand = null;
* @param input A string containing the LaTeX document
* @throws IOException If the input is not readable
*/
public void parse(String input) throws IOException {
this.extractPreamble(input);
LatexLexer lexer = new LatexLexer(new PushbackReader(new StringReader(input), 4096));
boolean expectArg = false;
boolean expectArg2 = false;
Token prevToken = null;
String packageOptions = null;
//CommandEntry currentCommand = null;
TexCommandEntry currentCommand = null;
int argCount = 0;
try {
for (Token t = lexer.next(); !(t instanceof EOF); t = lexer.next()) {
if (expectArg) {
if (t instanceof TArgument) {
if (prevToken instanceof TClabel) {
//this.labels.add(new ReferenceEntry(t.getText()));
ReferenceEntry l = new ReferenceEntry(t.getText());
l.setPosition(t.getPos(), t.getText().length());
l.startLine = t.getLine();
l.setLabelInfo(input);
this.labels.add(l);
} else if (prevToken instanceof TCbib) {
String[] sBibs = t.getText().split(",");
for (String bib : sBibs) {
bibs.add(bib.trim());
}
} else if (prevToken instanceof TCbibstyle) {
this.bibstyle = t.getText();
} else if (prevToken instanceof TCnew) {
//currentCommand = new CommandEntry(t.getText().substring(1));
currentCommand = new TexCommandEntry(t.getText().substring(1), "", 0);
currentCommand.startLine = t.getLine();
expectArg2 = true;
} else if (prevToken instanceof TCpackage) {
if (t.getText().equals("biblatex")) {
biblatexMode = true;
if (packageOptions != null) {
biblatexBackend = findBiblatexBackend(packageOptions);
// reset
packageOptions = null;
}
}
}
prevToken = null;
expectArg = false;
} else if (t instanceof TOptargument) {
if (prevToken instanceof TCpackage) {
packageOptions = t.getText();
}
} else if (!(t instanceof TWhitespace) && !(t instanceof TStar)
&& !(t instanceof TCommentline)) {
// this is an error condition, but we want a silent parse
prevToken = null;
expectArg = false;
}
} else if (expectArg2) {
// we are capturing the second argument of a command with two arguments
// the only one of those that interests us is newcommand
if (t instanceof TArgument) {
currentCommand.info = t.getText();
commands.add(currentCommand);
argCount = 0;
expectArg2 = false;
} else if (t instanceof TOptargument) {
if (argCount == 0) {
try {
currentCommand.arguments = Integer.parseInt(t.getText());
} catch (NumberFormatException nfe) {
expectArg2 = false;
}
}
argCount++;
} else if (!(t instanceof TWhitespace) && !(t instanceof TCommentline)) {
argCount = 0;
expectArg2 = false;
}
} else {
if (t instanceof TClabel || t instanceof TCbib || t instanceof TCbibstyle
|| t instanceof TCnew || t instanceof TCpackage) {
prevToken = t;
expectArg = true;
} else if (t instanceof TCpindex)
this.index = true;
}
}
} catch (LexerException e) {
fatalErrors = true;
}
}
/**
* @return Returns the bibs.
*/
public String[] getBibs() {
return bibs.toArray(new String[0]);
}
/**
* @return Returns the bibstyle.
*/
public String getBibstyle() {
return bibstyle;
}
/**
* @return Whether biblatex mode is activated
*/
public boolean isBiblatexMode() {
return biblatexMode;
}
/**
* @return The selected biblatex backend
*/
public String getBiblatexBackend() {
return biblatexBackend;
}
/**
* @return Returns the index.
*/
public boolean isIndex() {
return index;
}
/**
* @return Returns the labels.
*/
public ArrayList<ReferenceEntry> getLabels() {
return labels;
}
/**
* @return Returns the commands.
*/
public ArrayList<TexCommandEntry> getCommands() {
return commands;
}
/**
* @return Returns the preamble (contains \begin{document} at the end).
*/
public String getPreamble() {
return preamble;
}
/**
* @return Returns the fatalErrors.
*/
public boolean isFatalErrors() {
return fatalErrors;
}
}