/* TeXParser.java * ========================================================================= * This file is part of the JLaTeXMath Library - http://forge.scilab.org/p/jlatexmath * * Copyright (C) 2009 DENIZET Calixte * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or (at * your option) any later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * A copy of the GNU General Public License can be found in the file * LICENSE.txt provided with the source distribution of this program (see * the META-INF directory in the source jar). This license can also be * found on the GNU website at http://www.gnu.org/licenses/gpl.html. * * If you did not receive a copy of the GNU General Public License along * with this program, contact the lead developer, or write to the Free * Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA * 02110-1301, USA. * * Linking this library statically or dynamically with other modules * is making a combined work based on this library. Thus, the terms * and conditions of the GNU General Public License cover the whole * combination. * * As a special exception, the copyright holders of this library give you * permission to link this library with independent modules to produce * an executable, regardless of the license terms of these independent * modules, and to copy and distribute the resulting executable under terms * of your choice, provided that you also meet, for each linked independent * module, the terms and conditions of the license of that module. * An independent module is a module which is not derived from or based * on this library. If you modify this library, you may extend this exception * to your version of the library, but you are not obliged to do so. * If you do not wish to do so, delete this exception statement from your * version. * */ package com.himamis.retex.renderer.share; import java.util.HashSet; import java.util.Set; import com.himamis.retex.renderer.share.character.Character; import com.himamis.retex.renderer.share.exception.FormulaNotFoundException; import com.himamis.retex.renderer.share.exception.ParseException; import com.himamis.retex.renderer.share.exception.SymbolNotFoundException; import com.himamis.retex.renderer.share.platform.graphics.Color; /** * This class implements a parser for LaTeX' formulas. */ public class TeXParser { TeXFormula formula; private StringBuilder parseString; private int pos; private int spos; private int line; private int col; private int len; private int group; private boolean insertion; private int atIsLetter; private boolean arrayMode; private boolean ignoreWhiteSpace = true; private boolean isPartial; // the escape character private static final char ESCAPE = '\\'; // grouping characters (for parsing) private static final char L_GROUP = '{'; private static final char R_GROUP = '}'; private static final char L_BRACK = '['; private static final char R_BRACK = ']'; private static final char DOLLAR = '$'; private static final char DQUOTE = '\"'; // Percent char for comments private static final char PERCENT = '%'; // script characters (for parsing) private static final char SUB_SCRIPT = '_'; private static final char SUPER_SCRIPT = '^'; private static final char PRIME = '\''; private static final char BACKPRIME = '\u2035'; private static final char DEGREE = '\u00B0'; private static final char SUPZERO = '\u2070'; private static final char SUPONE = '\u00B9'; private static final char SUPTWO = '\u00B2'; private static final char SUPTHREE = '\u00B3'; private static final char SUPFOUR = '\u2074'; private static final char SUPFIVE = '\u2075'; private static final char SUPSIX = '\u2076'; private static final char SUPSEVEN = '\u2077'; private static final char SUPEIGHT = '\u2078'; private static final char SUPNINE = '\u2079'; private static final char SUPPLUS = '\u207A'; private static final char SUPMINUS = '\u207B'; private static final char SUPEQUAL = '\u207C'; private static final char SUPLPAR = '\u207D'; private static final char SUPRPAR = '\u207E'; private static final char SUPN = '\u207F'; private static final char SUBZERO = '\u2080'; private static final char SUBONE = '\u2081'; private static final char SUBTWO = '\u2082'; private static final char SUBTHREE = '\u2083'; private static final char SUBFOUR = '\u2084'; private static final char SUBFIVE = '\u2085'; private static final char SUBSIX = '\u2086'; private static final char SUBSEVEN = '\u2087'; private static final char SUBEIGHT = '\u2088'; private static final char SUBNINE = '\u2089'; private static final char SUBPLUS = '\u208A'; private static final char SUBMINUS = '\u208B'; private static final char SUBEQUAL = '\u208C'; private static final char SUBLPAR = '\u208D'; private static final char SUBRPAR = '\u208E'; protected static boolean isLoading = false; private static final Set<String> unparsedContents = new HashSet<String>(6); // static { // // unparsedContents.add("jlmDynamic"); // unparsedContents.add("jlmText"); // unparsedContents.add("jlmTextit"); // unparsedContents.add("jlmTextbf"); // unparsedContents.add("jlmTextitbf"); // unparsedContents.add("jlmExternalFont"); // } /** * Create a new TeXParser * * @param parseString * the string to be parsed * @param formula * the formula where to put the atoms * @throws ParseException * if the string could not be parsed correctly */ public TeXParser(String parseString, TeXFormula formula) { this(parseString, formula, true); } /** * Create a new TeXParser * * @param isPartial * if true certains exceptions are not thrown * @param parseString * the string to be parsed * @param formula * the formula where to put the atoms * @throws ParseException * if the string could not be parsed correctly */ public TeXParser(boolean isPartial, String parseString, TeXFormula formula) { this(parseString, formula, false); this.isPartial = isPartial; firstpass(); } /** * Create a new TeXParser with or without a first pass * * @param isPartial * if true certains exceptions are not thrown * @param parseString * the string to be parsed * @param formula * the output formula * @param firstpass * a boolean to indicate if the parser must replace the * user-defined macros by their content * @throws ParseException * if the string could not be parsed correctly */ public TeXParser(boolean isPartial, String parseString, TeXFormula formula, boolean firstpass) { this.formula = formula; this.isPartial = isPartial; if (parseString != null) { this.parseString = new StringBuilder(parseString); this.len = parseString.length(); this.pos = 0; if (firstpass) { firstpass(); } } else { this.parseString = null; this.pos = 0; this.len = 0; } } /** * Create a new TeXParser with or without a first pass * * @param parseString * the string to be parsed * @param firstpass * a boolean to indicate if the parser must replace the * user-defined macros by their content * @throws ParseException * if the string could not be parsed correctly */ public TeXParser(String parseString, TeXFormula formula, boolean firstpass) { this(false, parseString, formula, firstpass); } /** * Create a new TeXParser in the context of an array. When the parser meets * a & a new atom is added in the current line and when a \\ is met, a new * line is created. * * @param isPartial * if true certains exceptions are not thrown * @param parseString * the string to be parsed * @param aoa * an ArrayOfAtoms where to put the elements * @param firstpass * a boolean to indicate if the parser must replace the * user-defined macros by their content * @throws ParseException * if the string could not be parsed correctly */ public TeXParser(boolean isPartial, String parseString, ArrayOfAtoms aoa, boolean firstpass) { this(isPartial, parseString, (TeXFormula) aoa, firstpass); arrayMode = true; } /** * Create a new TeXParser in the context of an array. When the parser meets * a & a new atom is added in the current line and when a \\ is met, a new * line is created. * * @param isPartial * if true certains exceptions are not thrown * @param parseString * the string to be parsed * @param aoa * an ArrayOfAtoms where to put the elements * @param firstpass * a boolean to indicate if the parser must replace the * user-defined macros by their content * @throws ParseException * if the string could not be parsed correctly */ public TeXParser(boolean isPartial, String parseString, ArrayOfAtoms aoa, boolean firstpass, boolean space) { this(isPartial, parseString, (TeXFormula) aoa, firstpass, space); arrayMode = true; } /** * Create a new TeXParser in the context of an array. When the parser meets * a & a new atom is added in the current line and when a \\ is met, a new * line is created. * * @param parseString * the string to be parsed * @param aoa * an ArrayOfAtoms where to put the elements * @param firstpass * a boolean to indicate if the parser must replace the * user-defined macros by their content * @throws ParseException * if the string could not be parsed correctly */ public TeXParser(String parseString, ArrayOfAtoms aoa, boolean firstpass) { this(false, parseString, (TeXFormula) aoa, firstpass); } /** * Create a new TeXParser which ignores or not the white spaces, it's useful * for mbox command * * @param isPartial * if true certains exceptions are not thrown * @param parseString * the string to be parsed * @param firstpass * a boolean to indicate if the parser must replace the * user-defined macros by their content * @param space * a boolean to indicate if the parser must ignore or not the * white space * @throws ParseException * if the string could not be parsed correctly */ public TeXParser(boolean isPartial, String parseString, TeXFormula formula, boolean firstpass, boolean space) { this(isPartial, parseString, formula, firstpass); this.ignoreWhiteSpace = space; } /** * Create a new TeXParser which ignores or not the white spaces, it's useful * for mbox command * * @param parseString * the string to be parsed * @param firstpass * a boolean to indicate if the parser must replace the * user-defined macros by their content * @param space * a boolean to indicate if the parser must ignore or not the * white space * @throws ParseException * if the string could not be parsed correctly */ public TeXParser(String parseString, TeXFormula formula, boolean firstpass, boolean space) { this(false, parseString, formula, firstpass); this.ignoreWhiteSpace = space; } /** * Reset the parser with a new latex expression */ public void reset(String latex) { parseString = new StringBuilder(latex); len = parseString.length(); formula.root = null; pos = 0; spos = 0; line = 0; col = 0; group = 0; insertion = false; atIsLetter = 0; arrayMode = false; ignoreWhiteSpace = true; firstpass(); } /** * Return true if we get a partial formula */ public boolean getIsPartial() { return isPartial; } /** * Get the number of the current line */ public int getLine() { return line; } /** * Get the number of the current column */ public int getCol() { return pos - col - 1; } /** * Get the last atom of the current formula */ public Atom getLastAtom() { Atom at = formula.root; if (at instanceof RowAtom) { return ((RowAtom) at).getLastAtom(); } formula.root = null; return at; } /** * Get the atom represented by the current formula */ public Atom getFormulaAtom() { Atom at = formula.root; formula.root = null; return at; } /** * Put an atom in the current formula */ public void addAtom(Atom at) { formula.add(at); } /** * Indicate if the character @ can be used in the command's name */ public void makeAtLetter() { atIsLetter++; } /** * Indicate if the character @ can be used in the command's name */ public void makeAtOther() { atIsLetter--; } /** * Return a boolean indicating if the character @ is considered as a letter * or not */ public boolean isAtLetter() { return (atIsLetter != 0); } /** * Return a boolean indicating if the parser is used to parse an array or * not */ public boolean isArrayMode() { return arrayMode; } public void setArrayMode(boolean arrayMode) { this.arrayMode = arrayMode; } /** * Return a boolean indicating if the parser must ignore white spaces */ public boolean isIgnoreWhiteSpace() { return ignoreWhiteSpace; } /** * Return a boolean indicating if the parser is in math mode */ public boolean isMathMode() { return ignoreWhiteSpace; } /** * Return the current position in the parsed string */ public int getPos() { return pos; } /** * Rewind the current parsed string * * @param n * the number of character to be rewinded * @return the new position in the parsed string */ public int rewind(int n) { pos -= n; return pos; } public String getStringFromCurrentPos() { return parseString.substring(pos); } public void finish() { pos = parseString.length(); } /** * Add a new row when the parser is in array mode * * @throws ParseException * if the parser is not in array mode */ public void addRow() throws ParseException { if (!arrayMode) { throw new ParseException("You can add a row only in array mode !"); } ((ArrayOfAtoms) formula).addRow(); } public void cellColor(Color color) throws ParseException { if (!arrayMode) { throw new ParseException( "You can use cellcolor only in array mode !"); } ((ArrayOfAtoms) formula).cellColor(color); } private void firstpass() throws ParseException { if (len != 0) { char ch; String com; int spos; String[] args; MacroInfo mac; while (pos < len) { ch = parseString.charAt(pos); switch (ch) { case ESCAPE: spos = pos; com = getCommand(); if ("newcommand".equals(com) || "renewcommand".equals(com)) { args = getOptsArgs(2, 2); mac = MacroInfo.Commands.get(com); try { mac.invoke(this, args); } catch (ParseException e) { if (!isPartial) { throw e; } } parseString.delete(spos, pos); len = parseString.length(); pos = spos; } else if (NewCommandMacro.isMacro(com)) { mac = MacroInfo.Commands.get(com); args = getOptsArgs(mac.nbArgs, mac.hasOptions ? 1 : 0); args[0] = com; try { parseString.replace(spos, pos, (String) mac.invoke(this, args)); } catch (ParseException e) { if (!isPartial) { throw e; } spos += com.length() + 1; } len = parseString.length(); pos = spos; } else if ("begin".equals(com)) { args = getOptsArgs(1, 0); mac = MacroInfo.Commands.get(args[1] + "@env"); if (mac == null) { if (!isPartial) { throw new ParseException("Unknown environment: " + args[1] + " at position " + getLine() + ":" + getCol()); } } else { try { String[] optarg = getOptsArgs(mac.nbArgs - 1, 0); String grp = getGroup("\\begin{" + args[1] + "}", "\\end{" + args[1] + "}"); String expr = "{\\makeatletter \\" + args[1] + "@env"; for (int i = 1; i <= mac.nbArgs - 1; i++) { expr += "{" + optarg[i] + "}"; } expr += "{" + grp + "}\\makeatother}"; parseString.replace(spos, pos, expr); len = parseString.length(); pos = spos; } catch (ParseException e) { if (!isPartial) { throw e; } } } } else if ("makeatletter".equals(com)) { atIsLetter++; } else if ("makeatother".equals(com)) { atIsLetter--; } else if (unparsedContents.contains(com)) { getOptsArgs(1, 0); } break; case PERCENT: spos = pos++; char chr; while (pos < len) { chr = parseString.charAt(pos++); if (chr == '\r' || chr == '\n') { break; } } if (pos < len) { pos--; } parseString.replace(spos, pos, ""); len = parseString.length(); pos = spos; break; case DEGREE: // surround in {} so that it works if there's a following character parseString.replace(pos, pos + 1, "^{\\circ}"); len = parseString.length(); pos++; break; case SUPTWO: parseString.replace(pos, pos + 1, "\\jlatexmathcumsup{2}"); len = parseString.length(); pos++; break; case SUPTHREE: parseString.replace(pos, pos + 1, "\\jlatexmathcumsup{3}"); len = parseString.length(); pos++; break; case SUPONE: parseString.replace(pos, pos + 1, "\\jlatexmathcumsup{1}"); len = parseString.length(); pos++; break; case SUPZERO: parseString.replace(pos, pos + 1, "\\jlatexmathcumsup{0}"); len = parseString.length(); pos++; break; case SUPFOUR: parseString.replace(pos, pos + 1, "\\jlatexmathcumsup{4}"); len = parseString.length(); pos++; break; case SUPFIVE: parseString.replace(pos, pos + 1, "\\jlatexmathcumsup{5}"); len = parseString.length(); pos++; break; case SUPSIX: parseString.replace(pos, pos + 1, "\\jlatexmathcumsup{6}"); len = parseString.length(); pos++; break; case SUPSEVEN: parseString.replace(pos, pos + 1, "\\jlatexmathcumsup{7}"); len = parseString.length(); pos++; break; case SUPEIGHT: parseString.replace(pos, pos + 1, "\\jlatexmathcumsup{8}"); len = parseString.length(); pos++; break; case SUPNINE: parseString.replace(pos, pos + 1, "\\jlatexmathcumsup{9}"); len = parseString.length(); pos++; break; case SUPPLUS: parseString.replace(pos, pos + 1, "\\jlatexmathcumsup{+}"); len = parseString.length(); pos++; break; case SUPMINUS: parseString.replace(pos, pos + 1, "\\jlatexmathcumsup{-}"); len = parseString.length(); pos++; break; case SUPEQUAL: parseString.replace(pos, pos + 1, "\\jlatexmathcumsup{=}"); len = parseString.length(); pos++; break; case SUPLPAR: parseString.replace(pos, pos + 1, "\\jlatexmathcumsup{(}"); len = parseString.length(); pos++; break; case SUPRPAR: parseString.replace(pos, pos + 1, "\\jlatexmathcumsup{)}"); len = parseString.length(); pos++; break; case SUPN: parseString.replace(pos, pos + 1, "\\jlatexmathcumsup{n}"); len = parseString.length(); pos++; break; case SUBTWO: parseString.replace(pos, pos + 1, "\\jlatexmathcumsub{2}"); len = parseString.length(); pos++; break; case SUBTHREE: parseString.replace(pos, pos + 1, "\\jlatexmathcumsub{3}"); len = parseString.length(); pos++; break; case SUBONE: parseString.replace(pos, pos + 1, "\\jlatexmathcumsub{1}"); len = parseString.length(); pos++; break; case SUBZERO: parseString.replace(pos, pos + 1, "\\jlatexmathcumsub{0}"); len = parseString.length(); pos++; break; case SUBFOUR: parseString.replace(pos, pos + 1, "\\jlatexmathcumsub{4}"); len = parseString.length(); pos++; break; case SUBFIVE: parseString.replace(pos, pos + 1, "\\jlatexmathcumsub{5}"); len = parseString.length(); pos++; break; case SUBSIX: parseString.replace(pos, pos + 1, "\\jlatexmathcumsub{6}"); len = parseString.length(); pos++; break; case SUBSEVEN: parseString.replace(pos, pos + 1, "\\jlatexmathcumsub{7}"); len = parseString.length(); pos++; break; case SUBEIGHT: parseString.replace(pos, pos + 1, "\\jlatexmathcumsub{8}"); len = parseString.length(); pos++; break; case SUBNINE: parseString.replace(pos, pos + 1, "\\jlatexmathcumsub{9}"); len = parseString.length(); pos++; break; case SUBPLUS: parseString.replace(pos, pos + 1, "\\jlatexmathcumsub{+}"); len = parseString.length(); pos++; break; case SUBMINUS: parseString.replace(pos, pos + 1, "\\jlatexmathcumsub{-}"); len = parseString.length(); pos++; break; case SUBEQUAL: parseString.replace(pos, pos + 1, "\\jlatexmathcumsub{=}"); len = parseString.length(); pos++; break; case SUBLPAR: parseString.replace(pos, pos + 1, "\\jlatexmathcumsub{(}"); len = parseString.length(); pos++; break; case SUBRPAR: parseString.replace(pos, pos + 1, "\\jlatexmathcumsub{)}"); len = parseString.length(); pos++; break; default: pos++; } } pos = 0; len = parseString.length(); } } /** * Parse the input string * * @throws ParseException * if an error is encountered during parsing */ public void parse() throws ParseException { if (len != 0) { char ch; while (pos < len) { ch = parseString.charAt(pos); switch (ch) { case '\n': line++; col = pos; //$FALL-THROUGH$ case '\t': case '\r': pos++; break; case ' ': pos++; if (!ignoreWhiteSpace) {// We are in a mbox formula.add(new SpaceAtom()); formula.add(new BreakMarkAtom()); while (pos < len) { ch = parseString.charAt(pos); if (ch != ' ' || ch != '\t' || ch != '\r') { break; } pos++; } } break; case DOLLAR: pos++; if (!ignoreWhiteSpace) {// We are in a mbox int style = TeXConstants.STYLE_TEXT; boolean doubleDollar = false; if (parseString.charAt(pos) == DOLLAR) { style = TeXConstants.STYLE_DISPLAY; doubleDollar = true; pos++; } TeXFormula dollarFormula = new TeXFormula(this, getDollarGroup(DOLLAR), false); if (dollarFormula.root == null) { dollarFormula = new TeXFormula(this, "\\textcolor{red}{?}", false); } formula.add(new MathAtom(dollarFormula.root, style)); if (doubleDollar) { if (parseString.charAt(pos) == DOLLAR) { pos++; } } } break; case ESCAPE: Atom at = processEscape(); formula.add(at); if (arrayMode && at instanceof HlineAtom) { ((ArrayOfAtoms) formula).addRow(); } if (insertion) { insertion = false; } break; case L_GROUP: Atom atom = getArgument(); if (atom != null) { atom.type = TeXConstants.TYPE_ORDINARY; } formula.add(atom); break; case R_GROUP: group--; pos++; if (group == -1) { throw new ParseException( "Found a closing '" + R_GROUP + "' without an opening '" + L_GROUP + "'!"); } return; case SUPER_SCRIPT: formula.add(getScripts(ch)); break; case SUB_SCRIPT: if (ignoreWhiteSpace) { formula.add(getScripts(ch)); } else { formula.add(new UnderscoreAtom()); pos++; } break; case '&': if (!arrayMode) { throw new ParseException("Character '&' is only available in array mode !"); } ((ArrayOfAtoms) formula).addCol(); pos++; break; case PRIME: if (ignoreWhiteSpace) { formula.add(new CumulativeScriptsAtom(getLastAtom(), null, SymbolAtom.get("prime"))); } else { formula.add(convertCharacter(PRIME, true)); } pos++; break; case BACKPRIME: if (ignoreWhiteSpace) { formula.add(new CumulativeScriptsAtom(getLastAtom(), null, SymbolAtom.get("backprime"))); } else { formula.add(convertCharacter(BACKPRIME, true)); } pos++; break; case DQUOTE: if (ignoreWhiteSpace) { formula.add(new CumulativeScriptsAtom(getLastAtom(), null, SymbolAtom.get("prime"))); formula.add(new CumulativeScriptsAtom(getLastAtom(), null, SymbolAtom.get("prime"))); } else { formula.add(convertCharacter(PRIME, true)); formula.add(convertCharacter(PRIME, true)); } pos++; break; default: formula.add(convertCharacter(ch, false)); pos++; } } } if (formula.root == null && !arrayMode) { formula.add(new EmptyAtom()); } } private Atom getScripts(char f) throws ParseException { pos++; Atom first = getArgument(); Atom second = null; char s = '\0'; if (pos < len) { s = parseString.charAt(pos); } if (f == SUPER_SCRIPT && s == SUPER_SCRIPT) { second = first; first = null; } else if (f == SUB_SCRIPT && s == SUPER_SCRIPT) { pos++; second = getArgument(); } else if (f == SUPER_SCRIPT && s == SUB_SCRIPT) { pos++; second = first; first = getArgument(); } else if (f == SUPER_SCRIPT && s != SUB_SCRIPT) { second = first; first = null; } Atom at; if (formula.root instanceof RowAtom) { at = ((RowAtom) formula.root).getLastAtom(); } else if (formula.root == null) { at = new PhantomAtom(new CharAtom('M', "mathnormal"), false, true, true); } else { at = formula.root; formula.root = null; } if (at.getRightType() == TeXConstants.TYPE_BIG_OPERATOR) { return new BigOperatorAtom(at, first, second); } else if (at instanceof OverUnderDelimiter) { if (((OverUnderDelimiter) at).isOver()) { if (second != null) { ((OverUnderDelimiter) at).addScript(second); return new ScriptsAtom(at, first, null); } } else if (first != null) { ((OverUnderDelimiter) at).addScript(first); return new ScriptsAtom(at, null, second); } } return new ScriptsAtom(at, first, second); } /** * Get the contents between two delimiters * * @param openclose * the opening and closing character (such $) * @return the enclosed contents * @throws ParseException * if the contents are badly enclosed */ public String getDollarGroup(char openclose) throws ParseException { int spos = pos; char ch; do { ch = parseString.charAt(pos++); if (ch == ESCAPE) { pos++; } } while (pos < len && ch != openclose); if (ch == openclose) { return parseString.substring(spos, pos - 1); } return parseString.substring(spos, pos); } /** * Get the contents between two delimiters * * @param open * the opening character * @param close * the closing character * @return the enclosed contents * @throws ParseException * if the contents are badly enclosed */ public String getGroup(char open, char close) throws ParseException { if (pos == len) { return null; } int group, spos; char ch = parseString.charAt(pos); if (pos < len && ch == open) { group = 1; spos = pos; while (pos < len - 1 && group != 0) { pos++; ch = parseString.charAt(pos); if (ch == open) { group++; } else if (ch == close) { group--; } else if (ch == ESCAPE && pos != len - 1) { pos++; } } pos++; if (group != 0) { return parseString.substring(spos + 1, pos); } return parseString.substring(spos + 1, pos - 1); } throw new ParseException("missing '" + open + "'!"); } /** * Get the contents between two strings as in \begin{foo}...\end{foo} * * @param open * the opening string * @param close * the closing string * @return the enclosed contents * @throws ParseException * if the contents are badly enclosed */ public String getGroup(String open, String close) throws ParseException { int group = 1; int ol = open.length(), cl = close.length(); boolean lastO = isValidCharacterInCommand(open.charAt(ol - 1)); boolean lastC = isValidCharacterInCommand(close.charAt(cl - 1)); int oc = 0, cc = 0; int startC = 0; char prev = '\0'; StringBuilder buf = new StringBuilder(); while (pos < len && group != 0) { char c = parseString.charAt(pos); char c1; if (prev != ESCAPE && c == ' ') {// Trick to handle case where close // == "\end {foo}" while (pos < len && parseString.charAt(pos++) == ' ') { buf.append(' '); } c = parseString.charAt(--pos); if (isValidCharacterInCommand(prev) && isValidCharacterInCommand(c)) { oc = cc = 0; } } if (c == open.charAt(oc)) { oc++; } else { oc = 0; } if (c == close.charAt(cc)) { if (cc == 0) { startC = pos; } cc++; } else { cc = 0; } if (pos + 1 < len) { c1 = parseString.charAt(pos + 1); if (oc == ol) { if (!lastO || !isValidCharacterInCommand(c1)) { group++; } oc = 0; } if (cc == cl) { if (!lastC || !isValidCharacterInCommand(c1)) { group--; } cc = 0; } } else { if (oc == ol) { group++; oc = 0; } if (cc == cl) { group--; cc = 0; } } prev = c; buf.append(c); pos++; } if (group != 0) { if (isPartial) { return buf.toString(); } throw new ParseException("The token " + open + " must be closed by " + close); } return buf.substring(0, buf.length() - pos + startC); } /** * Get the argument of a command in his atomic format * * @return the corresponding atom * @throws ParseException * if the argument is incorrect */ public Atom getArgument() throws ParseException { skipWhiteSpace(); char ch; if (pos < len) { ch = parseString.charAt(pos); } else { return new EmptyAtom(); } if (ch == L_GROUP) { TeXFormula tf = new TeXFormula(); TeXFormula sformula = this.formula; this.formula = tf; pos++; group++; parse(); this.formula = sformula; if (this.formula.root == null) { RowAtom at = new RowAtom(); at.add(tf.root); return at; } return tf.root; } if (ch == ESCAPE) { Atom at = processEscape(); if (insertion) { insertion = false; return getArgument(); } return at; } Atom at = convertCharacter(ch, true); pos++; return at; } public String getOverArgument() throws ParseException { if (pos == len) { return null; } int ogroup = 1, spos; char ch = '\0'; spos = pos; while (pos < len && ogroup != 0) { ch = parseString.charAt(pos); switch (ch) { case L_GROUP: ogroup++; break; case '&': /* * if a & is encountered at the same level as \over we must * break the argument */ if (ogroup == 1) { ogroup--; } break; case R_GROUP: ogroup--; break; case ESCAPE: pos++; /* * if a \\ or a \cr is encountered at the same level as \over we * must break the argument */ if (pos < len && parseString.charAt(pos) == '\\' && ogroup == 1) { ogroup--; pos--; } else if (pos < len - 1 && parseString.charAt(pos) == 'c' && parseString.charAt(pos + 1) == 'r' && ogroup == 1) { ogroup--; pos--; } break; } pos++; } if (ogroup >= 2) { // end of string reached, but not processed properly throw new ParseException("Illegal end, missing '}' !"); } String str; if (ogroup == 0) { str = parseString.substring(spos, pos - 1); } else { str = parseString.substring(spos, pos); ch = '\0'; } if (ch == '&' || ch == '\\' || ch == R_GROUP) { pos--; } return str; } public double[] getLength() throws ParseException { if (pos == len) { return null; } int spos; char ch = '\0'; skipWhiteSpace(); spos = pos; while (pos < len && ch != ' ') { ch = parseString.charAt(pos++); } skipWhiteSpace(); return SpaceAtom.getLength(parseString.substring(spos, pos - 1)); } /** * Convert a character in the corresponding atom in using the file * TeXFormulaSettings.xml for non-alphanumeric characters * * @param c0 * the character to be converted * @return the corresponding atom * @throws ParseException * if the character is unknown */ public Atom convertCharacter(char c0, boolean oneChar) throws ParseException { if (ignoreWhiteSpace) {// The Unicode Greek letters in math mode are not // drawn with the // Greek font if (c0 >= 945 && c0 <= 969) { return SymbolAtom.get(TeXFormula.symbolMappings[c0]); } else if (c0 >= 913 && c0 <= 937) { return new TeXFormula(TeXFormula.symbolFormulaMappings[c0]).root; } } char c = convertToRomanNumber(c0); if (((c < '0' || c > '9') && (c < 'a' || c > 'z') && (c < 'A' || c > 'Z'))) { Character.UnicodeBlock block = Character.UnicodeBlock.of(c); if (!isLoading && !DefaultTeXFont.loadedAlphabets.contains(block)) { DefaultTeXFont.addAlphabet(DefaultTeXFont.registeredAlphabets.get(block)); } String symbolName = TeXFormula.symbolMappings[c]; if (symbolName == null && (TeXFormula.symbolFormulaMappings == null || TeXFormula.symbolFormulaMappings[c] == null)) { TeXFormula.FontInfos fontInfos = null; boolean isLatin = Character.UnicodeBlock.BASIC_LATIN.equals(block); if ((isLatin && TeXFormula.isRegisteredBlock(Character.UnicodeBlock.BASIC_LATIN)) || !isLatin) { fontInfos = TeXFormula.getExternalFont(block); } if (fontInfos != null) { if (oneChar) { return new JavaFontRenderingAtom(Character.toString(c), fontInfos); } int start = pos++; int end = len - 1; while (pos < len) { c = parseString.charAt(pos); if (!Character.UnicodeBlock.of(c).equals(block)) { end = --pos; break; } pos++; } return new JavaFontRenderingAtom(parseString.substring(start, end + 1), fontInfos); } if (!isPartial) { throw new ParseException( "Unknown character : '" + Character.toString(c) + "' (or " + ((int) c) + ")"); } return new ColorAtom(new RomanAtom(new TeXFormula( "\\text{(Unknown char " + ((int) c) + ")}").root), null, ColorUtil.RED); } if (!ignoreWhiteSpace) {// we are in text mode if (TeXFormula.symbolTextMappings[c] != null) { return SymbolAtom.get(TeXFormula.symbolTextMappings[c]) .setUnicode(c); } } if (TeXFormula.symbolFormulaMappings != null && TeXFormula.symbolFormulaMappings[c] != null) { return new TeXFormula(TeXFormula.symbolFormulaMappings[c]).root; } try { return SymbolAtom.get(symbolName); } catch (SymbolNotFoundException e) { throw new ParseException("The character '" + Character.toString(c) + "' was mapped to an unknown symbol with the name '" + symbolName + "'!", e); } } // alphanumeric character TeXFormula.FontInfos fontInfos = TeXFormula.externalFontMap .get(Character.UnicodeBlock.BASIC_LATIN); if (fontInfos != null) { if (oneChar) { return new JavaFontRenderingAtom(Character.toString(c), fontInfos); } int start = pos++; int end = len - 1; while (pos < len) { c = parseString.charAt(pos); if (((c < '0' || c > '9') && (c < 'a' || c > 'z') && (c < 'A' || c > 'Z'))) { end = --pos; break; } pos++; } return new JavaFontRenderingAtom(parseString.substring(start, end + 1), fontInfos); } return new CharAtom(c, ignoreWhiteSpace, formula.textStyle); } private String getCommand() { int pos1 = ++pos; char ch = '\0'; while (pos < len) { ch = parseString.charAt(pos); if ((ch < 'a' || ch > 'z') && (ch < 'A' || ch > 'Z') && (atIsLetter == 0 || ch != '@')) { break; } pos++; } if (ch == '\0') { return ""; } if (pos == pos1) { pos++; } String com = parseString.substring(pos1, pos); if ("cr".equals(com) && pos < len && parseString.charAt(pos) == ' ') { pos++; } return com; } private Atom processEscape() throws ParseException { spos = pos; String command = getCommand(); if (command.length() == 0) { return new EmptyAtom(); } if (MacroInfo.Commands.get(command) != null) { return processCommands(command); } try { return TeXFormula.get(command).root; } catch (FormulaNotFoundException e) { try { return SymbolAtom.get(command); } catch (SymbolNotFoundException e1) { } } // not a valid command or symbol or predefined TeXFormula found if (!isPartial) { throw new ParseException("Unknown symbol or command or predefined TeXFormula: '" + command + "'"); } return new ColorAtom( new RomanAtom(new TeXFormula("\\backslash " + command).root), null, ColorUtil.RED); } private void insert(int beg, int end, String formula) { parseString.replace(beg, end, formula); len = parseString.length(); pos = beg; insertion = true; } /** * Get the arguments ant the options of a command * * @param nbArgs * the number of arguments of the command * @param opts * must be 1 if the options are found before the first argument * and must be 2 if they must be found before the second argument * @return an array containing arguments and at the end the options are put */ /* Should be improved */ public String[] getOptsArgs(int nbArgs, int opts) { // A maximum of 10 options can be passed to a command String[] args = new String[nbArgs + 10 + 1]; if (nbArgs != 0) { // We get the options just after the command name if (opts == 1) { int j = nbArgs + 1; try { for (; j < nbArgs + 11; j++) { skipWhiteSpace(); args[j] = getGroup(L_BRACK, R_BRACK); } } catch (ParseException e) { args[j] = null; } } // We get the first argument skipWhiteSpace(); try { args[1] = getGroup(L_GROUP, R_GROUP); } catch (ParseException e) { if (parseString.charAt(pos) != '\\') { args[1] = "" + parseString.charAt(pos); pos++; } else { args[1] = getCommandWithArgs(getCommand()); } } // We get the options after the first argument if (opts == 2) { int j = nbArgs + 1; try { for (; j < nbArgs + 11; j++) { skipWhiteSpace(); args[j] = getGroup(L_BRACK, R_BRACK); } } catch (ParseException e) { args[j] = null; } } // We get the next arguments for (int i = 2; i <= nbArgs; i++) { skipWhiteSpace(); try { args[i] = getGroup(L_GROUP, R_GROUP); } catch (ParseException e) { if (parseString.charAt(pos) != '\\') { args[i] = "" + parseString.charAt(pos); pos++; } else { args[i] = getCommandWithArgs(getCommand()); } } } if (ignoreWhiteSpace) { skipWhiteSpace(); } } return args; } /** * return a string with command and options and args * * @param command * name of command * @return * @author Juan Enrique Escobar Robles */ private String getCommandWithArgs(String command) { if ("left".equals(command)) { return getGroup("\\left", "\\right"); } MacroInfo mac = MacroInfo.Commands.get(command); if (mac != null) { int mac_opts = 0; if (mac.hasOptions) { mac_opts = mac.posOpts; } String[] mac_args = getOptsArgs(mac.nbArgs, mac_opts); StringBuilder mac_arg = new StringBuilder("\\"); mac_arg.append(command); for (int j = 0; j < mac.posOpts; j++) { String arg_t = mac_args[mac.nbArgs + j + 1]; if (arg_t != null) { mac_arg.append("[").append(arg_t).append("]"); } } for (int j = 0; j < mac.nbArgs; j++) { String arg_t = mac_args[j + 1]; if (arg_t != null) { mac_arg.append("{").append(arg_t).append("}"); } } return mac_arg.toString(); } return "\\" + command; } /** * Processes the given TeX command (by parsing following command arguments * in the parse string). */ private Atom processCommands(String command) throws ParseException { MacroInfo mac = MacroInfo.Commands.get(command); int opts = 0; if (mac.hasOptions) { opts = mac.posOpts; } String[] args = getOptsArgs(mac.nbArgs, opts); args[0] = command; if (NewCommandMacro.isMacro(command)) { String ret = (String) mac.invoke(this, args); insert(spos, pos, ret); return null; } return (Atom) mac.invoke(this, args); } /** * Test the validity of the name of a command. It must contains only alpha * characters and eventually a @ if makeAtletter activated * * @param com * the command's name * @return the validity of the name */ public final boolean isValidName(String com) { if (com == null || "".equals(com)) { return false; } char c = '\0'; if (com.charAt(0) == '\\') { int pos = 1; int len = com.length(); while (pos < len) { c = com.charAt(pos); if (!java.lang.Character.isLetter(c) && (atIsLetter == 0 || c != '@')) { break; } pos++; } } else { return false; } return java.lang.Character.isLetter(c); } /** * Test the validity of a character in a command. It must contains only * alpha characters and eventually a @ if makeAtletter activated * * @param com * the command's name * @return the validity of the name */ public final boolean isValidCharacterInCommand(char ch) { return java.lang.Character.isLetter(ch) || (atIsLetter != 0 && ch == '@'); } private final void skipWhiteSpace() { char c; while (pos < len) { c = parseString.charAt(pos); if (c != ' ' && c != '\t' && c != '\n' && c != '\r') { break; } if (c == '\n') { line++; col = pos; } pos++; } } /** * The aim of this method is to convert foreign number into roman ones ! */ private static char convertToRomanNumber(char c) { if (c == 0x66b) {// Arabic dot return '.'; } else if (0x660 <= c && c <= 0x669) {// Arabic return (char) (c - (char) 0x630); } else if (0x6f0 <= c && c <= 0x6f9) {// Arabic return (char) (c - (char) 0x6c0); } else if (0x966 <= c && c <= 0x96f) {// Devanagari return (char) (c - (char) 0x936); } else if (0x9e6 <= c && c <= 0x9ef) {// Bengali return (char) (c - (char) 0x9b6); } else if (0xa66 <= c && c <= 0xa6f) {// Gurmukhi return (char) (c - (char) 0xa36); } else if (0xae6 <= c && c <= 0xaef) {// Gujarati return (char) (c - (char) 0xab6); } else if (0xb66 <= c && c <= 0xb6f) {// Oriya return (char) (c - (char) 0xb36); } else if (0xc66 <= c && c <= 0xc6f) {// Telugu return (char) (c - (char) 0xc36); } else if (0xd66 <= c && c <= 0xd6f) {// Malayalam return (char) (c - (char) 0xd36); } else if (0xe50 <= c && c <= 0xe59) {// Thai return (char) (c - (char) 0xe20); } else if (0xed0 <= c && c <= 0xed9) {// Lao return (char) (c - (char) 0xea0); } else if (0xf20 <= c && c <= 0xf29) {// Tibetan return (char) (c - (char) 0xe90); } else if (0x1040 <= c && c <= 0x1049) {// Myanmar return (char) (c - (char) 0x1010); } else if (0x17e0 <= c && c <= 0x17e9) {// Khmer return (char) (c - (char) 0x17b0); } else if (0x1810 <= c && c <= 0x1819) {// Mongolian return (char) (c - (char) 0x17e0); } else if (0x1b50 <= c && c <= 0x1b59) {// Balinese return (char) (c - (char) 0x1b20); } else if (0x1bb0 <= c && c <= 0x1bb9) {// Sundanese return (char) (c - (char) 0x1b80); } else if (0x1c40 <= c && c <= 0x1c49) {// Lepcha return (char) (c - (char) 0x1c10); } else if (0x1c50 <= c && c <= 0x1c59) {// Ol Chiki return (char) (c - (char) 0x1c20); } else if (0xa8d0 <= c && c <= 0xa8d9) {// Saurashtra return (char) (c - (char) 0xa8a0); } return c; } }