/*
* Created on 2003-11-02
*/
package org.geogebra.common.io;
import java.util.HashMap;
import org.geogebra.common.util.lang.Unicode;
/**
* Apache 2.0 Licence
*
* Original from http://www.tilman.de/programme/mathparser/download_en.html
*
* Parses Presentation MathML
*
* Alterations by GeoGebra
*
* @author (c) Tilman Walther
* @author (c) Martin Wilke
*/
public class MathMLParser {
private static HashMap<String, String> geogebraMap = new HashMap<String, String>();
static {
// Tags:
geogebraMap.put("<mfrac>", "((%BLOCK1%) / (%BLOCK2%))");
geogebraMap.put("<msup>", "((%BLOCK1%)^(%BLOCK2%))");
geogebraMap.put("<msub>", "%BLOCK1%_{%BLOCK2%}");// TODO _{1} -> _1 at
// the end of
// parsing
geogebraMap.put("<msqrt>", "sqrt(%BLOCK1%)");
geogebraMap.put("<mroot>", "nroot(%BLOCK1%,%BLOCK2%)");
geogebraMap.put("<mfenced>", "(%BLOCK1%)");// e.g. binomial coefficient,
// FIXME
geogebraMap.put("<mfenced open=\"|\" close=\"|\">", "abs(%BLOCK1%)");// abs.
// value
geogebraMap.put("<msubsup>", "(%BLOCK1%_{%BLOCK2%})^(%BLOCK3%)");// ignored
// for
// now,
// FIXME
// (subscripted
// variable
// powered)
geogebraMap.put("<munderover>", "%BLOCK1%(%BLOCK2%,%BLOCK3%,");// ignored
// for
// now,
// FIXME
// (subscripted
// variable
// powered)
geogebraMap.put("<munder>", "%BLOCK1%");// ignored for now, FIXME
geogebraMap.put("<mtable>", "{%BLOCKS%}");
geogebraMap.put("<mtr>", "{%BLOCKS%}, ");
geogebraMap.put("<mtd>", "%BLOCK1%, ");
geogebraMap.put("∫", "ggbMathmlIntegral");
geogebraMap.put("∑", "ggbMathmlSum");
// Entities
geogebraMap.put("˙", "* ");
geogebraMap.put("⋅", "* ");
geogebraMap.put("·", "* ");
geogebraMap.put("×", "* ");
geogebraMap.put("=", " = ");
geogebraMap.put("∀", "# ");
geogebraMap.put("∃", "# ");
geogebraMap.put("∍", "# ");
geogebraMap.put("∗", "* ");
geogebraMap.put("−", "- ");
geogebraMap.put("⁄", "/ ");
geogebraMap.put("∶", ": ");
geogebraMap.put("<", "< ");
geogebraMap.put(">", "> ");
geogebraMap.put("≅", "# ");
geogebraMap.put("⁢", " ");
// Pfeile
geogebraMap.put("↔", "# ");
geogebraMap.put("←", "# ");
geogebraMap.put("→", "# ");
geogebraMap.put("⇔", "# ");
geogebraMap.put("⇐", "# ");
geogebraMap.put("⇒", "# ");
// dynamische Zeichen
geogebraMap.put("∑", "# ");
geogebraMap.put("∏", "# ");
geogebraMap.put("∫", "# ");
geogebraMap.put("ⅆ", "d ");
// griechisches Alphabet ... may be implemented by Java unicode codes.
geogebraMap.put("α", Unicode.alpha + "");
geogebraMap.put("β", Unicode.beta + "");
geogebraMap.put("γ", Unicode.gamma + "");
geogebraMap.put("δ", Unicode.delta + "");
geogebraMap.put("ε", Unicode.epsilon + "");
geogebraMap.put("η", Unicode.eta + "");
geogebraMap.put("ι", Unicode.iota + "");
geogebraMap.put("κ", Unicode.kappa + "");
geogebraMap.put("λ", Unicode.lambda + "");
geogebraMap.put("μ", Unicode.mu + "");
geogebraMap.put("&mgr;", Unicode.mu + "");
geogebraMap.put("ν", Unicode.nu + "");
geogebraMap.put("ο", Unicode.omicron + "");
geogebraMap.put("π", Unicode.pi + "");
geogebraMap.put("θ", Unicode.theta + "");
geogebraMap.put("ρ", Unicode.rho + "");
geogebraMap.put("&rgr;", Unicode.rho + "");
geogebraMap.put("σ", Unicode.sigma + "");
geogebraMap.put("τ", Unicode.tau + "");
geogebraMap.put("υ", Unicode.upsilon + "");
geogebraMap.put("ϕ", Unicode.phi + "");
geogebraMap.put("φ", Unicode.phi + "");// \\varphi
geogebraMap.put("χ", Unicode.chi + "");
geogebraMap.put("ϖ", Unicode.pi + "");// \\varpi
geogebraMap.put("&pgr;", Unicode.pi + "");
geogebraMap.put("&ohgr;", Unicode.omega + "");
geogebraMap.put("ω", Unicode.omega + "");
geogebraMap.put("ξ", Unicode.xi + "");
geogebraMap.put("ψ", Unicode.psi + "");
geogebraMap.put("ζ", Unicode.zeta + "");
geogebraMap.put("Δ", Unicode.Delta + "");
geogebraMap.put("Φ", Unicode.Phi + "");
geogebraMap.put("Γ", Unicode.Gamma + "");
geogebraMap.put("Λ", Unicode.Lambda + "");
geogebraMap.put("Π", Unicode.Pi + "");
geogebraMap.put("&tgr;", Unicode.tau + "");
geogebraMap.put("Θ", Unicode.Theta + "");
geogebraMap.put("Σ", Unicode.Sigma + "");
geogebraMap.put("Υ", Unicode.Upsilon + "");
geogebraMap.put("ς", Unicode.sigma + "");// \\varsigma
geogebraMap.put("Ω", Unicode.Omega + "");
geogebraMap.put("Ξ", Unicode.Xi + "");
geogebraMap.put("Ψ", Unicode.Psi + "");
geogebraMap.put("ϵ", Unicode.epsilon + "");
geogebraMap.put("&phgr;", Unicode.phi + "");
geogebraMap.put("&ggr;", Unicode.gamma + "");
geogebraMap.put("&eegr;", Unicode.eta + "");
geogebraMap.put("&igr;", Unicode.iota + "");
geogebraMap.put("&phgr;", Unicode.phi + "");
geogebraMap.put("&kgr;", Unicode.kappa + "");
geogebraMap.put("&lgr;", Unicode.lambda + "");
geogebraMap.put("&ngr;", Unicode.nu + "");
geogebraMap.put("&ogr;", Unicode.omega + "");
geogebraMap.put("&thgr;", Unicode.theta + "");
geogebraMap.put("&sgr;", Unicode.sigma + "");
geogebraMap.put("&ugr;", Unicode.upsilon + "");
geogebraMap.put("&zgr;", Unicode.zeta + "");
geogebraMap.put("&Agr;", Unicode.Alpha + "");
geogebraMap.put("&Bgr;", Unicode.Beta + "");
geogebraMap.put("&KHgr;", Unicode.Chi + "");
geogebraMap.put("&Egr;", Unicode.Epsilon + "");
geogebraMap.put("&PHgr;", Unicode.Phi + "");
geogebraMap.put("&Ggr;", Unicode.Gamma + "");
geogebraMap.put("&EEgr;", Unicode.Eta + "");
geogebraMap.put("&Igr;", Unicode.Iota + "");
geogebraMap.put("&THgr;", Unicode.Theta + "");
geogebraMap.put("&Kgr;", Unicode.Kappa + "");
geogebraMap.put("&Lgr;", Unicode.Lambda + "");
geogebraMap.put("&Mgr;", Unicode.Mu + "");
geogebraMap.put("&Ngr;", Unicode.Nu + "");
geogebraMap.put("&Ogr;", Unicode.Omicron + "");
geogebraMap.put("&Pgr;", Unicode.Pi + "");
geogebraMap.put("&Rgr;", Unicode.Rho + "");
geogebraMap.put("&Sgr;", Unicode.Sigma + "");
geogebraMap.put("&Tgr;", Unicode.Tau + "");
geogebraMap.put("&Ugr;", Unicode.Upsilon + "");
geogebraMap.put("&OHgr;", Unicode.Omega + "");
geogebraMap.put("&Zgr;", Unicode.Zeta + "");
// Pfeile und andere Operatoren
geogebraMap.put("−", "-");
geogebraMap.put("⊥", "# ");
geogebraMap.put("∼", "~ ");
geogebraMap.put("′", "# ");
geogebraMap.put("≤", Unicode.LESS_EQUAL + "");
geogebraMap.put("≥", Unicode.GREATER_EQUAL + "");
geogebraMap.put("∞", Unicode.INFINITY + "");
geogebraMap.put("♣", "# ");
geogebraMap.put("♦", "# ");
geogebraMap.put("♥", "# ");
geogebraMap.put("♠", "# ");
geogebraMap.put("±", Unicode.PLUSMINUS + "");
geogebraMap.put("″", "# ");
geogebraMap.put("∝", "# ");
geogebraMap.put("∂", "# ");
geogebraMap.put("•", "# ");
geogebraMap.put("≠", Unicode.NOTEQUAL + "");
geogebraMap.put("≡", "# ");
geogebraMap.put("≈", "# ");
geogebraMap.put("…", "... ");
geogebraMap.put("∣", "# ");
geogebraMap.put("↵", "# ");
geogebraMap.put("ℵ", "# ");
geogebraMap.put("ℑ", "# ");// ???
geogebraMap.put("ℜ", "# ");// ???
geogebraMap.put("℘", "# ");
geogebraMap.put("⊗", "# ");
geogebraMap.put("⊕", "# ");
geogebraMap.put("∅", "# ");
geogebraMap.put("∩", "# ");
geogebraMap.put("∪", "# ");
geogebraMap.put("⊃", "# ");
geogebraMap.put("⊇", "# ");
geogebraMap.put("⊄", "# ");
geogebraMap.put("⊂", "# ");
geogebraMap.put("⊆", "# ");
geogebraMap.put("∈", "# ");
geogebraMap.put("∉", "# ");
geogebraMap.put("∠", "# ");
geogebraMap.put("∇", "# ");
geogebraMap.put("√", "# ");
geogebraMap.put("∧", "# ");
geogebraMap.put("∨", "# ");
geogebraMap.put("∧", "# ");
geogebraMap.put("∠", "# ");
geogebraMap.put("∠", "# ");
geogebraMap.put("≈", "# ");
geogebraMap.put("≈", "# ");
geogebraMap.put("⨁", "# ");
geogebraMap.put("⨂", "# ");
geogebraMap.put("⊥", "# ");
geogebraMap.put("⊥", "# ");
geogebraMap.put("∩", "# ");
geogebraMap.put("⊕", "# ");
geogebraMap.put("⊗", "# ");
geogebraMap.put("≅", "# ");
geogebraMap.put("≡", "# ");
geogebraMap.put("∪", "# ");
geogebraMap.put("↓", "# ");
geogebraMap.put("⇓", "# ");
geogebraMap.put("∇", "# ");
geogebraMap.put("∇", "# ");
geogebraMap.put("ⅆ", "\u2146 ");
geogebraMap.put("⇐", "# ");
geogebraMap.put("⇔", "# ");
geogebraMap.put("⇒", "# ");
geogebraMap.put("⇑", "# ");
geogebraMap.put("↓", "# ");
geogebraMap.put("⇓", "# ");
geogebraMap.put("↓", "# ");
geogebraMap.put("∈", "# ");
geogebraMap.put("∅", "# ");
geogebraMap.put("≡", "# ");
geogebraMap.put("∃", "# ");
geogebraMap.put("&Exist;", "# ");
geogebraMap.put("ⅇ", "\u2147 ");
geogebraMap.put("∀", "# ");
geogebraMap.put("∀", "# ");
geogebraMap.put("≥", Unicode.GREATER_EQUAL + "");
geogebraMap.put("≥", Unicode.GREATER_EQUAL + "");
geogebraMap.put("↔", "# ");
geogebraMap.put("⇔", "# ");
geogebraMap.put("⇔", "# ");
geogebraMap.put("⇒", "# ");
geogebraMap.put("∈", "# ");
geogebraMap.put("∫", "# ");
geogebraMap.put("∫", "# ");
geogebraMap.put("∈", "# ");
geogebraMap.put("∈", "# ");
geogebraMap.put("⋄", "# ");
geogebraMap.put("⋄", "# ");
geogebraMap.put("〈", "# ");
geogebraMap.put("⟨", "# ");
geogebraMap.put("←", "# ");
geogebraMap.put("⇐", "# ");
geogebraMap.put("⟨", "# ");
geogebraMap.put("⇐", "# ");
geogebraMap.put("←", "# ");
geogebraMap.put("↔", "# ");
geogebraMap.put("⇔", "# ");
geogebraMap.put("↔", "# ");
geogebraMap.put("≤", Unicode.LESS_EQUAL + "");
geogebraMap.put("≤", Unicode.LESS_EQUAL + "");
geogebraMap.put("⟺", "# ");
geogebraMap.put("−", "- ");
geogebraMap.put("∇", "# ");
geogebraMap.put("∉", "# ");
geogebraMap.put("≠", Unicode.NOTEQUAL + "");// ??? is this good
// in LaTeX?
geogebraMap.put("∉", "# ");
geogebraMap.put("⊕", "# ");
geogebraMap.put("∨", "# ");
geogebraMap.put("⊗", "# ");
geogebraMap.put("∂", "# ");
geogebraMap.put("&partialD;", "# ");
geogebraMap.put("⊥", "# ");
geogebraMap.put("∏", "# ");
geogebraMap.put("∏", "# ");
geogebraMap.put("〉", "# ");
geogebraMap.put("⟩", "# ");
geogebraMap.put("→", "# ");
geogebraMap.put("⇒", "# ");
geogebraMap.put("⟩", "# ");
geogebraMap.put("→", "# ");
geogebraMap.put("⇒", "# ");
geogebraMap.put("→", "# ");
geogebraMap.put("⋅", "* ");
geogebraMap.put("∼", "# ");
geogebraMap.put("∝", "# ");
geogebraMap.put("∝", "# ");
geogebraMap.put("∝", "# ");
geogebraMap.put("⊂", "# ");
geogebraMap.put("⊆", "# ");
geogebraMap.put("⫅", "# ");
geogebraMap.put("⊂", "# ");
geogebraMap.put("⊆", "# ");
geogebraMap.put("⫅", "# ");
geogebraMap.put("⊆", "# ");
geogebraMap.put("∑", "# ");
geogebraMap.put("∑", "# ");
geogebraMap.put("⊃", "# ");
geogebraMap.put("⊇", "# ");
geogebraMap.put("⫆", "# ");
geogebraMap.put("⊃", "# ");
geogebraMap.put("⊇", "# ");
geogebraMap.put("⊃", "# ");
geogebraMap.put("⊇", "# ");
geogebraMap.put("⫆", "# ");
geogebraMap.put("∼", "# ");
geogebraMap.put("≅", "# ");
geogebraMap.put("≈", "# ");
geogebraMap.put("‴", "\u2034 ");
geogebraMap.put("↑", "# ");
geogebraMap.put("⇑", "# ");
geogebraMap.put("↑", "# ");
geogebraMap.put("⇑", "# ");
geogebraMap.put("↑", "# ");
geogebraMap.put("⊥", "# ");
geogebraMap.put("∅", "# ");
geogebraMap.put("∝", "# ");
geogebraMap.put("∨", "# ");
geogebraMap.put("∝", "# ");
geogebraMap.put("∧", "# ");
geogebraMap.put("⨁", "# ");
geogebraMap.put("⨂", "# ");
geogebraMap.put("&Space;", " ");
geogebraMap.put(":", ":");
geogebraMap.put("⁡", " ");
geogebraMap.put("□", " ");
geogebraMap.put("−", "- ");
geogebraMap.put("→", "# ");
geogebraMap.put("∫", "# ");
geogebraMap.put("", "");
}
private static HashMap<String, String> latexMap = new HashMap<String, String>();
static {
// Tags:
latexMap.put("<mfrac>", "\\frac{%BLOCK1%}{%BLOCK2%}");
latexMap.put("<msup>", "%BLOCK1%^{%BLOCK2%}");
latexMap.put("<msub>", "%BLOCK1%_{%BLOCK2%}");
latexMap.put("<msqrt>", "\\sqrt{%BLOCK1%}");
latexMap.put("<mroot>", "\\sqrt[%BLOCK2%]{%BLOCK1%}");
latexMap.put("<mfenced>", "\\left(%BLOCK1%\\right)");
latexMap.put("<msubsup>", "%BLOCK1%_{%BLOCK2%}^{%BLOCK3%}");
latexMap.put("<munderover>", "%BLOCK1%_{%BLOCK2%}^{%BLOCK3%}");
latexMap.put("<munder>", "%BLOCK1%_{%BLOCK2%}");
latexMap.put("<mtable>", "\\matrix{%BLOCKS%}");
latexMap.put("<mtr>", "%BLOCKS%\\cr");
latexMap.put("<mtd>", "%BLOCK1%&");
// Entities
latexMap.put("˙", "\\cdot ");
latexMap.put("⋅", "\\cdot ");
latexMap.put("·", "\\cdot ");
latexMap.put("×", "\\times ");
latexMap.put("=", "\\Relbar ");
latexMap.put("∀", "\\forall ");
latexMap.put("∃", "\\exists ");
latexMap.put("∍", "\\ni ");
latexMap.put("∗", "* ");
latexMap.put("−", "- ");
latexMap.put("⁄", "/ ");
latexMap.put("∶", ": ");
latexMap.put("<", "< ");
latexMap.put(">", "> ");
latexMap.put("≅", "\\cong ");
latexMap.put("⁢", " ");
// Pfeile
latexMap.put("↔", "\\leftrightarrow ");
latexMap.put("←", "\\leftarrow ");
latexMap.put("→", "\\rightarrow ");
latexMap.put("⇔", "\\Leftrightarrow ");
latexMap.put("⇐", "\\Leftarrow ");
latexMap.put("⇒", "\\Rightarrow ");
// dynamische Zeichen
latexMap.put("∑", "\\sum ");
latexMap.put("∏", "\\prod ");
latexMap.put("∫", "\\int ");
latexMap.put("ⅆ", "d ");
// griechisches Alphabet
latexMap.put("α", "\\alpha");
latexMap.put("β", "\\beta");
latexMap.put("γ", "\\gamma ");
latexMap.put("δ", "\\delta ");
latexMap.put("ε", "\\epsilon ");
latexMap.put("η", "\\eta ");
latexMap.put("ι", "\\iota ");
latexMap.put("κ", "\\kappa ");
latexMap.put("λ", "\\lambda ");
latexMap.put("μ", "\\mu ");
latexMap.put("&mgr;", "\\mu ");
latexMap.put("ν", "\\nu ");
latexMap.put("ο", "o ");
latexMap.put("π", "\\pi ");
latexMap.put("θ", "\\theta ");
latexMap.put("ρ", "\\rho ");
latexMap.put("&rgr;", "\\rho ");
latexMap.put("σ", "\\sigma ");
latexMap.put("τ", "\\tau ");
latexMap.put("υ", "\\upsilon ");
latexMap.put("ϕ", "\\phi");
latexMap.put("φ", "\\varphi");
latexMap.put("χ", "\\chi ");
latexMap.put("ϖ", "\\varpi ");
latexMap.put("&pgr;", "\\pi ");
latexMap.put("&ohgr;", "\\omega ");
latexMap.put("ω", "\\omega ");
latexMap.put("ξ", "\\xi ");
latexMap.put("ψ", "\\psi ");
latexMap.put("ζ", "\\zeta ");
latexMap.put("Δ", "\\Delta ");
latexMap.put("Φ", "\\Phi ");
latexMap.put("Γ", "\\Gamma ");
latexMap.put("Λ", "\\Lambda ");
latexMap.put("Π", "\\Pi ");
latexMap.put("&tgr;", "\\tau ");
latexMap.put("Θ", "\\Theta ");
latexMap.put("Σ", "\\Sigma ");
latexMap.put("Υ", "\\Upsilon ");
latexMap.put("ς", "\\varsigma ");
latexMap.put("Ω", "\\Omega ");
latexMap.put("Ξ", "\\Xi ");
latexMap.put("Ψ", "\\Psi ");
latexMap.put("ϵ", "\\epsilon ");
latexMap.put("&phgr;", "\\phi ");
latexMap.put("&ggr;", "\\gamma ");
latexMap.put("&eegr;", "\\eta ");
latexMap.put("&igr;", "\\iota ");
latexMap.put("&phgr;", "\\phi ");
latexMap.put("&kgr;", "\\kappa ");
latexMap.put("&lgr;", "\\lambda ");
latexMap.put("&ngr;", "\\nu ");
latexMap.put("&ogr;", "o ");
latexMap.put("&thgr;", "\\theta ");
latexMap.put("&sgr;", "\\sigma ");
latexMap.put("&ugr;", "\\upsilon ");
latexMap.put("&zgr;", "\\zeta ");
latexMap.put("&Agr;", "A ");
latexMap.put("&Bgr;", "B ");
latexMap.put("&KHgr;", "X ");
latexMap.put("&Egr;", "E ");
latexMap.put("&PHgr;", "\\Phi ");
latexMap.put("&Ggr;", "\\Gamma ");
latexMap.put("&EEgr;", "H ");
latexMap.put("&Igr;", "I ");
latexMap.put("&THgr;", "\\Theta ");
latexMap.put("&Kgr;", "K ");
latexMap.put("&Lgr;", "\\Lambda ");
latexMap.put("&Mgr;", "M ");
latexMap.put("&Ngr;", "N ");
latexMap.put("&Ogr;", "O ");
latexMap.put("&Pgr;", "\\Pi ");
latexMap.put("&Rgr;", "P ");
latexMap.put("&Sgr;", "\\Sigma ");
latexMap.put("&Tgr;", "T ");
latexMap.put("&Ugr;", "\\Upsilon ");
latexMap.put("&OHgr;", "\\Omega ");
latexMap.put("&Zgr;", "Z ");
// Pfeile und andere Operatoren
latexMap.put("−", "-");
latexMap.put("⊥", "\\bot ");
latexMap.put("∼", "~ ");
latexMap.put("′", "\\prime ");
latexMap.put("≤", "\\le ");
latexMap.put("≥", "\\ge ");
latexMap.put("∞", "\\infty ");
latexMap.put("♣", "\\clubsuit ");
latexMap.put("♦", "\\diamondsuit ");
latexMap.put("♥", "\\heartsuit ");
latexMap.put("♠", "\\spadesuit ");
latexMap.put("±", "\\pm ");
latexMap.put("″", "\\prime\\prime ");
latexMap.put("∝", "\\propto ");
latexMap.put("∂", "\\partial ");
latexMap.put("•", "\\bullet ");
latexMap.put("≠", "\\neq ");
latexMap.put("≡", "\\equiv ");
latexMap.put("≈", "\\approx ");
latexMap.put("…", "... ");
latexMap.put("∣", "\\mid ");
latexMap.put("↵", "\\P ");
latexMap.put("ℵ", "\\aleph ");
latexMap.put("ℑ", "\\Im ");
latexMap.put("ℜ", "\\Re ");
latexMap.put("℘", "\\wp ");
latexMap.put("⊗", "\\otimes ");
latexMap.put("⊕", "\\oplus ");
latexMap.put("∅", "\\emtyset ");
latexMap.put("∩", "\\cap ");
latexMap.put("∪", "\\cup ");
latexMap.put("⊃", "\\supset ");
latexMap.put("⊇", "\\seupseteq ");
latexMap.put("⊄", "\\not\\subset ");
latexMap.put("⊂", "\\subset ");
latexMap.put("⊆", "\\subseteq ");
latexMap.put("∈", "\\in ");
latexMap.put("∉", "\\notin ");
latexMap.put("∠", "\\angle ");
latexMap.put("∇", "\\nabla ");
latexMap.put("√", "\\surd ");
latexMap.put("∧", "\\wedge ");
latexMap.put("∨", "\\vee ");
latexMap.put("∧", "\\wedge ");
latexMap.put("∠", "\\angle ");
latexMap.put("∠", "\\angle ");
latexMap.put("≈", "\\approx ");
latexMap.put("≈", "\\approx ");
latexMap.put("⨁", "\\oplus ");
latexMap.put("⨂", "\\otimes ");
latexMap.put("⊥", "\\bot ");
latexMap.put("⊥", "\\bot ");
latexMap.put("∩", "\\cap ");
latexMap.put("⊕", "\\oplus ");
latexMap.put("⊗", "\\otimes ");
latexMap.put("≅", "\\cong ");
latexMap.put("≡", "\\equiv ");
latexMap.put("∪", "\\cup ");
latexMap.put("↓", "\\downarrow ");
latexMap.put("⇓", "\\Downarrow ");
latexMap.put("∇", "\\nabla ");
latexMap.put("∇", "\\nabla ");
latexMap.put("ⅆ", "\u2146 ");
latexMap.put("⇐", "\\Leftarrow ");
latexMap.put("⇔", "\\Leftrightarrow ");
latexMap.put("⇒", "\\Rightarrow ");
latexMap.put("⇑", "\\Uparrow ");
latexMap.put("↓", "\\downarrow ");
latexMap.put("⇓", "\\Downarrow ");
latexMap.put("↓", "\\Downarrow ");
latexMap.put("∈", "\\in ");
latexMap.put("∅", "\\oslash ");
latexMap.put("≡", "\\equiv ");
latexMap.put("∃", "\\exists ");
latexMap.put("&Exist;", "\\exists ");
latexMap.put("ⅇ", "\u2147 ");
latexMap.put("∀", "\\forall ");
latexMap.put("∀", "\\forall ");
latexMap.put("≥", "\\geq ");
latexMap.put("≥", "\\geq ");
latexMap.put("≥", "\\geq ");
latexMap.put("↔", "\\leftrightarrow ");
latexMap.put("⇔", "\\Leftrightarrow ");
latexMap.put("⇔", "\\Leftrightarrow ");
latexMap.put("⇒", "\\Rightarrow ");
latexMap.put("∈", "\\in ");
latexMap.put("∞", "\\infty ");
latexMap.put("∫", "\\int ");
latexMap.put("∫", "\\int ");
latexMap.put("∈", "\\in ");
latexMap.put("∈", "\\in ");
latexMap.put("⋄", "\\diamond ");
latexMap.put("⋄", "\\diamond ");
latexMap.put("〈", "\\left\\langle ");
latexMap.put("⟨", "\\left\\langle ");
latexMap.put("←", "\\leftarrow ");
latexMap.put("⇐", "\\Leftarrow ");
latexMap.put("≤", "\\leq ");
latexMap.put("⟨", "\\left\\langle ");
latexMap.put("⇐", "\\Leftarrow ");
latexMap.put("←", "\\leftarrow ");
latexMap.put("↔", "\\leftrightarrow ");
latexMap.put("⇔", "\\Leftrightarrow ");
latexMap.put("↔", "\\leftrightarrow ");
latexMap.put("≤", "\\leq ");
latexMap.put("≤", "\\leq ");
latexMap.put("⟺", "\\Longleftrightarrow ");
latexMap.put("−", "- ");
latexMap.put("∇", "\\nabla ");
latexMap.put("∉", "\\notin ");
latexMap.put("≠", "\\notin ");
latexMap.put("∉", "\\notin ");
latexMap.put("⊕", "\\oplus ");
latexMap.put("∨", "\\vee ");
latexMap.put("⊗", "\\otimes ");
latexMap.put("∂", "\\partial ");
latexMap.put("&partialD;", "\\partial ");
latexMap.put("⊥", "\\bot ");
latexMap.put("∏", "\\Pi ");
latexMap.put("∏", "\\Pi ");
latexMap.put("〉", "\\right\\rangle ");
latexMap.put("⟩", "\\right\\rangle ");
latexMap.put("→", "\\rightarrow ");
latexMap.put("⇒", "\\Rightarrow ");
latexMap.put("⟩", "\\right\\rangle ");
latexMap.put("→", "\\rightarrow ");
latexMap.put("⇒", "\\Rightarrow ");
latexMap.put("→", "\\rightarrow ");
latexMap.put("⋅", "\\cdot ");
latexMap.put("∼", "\\sim ");
latexMap.put("∝", "\\propto ");
latexMap.put("∝", "\\propto ");
latexMap.put("∝", "\\propto ");
latexMap.put("⊂", "\\subset ");
latexMap.put("⊆", "\\subseteq ");
latexMap.put("⫅", "\\subseteq ");
latexMap.put("⊂", "\\subset ");
latexMap.put("⊆", "\\subseteq ");
latexMap.put("⫅", "\\subseteq ");
latexMap.put("⊆", "\\subseteq ");
latexMap.put("∑", "\\Sigma ");
latexMap.put("∑", "\\Sigma ");
latexMap.put("⊃", "\\supset ");
latexMap.put("⊇", "\\supseteq ");
latexMap.put("⫆", "\\supseteq ");
latexMap.put("⊃", "\\supset");
latexMap.put("⊇", "\\supseteq ");
latexMap.put("⊃", "\\supset ");
latexMap.put("⊇", "\\supseteq ");
latexMap.put("⫆", "\\supseteq ");
latexMap.put("∼", "\\sim ");
latexMap.put("≅", "\\cong ");
latexMap.put("≈", "\\approx ");
latexMap.put("‴", "\u2034 ");
latexMap.put("↑", "\\uparrow ");
latexMap.put("⇑", "\\Uparrow ");
latexMap.put("↑", "\\uparrow ");
latexMap.put("⇑", "\\Uparrow ");
latexMap.put("↑", "\\uparrow ");
latexMap.put("⊥", "\\bot ");
latexMap.put("∅", "\\oslash ");
latexMap.put("∝", "\\propto ");
latexMap.put("∨", "\\vee ");
latexMap.put("∝", "\\propto ");
latexMap.put("∧", "\\wedge ");
latexMap.put("⨁", "\\oplus ");
latexMap.put("⨂", "\\otimes ");
latexMap.put("&Space;", " ");
latexMap.put(":", ":");
latexMap.put("⁡", " ");
latexMap.put("□", " ");
latexMap.put("−", "- ");
latexMap.put("→", "\\to ");
latexMap.put("∫", "\\int ");
latexMap.put("", "");
}
/**
* The place holder for blocks in substitutions. If a substitution contains
* a block place holder it is replaced by the LaTeX representation of the
* followig block.<br>
* Syntax: PH_BLOCKSTART + blockNumber + PH_BLOCKEND, e.g. '#BLOCK1#'.
*/
private final static String PH_BLOCK_START = "%BLOCK";
private final static char PH_BLOCK_END = '%';
private final static char[] specialCharacters = { '%', '_', '$' };
private final static char[] leftBraces = { '(', '{', '[' };
private final static char[] rightBraces = { ')', '{', ']' };
private HashMap<String, String> substitutions;
// private StringBuilder result;
private String strBuf;
private int pos;
private boolean wrappedEntities;
private boolean skipUnknownEntities;
private boolean geogebraSyntax;
// temporary variables (declared global for better performance)
// protected String startTag, endTag;
private String nextTag;
private StringBuilder tagBuf = new StringBuilder(200); // used by
// readNextTag() &
// getBlockEnd()
private StringBuilder entity = new StringBuilder(32); // used by
// replaceEntities()
private String entitySubst = ""; // used by replaceEntities()
private boolean closeBracketNext = false;
/**
* Generates the substitution table from the default file path in field
* SUBSTITUTIONS_FILE.
*
* @param geogebraSyntax1
* whether to return GeoGebra
*
*/
public MathMLParser(boolean geogebraSyntax1) {
this.geogebraSyntax = geogebraSyntax1;
if (geogebraSyntax1) {
substitutions = geogebraMap;
} else {
substitutions = latexMap;
}
}
/*
* Removed by GeoGebra Generates the substitution table from the given file
* path.
*
* @param substitutionsTable the substitution table.
*
* public MathMLParser(HashMap<String, String> substitutionsTable) {
* substitutions = substitutionsTable; }
*/
/**
* TODO berarbeiten (complete MathML blocks only?): Parses MathML code into
* LaTeX code using the substitution table genereated by the constructor.
* <br>
* Only presentation markup can be parsed properly, no use for parsing
* content markup.
* <p>
* For example the presentation markup code
*
* <pre>
* <mrow>
* <msup>
* <mfenced>
* <mrow>
* <mi>a</mi>
* <mo>+</mo>
* <mi>b</mi>
* </mrow>
* </mfenced>
* <mn>2</mn>
* </msup>
* </mrow>
* </pre>
*
* can be parsed by this method, while the equivalent content markup
*
* <pre>
* <mrow>
* <apply>
* <power/>
* <apply>
* <plus/>
* <ci>a</ci>
* <ci>b</ci>
* </apply>
* <cn>2</cn>
* </apply>
* </mrow>
* </pre>
*
* can not be parsed.
* </p>
* Both notations of entities can be parsed: The plain MathML notation,
* starting with an ampersand sign (e.g. '='), or the
* "HTML wrapped" notation startig with an entity for the ampersand sign
* (e.g. '&equals;').
*
* @param strBuf0
* a String containig the MathML code to parse
* @param wrappedEntities1
* indicates whether the entities in the MathML code are HTML
* wrapped (e.g. '&PlusMinus;'), or not (e.g.
* '±')
* @param skipUnknownEntities1
* skipUnknownEntities
* @return a StringBuilder containig the LaTeX representation of the input
*/
public String parse(String strBuf0, boolean wrappedEntities1,
boolean skipUnknownEntities1) {
// Remove newlines first;
String strBuf1 = strBuf0.replace('\n', ' ').replace('\r', ' ');
// now remove coments
strBuf1 = strBuf1.replaceAll("<!--.*?-->", "");
// Avoiding bugs due to wrong parsing (quick workarounds)
strBuf1 = strBuf1.replace("><", "> <");
// strBuf1 = strBuf1.replace(";", "; ");
// Adding "inferred mrow" to those elements that need it
// according to W3C and also there in latexMap;
// but also take care of the possible attributes!
// As the algorithm itself neglects them,
// this "quick" solution can do that too.
strBuf1 = strBuf1.replaceAll("<msqrt.*?>", "<msqrt> <mrow>");
strBuf1 = strBuf1.replace("</msqrt>", "</mrow> </msqrt>");
strBuf1 = strBuf1.replaceAll("<mtd.*?>", "<mtd> <mrow>");
strBuf1 = strBuf1.replace("</mtd>", "</mrow> </mtd>");
this.strBuf = strBuf1;
this.wrappedEntities = wrappedEntities1;
this.skipUnknownEntities = skipUnknownEntities1;
// usually the MathML input should have more characters as the
// output
StringBuilder result = new StringBuilder(strBuf.length());
pos = 0;
try {
while (strBuf.indexOf("<", pos) != -1) {
parseBlock(getNextTag(), result, true);
skipFollowingTag();
}
// TODO besser result stutzen? -> return new
// StringBuilder(result) o. result.toString()
return result.toString();
} catch (Exception e) {
e.printStackTrace();
}
return null; // TODO statt exception, speter lo(umlaut)schen
}
/**
* TODO Pseudocode berarbeiten, Algorithmus noch einmal nachvollziehen
* Parses a MathML block in strBuf recursively into LaTeX code.
* <p>
* Pseudocode:
*
* <pre>
* while (pos <= blockEnd) {
* if (insideOfInnerstBlock) {
* result.append(convertToLatexSyntax(area));
* } else {
* tmpTag = getNextTag(); // pos = pos + tmpTag.length();
* if (substitutionAvailable(tmpTag)) {
* while (substitutionContainsBlock) {
* addSubstitutionUpToPlaceHolderOfBlockToOutput();
* parseBlock(getAreaOfNextBlock());
* }
* addRestOfSubstitutionOutPut();
* } else {
* parseBlock(pos, getBlockEndIndex(tmpTag));
* }
* skipClosingTag();
* }
* }
* </pre>
*
* </p>
*
* @param startTag
* startTag
* @param result
* builder to which we append the string
* @param appendSpace
* whether space shoud be appended after the block content
* @throws Exception
* if an error occurs while parsing
*/
void parseBlock(String startTag, StringBuilder result, boolean appendSpace)
throws Exception {
boolean closeBracketNow = this.closeBracketNext;
this.closeBracketNext = false;
String endTag = generateEndTag(startTag);
// System.out.println(startTag+ " "+endTag);
int blockEnd = getBlockEnd(startTag, endTag);
String substBuf;
String blockContent;
boolean inside = true;
int blockNumber = 0;
int prevBlockNumber;
while (pos <= blockEnd) {
// scan for subblocks
int i = pos;
while ((i <= blockEnd) && (strBuf.charAt(i) != '<')) {
i++;
}
if ((startTag != endTag) && (i > blockEnd)) {
// if sure to be at the end of the block hierarchy (inside),
// append block content to result
if (inside) {
blockContent = strBuf.substring(pos, blockEnd + 1);
result.append(parseBlockContent(blockContent));
if (appendSpace) {
result.append(' ');
}
pos = pos + blockContent.length();
blockContent = null;
} else {
// if all subblocks have been processed skip to the end
pos = blockEnd + 1;
}
} else {
// this block has subblocks
inside = false;
// if there is a substitution for the next block, write it to
// 'result'
if ((substBuf = substitutions.get(startTag)) != null) {
int phIndex;
int substIndex = 0;
// parse subblocks recursively
while (((phIndex = substBuf.indexOf(PH_BLOCK_START,
substIndex)) > -1) && (pos - 2 < blockEnd)) {
// write substitution up to the block marker
while (substIndex < phIndex) {
result.append(substBuf.charAt(substIndex));
substIndex++;
}
substIndex += PH_BLOCK_START.length();
// get number of the block to parse
int blockNumberIndex = substIndex;
while (substBuf.charAt(substIndex) != PH_BLOCK_END) {
substIndex++;
}
prevBlockNumber = blockNumber;
String blockNumberStr = substBuf
.substring(blockNumberIndex, substIndex);
if ("S".equals(blockNumberStr)) {
// keyword is BLOCKS -> parse all inner blocks in
// order of appearance
// skip PH_BLOCK_END
substIndex++;
// jump to the block to parse
skipBlocks((1 - prevBlockNumber) - 1);
// parse subblocks
while ((strBuf.substring(pos, blockEnd + 1))
.indexOf('<') != -1) {
nextTag = getNextTag();
parseBlock(nextTag, result, true);
skipFollowingTag();
}
if (!appendSpace) {
result.setLength(result.length() - 1);
}
} else {
// keyword is BLOCK + block number, parse inner
// blocks in given order
try {
blockNumber = Integer.parseInt(blockNumberStr);
} catch (NumberFormatException nfe) {
throw new Exception(
"Parsing error at character " + pos
+ ": Unparseable block number in substitution.");
}
// skip PH_BLOCK_END
substIndex++;
// jump to the block to parse
skipBlocks((blockNumber - prevBlockNumber) - 1);
// parse subblock
nextTag = getNextTag();
parseBlock(nextTag, result, false);
skipFollowingTag();
}
}
// write (end of) substitution
while (substIndex < substBuf.length()) {
result.append(substBuf.charAt(substIndex));
substIndex++;
}
pos = blockEnd + endTag.length();
if (substitutions.get(startTag).endsWith(",")) {
this.closeBracketNext = true;
}
} else {
// parse subblocks of nextTag
while ((strBuf.substring(pos, blockEnd + 1))
.indexOf('<') != -1) {
nextTag = getNextTag();
parseBlock(nextTag, result, true);
skipFollowingTag();
}
// make sure we don't output "x _{1}" but don't remove any
// non-space character
if (!appendSpace
&& result.charAt(result.length() - 1) == ' ') {
result.setLength(result.length() - 1);
}
}
}
if (closeBracketNow) {
result.append(")");
}
}
// System.out.print(pos);
// TODO Warum braucht 'amayaOut.htm' diese Anweisung? -> 853, 853
// (<mprescripts/>)
pos = blockEnd;
// System.out.println(", "+pos+" ("+startTag+")");
}
/**
* Jumps to the next tag, reads it into 'startTag' an generates the
* corresponding 'endTag'.
*/
private String getNextTag() {
while (strBuf.charAt(pos) != '<') {
pos++;
}
tagBuf.setLength(0);
while (strBuf.charAt(pos) != '>') {
tagBuf.append(strBuf.charAt(pos));
pos++;
}
pos++;
tagBuf.append('>');
return tagBuf.toString();
}
/**
* Generates an end tag corresponding to the given 'startTag'.
*
* @param startTag
* the start tag to generate an end tag from
* @return the end tag for the given start tag
*/
String generateEndTag(String startTag) {
if (startTag.charAt(tagBuf.length() - 2) != '/') {
if (startTag.indexOf(' ') > -1) {
// delete parameters of startTag
return "</" + startTag.substring(1, startTag.indexOf(' '))
+ ">";
}
return "</" + startTag.substring(1, startTag.length());
}
// if the tag is self-closing (e.g. "<mprescripts/>"), the endTag is the
// startTag
return startTag;
}
/**
* Skips all characters up to the end of the next tag.
*/
void skipFollowingTag() {
while (strBuf.charAt(pos) != '>') {
pos++;
}
pos++;
}
/**
* Skips (back and forth) a given number of blocks from the actual position.
*
* @param blocksToSkip
* the number of blocks to skip
*/
void skipBlocks(int blocksToSkip) {
if (blocksToSkip > 0) {
for (int i = 0; i < blocksToSkip; i++) {
String startTag = getNextTag();
String endTag = generateEndTag(startTag);
pos = getBlockEnd(startTag, endTag);
pos = pos + endTag.length();
}
} else if (blocksToSkip < 0) {
for (int i = 0; i > blocksToSkip; i--) {
int subBlocks = 1;
while (strBuf.charAt(pos) != '>') {
pos--;
}
tagBuf.setLength(0);
while (strBuf.charAt(pos) != '<') {
tagBuf.append(strBuf.charAt(pos));
pos--;
}
tagBuf.append('<');
tagBuf.reverse();
String blockEndTag = new String(tagBuf);
String blockStartTag = new String(tagBuf.deleteCharAt(1));
do {
while (strBuf.charAt(pos) != '>') {
pos--;
}
tagBuf.setLength(0);
while (strBuf.charAt(pos) != '<') {
tagBuf.append(strBuf.charAt(pos));
pos--;
}
tagBuf.append('<');
tagBuf.reverse();
if (tagBuf.indexOf(" ") > -1) {
tagBuf.delete(tagBuf.indexOf(" "), tagBuf.length() - 1);
}
if (tagBuf.toString().equals(blockStartTag)) {
subBlocks--;
} else {
if (tagBuf.toString().equals(blockEndTag)) {
subBlocks++;
}
}
} while ((subBlocks > 0)
|| (!(tagBuf.toString().equals(blockStartTag))));
}
}
}
/**
* Returns the end index of the block defined by the 'startTag' parameter
* skipping all subblocks. The end index is the position of the character
* before the closing tag of the block.
*
* @param startTag0
* the tag that opened the block
* @param endTag
* the end tag to seek
* @return the index of the closing tag
*/
int getBlockEnd(String startTag0, String endTag) {
if (!startTag0.equals(endTag)) {
String startTag = startTag0;
int pos2 = pos;
int subBlocks = 1;
// delete parameters of startTag
if (startTag.indexOf(' ') > -1) {
startTag = startTag.substring(0, startTag.indexOf(' ')) + '>';
}
do {
while (strBuf.charAt(pos2) != '<') {
pos2++;
}
tagBuf.setLength(0);
while (strBuf.charAt(pos2) != '>') {
tagBuf.append(strBuf.charAt(pos2));
pos2++;
}
tagBuf.append('>');
if (tagBuf.toString().equals(endTag)) {
subBlocks--;
} else {
if (tagBuf.indexOf(" ") > -1) {
tagBuf.delete(tagBuf.indexOf(" "), tagBuf.length() - 1);
}
if (tagBuf.toString().equals(startTag)) {
subBlocks++;
}
}
} while ((subBlocks > 0) || (!(tagBuf.toString().equals(endTag))));
return (pos2 - endTag.length());
}
return pos - startTag0.length();
}
/**
* Parses a String into Latex syntax and returns it.
*
* @param s
* the string to parse
* @return the Latex representation of the given string
* @throws Exception
* if HTML wrapped entities were expected but not found
*/
String parseBlockContent(String s) throws Exception {
// TODO hier!
// System.out.println("got '"+s+"'");
int sbIndex = 0;
StringBuilder sb = new StringBuilder(s);
// replace backslashes
while ((sbIndex = sb.indexOf("\\", sbIndex)) > -1) {
sb.insert(sbIndex + 1, "backslash");
sbIndex = sbIndex + 10;
}
// replace braces
if (!geogebraSyntax) {
for (int i = 0; i < leftBraces.length; i++) {
sbIndex = 0;
while ((sbIndex = sb.indexOf(String.valueOf(leftBraces[i]),
sbIndex)) > -1) {
sb.insert(sbIndex, "\\left");
sbIndex = sbIndex + 6;
}
}
for (int i = 0; i < rightBraces.length; i++) {
sbIndex = 0;
while ((sbIndex = sb.indexOf(String.valueOf(rightBraces[i]),
sbIndex)) > -1) {
sb.insert(sbIndex, "\\right");
sbIndex = sbIndex + 7;
}
}
}
// replace special characters
for (int i = 0; i < specialCharacters.length; i++) {
sbIndex = 0;
while ((sbIndex = sb.indexOf(String.valueOf(specialCharacters[i]),
sbIndex)) > -1) {
sb.insert(sbIndex, '\\');
sbIndex = sbIndex + 2;
}
}
// replace Entities
sbIndex = 0;
while ((sbIndex = sb.indexOf("&", sbIndex)) > -1) {
entity.setLength(0);
while (sb.charAt(sbIndex) != ';') {
entity.append(sb.charAt(sbIndex));
sbIndex++;
}
entity.append(';');
sbIndex++;
if (wrappedEntities && entity.toString().equals("&")) {
sb.delete(sbIndex - 4, sbIndex);
sbIndex = sbIndex - 5;
entity.setLength(0);
try {
while (sb.charAt(sbIndex) != ';') {
entity.append(sb.charAt(sbIndex));
sbIndex++;
}
} catch (StringIndexOutOfBoundsException sioobe) {
throw new Exception("Parsing error at character " + pos
+ ": MathML code is not HTML wrapped.");
}
entity.append(';');
sbIndex++;
}
if ((entitySubst = substitutions.get(entity.toString())) != null) {
sb.delete(sbIndex - entity.length(), sbIndex);
sbIndex = sbIndex - entity.length();
sb.insert(sbIndex, entitySubst);
sbIndex = sbIndex + entitySubst.length();
sb.insert(sbIndex, " ");
sbIndex++;
} else {
if (skipUnknownEntities) {
sb.delete(sbIndex - entity.length(), sbIndex);
sbIndex = sbIndex - entity.length();
sb.insert(sbIndex, " ");
sbIndex++;
} else {
String entityWorkout = entity.toString();
if (entityWorkout.startsWith("")) {
entityWorkout = entityWorkout.substring(3,
entityWorkout.length() - 1);
} else if (entityWorkout.startsWith("\\&\\#x")) {
// not sure whether this is needed any more...
entityWorkout = entityWorkout.substring(5,
entityWorkout.length() - 1);
}
if (isValidUnicode(entityWorkout)) {
// assuming our LaTeX parser will know these things
int hex = Integer.parseInt(entityWorkout, 16);
Character hexChar = (char) hex;
sb.replace(sbIndex - entity.length(), sbIndex,
hexChar.toString());
sbIndex -= entity.length() - 1;
} else {
// old school
sb.insert(sbIndex - entity.length(), "NOTFOUND:'");
sbIndex += 10;
sb.insert(sbIndex, "' ");
sbIndex += 2;
}
}
}
}
// replace '&'
sbIndex = 0;
while ((sbIndex = sb.indexOf("&", sbIndex)) > -1) {
sb.insert(sbIndex, '\\');
sbIndex = sbIndex + 2;
}
// replace '#'
sbIndex = 0;
while ((sbIndex = sb.indexOf("#", sbIndex)) > -1) {
sb.insert(sbIndex, '\\');
sbIndex = sbIndex + 2;
}
return sb.toString().trim();
}
/**
* Determines whether this is valid Unicode
*
* @param vu
* @return
*/
private static boolean isValidUnicode(String vu) {
if (vu.length() != 4) {
return false;
}
char[] ca = vu.toLowerCase().toCharArray();
for (int i = 0; i < 4; i++) {
if (!Character.isDigit(ca[i]) && (ca[i] < 'a' || ca[i] > 'f')) {
return false;
}
}
return true;
}
}