/* * Created on 2003-11-02 */ package org.geogebra.common.io; import java.util.HashMap; import org.geogebra.common.util.lang.Unicode; /** * Apache 2.0 Licence * * Original from http://www.tilman.de/programme/mathparser/download_en.html * * Parses Presentation MathML * * Alterations by GeoGebra * * @author (c) Tilman Walther * @author (c) Martin Wilke */ public class MathMLParser { private static HashMap<String, String> geogebraMap = new HashMap<String, String>(); static { // Tags: geogebraMap.put("<mfrac>", "((%BLOCK1%) / (%BLOCK2%))"); geogebraMap.put("<msup>", "((%BLOCK1%)^(%BLOCK2%))"); geogebraMap.put("<msub>", "%BLOCK1%_{%BLOCK2%}");// TODO _{1} -> _1 at // the end of // parsing geogebraMap.put("<msqrt>", "sqrt(%BLOCK1%)"); geogebraMap.put("<mroot>", "nroot(%BLOCK1%,%BLOCK2%)"); geogebraMap.put("<mfenced>", "(%BLOCK1%)");// e.g. binomial coefficient, // FIXME geogebraMap.put("<mfenced open=\"|\" close=\"|\">", "abs(%BLOCK1%)");// abs. // value geogebraMap.put("<msubsup>", "(%BLOCK1%_{%BLOCK2%})^(%BLOCK3%)");// ignored // for // now, // FIXME // (subscripted // variable // powered) geogebraMap.put("<munderover>", "%BLOCK1%(%BLOCK2%,%BLOCK3%,");// ignored // for // now, // FIXME // (subscripted // variable // powered) geogebraMap.put("<munder>", "%BLOCK1%");// ignored for now, FIXME geogebraMap.put("<mtable>", "{%BLOCKS%}"); geogebraMap.put("<mtr>", "{%BLOCKS%}, "); geogebraMap.put("<mtd>", "%BLOCK1%, "); geogebraMap.put("∫", "ggbMathmlIntegral"); geogebraMap.put("∑", "ggbMathmlSum"); // Entities geogebraMap.put("˙", "* "); geogebraMap.put("⋅", "* "); geogebraMap.put("·", "* "); geogebraMap.put("×", "* "); geogebraMap.put("=", " = "); geogebraMap.put("∀", "# "); geogebraMap.put("∃", "# "); geogebraMap.put("∍", "# "); geogebraMap.put("∗", "* "); geogebraMap.put("−", "- "); geogebraMap.put("⁄", "/ "); geogebraMap.put("∶", ": "); geogebraMap.put("<", "< "); geogebraMap.put(">", "> "); geogebraMap.put("≅", "# "); geogebraMap.put("⁢", " "); // Pfeile geogebraMap.put("↔", "# "); geogebraMap.put("←", "# "); geogebraMap.put("→", "# "); geogebraMap.put("⇔", "# "); geogebraMap.put("⇐", "# "); geogebraMap.put("⇒", "# "); // dynamische Zeichen geogebraMap.put("∑", "# "); geogebraMap.put("∏", "# "); geogebraMap.put("∫", "# "); geogebraMap.put("ⅆ", "d "); // griechisches Alphabet ... may be implemented by Java unicode codes. geogebraMap.put("α", Unicode.alpha + ""); geogebraMap.put("β", Unicode.beta + ""); geogebraMap.put("γ", Unicode.gamma + ""); geogebraMap.put("δ", Unicode.delta + ""); geogebraMap.put("ε", Unicode.epsilon + ""); geogebraMap.put("η", Unicode.eta + ""); geogebraMap.put("ι", Unicode.iota + ""); geogebraMap.put("κ", Unicode.kappa + ""); geogebraMap.put("λ", Unicode.lambda + ""); geogebraMap.put("μ", Unicode.mu + ""); geogebraMap.put("&mgr;", Unicode.mu + ""); geogebraMap.put("ν", Unicode.nu + ""); geogebraMap.put("ο", Unicode.omicron + ""); geogebraMap.put("π", Unicode.pi + ""); geogebraMap.put("θ", Unicode.theta + ""); geogebraMap.put("ρ", Unicode.rho + ""); geogebraMap.put("&rgr;", Unicode.rho + ""); geogebraMap.put("σ", Unicode.sigma + ""); geogebraMap.put("τ", Unicode.tau + ""); geogebraMap.put("υ", Unicode.upsilon + ""); geogebraMap.put("ϕ", Unicode.phi + ""); geogebraMap.put("φ", Unicode.phi + "");// \\varphi geogebraMap.put("χ", Unicode.chi + ""); geogebraMap.put("ϖ", Unicode.pi + "");// \\varpi geogebraMap.put("&pgr;", Unicode.pi + ""); geogebraMap.put("&ohgr;", Unicode.omega + ""); geogebraMap.put("ω", Unicode.omega + ""); geogebraMap.put("ξ", Unicode.xi + ""); geogebraMap.put("ψ", Unicode.psi + ""); geogebraMap.put("ζ", Unicode.zeta + ""); geogebraMap.put("Δ", Unicode.Delta + ""); geogebraMap.put("Φ", Unicode.Phi + ""); geogebraMap.put("Γ", Unicode.Gamma + ""); geogebraMap.put("Λ", Unicode.Lambda + ""); geogebraMap.put("Π", Unicode.Pi + ""); geogebraMap.put("&tgr;", Unicode.tau + ""); geogebraMap.put("Θ", Unicode.Theta + ""); geogebraMap.put("Σ", Unicode.Sigma + ""); geogebraMap.put("Υ", Unicode.Upsilon + ""); geogebraMap.put("ς", Unicode.sigma + "");// \\varsigma geogebraMap.put("Ω", Unicode.Omega + ""); geogebraMap.put("Ξ", Unicode.Xi + ""); geogebraMap.put("Ψ", Unicode.Psi + ""); geogebraMap.put("ϵ", Unicode.epsilon + ""); geogebraMap.put("&phgr;", Unicode.phi + ""); geogebraMap.put("&ggr;", Unicode.gamma + ""); geogebraMap.put("&eegr;", Unicode.eta + ""); geogebraMap.put("&igr;", Unicode.iota + ""); geogebraMap.put("&phgr;", Unicode.phi + ""); geogebraMap.put("&kgr;", Unicode.kappa + ""); geogebraMap.put("&lgr;", Unicode.lambda + ""); geogebraMap.put("&ngr;", Unicode.nu + ""); geogebraMap.put("&ogr;", Unicode.omega + ""); geogebraMap.put("&thgr;", Unicode.theta + ""); geogebraMap.put("&sgr;", Unicode.sigma + ""); geogebraMap.put("&ugr;", Unicode.upsilon + ""); geogebraMap.put("&zgr;", Unicode.zeta + ""); geogebraMap.put("&Agr;", Unicode.Alpha + ""); geogebraMap.put("&Bgr;", Unicode.Beta + ""); geogebraMap.put("&KHgr;", Unicode.Chi + ""); geogebraMap.put("&Egr;", Unicode.Epsilon + ""); geogebraMap.put("&PHgr;", Unicode.Phi + ""); geogebraMap.put("&Ggr;", Unicode.Gamma + ""); geogebraMap.put("&EEgr;", Unicode.Eta + ""); geogebraMap.put("&Igr;", Unicode.Iota + ""); geogebraMap.put("&THgr;", Unicode.Theta + ""); geogebraMap.put("&Kgr;", Unicode.Kappa + ""); geogebraMap.put("&Lgr;", Unicode.Lambda + ""); geogebraMap.put("&Mgr;", Unicode.Mu + ""); geogebraMap.put("&Ngr;", Unicode.Nu + ""); geogebraMap.put("&Ogr;", Unicode.Omicron + ""); geogebraMap.put("&Pgr;", Unicode.Pi + ""); geogebraMap.put("&Rgr;", Unicode.Rho + ""); geogebraMap.put("&Sgr;", Unicode.Sigma + ""); geogebraMap.put("&Tgr;", Unicode.Tau + ""); geogebraMap.put("&Ugr;", Unicode.Upsilon + ""); geogebraMap.put("&OHgr;", Unicode.Omega + ""); geogebraMap.put("&Zgr;", Unicode.Zeta + ""); // Pfeile und andere Operatoren geogebraMap.put("−", "-"); geogebraMap.put("⊥", "# "); geogebraMap.put("∼", "~ "); geogebraMap.put("′", "# "); geogebraMap.put("≤", Unicode.LESS_EQUAL + ""); geogebraMap.put("≥", Unicode.GREATER_EQUAL + ""); geogebraMap.put("∞", Unicode.INFINITY + ""); geogebraMap.put("♣", "# "); geogebraMap.put("♦", "# "); geogebraMap.put("♥", "# "); geogebraMap.put("♠", "# "); geogebraMap.put("±", Unicode.PLUSMINUS + ""); geogebraMap.put("″", "# "); geogebraMap.put("∝", "# "); geogebraMap.put("∂", "# "); geogebraMap.put("•", "# "); geogebraMap.put("≠", Unicode.NOTEQUAL + ""); geogebraMap.put("≡", "# "); geogebraMap.put("≈", "# "); geogebraMap.put("…", "... "); geogebraMap.put("∣", "# "); geogebraMap.put("↵", "# "); geogebraMap.put("ℵ", "# "); geogebraMap.put("ℑ", "# ");// ??? geogebraMap.put("ℜ", "# ");// ??? geogebraMap.put("℘", "# "); geogebraMap.put("⊗", "# "); geogebraMap.put("⊕", "# "); geogebraMap.put("∅", "# "); geogebraMap.put("∩", "# "); geogebraMap.put("∪", "# "); geogebraMap.put("⊃", "# "); geogebraMap.put("⊇", "# "); geogebraMap.put("⊄", "# "); geogebraMap.put("⊂", "# "); geogebraMap.put("⊆", "# "); geogebraMap.put("∈", "# "); geogebraMap.put("∉", "# "); geogebraMap.put("∠", "# "); geogebraMap.put("∇", "# "); geogebraMap.put("√", "# "); geogebraMap.put("∧", "# "); geogebraMap.put("∨", "# "); geogebraMap.put("∧", "# "); geogebraMap.put("∠", "# "); geogebraMap.put("∠", "# "); geogebraMap.put("≈", "# "); geogebraMap.put("≈", "# "); geogebraMap.put("⨁", "# "); geogebraMap.put("⨂", "# "); geogebraMap.put("⊥", "# "); geogebraMap.put("⊥", "# "); geogebraMap.put("∩", "# "); geogebraMap.put("⊕", "# "); geogebraMap.put("⊗", "# "); geogebraMap.put("≅", "# "); geogebraMap.put("≡", "# "); geogebraMap.put("∪", "# "); geogebraMap.put("↓", "# "); geogebraMap.put("⇓", "# "); geogebraMap.put("∇", "# "); geogebraMap.put("∇", "# "); geogebraMap.put("ⅆ", "\u2146 "); geogebraMap.put("⇐", "# "); geogebraMap.put("⇔", "# "); geogebraMap.put("⇒", "# "); geogebraMap.put("⇑", "# "); geogebraMap.put("↓", "# "); geogebraMap.put("⇓", "# "); geogebraMap.put("↓", "# "); geogebraMap.put("∈", "# "); geogebraMap.put("∅", "# "); geogebraMap.put("≡", "# "); geogebraMap.put("∃", "# "); geogebraMap.put("&Exist;", "# "); geogebraMap.put("ⅇ", "\u2147 "); geogebraMap.put("∀", "# "); geogebraMap.put("∀", "# "); geogebraMap.put("≥", Unicode.GREATER_EQUAL + ""); geogebraMap.put("≥", Unicode.GREATER_EQUAL + ""); geogebraMap.put("↔", "# "); geogebraMap.put("⇔", "# "); geogebraMap.put("⇔", "# "); geogebraMap.put("⇒", "# "); geogebraMap.put("∈", "# "); geogebraMap.put("∫", "# "); geogebraMap.put("∫", "# "); geogebraMap.put("∈", "# "); geogebraMap.put("∈", "# "); geogebraMap.put("⋄", "# "); geogebraMap.put("⋄", "# "); geogebraMap.put("⟨", "# "); geogebraMap.put("⟨", "# "); geogebraMap.put("←", "# "); geogebraMap.put("⇐", "# "); geogebraMap.put("⟨", "# "); geogebraMap.put("⇐", "# "); geogebraMap.put("←", "# "); geogebraMap.put("↔", "# "); geogebraMap.put("⇔", "# "); geogebraMap.put("↔", "# "); geogebraMap.put("≤", Unicode.LESS_EQUAL + ""); geogebraMap.put("≤", Unicode.LESS_EQUAL + ""); geogebraMap.put("⟺", "# "); geogebraMap.put("−", "- "); geogebraMap.put("∇", "# "); geogebraMap.put("∉", "# "); geogebraMap.put("≠", Unicode.NOTEQUAL + "");// ??? is this good // in LaTeX? geogebraMap.put("∉", "# "); geogebraMap.put("⊕", "# "); geogebraMap.put("∨", "# "); geogebraMap.put("⊗", "# "); geogebraMap.put("∂", "# "); geogebraMap.put("&partialD;", "# "); geogebraMap.put("⊥", "# "); geogebraMap.put("∏", "# "); geogebraMap.put("∏", "# "); geogebraMap.put("⟩", "# "); geogebraMap.put("⟩", "# "); geogebraMap.put("→", "# "); geogebraMap.put("⇒", "# "); geogebraMap.put("⟩", "# "); geogebraMap.put("→", "# "); geogebraMap.put("⇒", "# "); geogebraMap.put("→", "# "); geogebraMap.put("⋅", "* "); geogebraMap.put("∼", "# "); geogebraMap.put("∝", "# "); geogebraMap.put("∝", "# "); geogebraMap.put("∝", "# "); geogebraMap.put("⊂", "# "); geogebraMap.put("⊆", "# "); geogebraMap.put("⫅", "# "); geogebraMap.put("⊂", "# "); geogebraMap.put("⊆", "# "); geogebraMap.put("⫅", "# "); geogebraMap.put("⊆", "# "); geogebraMap.put("∑", "# "); geogebraMap.put("∑", "# "); geogebraMap.put("⊃", "# "); geogebraMap.put("⊇", "# "); geogebraMap.put("⫆", "# "); geogebraMap.put("⊃", "# "); geogebraMap.put("⊇", "# "); geogebraMap.put("⊃", "# "); geogebraMap.put("⊇", "# "); geogebraMap.put("⫆", "# "); geogebraMap.put("∼", "# "); geogebraMap.put("≅", "# "); geogebraMap.put("≈", "# "); geogebraMap.put("‴", "\u2034 "); geogebraMap.put("↑", "# "); geogebraMap.put("⇑", "# "); geogebraMap.put("↑", "# "); geogebraMap.put("⇑", "# "); geogebraMap.put("↑", "# "); geogebraMap.put("⊥", "# "); geogebraMap.put("∅", "# "); geogebraMap.put("∝", "# "); geogebraMap.put("∨", "# "); geogebraMap.put("∝", "# "); geogebraMap.put("∧", "# "); geogebraMap.put("⨁", "# "); geogebraMap.put("⨂", "# "); geogebraMap.put("&Space;", " "); geogebraMap.put(":", ":"); geogebraMap.put("⁡", " "); geogebraMap.put("□", " "); geogebraMap.put("−", "- "); geogebraMap.put("→", "# "); geogebraMap.put("∫", "# "); geogebraMap.put("⁡", ""); } private static HashMap<String, String> latexMap = new HashMap<String, String>(); static { // Tags: latexMap.put("<mfrac>", "\\frac{%BLOCK1%}{%BLOCK2%}"); latexMap.put("<msup>", "%BLOCK1%^{%BLOCK2%}"); latexMap.put("<msub>", "%BLOCK1%_{%BLOCK2%}"); latexMap.put("<msqrt>", "\\sqrt{%BLOCK1%}"); latexMap.put("<mroot>", "\\sqrt[%BLOCK2%]{%BLOCK1%}"); latexMap.put("<mfenced>", "\\left(%BLOCK1%\\right)"); latexMap.put("<msubsup>", "%BLOCK1%_{%BLOCK2%}^{%BLOCK3%}"); latexMap.put("<munderover>", "%BLOCK1%_{%BLOCK2%}^{%BLOCK3%}"); latexMap.put("<munder>", "%BLOCK1%_{%BLOCK2%}"); latexMap.put("<mtable>", "\\matrix{%BLOCKS%}"); latexMap.put("<mtr>", "%BLOCKS%\\cr"); latexMap.put("<mtd>", "%BLOCK1%&"); // Entities latexMap.put("˙", "\\cdot "); latexMap.put("⋅", "\\cdot "); latexMap.put("·", "\\cdot "); latexMap.put("×", "\\times "); latexMap.put("=", "\\Relbar "); latexMap.put("∀", "\\forall "); latexMap.put("∃", "\\exists "); latexMap.put("∍", "\\ni "); latexMap.put("∗", "* "); latexMap.put("−", "- "); latexMap.put("⁄", "/ "); latexMap.put("∶", ": "); latexMap.put("<", "< "); latexMap.put(">", "> "); latexMap.put("≅", "\\cong "); latexMap.put("⁢", " "); // Pfeile latexMap.put("↔", "\\leftrightarrow "); latexMap.put("←", "\\leftarrow "); latexMap.put("→", "\\rightarrow "); latexMap.put("⇔", "\\Leftrightarrow "); latexMap.put("⇐", "\\Leftarrow "); latexMap.put("⇒", "\\Rightarrow "); // dynamische Zeichen latexMap.put("∑", "\\sum "); latexMap.put("∏", "\\prod "); latexMap.put("∫", "\\int "); latexMap.put("ⅆ", "d "); // griechisches Alphabet latexMap.put("α", "\\alpha"); latexMap.put("β", "\\beta"); latexMap.put("γ", "\\gamma "); latexMap.put("δ", "\\delta "); latexMap.put("ε", "\\epsilon "); latexMap.put("η", "\\eta "); latexMap.put("ι", "\\iota "); latexMap.put("κ", "\\kappa "); latexMap.put("λ", "\\lambda "); latexMap.put("μ", "\\mu "); latexMap.put("&mgr;", "\\mu "); latexMap.put("ν", "\\nu "); latexMap.put("ο", "o "); latexMap.put("π", "\\pi "); latexMap.put("θ", "\\theta "); latexMap.put("ρ", "\\rho "); latexMap.put("&rgr;", "\\rho "); latexMap.put("σ", "\\sigma "); latexMap.put("τ", "\\tau "); latexMap.put("υ", "\\upsilon "); latexMap.put("ϕ", "\\phi"); latexMap.put("φ", "\\varphi"); latexMap.put("χ", "\\chi "); latexMap.put("ϖ", "\\varpi "); latexMap.put("&pgr;", "\\pi "); latexMap.put("&ohgr;", "\\omega "); latexMap.put("ω", "\\omega "); latexMap.put("ξ", "\\xi "); latexMap.put("ψ", "\\psi "); latexMap.put("ζ", "\\zeta "); latexMap.put("Δ", "\\Delta "); latexMap.put("Φ", "\\Phi "); latexMap.put("Γ", "\\Gamma "); latexMap.put("Λ", "\\Lambda "); latexMap.put("Π", "\\Pi "); latexMap.put("&tgr;", "\\tau "); latexMap.put("Θ", "\\Theta "); latexMap.put("Σ", "\\Sigma "); latexMap.put("Υ", "\\Upsilon "); latexMap.put("ς", "\\varsigma "); latexMap.put("Ω", "\\Omega "); latexMap.put("Ξ", "\\Xi "); latexMap.put("Ψ", "\\Psi "); latexMap.put("ϵ", "\\epsilon "); latexMap.put("&phgr;", "\\phi "); latexMap.put("&ggr;", "\\gamma "); latexMap.put("&eegr;", "\\eta "); latexMap.put("&igr;", "\\iota "); latexMap.put("&phgr;", "\\phi "); latexMap.put("&kgr;", "\\kappa "); latexMap.put("&lgr;", "\\lambda "); latexMap.put("&ngr;", "\\nu "); latexMap.put("&ogr;", "o "); latexMap.put("&thgr;", "\\theta "); latexMap.put("&sgr;", "\\sigma "); latexMap.put("&ugr;", "\\upsilon "); latexMap.put("&zgr;", "\\zeta "); latexMap.put("&Agr;", "A "); latexMap.put("&Bgr;", "B "); latexMap.put("&KHgr;", "X "); latexMap.put("&Egr;", "E "); latexMap.put("&PHgr;", "\\Phi "); latexMap.put("&Ggr;", "\\Gamma "); latexMap.put("&EEgr;", "H "); latexMap.put("&Igr;", "I "); latexMap.put("&THgr;", "\\Theta "); latexMap.put("&Kgr;", "K "); latexMap.put("&Lgr;", "\\Lambda "); latexMap.put("&Mgr;", "M "); latexMap.put("&Ngr;", "N "); latexMap.put("&Ogr;", "O "); latexMap.put("&Pgr;", "\\Pi "); latexMap.put("&Rgr;", "P "); latexMap.put("&Sgr;", "\\Sigma "); latexMap.put("&Tgr;", "T "); latexMap.put("&Ugr;", "\\Upsilon "); latexMap.put("&OHgr;", "\\Omega "); latexMap.put("&Zgr;", "Z "); // Pfeile und andere Operatoren latexMap.put("−", "-"); latexMap.put("⊥", "\\bot "); latexMap.put("∼", "~ "); latexMap.put("′", "\\prime "); latexMap.put("≤", "\\le "); latexMap.put("≥", "\\ge "); latexMap.put("∞", "\\infty "); latexMap.put("♣", "\\clubsuit "); latexMap.put("♦", "\\diamondsuit "); latexMap.put("♥", "\\heartsuit "); latexMap.put("♠", "\\spadesuit "); latexMap.put("±", "\\pm "); latexMap.put("″", "\\prime\\prime "); latexMap.put("∝", "\\propto "); latexMap.put("∂", "\\partial "); latexMap.put("•", "\\bullet "); latexMap.put("≠", "\\neq "); latexMap.put("≡", "\\equiv "); latexMap.put("≈", "\\approx "); latexMap.put("…", "... "); latexMap.put("∣", "\\mid "); latexMap.put("↵", "\\P "); latexMap.put("ℵ", "\\aleph "); latexMap.put("ℑ", "\\Im "); latexMap.put("ℜ", "\\Re "); latexMap.put("℘", "\\wp "); latexMap.put("⊗", "\\otimes "); latexMap.put("⊕", "\\oplus "); latexMap.put("∅", "\\emtyset "); latexMap.put("∩", "\\cap "); latexMap.put("∪", "\\cup "); latexMap.put("⊃", "\\supset "); latexMap.put("⊇", "\\seupseteq "); latexMap.put("⊄", "\\not\\subset "); latexMap.put("⊂", "\\subset "); latexMap.put("⊆", "\\subseteq "); latexMap.put("∈", "\\in "); latexMap.put("∉", "\\notin "); latexMap.put("∠", "\\angle "); latexMap.put("∇", "\\nabla "); latexMap.put("√", "\\surd "); latexMap.put("∧", "\\wedge "); latexMap.put("∨", "\\vee "); latexMap.put("∧", "\\wedge "); latexMap.put("∠", "\\angle "); latexMap.put("∠", "\\angle "); latexMap.put("≈", "\\approx "); latexMap.put("≈", "\\approx "); latexMap.put("⨁", "\\oplus "); latexMap.put("⨂", "\\otimes "); latexMap.put("⊥", "\\bot "); latexMap.put("⊥", "\\bot "); latexMap.put("∩", "\\cap "); latexMap.put("⊕", "\\oplus "); latexMap.put("⊗", "\\otimes "); latexMap.put("≅", "\\cong "); latexMap.put("≡", "\\equiv "); latexMap.put("∪", "\\cup "); latexMap.put("↓", "\\downarrow "); latexMap.put("⇓", "\\Downarrow "); latexMap.put("∇", "\\nabla "); latexMap.put("∇", "\\nabla "); latexMap.put("ⅆ", "\u2146 "); latexMap.put("⇐", "\\Leftarrow "); latexMap.put("⇔", "\\Leftrightarrow "); latexMap.put("⇒", "\\Rightarrow "); latexMap.put("⇑", "\\Uparrow "); latexMap.put("↓", "\\downarrow "); latexMap.put("⇓", "\\Downarrow "); latexMap.put("↓", "\\Downarrow "); latexMap.put("∈", "\\in "); latexMap.put("∅", "\\oslash "); latexMap.put("≡", "\\equiv "); latexMap.put("∃", "\\exists "); latexMap.put("&Exist;", "\\exists "); latexMap.put("ⅇ", "\u2147 "); latexMap.put("∀", "\\forall "); latexMap.put("∀", "\\forall "); latexMap.put("≥", "\\geq "); latexMap.put("≥", "\\geq "); latexMap.put("≥", "\\geq "); latexMap.put("↔", "\\leftrightarrow "); latexMap.put("⇔", "\\Leftrightarrow "); latexMap.put("⇔", "\\Leftrightarrow "); latexMap.put("⇒", "\\Rightarrow "); latexMap.put("∈", "\\in "); latexMap.put("∞", "\\infty "); latexMap.put("∫", "\\int "); latexMap.put("∫", "\\int "); latexMap.put("∈", "\\in "); latexMap.put("∈", "\\in "); latexMap.put("⋄", "\\diamond "); latexMap.put("⋄", "\\diamond "); latexMap.put("⟨", "\\left\\langle "); latexMap.put("⟨", "\\left\\langle "); latexMap.put("←", "\\leftarrow "); latexMap.put("⇐", "\\Leftarrow "); latexMap.put("≤", "\\leq "); latexMap.put("⟨", "\\left\\langle "); latexMap.put("⇐", "\\Leftarrow "); latexMap.put("←", "\\leftarrow "); latexMap.put("↔", "\\leftrightarrow "); latexMap.put("⇔", "\\Leftrightarrow "); latexMap.put("↔", "\\leftrightarrow "); latexMap.put("≤", "\\leq "); latexMap.put("≤", "\\leq "); latexMap.put("⟺", "\\Longleftrightarrow "); latexMap.put("−", "- "); latexMap.put("∇", "\\nabla "); latexMap.put("∉", "\\notin "); latexMap.put("≠", "\\notin "); latexMap.put("∉", "\\notin "); latexMap.put("⊕", "\\oplus "); latexMap.put("∨", "\\vee "); latexMap.put("⊗", "\\otimes "); latexMap.put("∂", "\\partial "); latexMap.put("&partialD;", "\\partial "); latexMap.put("⊥", "\\bot "); latexMap.put("∏", "\\Pi "); latexMap.put("∏", "\\Pi "); latexMap.put("⟩", "\\right\\rangle "); latexMap.put("⟩", "\\right\\rangle "); latexMap.put("→", "\\rightarrow "); latexMap.put("⇒", "\\Rightarrow "); latexMap.put("⟩", "\\right\\rangle "); latexMap.put("→", "\\rightarrow "); latexMap.put("⇒", "\\Rightarrow "); latexMap.put("→", "\\rightarrow "); latexMap.put("⋅", "\\cdot "); latexMap.put("∼", "\\sim "); latexMap.put("∝", "\\propto "); latexMap.put("∝", "\\propto "); latexMap.put("∝", "\\propto "); latexMap.put("⊂", "\\subset "); latexMap.put("⊆", "\\subseteq "); latexMap.put("⫅", "\\subseteq "); latexMap.put("⊂", "\\subset "); latexMap.put("⊆", "\\subseteq "); latexMap.put("⫅", "\\subseteq "); latexMap.put("⊆", "\\subseteq "); latexMap.put("∑", "\\Sigma "); latexMap.put("∑", "\\Sigma "); latexMap.put("⊃", "\\supset "); latexMap.put("⊇", "\\supseteq "); latexMap.put("⫆", "\\supseteq "); latexMap.put("⊃", "\\supset"); latexMap.put("⊇", "\\supseteq "); latexMap.put("⊃", "\\supset "); latexMap.put("⊇", "\\supseteq "); latexMap.put("⫆", "\\supseteq "); latexMap.put("∼", "\\sim "); latexMap.put("≅", "\\cong "); latexMap.put("≈", "\\approx "); latexMap.put("‴", "\u2034 "); latexMap.put("↑", "\\uparrow "); latexMap.put("⇑", "\\Uparrow "); latexMap.put("↑", "\\uparrow "); latexMap.put("⇑", "\\Uparrow "); latexMap.put("↑", "\\uparrow "); latexMap.put("⊥", "\\bot "); latexMap.put("∅", "\\oslash "); latexMap.put("∝", "\\propto "); latexMap.put("∨", "\\vee "); latexMap.put("∝", "\\propto "); latexMap.put("∧", "\\wedge "); latexMap.put("⨁", "\\oplus "); latexMap.put("⨂", "\\otimes "); latexMap.put("&Space;", " "); latexMap.put(":", ":"); latexMap.put("⁡", " "); latexMap.put("□", " "); latexMap.put("−", "- "); latexMap.put("→", "\\to "); latexMap.put("∫", "\\int "); latexMap.put("⁡", ""); } /** * The place holder for blocks in substitutions. If a substitution contains * a block place holder it is replaced by the LaTeX representation of the * followig block.<br> * Syntax: PH_BLOCKSTART + blockNumber + PH_BLOCKEND, e.g. '#BLOCK1#'. */ private final static String PH_BLOCK_START = "%BLOCK"; private final static char PH_BLOCK_END = '%'; private final static char[] specialCharacters = { '%', '_', '$' }; private final static char[] leftBraces = { '(', '{', '[' }; private final static char[] rightBraces = { ')', '{', ']' }; private HashMap<String, String> substitutions; // private StringBuilder result; private String strBuf; private int pos; private boolean wrappedEntities; private boolean skipUnknownEntities; private boolean geogebraSyntax; // temporary variables (declared global for better performance) // protected String startTag, endTag; private String nextTag; private StringBuilder tagBuf = new StringBuilder(200); // used by // readNextTag() & // getBlockEnd() private StringBuilder entity = new StringBuilder(32); // used by // replaceEntities() private String entitySubst = ""; // used by replaceEntities() private boolean closeBracketNext = false; /** * Generates the substitution table from the default file path in field * SUBSTITUTIONS_FILE. * * @param geogebraSyntax1 * whether to return GeoGebra * */ public MathMLParser(boolean geogebraSyntax1) { this.geogebraSyntax = geogebraSyntax1; if (geogebraSyntax1) { substitutions = geogebraMap; } else { substitutions = latexMap; } } /* * Removed by GeoGebra Generates the substitution table from the given file * path. * * @param substitutionsTable the substitution table. * * public MathMLParser(HashMap<String, String> substitutionsTable) { * substitutions = substitutionsTable; } */ /** * TODO berarbeiten (complete MathML blocks only?): Parses MathML code into * LaTeX code using the substitution table genereated by the constructor. * <br> * Only presentation markup can be parsed properly, no use for parsing * content markup. * <p> * For example the presentation markup code * * <pre> * <mrow> * <msup> * <mfenced> * <mrow> * <mi>a</mi> * <mo>+</mo> * <mi>b</mi> * </mrow> * </mfenced> * <mn>2</mn> * </msup> * </mrow> * </pre> * * can be parsed by this method, while the equivalent content markup * * <pre> * <mrow> * <apply> * <power/> * <apply> * <plus/> * <ci>a</ci> * <ci>b</ci> * </apply> * <cn>2</cn> * </apply> * </mrow> * </pre> * * can not be parsed. * </p> * Both notations of entities can be parsed: The plain MathML notation, * starting with an ampersand sign (e.g. '&equals;'), or the * "HTML wrapped" notation startig with an entity for the ampersand sign * (e.g. '&amp;equals;'). * * @param strBuf0 * a String containig the MathML code to parse * @param wrappedEntities1 * indicates whether the entities in the MathML code are HTML * wrapped (e.g. '&amp;PlusMinus;'), or not (e.g. * '&PlusMinus;') * @param skipUnknownEntities1 * skipUnknownEntities * @return a StringBuilder containig the LaTeX representation of the input */ public String parse(String strBuf0, boolean wrappedEntities1, boolean skipUnknownEntities1) { // Remove newlines first; String strBuf1 = strBuf0.replace('\n', ' ').replace('\r', ' '); // now remove coments strBuf1 = strBuf1.replaceAll("<!--.*?-->", ""); // Avoiding bugs due to wrong parsing (quick workarounds) strBuf1 = strBuf1.replace("><", "> <"); // strBuf1 = strBuf1.replace(";&#x", "; &#x"); // Adding "inferred mrow" to those elements that need it // according to W3C and also there in latexMap; // but also take care of the possible attributes! // As the algorithm itself neglects them, // this "quick" solution can do that too. strBuf1 = strBuf1.replaceAll("<msqrt.*?>", "<msqrt> <mrow>"); strBuf1 = strBuf1.replace("</msqrt>", "</mrow> </msqrt>"); strBuf1 = strBuf1.replaceAll("<mtd.*?>", "<mtd> <mrow>"); strBuf1 = strBuf1.replace("</mtd>", "</mrow> </mtd>"); this.strBuf = strBuf1; this.wrappedEntities = wrappedEntities1; this.skipUnknownEntities = skipUnknownEntities1; // usually the MathML input should have more characters as the // output StringBuilder result = new StringBuilder(strBuf.length()); pos = 0; try { while (strBuf.indexOf("<", pos) != -1) { parseBlock(getNextTag(), result, true); skipFollowingTag(); } // TODO besser result stutzen? -> return new // StringBuilder(result) o. result.toString() return result.toString(); } catch (Exception e) { e.printStackTrace(); } return null; // TODO statt exception, speter lo(umlaut)schen } /** * TODO Pseudocode berarbeiten, Algorithmus noch einmal nachvollziehen * Parses a MathML block in strBuf recursively into LaTeX code. * <p> * Pseudocode: * * <pre> * while (pos <= blockEnd) { * if (insideOfInnerstBlock) { * result.append(convertToLatexSyntax(area)); * } else { * tmpTag = getNextTag(); // pos = pos + tmpTag.length(); * if (substitutionAvailable(tmpTag)) { * while (substitutionContainsBlock) { * addSubstitutionUpToPlaceHolderOfBlockToOutput(); * parseBlock(getAreaOfNextBlock()); * } * addRestOfSubstitutionOutPut(); * } else { * parseBlock(pos, getBlockEndIndex(tmpTag)); * } * skipClosingTag(); * } * } * </pre> * * </p> * * @param startTag * startTag * @param result * builder to which we append the string * @param appendSpace * whether space shoud be appended after the block content * @throws Exception * if an error occurs while parsing */ void parseBlock(String startTag, StringBuilder result, boolean appendSpace) throws Exception { boolean closeBracketNow = this.closeBracketNext; this.closeBracketNext = false; String endTag = generateEndTag(startTag); // System.out.println(startTag+ " "+endTag); int blockEnd = getBlockEnd(startTag, endTag); String substBuf; String blockContent; boolean inside = true; int blockNumber = 0; int prevBlockNumber; while (pos <= blockEnd) { // scan for subblocks int i = pos; while ((i <= blockEnd) && (strBuf.charAt(i) != '<')) { i++; } if ((startTag != endTag) && (i > blockEnd)) { // if sure to be at the end of the block hierarchy (inside), // append block content to result if (inside) { blockContent = strBuf.substring(pos, blockEnd + 1); result.append(parseBlockContent(blockContent)); if (appendSpace) { result.append(' '); } pos = pos + blockContent.length(); blockContent = null; } else { // if all subblocks have been processed skip to the end pos = blockEnd + 1; } } else { // this block has subblocks inside = false; // if there is a substitution for the next block, write it to // 'result' if ((substBuf = substitutions.get(startTag)) != null) { int phIndex; int substIndex = 0; // parse subblocks recursively while (((phIndex = substBuf.indexOf(PH_BLOCK_START, substIndex)) > -1) && (pos - 2 < blockEnd)) { // write substitution up to the block marker while (substIndex < phIndex) { result.append(substBuf.charAt(substIndex)); substIndex++; } substIndex += PH_BLOCK_START.length(); // get number of the block to parse int blockNumberIndex = substIndex; while (substBuf.charAt(substIndex) != PH_BLOCK_END) { substIndex++; } prevBlockNumber = blockNumber; String blockNumberStr = substBuf .substring(blockNumberIndex, substIndex); if ("S".equals(blockNumberStr)) { // keyword is BLOCKS -> parse all inner blocks in // order of appearance // skip PH_BLOCK_END substIndex++; // jump to the block to parse skipBlocks((1 - prevBlockNumber) - 1); // parse subblocks while ((strBuf.substring(pos, blockEnd + 1)) .indexOf('<') != -1) { nextTag = getNextTag(); parseBlock(nextTag, result, true); skipFollowingTag(); } if (!appendSpace) { result.setLength(result.length() - 1); } } else { // keyword is BLOCK + block number, parse inner // blocks in given order try { blockNumber = Integer.parseInt(blockNumberStr); } catch (NumberFormatException nfe) { throw new Exception( "Parsing error at character " + pos + ": Unparseable block number in substitution."); } // skip PH_BLOCK_END substIndex++; // jump to the block to parse skipBlocks((blockNumber - prevBlockNumber) - 1); // parse subblock nextTag = getNextTag(); parseBlock(nextTag, result, false); skipFollowingTag(); } } // write (end of) substitution while (substIndex < substBuf.length()) { result.append(substBuf.charAt(substIndex)); substIndex++; } pos = blockEnd + endTag.length(); if (substitutions.get(startTag).endsWith(",")) { this.closeBracketNext = true; } } else { // parse subblocks of nextTag while ((strBuf.substring(pos, blockEnd + 1)) .indexOf('<') != -1) { nextTag = getNextTag(); parseBlock(nextTag, result, true); skipFollowingTag(); } // make sure we don't output "x _{1}" but don't remove any // non-space character if (!appendSpace && result.charAt(result.length() - 1) == ' ') { result.setLength(result.length() - 1); } } } if (closeBracketNow) { result.append(")"); } } // System.out.print(pos); // TODO Warum braucht 'amayaOut.htm' diese Anweisung? -> 853, 853 // (<mprescripts/>) pos = blockEnd; // System.out.println(", "+pos+" ("+startTag+")"); } /** * Jumps to the next tag, reads it into 'startTag' an generates the * corresponding 'endTag'. */ private String getNextTag() { while (strBuf.charAt(pos) != '<') { pos++; } tagBuf.setLength(0); while (strBuf.charAt(pos) != '>') { tagBuf.append(strBuf.charAt(pos)); pos++; } pos++; tagBuf.append('>'); return tagBuf.toString(); } /** * Generates an end tag corresponding to the given 'startTag'. * * @param startTag * the start tag to generate an end tag from * @return the end tag for the given start tag */ String generateEndTag(String startTag) { if (startTag.charAt(tagBuf.length() - 2) != '/') { if (startTag.indexOf(' ') > -1) { // delete parameters of startTag return "</" + startTag.substring(1, startTag.indexOf(' ')) + ">"; } return "</" + startTag.substring(1, startTag.length()); } // if the tag is self-closing (e.g. "<mprescripts/>"), the endTag is the // startTag return startTag; } /** * Skips all characters up to the end of the next tag. */ void skipFollowingTag() { while (strBuf.charAt(pos) != '>') { pos++; } pos++; } /** * Skips (back and forth) a given number of blocks from the actual position. * * @param blocksToSkip * the number of blocks to skip */ void skipBlocks(int blocksToSkip) { if (blocksToSkip > 0) { for (int i = 0; i < blocksToSkip; i++) { String startTag = getNextTag(); String endTag = generateEndTag(startTag); pos = getBlockEnd(startTag, endTag); pos = pos + endTag.length(); } } else if (blocksToSkip < 0) { for (int i = 0; i > blocksToSkip; i--) { int subBlocks = 1; while (strBuf.charAt(pos) != '>') { pos--; } tagBuf.setLength(0); while (strBuf.charAt(pos) != '<') { tagBuf.append(strBuf.charAt(pos)); pos--; } tagBuf.append('<'); tagBuf.reverse(); String blockEndTag = new String(tagBuf); String blockStartTag = new String(tagBuf.deleteCharAt(1)); do { while (strBuf.charAt(pos) != '>') { pos--; } tagBuf.setLength(0); while (strBuf.charAt(pos) != '<') { tagBuf.append(strBuf.charAt(pos)); pos--; } tagBuf.append('<'); tagBuf.reverse(); if (tagBuf.indexOf(" ") > -1) { tagBuf.delete(tagBuf.indexOf(" "), tagBuf.length() - 1); } if (tagBuf.toString().equals(blockStartTag)) { subBlocks--; } else { if (tagBuf.toString().equals(blockEndTag)) { subBlocks++; } } } while ((subBlocks > 0) || (!(tagBuf.toString().equals(blockStartTag)))); } } } /** * Returns the end index of the block defined by the 'startTag' parameter * skipping all subblocks. The end index is the position of the character * before the closing tag of the block. * * @param startTag0 * the tag that opened the block * @param endTag * the end tag to seek * @return the index of the closing tag */ int getBlockEnd(String startTag0, String endTag) { if (!startTag0.equals(endTag)) { String startTag = startTag0; int pos2 = pos; int subBlocks = 1; // delete parameters of startTag if (startTag.indexOf(' ') > -1) { startTag = startTag.substring(0, startTag.indexOf(' ')) + '>'; } do { while (strBuf.charAt(pos2) != '<') { pos2++; } tagBuf.setLength(0); while (strBuf.charAt(pos2) != '>') { tagBuf.append(strBuf.charAt(pos2)); pos2++; } tagBuf.append('>'); if (tagBuf.toString().equals(endTag)) { subBlocks--; } else { if (tagBuf.indexOf(" ") > -1) { tagBuf.delete(tagBuf.indexOf(" "), tagBuf.length() - 1); } if (tagBuf.toString().equals(startTag)) { subBlocks++; } } } while ((subBlocks > 0) || (!(tagBuf.toString().equals(endTag)))); return (pos2 - endTag.length()); } return pos - startTag0.length(); } /** * Parses a String into Latex syntax and returns it. * * @param s * the string to parse * @return the Latex representation of the given string * @throws Exception * if HTML wrapped entities were expected but not found */ String parseBlockContent(String s) throws Exception { // TODO hier! // System.out.println("got '"+s+"'"); int sbIndex = 0; StringBuilder sb = new StringBuilder(s); // replace backslashes while ((sbIndex = sb.indexOf("\\", sbIndex)) > -1) { sb.insert(sbIndex + 1, "backslash"); sbIndex = sbIndex + 10; } // replace braces if (!geogebraSyntax) { for (int i = 0; i < leftBraces.length; i++) { sbIndex = 0; while ((sbIndex = sb.indexOf(String.valueOf(leftBraces[i]), sbIndex)) > -1) { sb.insert(sbIndex, "\\left"); sbIndex = sbIndex + 6; } } for (int i = 0; i < rightBraces.length; i++) { sbIndex = 0; while ((sbIndex = sb.indexOf(String.valueOf(rightBraces[i]), sbIndex)) > -1) { sb.insert(sbIndex, "\\right"); sbIndex = sbIndex + 7; } } } // replace special characters for (int i = 0; i < specialCharacters.length; i++) { sbIndex = 0; while ((sbIndex = sb.indexOf(String.valueOf(specialCharacters[i]), sbIndex)) > -1) { sb.insert(sbIndex, '\\'); sbIndex = sbIndex + 2; } } // replace Entities sbIndex = 0; while ((sbIndex = sb.indexOf("&", sbIndex)) > -1) { entity.setLength(0); while (sb.charAt(sbIndex) != ';') { entity.append(sb.charAt(sbIndex)); sbIndex++; } entity.append(';'); sbIndex++; if (wrappedEntities && entity.toString().equals("&")) { sb.delete(sbIndex - 4, sbIndex); sbIndex = sbIndex - 5; entity.setLength(0); try { while (sb.charAt(sbIndex) != ';') { entity.append(sb.charAt(sbIndex)); sbIndex++; } } catch (StringIndexOutOfBoundsException sioobe) { throw new Exception("Parsing error at character " + pos + ": MathML code is not HTML wrapped."); } entity.append(';'); sbIndex++; } if ((entitySubst = substitutions.get(entity.toString())) != null) { sb.delete(sbIndex - entity.length(), sbIndex); sbIndex = sbIndex - entity.length(); sb.insert(sbIndex, entitySubst); sbIndex = sbIndex + entitySubst.length(); sb.insert(sbIndex, " "); sbIndex++; } else { if (skipUnknownEntities) { sb.delete(sbIndex - entity.length(), sbIndex); sbIndex = sbIndex - entity.length(); sb.insert(sbIndex, " "); sbIndex++; } else { String entityWorkout = entity.toString(); if (entityWorkout.startsWith("&#x")) { entityWorkout = entityWorkout.substring(3, entityWorkout.length() - 1); } else if (entityWorkout.startsWith("\\&\\#x")) { // not sure whether this is needed any more... entityWorkout = entityWorkout.substring(5, entityWorkout.length() - 1); } if (isValidUnicode(entityWorkout)) { // assuming our LaTeX parser will know these things int hex = Integer.parseInt(entityWorkout, 16); Character hexChar = (char) hex; sb.replace(sbIndex - entity.length(), sbIndex, hexChar.toString()); sbIndex -= entity.length() - 1; } else { // old school sb.insert(sbIndex - entity.length(), "NOTFOUND:'"); sbIndex += 10; sb.insert(sbIndex, "' "); sbIndex += 2; } } } } // replace '&' sbIndex = 0; while ((sbIndex = sb.indexOf("&", sbIndex)) > -1) { sb.insert(sbIndex, '\\'); sbIndex = sbIndex + 2; } // replace '#' sbIndex = 0; while ((sbIndex = sb.indexOf("#", sbIndex)) > -1) { sb.insert(sbIndex, '\\'); sbIndex = sbIndex + 2; } return sb.toString().trim(); } /** * Determines whether this is valid Unicode * * @param vu * @return */ private static boolean isValidUnicode(String vu) { if (vu.length() != 4) { return false; } char[] ca = vu.toLowerCase().toCharArray(); for (int i = 0; i < 4; i++) { if (!Character.isDigit(ca[i]) && (ca[i] < 'a' || ca[i] > 'f')) { return false; } } return true; } }