package com.formulasearchengine.mathosphere.mlp.text;
import com.google.common.collect.HashMultiset;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Multiset;
import com.formulasearchengine.mathosphere.mlp.PatternMatchingRelationFinder;
import org.apache.commons.io.IOUtils;
import org.junit.Ignore;
import org.junit.Test;
import java.io.IOException;
import java.io.InputStream;
import java.util.Arrays;
import java.util.Set;
import static org.junit.Assert.*;
public class MathMLUtilsTest {
@Test
public void extractFromTex_simple() {
String tex = "x^2 + y^2 = z^2";
Set<String> identifiers = MathMLUtils.extractIdentifiersFromTex(tex, false, "").elementSet();
Set<String> expected = ImmutableSet.of("x", "y", "z");
assertEquals(expected, identifiers);
}
@Test
@Ignore
public void extractFromTex_moreComplex() {
String tex = "\\sqrt{x + y} = \\cfrac{\\varphi + \\rho}{\\Theta \\cdot \\Phi}";
MathMLUtils.setEngine("");
Set<String> identifiers = MathMLUtils.extractIdentifiersFromTex(tex, false, "").elementSet();
Set<String> expected = ImmutableSet.of("x", "y", "φ", "ρ", "Θ", "Φ");
assertEquals(expected, identifiers);
MathMLUtils.setEngine("snuggle");
}
@Test
public void extractFromTex_subscripts() {
String tex = "\\sigma_1 + \\sigma_2 = r_1";
Set<String> identifiers = MathMLUtils.extractIdentifiersFromTex(tex, false, "").elementSet();
Set<String> expected = ImmutableSet.of("σ_1", "σ_2", "r_1");
assertTrue(identifiers.containsAll(expected));
}
@Test
public void extractFromTex_superscriptIndentifier() {
String tex = "\\sigma^x";
Set<String> identifiers = MathMLUtils.extractIdentifiersFromTex(tex, false, "").elementSet();
Set<String> expected = ImmutableSet.of("σ", "x");
assertTrue(identifiers.containsAll(expected));
}
@Test
public void extractFromTex_capturesMultipleOccurrences() {
String tex = "\\sigma^2 + \\sigma + b";
Multiset<String> identifiers = MathMLUtils.extractIdentifiersFromTex(tex, false, "");
Multiset<String> expected = HashMultiset.create(Arrays.asList("σ", "σ", "b"));
assertEquals(expected, identifiers);
}
@Test
public void extractFromTex_oneIdTwoOccurrences_sizeIs2() {
String tex = "\\sigma + \\sigma";
Multiset<String> identifiers = MathMLUtils.extractIdentifiersFromTex(tex, false, "");
assertEquals(2, identifiers.size());
}
@Test
public void extractFromTex_boldText() {
String tex = "\\mathbf r";
Set<String> identifiers = MathMLUtils.extractIdentifiersFromTex(tex, false, "").elementSet();
Set<String> expected = ImmutableSet.of("r");
assertTrue(identifiers.containsAll(expected));
}
@Test
public void extractFromMathML_complextMsub_noSubCaptured() throws Exception {
String mathML = readResource("complex_msub.xml");
Set<String> identifiers = MathMLUtils.extractIdentifiersFromMathML(mathML, false, false).elementSet();
identifiers.forEach(id -> assertFalse(id.contains("_")));
}
private static String readResource(String file) throws IOException {
InputStream inputStream = PatternMatchingRelationFinder.class.getResourceAsStream(file);
return IOUtils.toString(inputStream);
}
@Test
public void extractFromMathMl_identifiersFromMSub_notCaptured() throws Exception {
String mathML = readResource("math-R_specific.xml");
Set<String> identifiers = MathMLUtils.extractIdentifiersFromMathML(mathML, false, false).elementSet();
Set<String> expected = ImmutableSet.of("R_specific", "R", "M");
assertFalse(identifiers.contains("specific"));
assertEquals(expected, identifiers);
}
@Test
public void extractFromMathML_notParsable() throws Exception {
String mathML = readResource("math-xmlparsingerror.xml");
Set<String> identifiers = MathMLUtils.extractIdentifiersFromMathML(mathML, false, false).elementSet();
// "a" is a stop word, it's removed
Set<String> expected = ImmutableSet.of("x", "b", "c");
assertEquals(expected, identifiers);
}
@Test
public void extractFromMathML_isNumeric() throws Exception {
assertFalse(MathMLUtils.isNumeric("a"));
assertTrue(MathMLUtils.isNumeric("1"));
assertTrue(MathMLUtils.isNumeric("10"));
assertTrue(MathMLUtils.isNumeric("10.0"));
assertTrue(MathMLUtils.isNumeric("10.00001"));
assertFalse(MathMLUtils.isNumeric("10.00001a"));
assertFalse(MathMLUtils.isNumeric("x1"));
}
}