package com.formulasearchengine.mathosphere.mlp.text;
import com.formulasearchengine.mathosphere.mlp.PatternMatchingRelationFinder;
import com.formulasearchengine.mathosphere.mlp.contracts.TextExtractorMapper;
import com.formulasearchengine.mathosphere.mlp.pojos.MathTag;
import com.formulasearchengine.mathosphere.mlp.text.WikiTextUtils.MathMarkUpType;
import org.apache.commons.io.IOUtils;
import org.junit.Ignore;
import org.junit.Test;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import static junit.framework.TestCase.assertEquals;
public class WikiTextUtilsTest {
public static String getTestResource(String testFile) throws IOException {
InputStream stream = PatternMatchingRelationFinder.class.getClassLoader().getResourceAsStream(testFile);
return IOUtils.toString(stream, "utf-8");
}
@Test
public void findMathTags() {
String input = "Text text <math>V = V_0</math> text text <math>V = V_1</math> text. "
+ "Text <math>V = V_2</math>.";
List<MathTag> actual = WikiTextUtils.findMathTags(input);
List<MathTag> expected = Arrays.asList(
new MathTag(10, "V = V_0", MathMarkUpType.LATEX),
new MathTag(41, "V = V_1", MathMarkUpType.LATEX),
new MathTag(73, "V = V_2", MathMarkUpType.LATEX));
assertEquals(expected, actual);
}
@Test
public void findMathTags_first() {
String input = "<math>V = V_0</math> text text.";
List<MathTag> actual = WikiTextUtils.findMathTags(input);
List<MathTag> expected = Collections.singletonList(new MathTag(0, "V = V_0", MathMarkUpType.LATEX));
assertEquals((List) expected, (List) actual);
}
@Test
public void replaceAllFormulas() {
String text = "Text text <math>V = V_0</math> text text <math>V = V_1</math> text. "
+ "Text <math>V = V_2</math>.";
MathTag tag1 = new MathTag(10, "<math>V = V_0</math>", MathMarkUpType.LATEX);
MathTag tag2 = new MathTag(41, "<math>V = V_1</math>", MathMarkUpType.LATEX);
MathTag tag3 = new MathTag(73, "<math>V = V_2</math>", MathMarkUpType.LATEX);
List<MathTag> tags = Arrays.asList(tag1, tag2, tag3);
String actual = WikiTextUtils.replaceAllFormulas(text, tags);
String expected = "Text text " + tag1.placeholder() + " text text " + tag2.placeholder() + " text. "
+ "Text " + tag3.placeholder() + ".";
assertEquals(expected, actual);
}
@Test
public void extractPlainText_subsup() {
String input = "V = V<sub>0</sub>. E < V<sup>24</sup>";
String actual = WikiTextUtils.subsup(input);
String expected = "V = V_0. E < V^24";
assertEquals(expected, actual);
}
@Test
public void guessMarkupType_normalMathTag_isLatex() {
String text = "<math>E_{rot} = \\frac{l(l+1) \\hbar^2}{2 \\mu r_{0}^2}</math>";
MathTag tag = WikiTextUtils.findMathTags(text).get(0);
assertEquals(MathMarkUpType.LATEX, tag.getMarkUpType());
}
@Test
public void guessMarkupType_weirdMathTag_isLatex() {
String text = "<math style>E_{rot} = \\frac{l(l+1) \\hbar^2}{2 \\mu r_{0}^2} l=0,1,2,... </math>";
MathTag tag = WikiTextUtils.findMathTags(text).get(0);
assertEquals(MathMarkUpType.LATEX, tag.getMarkUpType());
}
@Test
public void findFormulaFromWikiText() throws Exception {
String text = getTestResource("com/formulasearchengine/mathosphere/mlp/gold/eval_dataset_sample.xml");
text = TextExtractorMapper.unescape(text);
WikiTextUtils.findMathTags(text);
}
@Test
public void guessMarkupType_isMathML() {
String text = "<math><mi>x</mi></math>";
MathTag tag = WikiTextUtils.findMathTags(text).get(0);
assertEquals(MathMarkUpType.MATHML, tag.getMarkUpType());
}
@Test
public void testReplaceAllFormulas1() throws Exception {
}
@Test
@Ignore
public void testRenderAllFormulae() throws Exception {
//final ClassLoader classLoader = getClass().getClassLoader();
//String testString = PosTaggerTest.readText("mean_wiki.txt");
String testString = "The energy <math>E</math>,";
String out = WikiTextUtils.renderAllFormulae(testString);
assertEquals("The energy <math xmlns=\"http://www.w3.org/1998/Math/MathML\" id=\"p1.1.m1.1\" class=\"ltx_Math\" alttext=\"E\" display=\"inline\">\n" +
" <semantics id=\"p1.1.m1.1a\">\n" +
" <mi id=\"p1.1.m1.1.1\" xref=\"p1.1.m1.1.1.cmml\">E</mi>\n" +
" <annotation-xml encoding=\"MathML-Content\" id=\"p1.1.m1.1b\">\n" +
" <ci id=\"p1.1.m1.1.1.cmml\" xref=\"p1.1.m1.1.1\">E</ci>\n" +
" </annotation-xml>\n" +
" <annotation encoding=\"application/x-tex\" id=\"p1.1.m1.1c\">E</annotation>\n" +
" </semantics>\n" +
"</math>,", out);
}
@Ignore
@Test
public void renderResource() throws Exception {
final String name = "n20";
String testString = PosTaggerTest.readText(name + "_wiki.txt");
String targetPath = PatternMatchingRelationFinder.class.getResource(name + "_wiki.txt").getPath().replace("_wiki.txt", "_exc.txt");
FileWriter writer = new FileWriter(targetPath);
String out = WikiTextUtils.renderAllFormulae(testString);
writer.write(out);
writer.close();
System.out.println(out);
}
}