package com.formulasearchengine.mathosphere.mlp.cli;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.formulasearchengine.mathosphere.mlp.ml.WekaLearner;
import com.formulasearchengine.mathosphere.mlp.pojos.IdentifierDefinition;
import com.formulasearchengine.mathosphere.mlp.pojos.StrippedWikiDocumentOutput;
import com.google.common.base.Throwables;
import com.google.common.io.Files;
import com.formulasearchengine.mathosphere.mlp.Main;
import org.apache.commons.io.output.TeeOutputStream;
import org.junit.Assert;
import org.junit.Ignore;
import org.junit.Test;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.PrintStream;
import java.io.UnsupportedEncodingException;
import java.net.URL;
import java.net.URLDecoder;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.stream.Stream;
import static org.junit.Assert.assertTrue;
public class CliMainTest {
@Test
public void testMlpRus() throws Exception {
String[] args = new String[9];
final File temp;
temp = File.createTempFile("temp", Long.toString(System.nanoTime()));
args[0] = "mlp";
args[1] = "-in";
args[2] = resourcePath("com/formulasearchengine/mathosphere/mlp/wikirusample.xml");
args[3] = "-out";
args[4] = temp.getAbsolutePath();
args[5] = "--language";
args[6] = "ru";
args[7] = "-pos";
args[8] = "";
System.out.println(temp.getAbsolutePath());
runTest(args);
}
private String resourcePath(String resourceName) {
ClassLoader classLoader = getClass().getClassLoader();
URL resource = classLoader.getResource(resourceName);
return decodePath(resource.getFile());
}
private static String decodePath(String urlEncodedPath) {
try {
return URLDecoder.decode(urlEncodedPath, "UTF-8");
} catch (UnsupportedEncodingException e) {
throw Throwables.propagate(e);
}
}
@Test
public void testMlpRusPlain() throws Exception {
String[] args = new String[11];
final File temp;
temp = File.createTempFile("temp", Long.toString(System.nanoTime()));
args[0] = "mlp";
args[1] = "-in";
args[2] = resourcePath("com/formulasearchengine/mathosphere/mlp/ru-sample.xml");
args[3] = "-out";
args[4] = temp.getAbsolutePath();
args[5] = "--language";
args[6] = "ru";
args[7] = "-pos";
args[8] = "";
args[9] = "--tex";
args[10] = "";
System.out.println(temp.getAbsolutePath());
String res = runTest(args);
System.out.println(res);
}
@Test
public void testHelp() throws Exception {
String[] args = new String[1];
args[0] = "help";
runTest(args);
}
@Test
public void testMlpEngPlain() throws Exception {
String[] args = new String[5];
final File temp;
temp = File.createTempFile("temp", Long.toString(System.nanoTime()));
args[0] = "mlp";
args[1] = "-in";
args[2] = resourcePath("com/formulasearchengine/mathosphere/mlp/sample.xml");
args[3] = "-out";
args[4] = temp.getAbsolutePath();
System.out.println(temp.getAbsolutePath());
runTest(args);
}
@Test
public void testMlpEngPlainWithWikidata() throws Exception {
final File temp;
final String file = decodePath(getClass().getResource("../sample.xml").getFile());
final String wikiDataList = decodePath(getClass().getResource("../text/test-map-no-dup.csv").getFile());
temp = File.createTempFile("temp", Long.toString(System.nanoTime()));
String[] args = {"mlp", "-in", file, "-out", temp.getAbsolutePath(), "--tex", "-w", wikiDataList};
System.out.println(temp.getAbsolutePath());
String res = runTest(args);
System.out.println(res);
}
public String runTest(String[] args) throws Exception {
final PrintStream stdout = System.out;
final ByteArrayOutputStream myOut = new ByteArrayOutputStream();
TeeOutputStream tee = new TeeOutputStream(stdout, myOut);
System.setOut(new PrintStream(tee));
final long t0 = System.nanoTime();
Main.main(args);
final String standardOutput = myOut.toString();
System.setOut(stdout);
System.out.println((System.nanoTime() - t0) / 1000000000 + "s");
return standardOutput;
}
@Test
@Ignore
public void testExtract() throws Exception {
String[] args = new String[3];
args[0] = "extract";
args[1] = "-in";
args[2] = resourcePath("com/formulasearchengine/mathosphere/mlp/hamiltonian_esc.txt");
final PrintStream stdout = System.out;
final ByteArrayOutputStream myOut = new ByteArrayOutputStream();
System.setOut(new PrintStream(myOut));
Main.main(args);
final String standardOutput = myOut.toString();
assertTrue(standardOutput.contains("magnetic dipole moment"));
System.setOut(stdout);
}
@Test
public void testCount() throws Exception {
String[] args = new String[4];
args[0] = "count";
args[1] = "-in";
args[2] = resourcePath("identifier.json");
args[3] = "--ids";
final PrintStream stdout = System.out;
final ByteArrayOutputStream myOut = new ByteArrayOutputStream();
System.setOut(new PrintStream(myOut));
Main.main(args);
final String standardOutput = myOut.toString();
assertTrue(standardOutput.contains("{\"element\":\"i\",\"count\":42}"));
System.setOut(stdout);
// System.out.println(standardOutput);
}
@Test
public void testCountTok() throws Exception {
String[] args = new String[3];
args[0] = "count";
args[1] = "-in";
args[2] = resourcePath("tokens.json");
final PrintStream stdout = System.out;
final ByteArrayOutputStream myOut = new ByteArrayOutputStream();
System.setOut(new PrintStream(myOut));
Main.main(args);
final String standardOutput = myOut.toString();
assertTrue(standardOutput
.contains("{\"element\":{\"f0\":\"TEX_ONLY\",\"f1\":\"i\",\"arity\":2},\"count\":88}"));
System.setOut(stdout);
// System.out.println(standardOutput);
}
@Test
public void testEval() throws Exception {
final File temp;
temp = Files.createTempDir();
System.out.println(temp.getAbsolutePath());
String[] args = {"eval",
"-in", resourcePath("com/formulasearchengine/mathosphere/mlp/gold/eval_dataset_sample.xml"),
"-out", temp.getAbsolutePath(),
"--queries", resourcePath("com/formulasearchengine/mathosphere/mlp/gold/gold.json"),
"--nd", resourcePath("com/formulasearchengine/mathosphere/mlp/gold/nd.json"),
"--tex",
"-t", "0.8",
"--level", "2",
"--ref", resourcePath("com/formulasearchengine/mathosphere/mlp/nd"),
};
final PrintStream stdout = System.out;
final ByteArrayOutputStream myOut = new ByteArrayOutputStream();
System.setOut(new PrintStream(myOut));
Main.main(args);
System.setOut(stdout);
}
@Test
public void testPatternMatcher() throws Exception {
final File temp = Files.createTempDir();
String[] args = {CliParams.EVAL,
"-in", resourcePath("com/formulasearchengine/mathosphere/mlp/gold/eval_dataset_sample.xml"),
"-out", temp.getAbsolutePath(),
"--queries", resourcePath("com/formulasearchengine/mathosphere/mlp/gold/gold.json"),
"--nd", resourcePath("com/formulasearchengine/mathosphere/mlp/gold/nd.json"),
"--tex",
"--usePatternMatcher",
};
final PrintStream stdout = System.out;
final ByteArrayOutputStream myOut = new ByteArrayOutputStream();
System.setOut(new PrintStream(myOut));
Main.main(args);
System.setOut(stdout);
}
@Test
@Ignore
/**
* Actually not a test but documentation of how the svm training is done.
* Takes long.
* Does not work with the eval_dataset_sample.xml because it has too little samples.
*/
public void testMachineLearning() throws Exception {
final File temp = Files.createTempDir();
String[] args = {CliParams.ML,
"-in", resourcePath("com/formulasearchengine/mathosphere/mlp/gold/eval_dataset.xml"),
"-out", temp.getAbsolutePath(),
"--goldFile", resourcePath("com/formulasearchengine/mathosphere/mlp/gold/gold.json"),
"--tex",
"--threads", "10",
"--writeInstances",
"--writeSvmModel"
};
Main.main(args);
}
@Test
@Ignore
/**
* Actually not a test but documentation of how the svm training is done.
* Takes long.
* Does not work with the eval_dataset_sample.xml because it has too little samples.
*/
public void testMachineLearningPercent() throws Exception {
final File temp = Files.createTempDir();
String[] args = {CliParams.ML,
"-in", resourcePath("com/formulasearchengine/mathosphere/mlp/gold/eval_dataset.xml"),
"-out", temp.getAbsolutePath(),
"--goldFile", resourcePath("com/formulasearchengine/mathosphere/mlp/gold/gold_with_alias.json"),
"--tex",
"--texvcinfo", "http://localhost:10044/texvcinfo",
"--threads", "10",
"--samplePercent", "10",
"--samplePercent", "20",
"--samplePercent", "30",
"--samplePercent", "40",
"--samplePercent", "50",
"--samplePercent", "60",
"--samplePercent", "70",
"--samplePercent", "80",
"--samplePercent", "90",
"--samplePercent", "100",
};
Main.main(args);
}
/**
* Tests if the classification throws no error. Also tests if a correct definiens is extracted. Must have a good model!
*
* @throws Exception
*/
@Test
public void testMachineLearningClassification() throws Exception {
final File temp = Files.createTempDir();
String[] args = {CliParams.CLASSIFY,
"-in", resourcePath("com/formulasearchengine/mathosphere/mlp/gold/eval_dataset_sample.xml"),
"-out", temp.getAbsolutePath(),
"--tex",
"--threads", "1",
"--svmModel", resourcePath("com/formulasearchengine/mathosphere/mlp/ml/svm_model__c_1.0_gamma_0.022097087.model"),
"--stringFilter", resourcePath("com/formulasearchengine/mathosphere/mlp/ml/string_filter__c_1.0_gamma_0.022097087.model"),
};
final PrintStream stdout = System.out;
final ByteArrayOutputStream myOut = new ByteArrayOutputStream();
System.setOut(new PrintStream(myOut));
Main.main(args);
System.setOut(stdout);
final File extraction = new File(temp.getAbsolutePath() + "/extractedDefiniens");
//must be a rather small file, lets assume smaller than 2kb. This is also a sanity check not to deserialize a large file in case of error.
Assert.assertTrue(extraction.length() < 2 * 1024);
ObjectMapper mapper = new ObjectMapper();
StrippedWikiDocumentOutput strippedWikiDocumentOutput = mapper.readValue(extraction, StrippedWikiDocumentOutput.class);
Assert.assertEquals(strippedWikiDocumentOutput.getTitle(), "Martingale (betting system)");
Assert.assertTrue(strippedWikiDocumentOutput.getRelations().contains(new IdentifierDefinition("q", "probability")));
}
@Test
@Ignore
/**
* Actually not a test but documentation of how the svm optimisation is done.
*/
public void testMachineLearningClassificationWithNamespaces() throws Exception {
final File temp = Files.createTempDir();
String[] args = {CliParams.CLASSIFY,
"-in", resourcePath("com/formulasearchengine/mathosphere/mlp/gold/eval_dataset.xml"),
"-out", temp.getAbsolutePath(),
"--tex",
"--namespace",
"--nd", "C:\\tmp\\mlp-tmp\\gold-weak-snowball-True-True-svd-150-kmeans-k=10000.json",
"--texvcinfo", "http://localhost:10044/texvcinfo",
"--threads", "5",
"--evaluate",
"--svmModel", resourcePath("com/formulasearchengine/mathosphere/mlp/ml/svm_model__c_1.0_gamma_0.022097087.model"),
"--stringFilter", resourcePath("com/formulasearchengine/mathosphere/mlp/ml/string_filter__c_1.0_gamma_0.022097087.model"),
};
Main.main(args);
}
@Test
@Ignore
/**
* Actually not a test but documentation of how the svm optimisation was done.
*/
public void testMachineLearningFromPreprocessedInstances() throws Exception {
final File temp = Files.createTempDir();
String[] args = {CliParams.ML,
"-in", resourcePath("com/formulasearchengine/mathosphere/mlp/gold/eval_dataset.xml"),
"-out", temp.getAbsolutePath(),
"--goldFile", resourcePath("com/formulasearchengine/mathosphere/mlp/gold/gold.json"),
"--threads", "10",
"--instances", resourcePath("com/formulasearchengine/mathosphere/mlp/ml/instances.arff")
};
Main.main(args);
}
@Test
@Ignore
/**
* Actually not a test but documentation of how the svm optimisation was done.
*/
public void testMachineLearningCorase() throws Exception {
final File temp = Files.createTempDir();
List<String> costAndGamma = new ArrayList<>();
for (double c : WekaLearner.C_coarse) {
costAndGamma.add("--svmCost");
costAndGamma.add("" + c);
}
for (double g : WekaLearner.Y_coarse) {
costAndGamma.add("--svmGamma");
costAndGamma.add("" + g);
}
String[] args = {CliParams.ML,
"-in", resourcePath("com/formulasearchengine/mathosphere/mlp/gold/eval_dataset.xml"),
"-out", temp.getAbsolutePath(),
"--goldFile", resourcePath("com/formulasearchengine/mathosphere/mlp/gold/gold.json"),
"--tex",
"--texvcinfo", "http://localhost:10044/texvcinfo",
"--threads", "1",
"--writeInstances"
};
String[] allArgs = Stream.concat(Arrays.stream(args), costAndGamma.stream()).toArray(String[]::new);
Main.main(allArgs);
}
}