package com.formulasearchengine.mathosphere.mlp.text; import com.formulasearchengine.mathosphere.mlp.pojos.Sentence; import com.formulasearchengine.mathosphere.mlp.pojos.Word; import org.junit.Assert; import org.junit.Before; import org.junit.Test; import java.util.ArrayList; import static com.formulasearchengine.mathosphere.mlp.ml.WekaUtils.LONGEST_SENTENCE_IN_ENGISH; /** * Created by Leo on 20.01.2017. */ public class MachineLearningPatternMatcherTest { public static final double EPSILON = 1e-15; public static final double ONE_OCCURRENCE = 1d / LONGEST_SENTENCE_IN_ENGISH; public static final double TWO_OCCURRENCES = 2d / LONGEST_SENTENCE_IN_ENGISH; public static final Word DENOTES = new Word(MachineLearningPatternMatcher.DENOTES, ""); public static final Word THE = new Word("definite or indefinite article", "DT"); public static final Word IS = new Word(MachineLearningPatternMatcher.IS, ""); public static final Word DENOTED = new Word(MachineLearningPatternMatcher.DENOTED, ""); public static final Word BY = new Word(MachineLearningPatternMatcher.BY, ""); public static final Word BE = new Word(MachineLearningPatternMatcher.BE, ""); public static final Word LET = new Word(MachineLearningPatternMatcher.LET, ""); public static final Word COLON = new Word(":", ":"); public static final Word COMMA = new Word(",", ","); public static final Word OTHERMATH = new Word("E^2", "MATH"); public static final Word OPENING = new Word("(", "-LRB-"); public static final Word CLOSING = new Word(")", "-RRB-"); private Word identifier = new Word(MachineLearningPatternMatcher.IDENTIFIER, "ID"); private Word definiens = new Word(MachineLearningPatternMatcher.DEFINITION, "NN"); private Word random = new Word("random", "NN"); private ArrayList<Word> words; @Before public void setup() { words = new ArrayList<>(); } @Test public void testPattern1() { words.add(definiens); words.add(identifier); Sentence s = new Sentence(words, null, null); double[] result = new MachineLearningPatternMatcher().match(s, MachineLearningPatternMatcher.IDENTIFIER, MachineLearningPatternMatcher.DEFINITION, words.indexOf(identifier), words.indexOf(definiens)); Assert.assertArrayEquals(new double[]{0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, result, EPSILON); } @Test public void testPattern1FalsePositive() { words.add(random); words.add(identifier); Sentence s = new Sentence(words, null, null); double[] result = new MachineLearningPatternMatcher().match(s, MachineLearningPatternMatcher.IDENTIFIER, MachineLearningPatternMatcher.DEFINITION, words.indexOf(identifier), words.indexOf(definiens)); Assert.assertArrayEquals(new double[]{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, result, EPSILON); } @Test public void testPattern2() { words.add(identifier); words.add(definiens); Sentence s = new Sentence(words, null, null); double[] result = new MachineLearningPatternMatcher().match(s, MachineLearningPatternMatcher.IDENTIFIER, MachineLearningPatternMatcher.DEFINITION, words.indexOf(identifier), words.indexOf(definiens)); Assert.assertArrayEquals(new double[]{1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, result, EPSILON); } @Test public void testPattern3() { words.add(identifier); words.add(DENOTES); words.add(definiens); Sentence s = new Sentence(words, null, null); double[] result = new MachineLearningPatternMatcher().match(s, MachineLearningPatternMatcher.IDENTIFIER, MachineLearningPatternMatcher.DEFINITION, words.indexOf(identifier), words.indexOf(definiens)); Assert.assertArrayEquals(new double[]{0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0}, result, EPSILON); } @Test public void testPattern4() { words.add(identifier); words.add(DENOTES); words.add(THE); words.add(definiens); Sentence s = new Sentence(words, null, null); double[] result = new MachineLearningPatternMatcher().match(s, MachineLearningPatternMatcher.IDENTIFIER, MachineLearningPatternMatcher.DEFINITION, words.indexOf(identifier), words.indexOf(definiens)); Assert.assertArrayEquals(new double[]{0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0}, result, EPSILON); } @Test public void testPattern5() { words.add(identifier); words.add(IS); words.add(definiens); Sentence s = new Sentence(words, null, null); double[] result = new MachineLearningPatternMatcher().match(s, MachineLearningPatternMatcher.IDENTIFIER, MachineLearningPatternMatcher.DEFINITION, words.indexOf(identifier), words.indexOf(definiens)); Assert.assertArrayEquals(new double[]{0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, result, EPSILON); } @Test public void testPattern6() { words.add(identifier); words.add(IS); words.add(THE); words.add(definiens); Sentence s = new Sentence(words, null, null); double[] result = new MachineLearningPatternMatcher().match(s, MachineLearningPatternMatcher.IDENTIFIER, MachineLearningPatternMatcher.DEFINITION, words.indexOf(identifier), words.indexOf(definiens)); Assert.assertArrayEquals(new double[]{0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, result, EPSILON); } @Test public void testPattern7() { words.add(identifier); words.add(IS); words.add(DENOTED); words.add(BY); words.add(definiens); Sentence s = new Sentence(words, null, null); double[] result = new MachineLearningPatternMatcher().match(s, MachineLearningPatternMatcher.IDENTIFIER, MachineLearningPatternMatcher.DEFINITION, words.indexOf(identifier), words.indexOf(definiens)); Assert.assertArrayEquals(new double[]{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, result, EPSILON); } @Test public void testPattern8() { words.add(identifier); words.add(IS); words.add(DENOTED); words.add(BY); words.add(THE); words.add(definiens); Sentence s = new Sentence(words, null, null); double[] result = new MachineLearningPatternMatcher().match(s, MachineLearningPatternMatcher.IDENTIFIER, MachineLearningPatternMatcher.DEFINITION, words.indexOf(identifier), words.indexOf(definiens)); Assert.assertArrayEquals(new double[]{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, result, EPSILON); } @Test public void testPattern9() { words.add(LET); words.add(identifier); words.add(BE); words.add(DENOTED); words.add(BY); words.add(definiens); Sentence s = new Sentence(words, null, null); double[] result = new MachineLearningPatternMatcher().match(s, MachineLearningPatternMatcher.IDENTIFIER, MachineLearningPatternMatcher.DEFINITION, words.indexOf(identifier), words.indexOf(definiens)); Assert.assertArrayEquals(new double[]{0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, result, EPSILON); } @Test public void testPattern10() { words.add(LET); words.add(identifier); words.add(BE); words.add(DENOTED); words.add(BY); words.add(THE); words.add(definiens); Sentence s = new Sentence(words, null, null); double[] result = new MachineLearningPatternMatcher().match(s, MachineLearningPatternMatcher.IDENTIFIER, MachineLearningPatternMatcher.DEFINITION, words.indexOf(identifier), words.indexOf(definiens)); Assert.assertArrayEquals(new double[]{0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0}, result, EPSILON); //destroy pattern test s.getWords().add(3, random); result = new MachineLearningPatternMatcher().match(s, MachineLearningPatternMatcher.IDENTIFIER, MachineLearningPatternMatcher.DEFINITION, words.indexOf(identifier), words.indexOf(definiens)); Assert.assertArrayEquals(new double[]{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, result, EPSILON); } @Test public void testColon() { words.add(identifier); words.add(COLON); words.add(random); words.add(definiens); Sentence s = new Sentence(words, null, null); double[] result = new MachineLearningPatternMatcher().match(s, MachineLearningPatternMatcher.IDENTIFIER, MachineLearningPatternMatcher.DEFINITION, words.indexOf(identifier), words.indexOf(definiens)); Assert.assertArrayEquals(new double[]{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0}, result, EPSILON); //remove colon s.getWords().remove(COLON); //colon at end s.getWords().add(COLON); result = new MachineLearningPatternMatcher().match(s, MachineLearningPatternMatcher.IDENTIFIER, MachineLearningPatternMatcher.DEFINITION, words.indexOf(identifier), words.indexOf(definiens)); Assert.assertArrayEquals(new double[]{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, result, EPSILON); } @Test public void testComma() { words.add(identifier); words.add(COMMA); words.add(random); words.add(definiens); Sentence s = new Sentence(words, null, null); double[] result = new MachineLearningPatternMatcher().match(s, MachineLearningPatternMatcher.IDENTIFIER, MachineLearningPatternMatcher.DEFINITION, words.indexOf(identifier), words.indexOf(definiens)); Assert.assertArrayEquals(new double[]{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0}, result, EPSILON); //remove colon s.getWords().remove(COMMA); //colon at end s.getWords().add(COMMA); result = new MachineLearningPatternMatcher().match(s, MachineLearningPatternMatcher.IDENTIFIER, MachineLearningPatternMatcher.DEFINITION, words.indexOf(identifier), words.indexOf(definiens)); Assert.assertArrayEquals(new double[]{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, result, EPSILON); } @Test public void testOthermath() { words.add(identifier); words.add(OTHERMATH); words.add(random); words.add(definiens); Sentence s = new Sentence(words, null, null); double[] result = new MachineLearningPatternMatcher().match(s, MachineLearningPatternMatcher.IDENTIFIER, MachineLearningPatternMatcher.DEFINITION, words.indexOf(identifier), words.indexOf(definiens)); Assert.assertArrayEquals(new double[]{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0}, result, EPSILON); //remove OTHERMATH s.getWords().remove(OTHERMATH); //OTHERMATH at beginning s.getWords().add(0, OTHERMATH); result = new MachineLearningPatternMatcher().match(s, MachineLearningPatternMatcher.IDENTIFIER, MachineLearningPatternMatcher.DEFINITION, words.indexOf(identifier), words.indexOf(definiens)); Assert.assertArrayEquals(new double[]{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, result, EPSILON); } @Test public void testOpenParentheses() { words.add(identifier); words.add(OPENING); words.add(random); words.add(definiens); Sentence s = new Sentence(words, null, null); double[] result = new MachineLearningPatternMatcher().match(s, MachineLearningPatternMatcher.IDENTIFIER, MachineLearningPatternMatcher.DEFINITION, words.indexOf(identifier), words.indexOf(definiens)); Assert.assertArrayEquals(new double[]{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0}, result, EPSILON); //remove Parentheses s.getWords().remove(OPENING); //Parentheses at end s.getWords().add(OPENING); result = new MachineLearningPatternMatcher().match(s, MachineLearningPatternMatcher.IDENTIFIER, MachineLearningPatternMatcher.DEFINITION, words.indexOf(identifier), words.indexOf(definiens)); Assert.assertArrayEquals(new double[]{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, result, EPSILON); } @Test public void testClosingParentheses() { words.add(identifier); words.add(CLOSING); words.add(random); words.add(definiens); Sentence s = new Sentence(words, null, null); double[] result = new MachineLearningPatternMatcher().match(s, MachineLearningPatternMatcher.IDENTIFIER, MachineLearningPatternMatcher.DEFINITION, words.indexOf(identifier), words.indexOf(definiens)); Assert.assertArrayEquals(new double[]{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1}, result, EPSILON); } @Test public void testManyParentheses() { words.add(definiens); words.add(CLOSING); words.add(random); words.add(OPENING); words.add(identifier); Sentence s = new Sentence(words, null, null); double[] result = new MachineLearningPatternMatcher().match(s, MachineLearningPatternMatcher.IDENTIFIER, MachineLearningPatternMatcher.DEFINITION, words.indexOf(identifier), words.indexOf(definiens)); Assert.assertArrayEquals(new double[]{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, result, EPSILON); } @Test public void testManyParentheses2() { words.add(definiens); words.add(OPENING); words.add(random); words.add(CLOSING); words.add(OPENING); words.add(identifier); words.add(CLOSING); Sentence s = new Sentence(words, null, null); double[] result = new MachineLearningPatternMatcher().match(s, MachineLearningPatternMatcher.IDENTIFIER, MachineLearningPatternMatcher.DEFINITION, words.indexOf(identifier), words.indexOf(definiens)); Assert.assertArrayEquals(new double[]{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1}, result, EPSILON); } @Test public void testManyParenthesesDefiniensInParentheses() { words.add(OPENING); words.add(definiens); words.add(OPENING); words.add(random); words.add(CLOSING); words.add(CLOSING); words.add(identifier); Sentence s = new Sentence(words, null, null); double[] result = new MachineLearningPatternMatcher().match(s, MachineLearningPatternMatcher.IDENTIFIER, MachineLearningPatternMatcher.DEFINITION, words.indexOf(identifier), words.indexOf(definiens)); Assert.assertArrayEquals(new double[]{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0}, result, EPSILON); } @Test public void testIdentifierAndDefiniensPositions() { words.add(identifier); words.add(definiens); words.add(identifier); Sentence s = new Sentence(words, null, null); double[] result = new MachineLearningPatternMatcher().match(s, MachineLearningPatternMatcher.IDENTIFIER, MachineLearningPatternMatcher.DEFINITION, 2, 1); Assert.assertArrayEquals(new double[]{1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, result, EPSILON); result = new MachineLearningPatternMatcher().match(s, MachineLearningPatternMatcher.IDENTIFIER, MachineLearningPatternMatcher.DEFINITION, 0, 1); Assert.assertArrayEquals(new double[]{1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, result, EPSILON); } }