package com.formulasearchengine.mathosphere.mlp.text; import com.formulasearchengine.mathosphere.mlp.pojos.ParsedWikiDocument; import com.formulasearchengine.mathosphere.mlp.pojos.Word; import org.junit.Assert; import org.junit.Before; import org.junit.Test; import java.util.*; import static com.formulasearchengine.mathosphere.mlp.text.PatternMatcher.*; /** * Created by Leo on 26.01.2017. */ public class PatternMatcherTest { public static final String SPEED_OF_LIGHT = "speed of light"; public static final String TIMES = "times"; public static final String C_2 = "c^2"; public static final String M = "m"; public static final String MASS = "mass"; public static final String EQUALS = "="; public static final String ENERGY = "energy"; public static final String E = "E"; public static final String ID = "ID"; public static final String SYM = "SYM"; public static final String NN = "NN"; public static final String VB = "VB"; public static final String NNP = "NNP"; public static final String IS = "is"; public static final String ARE = "are"; private PatternMatcher patternMatcher; private Set<String> identifiers = new HashSet<>(); private ParsedWikiDocument parsedWikiDocument = new ParsedWikiDocument(); @Before public void setup() { identifiers.add("E"); identifiers.add("m"); identifiers.add("c^2"); patternMatcher = generatePatterns(identifiers); } @Test public void apposition() throws Exception { List<Word> words = new ArrayList<>(); words.add(new Word(ENERGY, NN)); words.add(new Word(E, ID)); words.add(new Word(EQUALS, SYM)); words.add(new Word(MASS, NN)); words.add(new Word(M, ID)); words.add(new Word(TIMES, VB)); words.add(new Word(SPEED_OF_LIGHT, NNP)); words.add(new Word(C_2, ID)); List<PatternMatcher.IdentifierMatch> matches = patternMatcher.match(words, parsedWikiDocument); Assert.assertEquals(3, matches.size()); Collections.sort(matches, new Comparator<PatternMatcher.IdentifierMatch>() { @Override public int compare(IdentifierMatch o1, IdentifierMatch o2) { return o1.getIdentifier().toLowerCase().compareTo(o2.getIdentifier().toLowerCase()); } }); for (int i = 0; i < matches.size(); i++) { IdentifierMatch match = matches.get(i); switch (i) { case 0: //c Assert.assertEquals(C_2, match.getIdentifier()); Assert.assertEquals(SPEED_OF_LIGHT, match.getDefinition()); break; case 1: //c Assert.assertEquals(E, match.getIdentifier()); Assert.assertEquals(ENERGY, match.getDefinition()); break; case 2: //c Assert.assertEquals(M, match.getIdentifier()); Assert.assertEquals(MASS, match.getDefinition()); break; } } } @Test public void pattern2() throws Exception { List<Word> words = new ArrayList<>(); words.add(new Word(E, ID)); words.add(new Word(IS, "bar")); words.add(new Word(ENERGY, NN)); //split word words.add(new Word(TIMES, VB)); words.add(new Word(C_2, ID)); words.add(new Word(ARE, "foo")); words.add(new Word(SPEED_OF_LIGHT, NNP)); List<PatternMatcher.IdentifierMatch> matches = patternMatcher.match(words, parsedWikiDocument); Assert.assertEquals(2, matches.size()); Collections.sort(matches, new Comparator<PatternMatcher.IdentifierMatch>() { @Override public int compare(IdentifierMatch o1, IdentifierMatch o2) { return o1.getIdentifier().toLowerCase().compareTo(o2.getIdentifier().toLowerCase()); } }); for (int i = 0; i < matches.size(); i++) { IdentifierMatch match = matches.get(i); switch (i) { case 0: //c Assert.assertEquals(C_2, match.getIdentifier()); Assert.assertEquals(SPEED_OF_LIGHT, match.getDefinition()); Assert.assertEquals(2, match.getPosition()); break; case 1: //E Assert.assertEquals(E, match.getIdentifier()); Assert.assertEquals(ENERGY, match.getDefinition()); Assert.assertEquals(2, match.getPosition()); break; } } } @Test public void patternRepeat() throws Exception { List<Word> words = new ArrayList<>(); words.add(new Word(E, ID)); words.add(new Word(IS, "bar")); words.add(new Word(ENERGY, NN)); //split word words.add(new Word(TIMES, VB)); words.add(new Word(E, ID)); words.add(new Word(IS, "bar")); words.add(new Word(ENERGY, NN)); List<PatternMatcher.IdentifierMatch> matches = patternMatcher.match(words, parsedWikiDocument); Assert.assertEquals(2, matches.size()); Collections.sort(matches, new Comparator<PatternMatcher.IdentifierMatch>() { @Override public int compare(IdentifierMatch o1, IdentifierMatch o2) { return o1.getIdentifier().toLowerCase().compareTo(o2.getIdentifier().toLowerCase()); } }); for (int i = 0; i < matches.size(); i++) { IdentifierMatch match = matches.get(i); switch (i) { case 0: //c Assert.assertEquals(E, match.getIdentifier()); Assert.assertEquals(ENERGY, match.getDefinition()); Assert.assertEquals(2, match.getPosition()); break; case 1: //c Assert.assertEquals(E, match.getIdentifier()); Assert.assertEquals(ENERGY, match.getDefinition()); Assert.assertEquals(2, match.getPosition()); break; } } } }