package com.formulasearchengine.mathosphere.mlp.text;
import com.alexeygrigorev.rseq.*;
import com.formulasearchengine.mathosphere.mlp.pojos.Sentence;
import com.formulasearchengine.mathosphere.mlp.pojos.Word;
import java.util.*;
public class MyPatternMatcher {
public static final String IDENTIFIER = "identifier";
public static final String DEFINITION = "definition";
public static int[] match(Sentence sentence, String identifierText, String definiens, int identifierPosition, int definiensPosition) {
Matcher<Word> isOrAre = word("is").or(word("are"));
Matcher<Word> let = word("let");
Matcher<Word> be = word("be");
Matcher<Word> by = word("by");
Matcher<Word> denotes = word("denotes").or(word("denote"));
Matcher<Word> denoted = word("denoted");
Matcher<Word> the = pos("DT");
Matcher<Word> identifier = BeanMatchers.eq(Word.class, "word", identifierText).captureAs(IDENTIFIER);
Matcher<Word> definition = posRegExp("(NN[PS]{0,2}|NP\\+?|NN\\+|LNK)").captureAs(DEFINITION);
Matcher<Word> otherMathExpression = posRegExp("(ID|MATH)").captureAs("othermath");
List<Pattern<Word>> patterns = Arrays.asList(
//1
Pattern.create(definition, identifier),
//2
Pattern.create(identifier, definition),
//3
Pattern.create(identifier, denotes, definition),
//4
Pattern.create(identifier, denotes, the, definition),
//5
Pattern.create(identifier, isOrAre, definition),
//6
Pattern.create(identifier, isOrAre, the, definition),
//7
Pattern.create(identifier, isOrAre, denoted, by, definition),
//8
Pattern.create(identifier, isOrAre, denoted, by, the, definition),
//9
Pattern.create(let, identifier, be, denoted, by, definition),
//10
Pattern.create(let, identifier, be, denoted, by, the, definition),
//11
//colon
Pattern.create(pos(":")),
//12
//comma
Pattern.create(pos(",")),
//13
//othermath
Pattern.create(otherMathExpression),
//14
//more opening parentheses than closing -> definiens in parentheses
Pattern.create(word("\\(")),
//15
//more closing parentheses than opening -> identifier in parentheses
Pattern.create(word("\\)"))
);
int[] result = new int[15];
long openingParentheses = 0;
for (int i = 0; i < patterns.size(); i++) {
Pattern<Word> pattern = patterns.get(i);
List<Match<Word>> matches = pattern.find(sentence.getWords());
switch (i) {
case 0:
case 1:
case 2:
case 3:
case 4:
case 5:
case 6:
case 7:
case 8:
case 9:
for (Match<Word> match : matches) {
Word matchedDefiniens = match.getVariable(DEFINITION);
if (matchedDefiniens != null && matchedDefiniens.getWord().equals(definiens))
result[i] = 1;
}
break;
case 10:
case 11:
case 12:
for (Match<Word> match : matches) {
if (inRange(match.matchedFrom(), identifierPosition, definiensPosition))
result[i] = 1;
}
break;
case 13:
openingParentheses = matches.stream().filter(m -> inRange(m.matchedFrom(), identifierPosition, definiensPosition)).count();
break;
case 14:
//definiens in parentheses
long closingParentheses = matches.stream().filter(m -> inRange(m.matchedFrom(), identifierPosition, definiensPosition)).count();
if (identifierPosition < definiensPosition)
//more opening parentheses than closing -> definiens in parentheses
result[13] = openingParentheses - closingParentheses > 0 ? 1 : 0;
if (identifierPosition > definiensPosition)
//more closing parentheses than opening -> identifier in parentheses
result[14] = closingParentheses - openingParentheses > 0 ? 1 : 0;
}
}
return result;
}
/**
* Checks if x lies between y and z
*
* @return
*/
private static boolean inRange(int x, int y, int z) {
return ((y < x && x < z) || (z < x && x < y));
}
protected static XMatcher<Word> word(String word) {
return BeanMatchers.eq(Word.class, "word", word);
}
protected static XMatcher<Word> pos(String pos) {
return BeanMatchers.eq(Word.class, "posTag", pos);
}
protected static XMatcher<Word> posRegExp(String regexp) {
return BeanMatchers.regex(Word.class, "posTag", regexp);
}
protected static XMatcher<Word> regExp(String regexp) {
return BeanMatchers.regex(Word.class, "word", regexp);
}
}