package edu.uncc.cs.watsonsim;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Methods related to determining and returning the QType (or QClass) of the question
* @author Ken Overholt
*
*/
public class QClassDetection {
/**
* Determines the QType (or QClass) of the question
* @param q Question object with the raw_text and category values set
* @return a QType enum representing the QType (or QClass)
*/
public static QType detectType(Question q) {
if (isFITB(q)) {
return QType.FITB;
}
else if (isCommonBonds(q.text,q.getCategory())) {
return QType.COMMON_BONDS;
}
else if (isAnagram(q.text,q.getCategory())) {
return QType.ANAGRAM;
}
else if (isBeforeAndAfter(q.text,q.getCategory())) {
return QType.BEFORE_AND_AFTER;
}
else if (isQuotation(q.text,q.getCategory())) {
return QType.QUOTATION;
}
else {
return QType.FACTOID;
}
}
/**
* Returns true if the QType if FITB, i.e., if there are blanks in the question
* If returning true, sets the FITB annotations in the Question object
*
* @param question the Question object we are attempting to classify
* @return true/false
*/
private static boolean isFITB(Phrase question) {
//TODO: Another, more common, indication of FITB is quoted phrases adjacent to the focus.
// Use focus detection (and LAT detection) to identify these.
if (question.text.contains("_")) {
return true;
} else {
return false;
}
}
/**
* tester
* @param str
*/
public static void main(String... str) {
//test data
//String text1 = "my test ___ phrase\"";
//String text2 = "Some other test text 78; second line.";
String text3 = " Yet _ more__ text. W___ith further ___ in dd z\"it.";
String testText = text3;
String count = "01234567890123456789012345678901234567890";
System.out.println(testText);
System.out.println(count);
//blank finder
int blankStart = -1; int blankEnd = -1;
Pattern pattern = Pattern.compile("_+", java.util.regex.Pattern.CASE_INSENSITIVE);
Matcher matcher = pattern.matcher(testText);
while (matcher.find()) {
System.out.println("blank found: " + matcher.start() + " to " + matcher.end() + ": " +testText.substring(matcher.start(), matcher.end()));
if (blankStart == -1) blankStart = matcher.start();
blankEnd = matcher.end();
};
//section finders
pattern = Pattern.compile("\"");
matcher = pattern.matcher(testText);
int firstBlankBeginning = blankStart;
int sectionOneBegin = 0;
while (matcher.find() && matcher.start()< firstBlankBeginning) {
sectionOneBegin = matcher.start();
}
System.out.println("section 1: " + testText.substring(sectionOneBegin, firstBlankBeginning));
matcher.region(blankEnd, testText.length());
matcher.find();
System.out.println("section 2: " + testText.substring(blankEnd,matcher.end()));
//System.out.println("my test ___ phrase".matches(".*_.*"));
System.out.println(QClassDetection.isQuotation("He not only wrote & directed \"Little Johnny Jones\", he also played the title role", "MUSICALS"));
}
/**
* Returns true if the QType is COMMON BONDS, i.e., if "COMMON BONDS" is
* in the category
*
* @param clue
* @param category
* @return
*/
private static boolean isCommonBonds(String clue, String category) {
return category.toUpperCase().matches(".*COMMON BONDS.*");
}
/**
* Returns true if the QType if BEFORE & AFTER, i.e., if "BEFORE & AFTER" is
* in the category
*
* @param clue
* @param category
* @return
*/
private static boolean isBeforeAndAfter(String clue, String category) {
return category.toUpperCase().matches(".*BEFORE & AFTER.*");
}
/**
* Returns true if the QType is Anagram, i.e., if the category includes
* "ANAGRAM", "SCRAMBLED", or "JUMBLED"
*
* @param clue
* @param category
* @return
*/
private static boolean isAnagram(String clue, String category) {
return category.toUpperCase().matches(".*ANAGRAM.*") ||
category.toUpperCase().matches(".*SCRAMBLED.*") ||
category.toUpperCase().matches(".*JUMBLED.*");
}
/**
* Returns true if the QType is Quotation. Need identify the requirements
* and implement this.
*
* @param clue
* @param category
* @return
*/
private static boolean isQuotation(String clue, String category) {
return clue.matches(".*\"(([^\"\\r\\n\\s]+)\\b[:;?!,.]?(\\s)*){3,}\".*");
}
}