package edu.uncc.cs.watsonsim.researchers;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import edu.uncc.cs.watsonsim.Answer;
import edu.uncc.cs.watsonsim.Question;
/*Author : Ricky Sanders
*
* Compares answer to answer to merge those that have 3 or more words in common
* Currently keeps the longest answer
*
* WORK IN PROGRESS
*/
public class MergeAnswers extends Researcher{
@Override
/** Call merge on any two similar answers */
public List<Answer> question(Question q, List<Answer> answers) {
List<List<Answer>> answer_blocks = new ArrayList<>();
// Arrange the answers into blocks
each_answer:
for (Answer original : answers) {
HashSet<String> original_terms = new HashSet<String>();
original_terms.addAll(original.getTokens());
//return reference_terms.containsAll(StringUtils.tokenize(reference));
for (List<Answer> block : answer_blocks) {
for (Answer example : block) {
HashSet<String> example_terms = new HashSet<String>();
example_terms.addAll(example.getTokens());
// Look through the examples in this topic
// If it matches, choose to put it in this block and quit.
int sizeExample = example_terms.size();
example_terms.retainAll(original_terms);
int count = example_terms.size();
double percentCorrect = count/(sizeExample + 0.01);
/** Merge by word count of 3 only */
if (count >= 3 || percentCorrect >= 0.5) {
original.log(this, "It restates %s", original);
block.add(original);
continue each_answer;
}
}
}
// Make a new topic for this answer
List<Answer> new_block = new ArrayList<>();
new_block.add(original);
answer_blocks.add(new_block);
}
// Merge the blocks
List<Answer> new_answers = new ArrayList<>();
for (List<Answer> block : answer_blocks) {
if (block.size() > 1) {
new_answers.add(Answer.merge(block));
} else {
new_answers.add(block.get(0));
}
}
log.info("Merged " + answers.size() + " candidates into " + new_answers.size() + " (by word similarity).");
return new_answers;
}
}