package edu.uncc.cs.watsonsim.researchers; import java.util.ArrayList; import java.util.List; import edu.uncc.cs.watsonsim.Answer; import edu.uncc.cs.watsonsim.Environment; import edu.uncc.cs.watsonsim.Question; import edu.uncc.cs.watsonsim.nlp.Relatedness; public class MergeByText extends Researcher { private final Relatedness syn; /** * Create a new merger using shared environment resources. * @param env */ public MergeByText(Environment env) { syn = new Relatedness(env); } @Override /** Call merge on any two answers with the same title */ public List<Answer> question(Question q, List<Answer> answers) { List<List<Answer>> answer_blocks = new ArrayList<>(); // Arrange the answers into blocks each_answer: for (Answer original : answers) { for (List<Answer> block : answer_blocks) { for (Answer example : block) { // Look through the examples in this topic // If it matches, choose to put it in this block and quit. if (syn.matchViaLevenshtein(original.text, example.text)) { block.add(original); continue each_answer; } } } // Make a new topic for this answer List<Answer> new_block = new ArrayList<>(); new_block.add(original); answer_blocks.add(new_block); } // Merge the blocks List<Answer> new_answers = new ArrayList<>(); for (List<Answer> block : answer_blocks) { if (block.size() > 1) { new_answers.add(Answer.merge(block)); } else { new_answers.add(block.get(0)); } } log.info("Merged " + answers.size() + " candidates into " + new_answers.size() + " (by surface similarity)."); return new_answers; } }