package edu.uncc.cs.watsonsim.scorers; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import edu.uncc.cs.watsonsim.Answer; import edu.uncc.cs.watsonsim.Passage; import edu.uncc.cs.watsonsim.Phrase; import edu.uncc.cs.watsonsim.StringUtils; import opennlp.tools.namefind.NameFinderME; import opennlp.tools.namefind.TokenNameFinderModel; import opennlp.tools.tokenize.SimpleTokenizer; import opennlp.tools.util.Span; /** * This scorer will return the number of named entities matched in a given * question * * @author Jonathan Shuman * */ public class NamedEntityRecognizerScorer extends PassageScorer { public double scorePassage(Phrase q, Answer a, Passage p) { // Jane Austen String c_t = StringUtils.join(p.text, " "); // Romantic novelist Jane Austen once wrote -the- book Emma. String q_t = q.text; return numberOfNamedPersonEntities(q_t, c_t); } private double numberOfNamedPersonEntities(String q_t, String c_t) { InputStream modelIn = null; double retVal = 0; try { modelIn = new FileInputStream("data/en-ner-person.bin"); TokenNameFinderModel model = new TokenNameFinderModel(modelIn); NameFinderME nameFinder = new NameFinderME(model); String[] c_words = SimpleTokenizer.INSTANCE.tokenize(c_t); String[] q_words = SimpleTokenizer.INSTANCE.tokenize(q_t); Span[] c_tokens = nameFinder.find(c_words); for (Span cS : c_tokens) { for (String q_word : q_words) if ((c_words[cS.getStart()]).contains(q_word)) retVal++; } } catch (IOException e) { e.printStackTrace(); return Double.NaN; } finally { if (modelIn != null) { try { modelIn.close(); } catch (IOException e) { return Double.NaN; } } } return retVal; } }