package edu.stanford.nlp.quoteattribution; import edu.stanford.nlp.ling.CoreAnnotation; import edu.stanford.nlp.ling.CoreAnnotations; import edu.stanford.nlp.pipeline.Annotation; import edu.stanford.nlp.pipeline.Annotator; import edu.stanford.nlp.util.CoreMap; import edu.stanford.nlp.util.ErasureUtils; import java.util.*; /** * Created by mjfang on 12/18/16. Annotates each sentence with what chapter it is in (1-indexed). * Currently uses "CHAPTER" as a delimiter; may have to be extended in the future. */ public class ChapterAnnotator implements Annotator{ public String CHAPTER_BREAK = "CHAPTER"; //key to a list of sentences that begin chapters public static class ChapterAnnotation implements CoreAnnotation<Integer> { public Class<Integer> getType() { return ErasureUtils.uncheckedCast(List.class); } } public void annotate(Annotation doc) { Map<Integer, Integer> sentenceToChapter = new HashMap<>(); List<CoreMap> sentences = doc.get(CoreAnnotations.SentencesAnnotation.class); int chapterNum = 0; int sentenceIndex = 0; for(CoreMap sentence : sentences) { if(sentence.get(CoreAnnotations.TextAnnotation.class).contains(CHAPTER_BREAK)) { chapterNum++; } sentence.set(ChapterAnnotation.class, chapterNum); sentenceToChapter.put(sentenceIndex, chapterNum); sentenceIndex++; } } @Override public Set<Class<? extends CoreAnnotation>> requirementsSatisfied() { return null; } @Override public Set<Class<? extends CoreAnnotation>> requires() { return new HashSet<>(Arrays.asList( CoreAnnotations.TextAnnotation.class, CoreAnnotations.SentencesAnnotation.class)); } }