package com.cse10.extractor.stanfordcorenlp.detector;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.tokensregex.TokenSequenceMatcher;
import edu.stanford.nlp.ling.tokensregex.TokenSequencePattern;
import java.util.List;
/**
* Created by TharinduWijewardane on 2015-01-05.
*/
public class CriminalDetector {
private static TokenSequencePattern criminalPattern = TokenSequencePattern.compile("(/killed|murdered|stabbed|raped/) []{0,4} /by/ []{0,2} ([ner: PERSON]{1,4})");
private static TokenSequencePattern criminalActivePattern = TokenSequencePattern.compile("([ner: PERSON]{1,4}) []{0,4} (/killed|murdered|stabbed|raped/)");
public static String findCriminal(List<CoreLabel> tokens) {
String criminal = "";
TokenSequenceMatcher criminalMatcher = criminalPattern.getMatcher(tokens);
while (criminalMatcher.find()) {
criminal = criminalMatcher.group(criminalMatcher.groupCount()); // get last group
System.out.println(" criminal: " + criminal);
}
if (criminal.length() > 0) {
return criminal;
}
criminalMatcher = criminalActivePattern.getMatcher(tokens);
while (criminalMatcher.find()) {
criminal = criminalMatcher.group(1);
System.out.println(" criminal: " + criminal);
}
return criminal;
}
}