package com.cse10.extractor.stanfordcorenlp.detector;
import com.cse10.extractor.ExtractorConstants;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.tokensregex.TokenSequenceMatcher;
import edu.stanford.nlp.ling.tokensregex.TokenSequencePattern;
import java.util.List;
/**
* Created by TharinduWijewardane on 2015-01-05.
*/
public class CrimeTypeDetector {
private static TokenSequencePattern criminalPattern = TokenSequencePattern.compile("(/killed|murdered|stabbed|raped/) []{0,4} /by/ []{0,2} ([ner: PERSON]{1,4})");
private static TokenSequencePattern criminalActivePattern = TokenSequencePattern.compile("([ner: PERSON]{1,4}) []{0,4} (/killed|murdered|stabbed|raped/)");
private static TokenSequencePattern criminalIngPattern = TokenSequencePattern.compile("[{tag:IN}]{1,1} []{0,4} (/killing|murdering|stabbing|raping/)");
private static TokenSequencePattern victimPattern = TokenSequencePattern.compile("([{tag:NNS}]{0,3} [{tag:NN}]{0,3} [{tag:NNP}]{0,3}) [{tag:VBD}]{1,2} (/killed|murdered|stabbed|raped/)");
protected static TokenSequencePattern suicidePattern = TokenSequencePattern.compile("([{tag:NNS}]{0,3} [{tag:NN}]{0,3} [{tag:NNP}]{0,3}) /committed/ /suicide/");
public static String findCrimeType(List<CoreLabel> tokens) {
String crimeType = "";
TokenSequenceMatcher criminalMatcher = criminalPattern.getMatcher(tokens);
while (criminalMatcher.find()) {
crimeType = criminalMatcher.group(1);
System.out.println(" crime type: " + crimeType);
}
if (crimeType.length() > 0) {
return standardize(crimeType);
}
criminalMatcher = criminalActivePattern.getMatcher(tokens);
while (criminalMatcher.find()) {
crimeType = criminalMatcher.group(criminalMatcher.groupCount());
System.out.println(" crime type: " + crimeType);
}
if (crimeType.length() > 0) {
return standardize(crimeType);
}
criminalMatcher = criminalIngPattern.getMatcher(tokens);
while (criminalMatcher.find()) {
crimeType = criminalMatcher.group(1);
System.out.println(" crime type: " + crimeType);
}
if (crimeType.length() > 0) {
return standardize(crimeType);
}
TokenSequenceMatcher victimMatcher = victimPattern.getMatcher(tokens);
while (victimMatcher.find()) {
crimeType = victimMatcher.group(victimMatcher.groupCount());
System.out.println(" crime type: " + crimeType);
}
if (crimeType.length() > 0) {
return standardize(crimeType);
}
TokenSequenceMatcher suicideVictimMatcher = suicidePattern.getMatcher(tokens);
while (suicideVictimMatcher.find()) {
crimeType = ExtractorConstants.CRIME_TYPE_SUICIDE;
System.out.println(" suicide type");
}
return standardize(crimeType);
}
private static String standardize(String keyword) {
if (keyword.startsWith("kill") || keyword.startsWith("murder") || keyword.startsWith("stab")) {
keyword = ExtractorConstants.CRIME_TYPE_MURDER;
} else if (keyword.startsWith("rape")) {
keyword = ExtractorConstants.CRIME_TYPE_RAPE;
}
return keyword;
}
}