package com.cse10.extractor.stanfordcorenlp; import edu.stanford.nlp.ling.CoreLabel; import edu.stanford.nlp.ling.tokensregex.TokenSequenceMatcher; import edu.stanford.nlp.ling.tokensregex.TokenSequencePattern; import java.util.List; /** * Created by TharinduWijewardane on 2014-12-16. */ public class CrimeRegexPatterns { //to do Consumer Affairs Authority, unfit for consumption //to do core reference //to do asylum seeker (15) private static String crimeType = ""; private static String criminal = ""; private static String victim = ""; private static String location = ""; private static String police = ""; private static String court = ""; private static String possession = ""; private static String prison = ""; public static void printAll(List<CoreLabel> tokens) { detectCriminal(tokens); printVictim(tokens); printSuicide(tokens); printLocation(tokens); printPolice(tokens); printCourt(tokens); printPossession(tokens); printPrison(tokens); } protected static TokenSequencePattern criminalPattern = TokenSequencePattern.compile("(/killed|murdered|stabbed|raped/) []{0,4} /by/ []{0,2} ([ner: PERSON]{1,4})"); protected static TokenSequencePattern criminalActivePattern = TokenSequencePattern.compile("([ner: PERSON]{1,4}) []{0,4} (/killed|murdered|stabbed|raped/)"); protected static TokenSequencePattern criminalIngPattern = TokenSequencePattern.compile("[{tag:IN}]{1,1} []{0,4} (/killing|murdering|stabbing|raping/)"); public static void detectCriminal(List<CoreLabel> tokens) { if (crimeType.length() > 0 && criminal.length() > 0) { return; } TokenSequenceMatcher criminalMatcher = criminalPattern.getMatcher(tokens); while (criminalMatcher.find()) { crimeType = criminalMatcher.group(1); System.out.println(" crime type: " + crimeType); criminal = criminalMatcher.group(criminalMatcher.groupCount()); // get last group System.out.println(" criminal: " + criminal); } if (crimeType.length() > 0 && criminal.length() > 0) { return; } criminalMatcher = criminalActivePattern.getMatcher(tokens); while (criminalMatcher.find()) { crimeType = criminalMatcher.group(criminalMatcher.groupCount()); System.out.println(" crime type: " + crimeType); criminal = criminalMatcher.group(1); System.out.println(" criminal: " + criminal); } if (crimeType.length() > 0 && criminal.length() > 0) { return; } criminalMatcher = criminalIngPattern.getMatcher(tokens); while (criminalMatcher.find()) { System.out.println(" crime type: " + criminalMatcher.group(1)); } if (crimeType.length() > 0 && criminal.length() > 0) { return; } } protected static TokenSequencePattern victimPattern = TokenSequencePattern.compile("([{tag:NNS}]{0,3} [{tag:NN}]{0,3} [{tag:NNP}]{0,3}) [{tag:VBD}]{1,2} (/killed|murdered|stabbed|raped/)"); public static void printVictim(List<CoreLabel> tokens) { TokenSequenceMatcher victimMatcher = victimPattern.getMatcher(tokens); while (victimMatcher.find()) { System.out.println(" crime type: " + victimMatcher.group(victimMatcher.groupCount())); System.out.println(" victim: " + victimMatcher.group(1)); } } protected static TokenSequencePattern suicidePattern = TokenSequencePattern.compile("([{tag:NNS}]{0,3} [{tag:NN}]{0,3} [{tag:NNP}]{0,3}) /committed/ /suicide/"); public static void printSuicide(List<CoreLabel> tokens) { TokenSequenceMatcher victimMatcher = suicidePattern.getMatcher(tokens); while (victimMatcher.find()) { System.out.println(" suicide type"); System.out.println(" victim: " + victimMatcher.group(1)); } } protected static TokenSequencePattern caughtPattern = TokenSequencePattern.compile("(/arrested|apprehended|nabbed|seized|remanded|raided/ | /took/ /into/ /custody/ | /taken/ /into/ /custody/ ) []{0,15} [{tag:IN}]{1,2} []{0,3} ([ner: LOCATION])"); public static void printLocation(List<CoreLabel> tokens) { TokenSequenceMatcher caughtMatcher = caughtPattern.getMatcher(tokens); while (caughtMatcher.find()) { System.out.println(" caught location: " + caughtMatcher.group(caughtMatcher.groupCount())); // get last group } } protected static TokenSequencePattern policePattern = TokenSequencePattern.compile("(/arrested|apprehended|nabbed|seized|remanded|raided/ | /taken/ /into/ /custody/ ) []{0,8} /by/ [/the/]{0,1} ([{tag:NNP}]{1,3}) /police|Police/"); protected static TokenSequencePattern policeActivePattern = TokenSequencePattern.compile("([{tag:NNP}]{1,3}) /police|Police/ []{0,4} (/arrested|apprehended|nabbed|seized|remanded|raided/ | /took/ /into/ /custody/ )"); protected static TokenSequencePattern policeAllPattern = TokenSequencePattern.compile("([{tag:NNP}]{1,3}) /police|Police/"); public static void printPolice(List<CoreLabel> tokens) { TokenSequenceMatcher policeMatcher = policePattern.getMatcher(tokens); while (policeMatcher.find()) { System.out.println(" police1: " + policeMatcher.group(policeMatcher.groupCount())); // get last group } policeMatcher = policeActivePattern.getMatcher(tokens); while (policeMatcher.find()) { System.out.println(" police2: " + policeMatcher.group(1)); } policeMatcher = policeAllPattern.getMatcher(tokens); while (policeMatcher.find()) { System.out.println(" police3: " + policeMatcher.group(1)); } } protected static TokenSequencePattern courtPattern = TokenSequencePattern.compile("([{tag:NNP}]{1,3}) /Magistrate?/ []{0,2} /Court/"); public static void printCourt(List<CoreLabel> tokens) { TokenSequenceMatcher courtMatcher = courtPattern.getMatcher(tokens); while (courtMatcher.find()) { System.out.println(" court: " + courtMatcher.group(1)); } } protected static TokenSequencePattern crimePossessionPattern = TokenSequencePattern.compile("(/arrested|apprehended|nabbed|seized|raided/ | /taken/ /into/ /custody/ ) []{0,6} /with/ [{tag:CD}]{0,2} ([{tag:NN}]{0,3} [{tag:NNS}]{0,3})"); protected static TokenSequencePattern crimePossessionPattern2 = TokenSequencePattern.compile("(/arrested|apprehended|nabbed|seized|raided/ | /taken/ /into/ /custody/ ) []{0,6} /for|in/ /possession/ /of/ [{tag:CD}]{0,2} ([{tag:NN}]{0,3} [{tag:NNS}]{0,3})"); protected static TokenSequencePattern crimePossessionPattern3 = TokenSequencePattern.compile("[{tag:CD}]{0,2} []{0,4} ([{tag:JJ}]{0,2} [{tag:NNS}]{1,3}) []{0,4} /seized|captured/ []{0,2} /in|from/ []{0,3} /possession/"); public static void printPossession(List<CoreLabel> tokens) { TokenSequenceMatcher possessionMatcher = crimePossessionPattern.getMatcher(tokens); while (possessionMatcher.find()) { System.out.println(" possession: " + possessionMatcher.group(possessionMatcher.groupCount())); // get last group } possessionMatcher = crimePossessionPattern2.getMatcher(tokens); while (possessionMatcher.find()) { System.out.println(" possession: " + possessionMatcher.group(possessionMatcher.groupCount())); // get last group } possessionMatcher = crimePossessionPattern3.getMatcher(tokens); while (possessionMatcher.find()) { System.out.println(" possession: " + possessionMatcher.group(possessionMatcher.groupCount())); // get last group } } protected static TokenSequencePattern prisonPattern = TokenSequencePattern.compile("/escaped/ /from/ ([ner: LOCATION]) /prison/"); public static void printPrison(List<CoreLabel> tokens) { TokenSequenceMatcher prisonMatcher = prisonPattern.getMatcher(tokens); while (prisonMatcher.find()) { System.out.println(" prison: " + prisonMatcher.group(prisonMatcher.groupCount())); // get last group } } }