//Dstl (c) Crown Copyright 2017 package uk.gov.dstl.baleen.annotators.misc; import java.util.Arrays; import java.util.Collection; import java.util.List; import org.apache.uima.jcas.JCas; import com.google.common.collect.ImmutableSet; import uk.gov.dstl.baleen.annotators.misc.helpers.AbstractRootWordAnnotator; import uk.gov.dstl.baleen.core.pipelines.orderers.AnalysisEngineAction; import uk.gov.dstl.baleen.types.language.Sentence; import uk.gov.dstl.baleen.types.language.WordToken; import uk.gov.dstl.baleen.types.military.MilitaryPlatform; /** * This class attempts to identify generically described military platforms * * Even though this class extends {@link AbstractRootWordAnnotator}, it overrides * the sentence processing logic to allow for phrases with up to two words (e.g. aircraft carrier) * * @baleen.javadoc */ public class GenericMilitaryPlatform extends AbstractRootWordAnnotator<MilitaryPlatform> { //Define list of words, which can be either one or two words //Two word phrases should not contain words from one word phrases protected static final List<String> GROUND = Arrays.asList("tank", "armoured vehicle", "armoured vehicle", "humvee", "military vehicle", "tactical vehicle"); protected static final List<String> NAVAL = Arrays.asList("aircraft carrier", "assault ship", "frigate", "destroyer", "submarine", "minesweeper", "warship"); protected static final List<String> AIR = Arrays.asList("attack aircraft", "attack helicopter", "drone", "fighter jet", "fighter plane", "uav", "warplane"); @Override protected void processSentence(JCas jCas, Collection<WordToken> wordTokens){ WordToken wtPrev = null; for(WordToken wt : wordTokens){ String word = wt.getCoveredText().toLowerCase(); String platformType = isEntity(word); if(platformType != null){ createMilitaryPlatform(jCas, wt.getBegin(), wt.getEnd(), platformType); }else if(wtPrev != null){ String words = wtPrev.getCoveredText().toLowerCase() + " " + wt.getCoveredText().toLowerCase(); platformType = isEntity(words); if(platformType != null){ createMilitaryPlatform(jCas, wtPrev.getBegin(), wt.getEnd(), platformType); } } wtPrev = wt; } } @Override protected String isEntity(String word) { String ret = null; String singular = word; if(word.endsWith("s")){ singular = singular.substring(0, singular.length() - 1); } if(GROUND.contains(singular)){ ret = "GROUND"; }else if(NAVAL.contains(singular)){ ret = "NAVAL"; }else if(AIR.contains(singular)){ ret = "AIR"; } return ret; } @Override protected boolean isDescriptiveWord(String word) { // No descriptive words for military platforms return false; } private void createMilitaryPlatform(JCas jCas, Integer begin, Integer end, String type){ MilitaryPlatform mp = createEntity(jCas); mp.setBegin(begin); mp.setEnd(end); mp.setSubType(type); addToJCasIndex(mp); } @Override protected MilitaryPlatform createEntity(JCas jCas) { return new MilitaryPlatform(jCas); } @Override public AnalysisEngineAction getAction() { return new AnalysisEngineAction(ImmutableSet.of(Sentence.class, WordToken.class), ImmutableSet.of(MilitaryPlatform.class)); } }