/** * */ package outputter.search; import java.util.ArrayList; import java.util.Hashtable; import org.apache.log4j.Logger; import org.jdom.Element; import outputter.Utilities; import outputter.data.CompositeEntity; import outputter.data.EntityProposals; import outputter.data.FormalConcept; import outputter.data.REntity; import outputter.data.SimpleEntity; import outputter.knowledge.Dictionary; /** * @author Hong Cui * the strategy for handling cases such as 'otic canal' which matches 'otic sensoary canal'. * turn 'otic canal' to 'otic .* canal' * */ public class EntitySearcher4 extends EntitySearcher { private static final Logger LOGGER = Logger.getLogger(EntitySearcher4.class); private static Hashtable<String, ArrayList<EntityProposals>> cache = new Hashtable<String, ArrayList<EntityProposals>>(); private static ArrayList<String> nomatchcache = new ArrayList<String>(); /** * */ public EntitySearcher4() { } @Override public ArrayList<EntityProposals> searchEntity(Element root, String structid, String entityphrase, String elocatorphrase, String originalentityphrase, String prep) { LOGGER.debug("EntitySearcher4: search '"+entityphrase+"[orig="+originalentityphrase+"]'"); //search cache if(EntitySearcher4.nomatchcache.contains(entityphrase+"+"+elocatorphrase)) return null; if(EntitySearcher4.cache.get(entityphrase+"+"+elocatorphrase)!=null) return EntitySearcher4.cache.get(entityphrase+"+"+elocatorphrase); //still not find a match, if entityphrase is at least two words long, add wildcard * in spaces //search for locator first String[] entitylocators = null; if(elocatorphrase.length()>0) entitylocators = elocatorphrase.split("\\s*,\\s*"); ArrayList<SimpleEntity> entityls = new ArrayList<SimpleEntity>(); if(entitylocators!=null) { //TODO: is elocator a reg exp? ArrayList<FormalConcept> result = new TermSearcher().searchTerm(elocatorphrase, "entity"); //TODO: should it call EntitySearcherOriginal? decided not to. if(result!=null){ LOGGER.debug("search for locator '"+elocatorphrase+"' found match: "); for(FormalConcept fc: result){ entityls.add((SimpleEntity)fc); LOGGER.debug(".."+fc.toString()); } }else{ //entity locator not matched LOGGER.debug("search for locator '"+elocatorphrase+"' found no match"); } } //search entityphrase using wildcard //String myentityphrase = entityphrase.replaceFirst("^\\(\\?:", "").replaceFirst("\\)$", "").trim(); String aentityphrase = entityphrase; if(entityphrase.contains(" ")) aentityphrase = entityphrase.replaceAll("\\s+", " .*? "); //ArrayList<FormalConcept> sentities = TermSearcher.regexpSearchTerm(entityphrase, "entity"); //candidate matches for the same entity ArrayList<FormalConcept> sentities = new TermSearcher().searchTerm(aentityphrase, "entity"); //candidate matches for the same entity if(sentities!=null){ LOGGER.debug("search for entity '"+aentityphrase+"' found match, forming proposals..."); boolean found = false; EntityProposals ep = new EntityProposals(); ep.setPhrase(originalentityphrase); for(FormalConcept sentityfc: sentities){ SimpleEntity sentity = (SimpleEntity)sentityfc; sentity.setConfidenceScore(1f/sentities.size()); if(sentity!=null){//if entity matches if(elocatorphrase.length()>0){ for(FormalConcept fc: entityls){ SimpleEntity entityl = (SimpleEntity)fc; entityl.setConfidenceScore(1f/entityls.size()); //relation & entity locator CompositeEntity centity = new CompositeEntity(); centity.addEntity(sentity); centity.addParentEntity(new REntity(Dictionary.partof, entityl)); centity.setString(originalentityphrase); ep.add(centity); //add the other LOGGER.debug(".."+centity.toString()); found = true; } }else{ ep.add(sentity); //no locator LOGGER.debug(".."+sentity.toString()); found = true; } } } if(found==true){ ArrayList<EntityProposals> entities = new ArrayList<EntityProposals>(); Utilities.addEntityProposals(entities, ep); LOGGER.debug("EntitySearcher4 returns:"); for(EntityProposals aep: entities){ LOGGER.debug("..EntityProposals: "+aep.toString()); } //caching if(entities==null) EntitySearcher4.nomatchcache.add(entityphrase+"+"+elocatorphrase); else EntitySearcher4.cache.put(entityphrase+"+"+elocatorphrase, entities); return entities; } }else{ LOGGER.debug("...search for entity '"+entityphrase+"' found no match"); EntitySearcher4.nomatchcache.add(entityphrase+"+"+elocatorphrase); } LOGGER.debug("EntitySearcher4 calls EntitySearcher5"); return new EntitySearcher5().searchEntity(root, structid, entityphrase, elocatorphrase, originalentityphrase, prep); } /** * @param args */ public static void main(String[] args) { // TODO Auto-generated method stub } }