package eu.fbk.knowledgestore.populator.naf; import; import; import; import eu.fbk.knowledgestore.populator.naf.model.*; import eu.fbk.knowledgestore.vocabulary.KS; import eu.fbk.knowledgestore.vocabulary.NIF; import eu.fbk.knowledgestore.vocabulary.NWR; import eu.fbk.rdfpro.util.IO; import org.openrdf.model.URI; import org.openrdf.model.impl.URIImpl; import org.openrdf.model.impl.ValueFactoryImpl; import org.openrdf.model.vocabulary.DCTERMS; import org.openrdf.model.vocabulary.RDF; import org.slf4j.Logger; import javax.xml.bind.JAXBContext; import javax.xml.bind.JAXBException; import javax.xml.bind.Unmarshaller; import*; import; import java.nio.file.Path; import java.nio.file.Paths; import java.util.*; public class processNAF { public static void main(String[] args) throws JAXBException, IOException { // args[0] is the path, [args[1] is the disabled_Items] String disabled_Items = "", path = ""; if (args.length > 0) { path = args[0]; if (args.length > 1) disabled_Items = args[1]; } else { System.err .println("eu.fbk.knowledgestore.populator.naf.processNAF path disabled_items \ndisabled_items = [Entities|Mentions|Resources] "); System.exit(-1); } processNAFVariables vars = new processNAFVariables(); analyzePathAndRunSystem(path, disabled_Items, vars); } public static KSPresentation init(String fPath, Writer inout, String disabled_Items, boolean store_partical_info) throws JAXBException, IOException { processNAFVariables vars = new processNAFVariables(); vars.storePartialInforInCaseOfError = store_partical_info; vars.out = inout; statistics stat = readFile(fPath, disabled_Items, vars); KSPresentation returned = new KSPresentation(); returned.setNaf_file_path(fPath); returned.setNews(vars.rawText); returned.setMentions(vars.mentionListHash); returned.setNaf(vars.nafFile2); returned.setNewsResource(vars.newsFile2); returned.setStats(stat); return returned; } private static void analyzePathAndRunSystem(String path, String disabled_Items,processNAFVariables vars) throws JAXBException, IOException { vars.filePath = new File(path); if (vars.filePath.exists() && vars.filePath.isDirectory()) { // create report file in the same directory of running the system vars.out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(new File( vars.filePath.getPath(), "report.txt")), "utf-8")); File[] listOfFiles = vars.filePath.listFiles(); for (int i = 0; i < listOfFiles.length; i++) { if (listOfFiles[i].isFile() && listOfFiles[i].getName().endsWith(".naf")) { System.err.println(i + "=" + listOfFiles[i].getName()); vars.out.append("\n" + i + "=" + listOfFiles[i].getName() + "\n"); readFile(listOfFiles[i].getPath(), disabled_Items,vars); } vars.out.flush(); System.gc(); Runtime.getRuntime().gc(); } } else if (vars.filePath.exists() && vars.filePath.isFile()) { // create report file in the same directory of running the system vars.out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(new File( vars.filePath.getPath() + ".report.txt")), "utf-8")); vars.out.append(vars.filePath.getPath() + "\n"); readFile(vars.filePath.getPath(), disabled_Items,vars); } vars.out.flush(); vars.out.close(); } public static statistics readFile(String filepath, String disabled_Items, processNAFVariables vars) throws JAXBException, IOException { vars.storePartialInforInCaseOfError = true; vars.filePath = new File(filepath); logDebug("Start working with (" + vars.filePath.getName() + ")", vars); String disabledItems = "";// Default empty so generate all layers of data if (disabled_Items != null && (disabled_Items.matches("(?i)Entity") || disabled_Items.contains("(?i)Mention") || disabled_Items.contains("(?i)Resource"))) { disabledItems = disabled_Items; logDebug("Disable layer: " + disabledItems,vars); } readNAFFile(vars.filePath, vars); NafHeader header = vars.doc.getNafHeader(); getNAFHEADERMentions(header,vars); vars.rawText = vars.doc.getRaw().getvalue(); vars.globalText = vars.doc.getText(); vars.globalTerms = vars.doc.getTerms(); getEntitiesMentions(vars.doc.getEntities(), disabledItems,vars); getCoreferencesMentions(vars.doc.getCoreferences(),vars); getTimeExpressionsMentions(vars.doc.getTimeExpressions(),vars); // getFactualityMentions(vars.doc.getFactualitylayer(),vars); getFactualityMentionsV3(vars.doc.getFactualities(),vars); getSRLMentions(vars); getCLinksMentions(vars.doc.getCausalRelations(),vars); getTLinksMentions(vars.doc.getTemporalRelations(),vars); // logDebug("ROL1 before dumpStack()") ; Thread.currentThread().dumpStack(); fixMentions(vars); logDebug("End of NAF populating.",vars); statistics st = new statistics(); st.setObjectMention((vars.corefMention2+vars.entityMen2)); st.setPER(vars.PER); st.setORG(vars.ORG); st.setLOC(vars.LOC); st.setFin(vars.fin); st.setMix(vars.mix); st.setPRO(vars.PRO); st.setNo_mapping(vars.no_mapping); st.setTimeMention(vars.timeMention2); st.setEventMention((vars.factualityMentions2 + vars.srlMention2)); st.setParticipationMention(vars.rolewithEntity2); st.setEntity(vars.entityMen); st.setCoref(vars.corefMention); st.setCorefMentionEvent(vars.corefMentionEvent); st.setCorefMentionNotEvent(vars.corefMentionNotEvent); st.setFactuality(vars.factualityMentions); st.setRole(vars.roleMentions); st.setRolewithEntity(vars.rolewithEntity); st.setRolewithoutEntity(vars.rolewithoutEntity); st.setSrl(vars.srlMention); st.setTimex(vars.timeMention); st.setClinkMention(vars.clinkMentions); st.setClinkMentionDiscarded(vars.clinkMentionsDiscarded); st.setTlinkMention(vars.tlinkMentions); st.setTlinkMentionsEnriched(vars.tlinkMentionsEnriched); st.setTlinkMentionDiscarded(vars.tlinkMentionsDiscarded); logDebug(st.getStats(),vars); return st; } private static void getEntitiesMentions(Entities obj, String disabledItems,processNAFVariables vars) { if (!checkHeaderTextTerms(vars)) { logError("Error: populating stopped",vars); } else { logDebug("Start mapping the Entities mentions:",vars); } for (Entity entObj : ((Entities) obj).getEntity()) { String deg = ""; int referencesElements = 0; String charS = null; /* process <references> or <externalReferences> */ for (Object generalEntObj : entObj.getReferencesOrExternalReferences()) { if (generalEntObj instanceof References) { /* process <references> */ referencesElements++; if (((References) generalEntObj).getSpan().size() < 1) { logDebug("Every entity must contain a 'span' element inside 'references'", vars); } if (((References) generalEntObj).getSpan().size() > 1) { logDebug("xpath(///NAF/entities/entity/references/span/), spanSize(" + ((References) generalEntObj).getSpan().size() + ") Every entity must contain a unique 'span' element inside 'references'",vars); } for (Span spansObj : ((References) generalEntObj).getSpan()) { boolean addMentionFlag = true; if (spansObj.getTarget().size() < 1) { addMentionFlag = false; logDebug("Every span in an entity must contain at least one target inside", vars); continue; } Record m = Record.create(); deg += "RDF.TYPE:OBJECT_MENTION,ENTITY_MENTION,MENTION"; m.add(RDF.TYPE, NWR.OBJECT_MENTION, NWR.ENTITY_MENTION, KS.MENTION); deg = "MENTION_OF:" + vars.news_file_id.stringValue() + "|" + deg; m.add(KS.MENTION_OF, vars.news_file_id); if (((References) generalEntObj).getSpan().size() > 1) { m.add(NWR.LOCAL_COREF_ID, entObj.getId()); deg += "|LOCAL_COREF_ID:" + entObj.getId(); } generateTheMIdAndSetID(spansObj, m,vars); charS = m.getUnique(NIF.BEGIN_INDEX, Integer.class) + "," + m.getUnique(NIF.END_INDEX, Integer.class); deg = "MentionId:" + m.getID() + "|" + deg; /* don't use predefined types from guidelines but keep the mention with type provided in the NAF */ boolean keepEntityTypeProvidedByNaf = true,dbpedia=true; String type3charLC = ""; if(!dbpedia){ if(entObj.getType()!=null&&entObj.getType()!=""&&!entObj.getType().equalsIgnoreCase("misc")){ type3charLC = entObj.getType().substring(0, 3).toLowerCase(); }else{ type3charLC = "misc"; entObj.setType("misc"); } if (keepEntityTypeProvidedByNaf) { URI dynamicTypeUri = ValueFactoryImpl.getInstance().createURI(NWR.NAMESPACE, "entity_type_" + entObj.getType().toLowerCase()); m.add(NWR.ENTITY_TYPE, dynamicTypeUri); deg += "|ENTITY_TYPE:" + dynamicTypeUri; logDebug("ROL1: <entity> added new mention for id " + entObj.getId() + ", charSpan |" + getCharSpanFromSpan(spansObj,vars) + "|, type " + dynamicTypeUri,vars); } else { if (vars.entityTypeMapper.containsKey(type3charLC) && vars.entityTypeMapper.get(type3charLC) != null) { m.add(NWR.ENTITY_TYPE, vars.entityTypeMapper.get(type3charLC)); deg += "|ENTITY_TYPE:" + vars.entityTypeMapper.get(type3charLC); logDebug("ROL1: <entity> STRANGE added new mention for id " + entObj.getId() + ", charSpan |" + getCharSpanFromSpan(spansObj,vars) + "|, type " + vars.entityTypeMapper.get(type3charLC),vars); } else { addMentionFlag = false; logDebug("xpath(//NAF/entities/entity/@type),type(" + entObj.getType() + "), id(" + entObj.getId() + ") NO mapping for it", vars); vars.no_mapping++; } } }//dbpedia finish else{ if(entObj.getType()==null||entObj.getType().equals("")){ type3charLC = "misc"; entObj.setType("misc"); }else{ if(entObj.getType().toLowerCase().contains("per")||entObj.getType().toLowerCase().contains("dbpedia:person")){ entObj.setType("person"); type3charLC = entObj.getType().substring(0, 3).toLowerCase(); }else if(entObj.getType().toLowerCase().contains("org")||entObj.getType().toLowerCase().contains("dbpedia:organisation")){ entObj.setType("organization"); type3charLC = entObj.getType().substring(0, 3).toLowerCase(); } else if(entObj.getType().toLowerCase().contains("loc")||entObj.getType().toLowerCase().contains("DBpedia:Place")){ entObj.setType("location"); type3charLC = entObj.getType().substring(0, 3).toLowerCase(); }else { entObj.setType("misc"); type3charLC = "misc"; } } URI dynamicTypeUri = ValueFactoryImpl.getInstance().createURI(NWR.NAMESPACE, "entity_type_" + entObj.getType().toLowerCase()); m.add(NWR.ENTITY_TYPE, dynamicTypeUri); deg += "|ENTITY_TYPE:" + dynamicTypeUri; logDebug("ROL1: <entity> added new mention for id " + entObj.getId() + ", charSpan |" + getCharSpanFromSpan(spansObj,vars) + "|, type " + dynamicTypeUri,vars); } if (addMentionFlag) { if (addOrMergeAMention(m,vars)==1) { String charS2 = m.getUnique(NIF.BEGIN_INDEX, Integer.class) + "," + m.getUnique(NIF.END_INDEX, Integer.class); vars.entityMentions.put(charS2, m); if (type3charLC.equalsIgnoreCase("PER")) vars.PER++; if (type3charLC.equalsIgnoreCase("LOC")) vars.LOC++; if (type3charLC.equalsIgnoreCase("ORG")) vars.ORG++; if (type3charLC.equalsIgnoreCase("PRO")) vars.PRO++; if (type3charLC.equalsIgnoreCase("fin")) vars.fin++; if (type3charLC.equalsIgnoreCase("mix")||type3charLC.equalsIgnoreCase("misc")) vars.mix++; vars.entityMen2++; vars.entityMen++; } } } } else if (generalEntObj instanceof ExternalReferences) { /* process <externalReferences> */ // do it only if management of (KS) "Entity" layer is not disabled // if (! disabledItems.matches("(?i)Entity")) { boolean firstTimeFlag = true; String chosenReferenceValue = null; //modeactive is two filters should be applied to the externalRefs. //1. Language For each entity, group externalRef(s) by language (using reftype) and use the first nonempty set, using this sorting: en, es, it, nl. //2. Confidence Once the language is chosen, just take the externalRef having the higher confidence (and, of course, the language chosen). boolean modeactive=true; // if POCUS "reranker value" get it as externalreference otherwise do the language/highconfidence check for (ExternalRef exRObj : ((ExternalReferences) generalEntObj) .getExternalRef()) { if(exRObj.getSource().equalsIgnoreCase("POCUS")){ if (referencesElements < 1) { logDebug("Every entity must contain a 'references' element:not possible to add ExternalRef to null.", vars); continue; } // String resourceValue = exRObj.getResource(); String referenceValue = exRObj.getReference(); chosenReferenceValue = new String(referenceValue); modeactive=false; } } if(modeactive){ LinkedList<ExternalRef> exrEn = new LinkedList<ExternalRef>(); LinkedList<ExternalRef> exrEs = new LinkedList<ExternalRef>(); LinkedList<ExternalRef> exrIt = new LinkedList<ExternalRef>(); LinkedList<ExternalRef> exrNl = new LinkedList<ExternalRef>(); ExternalReferences obs = ((ExternalReferences) generalEntObj) ; for (ExternalRef exRObj : obs.getExternalRef()) { if(exRObj!=null) getAllLayersOfExternalReferences( modeactive, exrEn, exRObj, exrEs, exrIt, exrNl,vars); } if(exrEn.size()>0) chosenReferenceValue = new String(getHighConfidenceReferenceValue(exrEn)); else if(exrEs.size()>0) chosenReferenceValue = new String(getHighConfidenceReferenceValue(exrEs)); else if(exrIt.size()>0) chosenReferenceValue = new String(getHighConfidenceReferenceValue(exrIt)); else chosenReferenceValue = new String(getHighConfidenceReferenceValue(exrNl)); }else if(false){ //don't use it for now, it gets the last externalRef as the chosen one for (ExternalRef exRObj : ((ExternalReferences) generalEntObj) .getExternalRef()) { if (referencesElements < 1) { logDebug("Every entity must contain a 'references' element:not possible to add ExternalRef to null.", vars); continue; } String resourceValue = exRObj.getResource(); String referenceValue = exRObj.getReference(); // choose as referenceValue the one provided by 'vua-type-reranker' if present, otherwise take the first value if (firstTimeFlag) { chosenReferenceValue = new String(referenceValue); firstTimeFlag = false; } else if (resourceValue.matches("(?i).*type-reranker.*")) { chosenReferenceValue = new String(referenceValue); } } } URIImpl chosenReferenceURI = new URIImpl(chosenReferenceValue); if (charS != null&&vars.mentionListHash.get(charS)!=null&&vars.mentionListHash.get(charS).get(KS.REFERS_TO).size()==0){ vars.mentionListHash.get(charS).add(KS.REFERS_TO, chosenReferenceURI); //TODO mohammed if there already decided an externalRef don't add new one. deg += "|REFERS_TO:" + chosenReferenceValue; vars.entityMentions.get(charS).add(KS.REFERS_TO, chosenReferenceURI); }else{ if (charS != null&&vars.mentionListHash.get(charS)!=null) deg += "|REFERS_TO:" + vars.mentionListHash.get(charS).get(KS.REFERS_TO); } } } // end of if (generalEntObj instanceof ExternalReferences) { } // end of for (Object generalEntObj : entObj.getReferencesOrExternalReferences()) logDebug(deg,vars); if (referencesElements < 1) { logDebug("Every entity must contain a 'references' element", vars); } } } private static void getAllLayersOfExternalReferences(boolean modeactive, LinkedList<ExternalRef> exrEn, ExternalRef exRObj, LinkedList<ExternalRef> exrEs, LinkedList<ExternalRef> exrIt, LinkedList<ExternalRef> exrNl,processNAFVariables vars) { if(modeactive){ if(exRObj.getReftype()!=null){ switch(exRObj.getReftype()){ case "en": exrEn.addLast(exRObj); break; case "es": exrEs.addLast(exRObj); break; case "it": exrIt.addLast(exRObj); break; case "nl": exrNl.addLast(exRObj); break; } }else if(exRObj.getReference().contains("dbpedia")){ if(exRObj.getReference().contains("://")){ exrEn.addLast(exRObj); }else if(exRObj.getReference().contains("://")){ exrEs.addLast(exRObj); }else if(exRObj.getReference().contains("://")){ exrIt.addLast(exRObj); }else { exrNl.addLast(exRObj); } }else{ logDebug("Every entity must contain a 'references' element with type or DBpedia reference: not possible to add ExternalRef to null.", vars); } } if(exRObj!=null&&exRObj.getExternalRef()!=null && exRObj.getExternalRef().size()>0){ for(ExternalRef ff :exRObj.getExternalRef()){ getAllLayersOfExternalReferences( modeactive, exrEn, ff, exrEs, exrIt, exrNl,vars); } }else return; } private static String getHighConfidenceReferenceValue( LinkedList<ExternalRef> exrl) { ExternalRef current=null; Double dt=0.0; for(ExternalRef tmp:exrl){ Double co=Double.parseDouble(tmp.getConfidence()); if(current==null){ current = tmp; dt=co; }else{ if(co>dt){ current = tmp; dt=co; } } } return current.getReference(); } private static void getTimeExpressionsMentions(TimeExpressions obj,processNAFVariables vars) { if (!checkHeaderTextTerms(vars)) { logError("Error: populating interrupted",vars); } else { logDebug("Start mapping the TimeExpressions mentions:",vars); } for (Timex3 tmxObj : ((TimeExpressions) obj).getTimex3()) { Span tmxSpan = tmxObj.getSpan(); String tmxTypeUC = tmxObj.getType().toUpperCase(); boolean keepTimeTypeProvidedByNaf = true; // patch for timex3 without <span> if ((tmxSpan == null) || (tmxSpan.getTarget().size() < 1)) { logDebug("skipping timex3 without span, id is " + tmxObj.getId(), vars); continue; } String deg = ""; Record m = Record.create(); m.add(RDF.TYPE, NWR.TIME_MENTION, NWR.TIME_OR_EVENT_MENTION, NWR.ENTITY_MENTION, KS.MENTION); deg += "|TYPE:TIME_MENTION,TIME_OR_EVENT_MENTION,ENTITY_MENTION,MENTION"; m.add(KS.MENTION_OF, vars.news_file_id); LinkedList<Wf> wordsL = fromSpanGetAllMentionsTmx(((Span) tmxSpan).getTarget(), vars); generateMIDAndSetIdWF(wordsL, m,vars); deg = "MentionId:" + m.getID() + deg; if (keepTimeTypeProvidedByNaf) { URI dynamicTypeUri = ValueFactoryImpl.getInstance().createURI(NWR.NAMESPACE, "timex3_" + tmxTypeUC.toLowerCase()); m.add(NWR.TIME_TYPE, dynamicTypeUri); deg += "|TIME_TYPE:" + dynamicTypeUri; logDebug("ROL1: <timex3> added new mention for id " + tmxObj.getId() + ", type " + dynamicTypeUri,vars); } else { if (vars.timex3TypeMapper.containsKey(tmxTypeUC) && vars.timex3TypeMapper.get(tmxTypeUC) != null) { m.add(NWR.TIME_TYPE, vars.timex3TypeMapper.get(tmxTypeUC)); deg += "|TIME_TYPE:" + vars.timex3TypeMapper.get(tmxTypeUC); logDebug("ROL1: <timex3> STRANGE added new mention for id " + tmxObj.getId() + ", type " + vars.timex3TypeMapper.get(tmxTypeUC),vars); } else { logDebug("xpath(//NAF/timeExpressions/timex3/@type), type(" + tmxTypeUC + "), No mapping.", vars); } } if (false&&tmxObj.getBeginPoint() != null) { //TODO m.add(NWR.BEGIN_POINT, tmxObj.getBeginPoint()); deg += "|BEGIN_POINT:" + tmxObj.getBeginPoint(); } if (false&&tmxObj.getEndPoint() != null) { m.add(NWR.END_POINT, tmxObj.getEndPoint()); deg += "|END_POINT:" + tmxObj.getEndPoint(); } if (tmxObj.getQuant() != null) { m.add(NWR.QUANT, tmxObj.getQuant()); deg += "|QUANT:" + tmxObj.getQuant(); } if (tmxObj.getFreq() != null) { m.add(NWR.FREQ, tmxObj.getFreq()); deg += "|FREQ:" + tmxObj.getFreq(); } if (tmxObj.getFunctionInDocument() != null) { m.add(NWR.FUNCTION_IN_DOCUMENT, tmxObj.getFunctionInDocument()); deg += "|FUNCTION_IN_DOCUMENT:" + tmxObj.getFunctionInDocument(); } if (tmxObj.getTemporalFunction() != null) { m.add(NWR.TEMPORAL_FUNCTION, tmxObj.getTemporalFunction()); deg += "|TEMPORAL_FUNCTION:" + tmxObj.getTemporalFunction(); } if (tmxObj.getValue() != null) { m.add(NWR.VALUE, tmxObj.getValue()); deg += "|VALUE:" + tmxObj.getValue(); } if (tmxObj.getValueFromFunction() != null) { m.add(NWR.VALUE_FROM_FUNCTION, tmxObj.getValueFromFunction()); deg += "|VALUE_FROM_FUNCTION:" + tmxObj.getValueFromFunction(); } if (tmxObj.getMod() != null) { m.add(NWR.MOD, tmxObj.getMod()); deg += "|MOD:" + tmxObj.getMod(); } if (tmxObj.getAnchorTimeID() != null) { m.add(NWR.ANCHOR_TIME, tmxObj.getAnchorTimeID()); deg += "|ANCHOR_TIME:" + tmxObj.getAnchorTimeID(); } logDebug(deg,vars); int addedNew = addOrMergeAMention(m,vars); if (addedNew==1){ vars.timeMention2++; vars.timeMention++; } String charS2 = m.getUnique(NIF.BEGIN_INDEX, Integer.class) + "," + m.getUnique(NIF.END_INDEX, Integer.class); vars.entityMentions.put(charS2,m); } } private static void getFactualityMentions(Factualitylayer obj,processNAFVariables vars) { if (!checkHeaderTextTerms(vars)) { logError("Error: populating interrupted",vars); } else { logDebug("Start mapping the Factuality mentions:",vars); } for (Factvalue fvObj : ((Factualitylayer) obj).getFactvalue()) { String deg = ""; Record m = Record.create(); m.add(RDF.TYPE, NWR.EVENT_MENTION, NWR.TIME_OR_EVENT_MENTION, NWR.ENTITY_MENTION, KS.MENTION); deg += "|TYPE:EVENT_MENTION,TIME_OR_EVENT_MENTION,ENTITY_MENTION,MENTION"; m.add(KS.MENTION_OF, vars.news_file_id); LinkedList<Target> tarlist = new LinkedList<Target>(); Target tmp = new Target(); tmp.setId(fvObj.getId()); tarlist.addLast(tmp); LinkedList<Wf> wordsL = fromSpanGetAllMentionsTmx(tarlist,vars); generateMIDAndSetIdWF(wordsL, m,vars); deg = "MentionId:" + m.getID() + deg; // fvObj.getPrediction(); //TODO we need to verify the mapping here // m.add(NWR.CERTAINTY, fvObj.getPrediction()); // m.add(NWR.FACTUALITY, fvObj.getPrediction()); if (fvObj.getConfidence() != null) { m.add(NWR.FACTUALITY_CONFIDENCE, fvObj.getConfidence()); deg += "|FACTUALITY_CONFIDENCE:" + fvObj.getConfidence(); } logDebug(deg,vars); int addedNew = addOrMergeAMention(m,vars); if (addedNew==1){ vars.factualityMentions2++; vars.factualityMentions++; } String charS2 = m.getUnique(NIF.BEGIN_INDEX, Integer.class) + "," + m.getUnique(NIF.END_INDEX, Integer.class); vars.entityMentions.put(charS2,m); } } private static void getFactualityMentionsV3(Factualities factualities,processNAFVariables vars) { if (!checkHeaderTextTerms(vars)) { logError("Error: populating interrupted",vars); } else { logDebug("Start mapping the Factualities mentions:",vars); } for (Factuality fvObj : factualities.getFactuality()) { String deg = ""; Record m = Record.create(); m.add(RDF.TYPE, NWR.EVENT_MENTION, NWR.TIME_OR_EVENT_MENTION, NWR.ENTITY_MENTION, KS.MENTION); deg += "|TYPE:EVENT_MENTION,TIME_OR_EVENT_MENTION,ENTITY_MENTION,MENTION"; m.add(KS.MENTION_OF, vars.news_file_id); LinkedList<Target> tarlist = new LinkedList<Target>(); for(Target t : fvObj.getSpan().getTarget()){ tarlist.addLast(t); } LinkedList<Wf> wordsL = fromSpanGetAllMentions(tarlist,vars); generateMIDAndSetIdWF(wordsL, m,vars); deg = "MentionId:" + m.getID() + deg; for (FactVal tts : fvObj.getFactVal()) { if(tts.getResource().equalsIgnoreCase("factbank")){ m.add(NWR.FACT_BANK, tts.getValue()); } } logDebug(deg,vars); int addedNew = addOrMergeAMention(m,vars); if (addedNew==1|addedNew==0){ vars.factualityMentions2++; vars.factualityMentions++; } String charS2 = m.getUnique(NIF.BEGIN_INDEX, Integer.class) + "," + m.getUnique(NIF.END_INDEX, Integer.class); vars.entityMentions.put(charS2,m); } } private static void getCLinksMentions(CausalRelations causalRelations,processNAFVariables vars) { if (!checkHeaderTextTerms(vars)) { logError("Error: populating interrupted",vars); } else { logDebug("Start mapping the CLINKS mentions:",vars); } for (Clink fvObj : causalRelations.getClink()) { String deg = ""; Record m = Record.create(); m.add(RDF.TYPE, NWR.CLINK,NWR.RELATION_MENTION, KS.MENTION); deg += "|TYPE:CLINK,RELATION_MENTION,MENTION"; m.add(KS.MENTION_OF, vars.news_file_id); LinkedList<Target> tarlist = new LinkedList<Target>(); List<Target> from = getSpanTermsOfPredicate(fvObj.getFrom(),vars); List<Target> to = getSpanTermsOfPredicate(fvObj.getTo(),vars); tarlist.addAll(from); tarlist.addAll(to); LinkedList<Wf> fromwl = fromSpanGetAllMentions(from,vars); Record mtest1 = Record.create(); generateMIDAndSetIdWF(fromwl, mtest1 ,vars); m.add(NWR.SOURCE, mtest1.getID()); LinkedList<Wf> towl = fromSpanGetAllMentions(to,vars); Record mtest2 = Record.create(); generateMIDAndSetIdWF(towl, mtest2 ,vars); m.add(NWR.TARGET, mtest2.getID()); LinkedList<Wf> wordsL = fromSpanGetAllMentions(tarlist,vars); generateMIDAndSetIdWF(wordsL, m,vars); deg = "MentionId:" + m.getID() + deg; logDebug(deg,vars); int addedNew = addOrMergeAMention(m,vars); if (addedNew==1){ vars.clinkMentions++; }else if (addedNew==1){ vars.clinkMentionsDiscarded++; } } } private static void getTLinksMentions(TemporalRelations temporalRelations,processNAFVariables vars) { if (!checkHeaderTextTerms(vars)) { logError("Error: populating interrupted",vars); } else { logDebug("Start mapping the TLINKS mentions:",vars); } if (temporalRelations == null) { return; } if (temporalRelations.getTlink() == null) { return; } for (Tlink fvObj : temporalRelations.getTlink()) { String deg = ""; Record m = Record.create(); m.add(RDF.TYPE, NWR.TLINK,NWR.RELATION_MENTION, KS.MENTION); deg += "|TYPE:TLINK,RELATION_MENTION,MENTION"; m.add(KS.MENTION_OF, vars.news_file_id); LinkedList<Target> tarlist = new LinkedList<Target>(); List<Target> from = new ArrayList<Target>(); List<Target> to = new ArrayList<Target>(); if(fvObj.getFromType().equalsIgnoreCase("event")){ from = getSpanTermsOfPredicate(fvObj.getFrom(),vars); } if(fvObj.getToType().equalsIgnoreCase("event")){ to = getSpanTermsOfPredicate(fvObj.getTo(),vars); } tarlist.addAll(from); tarlist.addAll(to); LinkedList<Wf> allEventWF = fromSpanGetAllMentions(tarlist,vars); LinkedList<Wf> allWFFrom = fromSpanGetAllMentions(from,vars); LinkedList<Wf> allWFTO = fromSpanGetAllMentions(to,vars); List<Wf> fromtmx = new ArrayList<Wf>(); List<Wf> totmx = new ArrayList<Wf>(); //here if the type is timex, get all its WFs if(fvObj.getFromType().equalsIgnoreCase("timex")){ fromtmx=getSpanOfTimex(fvObj.getFrom(),vars); allWFFrom.addAll(fromtmx); allEventWF.addAll(fromtmx); } if(fvObj.getToType().equalsIgnoreCase("timex")){ totmx = getSpanOfTimex(fvObj.getTo(),vars); allWFTO.addAll(totmx); allEventWF.addAll(totmx); } //reorder the lists allEventWF = reorderWFAscending(allEventWF,vars); allWFFrom = reorderWFAscending(allWFFrom,vars); allWFTO = reorderWFAscending(allWFTO,vars); if(fvObj.getFrom().equalsIgnoreCase("tmx0")){ Record mtest2 = Record.create(); generateMIDAndSetIdWF(allWFTO, mtest2 ,vars); int returned=addTlinkRelTypeToMention(NWR.TLINK_FROM_TMX0,vars.tLinkTypeMapper.get(fvObj.getRelType().toUpperCase()),mtest2,vars); if (returned==1) vars.tlinkMentionsEnriched++; else logDebug("Tlink FROM -> tmx0, not found the target mention id:" + fvObj.getTo(), vars); continue; } if(fvObj.getTo().equalsIgnoreCase("tmx0")){ Record mtest1 = Record.create(); generateMIDAndSetIdWF(allWFFrom, mtest1 ,vars); int returned=addTlinkRelTypeToMention(NWR.TLINK_TO_TMX0,vars.tLinkTypeMapper.get(fvObj.getRelType().toUpperCase()),mtest1,vars); if (returned==1) vars.tlinkMentionsEnriched++; else logDebug("Tlink TO -> tmx0, not found the source mention id:" + fvObj.getFrom(), vars); continue; } if(allWFFrom.size()>0){ Record mtest1 = Record.create(); generateMIDAndSetIdWF(allWFFrom, mtest1 ,vars); m.add(NWR.SOURCE, mtest1.getID()); } if(allWFTO.size()>0){ Record mtest2 = Record.create(); generateMIDAndSetIdWF(allWFTO, mtest2 ,vars); m.add(NWR.TARGET, mtest2.getID()); } m.add(NWR.REL_TYPE, vars.tLinkTypeMapper.get(fvObj.getRelType().toUpperCase())); generateMIDAndSetIdWF(allEventWF, m,vars); deg = "MentionId:" + m.getID() + deg; logDebug(deg,vars); int addedNew = addOrMergeAMention(m,vars); if (addedNew==1){ vars.tlinkMentions++; }else if (addedNew==-1){ vars.tlinkMentionsDiscarded++; } } } private static int addTlinkRelTypeToMention(URI key, URI value, Record mention, processNAFVariables vars) { String charS = mention.getUnique(NIF.BEGIN_INDEX, Integer.class) + "," + mention.getUnique(NIF.END_INDEX, Integer.class); if(vars.mentionListHash.containsKey(charS)){ vars.mentionListHash.get(charS).add(key, value); return 1; } return -1; } private static LinkedList<Wf> reorderWFAscending(LinkedList<Wf> list, processNAFVariables vars) { LinkedList<Wf> tmp = new LinkedList<Wf>(); int found = 0; for (Wf wftmp : vars.doc.getText().getWf()) { if (list.contains(wftmp)) { tmp.addLast(wftmp); found++; } if (found >= list.size()) { break; } } if (found < list.size()) { logDebug("reorderWFAscending method, inconsistency: returned list less than the input list", vars); } return tmp; } private static List<Wf> getSpanOfTimex(String tmxId, processNAFVariables vars) { List<Wf> tmp = new ArrayList<Wf>(); for(Timex3 tms:vars.doc.getTimeExpressions().getTimex3()){ if(tms.getId().equalsIgnoreCase(tmxId)){ for(Target t: tms.getSpan().getTarget()){ tmp.add((Wf) t.getId()); } break; } } return tmp; } private static List<Target> getSpanTermsOfPredicate(String predicateId, processNAFVariables vars) { for( Predicate pr:vars.doc.getSrl().getPredicate()){ if(pr.getId().equalsIgnoreCase(predicateId)){ return pr.getSpan().getTarget(); } } return null; } private static URIImpl getUriForSrlExternalRefResource(String type, String value) { String prefix = null; if (type.equalsIgnoreCase("PropBank")) { prefix = ""; } else if (type.equalsIgnoreCase("VerbNet")) { prefix = ""; } else if (type.equalsIgnoreCase("FrameNet")) { prefix = ""; } else if (type.equalsIgnoreCase("NomBank")) { prefix = ""; } else if (type.equalsIgnoreCase("ESO")) { prefix = ""; } if (prefix != null) { return new URIImpl(prefix + value); } else { return null; } } private static void getSRLMentions(processNAFVariables vars) { Srl obj = vars.doc.getSrl(); if (!checkHeaderTextTerms(vars)) { logError("Error: populating interrupted",vars); } else { logDebug("Start mapping the Srl mentions:",vars); } if ((obj == null) || (((Srl) obj).getPredicate() == null)) { logError("skipped missing xpath(//NAF/srl)",vars); return; } /* Iterate over the <predicate>s */ for (Predicate prdObj : ((Srl) obj).getPredicate()) { String deg = ""; Record mtmp = null; String predicateID = prdObj.getId(); boolean firstSpanFound = false; int predicatExtRef = 0; String eventMentionId = null; String predicateCharSpan = null; LinkedList<Term> eventTermList = new LinkedList<Term>(); /* create an EVENT_MENTION for the <predicate> */ if (prdObj.getSpan() instanceof Span) { if (firstSpanFound) { logDebug("Srl should have one span only! ", vars); } if (!firstSpanFound) { firstSpanFound = true; } predicateCharSpan = getCharSpanFromSpan(prdObj.getSpan(),vars); mtmp = Record.create(); mtmp.add(RDF.TYPE, NWR.EVENT_MENTION, NWR.TIME_OR_EVENT_MENTION, NWR.ENTITY_MENTION, KS.MENTION); deg = "|TYPE:EVENT_MENTION,TIME_OR_EVENT_MENTION,ENTITY_MENTION,MENTION"; mtmp.add(KS.MENTION_OF, vars.news_file_id); for (Target tars : prdObj.getSpan().getTarget()) { tars.getId(); Term eventTerm = getTermfromTermId((Term) tars.getId(),vars); eventTermList.addLast(eventTerm); if (eventTerm.getLemma() != null) { mtmp.add(NWR.PRED, eventTerm.getLemma()); deg += "|PRED:" + eventTerm.getLemma(); } if (eventTerm.getPos() != null) { URI posVal = (eventTerm.getPos().equals("V") || eventTerm.getPos().equals("N")) ? vars.partOfSpeechMapper.get(eventTerm.getPos()) : vars.partOfSpeechMapper.get(""); mtmp.add(NWR.POS, posVal); deg += "|POS:" + posVal; } } generateTheMIdAndSetID(prdObj.getSpan(), mtmp,vars); deg = "MentionId:" + mtmp.getID() + deg; } /* * Assign the predicateAnchor attributes to the predicate Mention */ List<PredicateAnchor> prds= getAllRelativePredicateAnchors(prdObj.getId(),vars); for(PredicateAnchor tprd: prds){ if(tprd.getAnchorTime()!=null){ LinkedList<Wf> wfL = reorderWFAscending(fromSpanGetAllMentionsTmx(((Timex3)tprd.getAnchorTime()).getSpan().getTarget(), vars),vars); if(wfL.size()>0){ Record mtest1 = Record.create(); generateMIDAndSetIdWF(wfL, mtest1 ,vars); mtmp.add(NWR.ANCHOR_TIME, mtest1.getID()); } } if(tprd.getBeginPoint()!=null){ LinkedList<Wf> wfL = reorderWFAscending(fromSpanGetAllMentionsTmx(((Timex3)tprd.getBeginPoint()).getSpan().getTarget(), vars),vars); if(wfL.size()>0){ Record mtest1 = Record.create(); generateMIDAndSetIdWF(wfL, mtest1 ,vars); mtmp.add(NWR.BEGIN_POINT, mtest1.getID()); } } if(tprd.getEndPoint()!=null){ LinkedList<Wf> wfL = reorderWFAscending(fromSpanGetAllMentionsTmx(((Timex3)tprd.getEndPoint()).getSpan().getTarget(), vars),vars); if(wfL.size()>0){ Record mtest1 = Record.create(); generateMIDAndSetIdWF(wfL, mtest1 ,vars); mtmp.add(NWR.END_POINT, mtest1.getID()); } } } deg +="| ANCHOR_TIME:"+mtmp.get(NWR.ANCHOR_TIME)+" | BEGIN_POINT:"+mtmp.get(NWR.BEGIN_POINT)+" | END_POINT:"+mtmp.get(NWR.END_POINT); /* iterate over the sub-tags <externalReferences> or <role>s of the <predicate> */ for (Object prdGObj : prdObj.getExternalReferencesOrRole()) { /* process the <externalReferences> sub-tag: enrich the EVENT_MENTION related to <predicate> */ if (prdGObj instanceof ExternalReferences) { boolean eventTypeFound = false; if (predicatExtRef > 1) { logDebug("more than one external ref for predicate:" + predicateID + " size: " + predicatExtRef, vars); } predicatExtRef++; for (ExternalRef exrObj : ((ExternalReferences) prdGObj).getExternalRef()) { if (mtmp != null) { // check 'resource' and 'reference' attributes // String resourceValue = exrObj.getResource(); String referenceValue = exrObj.getReference(); if (resourceValue != null) { // check cases different from resource="EventType" // if (!resourceValue.equalsIgnoreCase("EventType")) { URI resourceMappedValue = vars.srlExternalRefResourceTypeMapper.get(resourceValue); URIImpl valueURI; if (resourceMappedValue != null) { valueURI = getUriForSrlExternalRefResource(resourceValue, referenceValue); } else { // force dynamic URIs // resourceMappedValue = ValueFactoryImpl.getInstance() .createURI(NWR.NAMESPACE, resourceValue.toLowerCase() + "Ref"); valueURI = new URIImpl("" + resourceValue.toLowerCase() + "/" + referenceValue); } mtmp.add(resourceMappedValue, valueURI); deg += "|" + resourceMappedValue + ":" + valueURI; } else { // resource="EventType" => check 'reference' attribute // URI referenceMappedValue = null; if (referenceValue != null) { referenceMappedValue = vars.eventClassMapper.get(referenceValue); } if (referenceMappedValue == null) { // force this default value // referenceMappedValue = NWR.EVENT_SPEECH_COGNITIVE; } mtmp.add(NWR.EVENT_CLASS, referenceMappedValue); deg += "|EVENT_CLASS:" + referenceMappedValue; eventTypeFound = true; } } else { // resourceValue == null) logDebug("xpath(//NAF/srl/predicate/externalReferences/externalRef@Resource(NULL)): predicateID(" + predicateID + ")", vars); } } else { logDebug("Mapping error - Mention null - xpath(NAF/srl/predicate/externalReferences/externalRef): predicateID(" + predicateID + ")", vars); } } if (!eventTypeFound) { mtmp.add(NWR.EVENT_CLASS, vars.eventClassMapper.get("contextual")); deg += "|EVENT_CLASS:" + vars.eventClassMapper.get("contextual"); eventTypeFound = true; } if (eventTypeFound) { logDebug(deg,vars); int addedNew = addOrMergeAMention(mtmp,vars); logDebug("ROL1: <srl> <predicate> adding new event mention for id " + predicateID + ", charSpan |" + predicateCharSpan + "|",vars); if (addedNew==1|addedNew==0){ vars.srlMention2++; vars.srlMention++; } String charS2 = mtmp.getUnique(NIF.BEGIN_INDEX, Integer.class) + "," + mtmp.getUnique(NIF.END_INDEX, Integer.class); vars.entityMentions.put(charS2,mtmp); eventMentionId = mtmp.getID().stringValue(); } else { // if eventType not found or no mapping for it // write error and discard this mention logDebug("Mention discarded for predicateID(" + predicateID + ") - ID(" + mtmp.getID().toString() + ")", vars); } } else /* process the <role> sub-tag rules: A) if the semantic role of <role> is temporal expression ("AM-TMP"), then two additional mentions are created: (1) a temporal-expression mention with span <role><span> (if not already extracted) (2) a TLINK between the event mention (<predicate><span>) and the temporal-expression mention (<role><span>) B) else if the <role> <span> COINCIDES EXACTLY with a previously extracted mention then create an single additional mention (a participation mention with span <predicate><span> + <role><span>) */ if (prdGObj instanceof Role) { boolean MenCreated = false; String charS = null; String deg2 = ""; LinkedList<Term> roleTermList = null; Span roleSpan = ((Role) prdGObj).getSpan(); String roleCharSpan = getCharSpanFromSpan(roleSpan,vars); boolean createTemporalexpressionMentionFlag = false; boolean createTlinkFlag = false; boolean createParticipationMentionFlag = false; String semRole = ((Role) prdGObj).getSemRole(); if ((semRole != null) && (semRole.equalsIgnoreCase("AM-TMP"))) { createTlinkFlag = true; createTemporalexpressionMentionFlag = true; logDebug(" ROL1: <srl> <role> found TLINK for |" + predicateCharSpan + "|" + roleCharSpan + "|",vars); } else { if (checkAlreadyAcceptedMention(roleCharSpan,vars)) { createParticipationMentionFlag = true; logDebug(" ROL1: <srl> <role> found already existent mention for |" + roleCharSpan + "|",vars); } } /* create a temporal-expression mention with span <role><span> if not already extracted */ if (createTemporalexpressionMentionFlag) { if (! checkAlreadyAcceptedMention(roleCharSpan,vars)) { Record roleM = Record.create(); roleM.add(RDF.TYPE, NWR.TIME_MENTION, NWR.TIME_OR_EVENT_MENTION, NWR.ENTITY_MENTION, KS.MENTION); roleM.add(KS.MENTION_OF, vars.news_file_id); // compute the Term list for the <role> roleTermList = new LinkedList<Term>(); for (Target rspnTar : ((Role) prdGObj).getSpan().getTarget()) { Term ttmp = getTermfromTermId((Term) rspnTar.getId(),vars); roleTermList.addLast(ttmp); } // generate and set ID of the mention generateTheMIdAndSetID(roleSpan, roleM,vars); // try to add the mention if(addOrMergeAMention(roleM,vars) == 1) { vars.timeMention2++; } logDebug(" ROL1: created temporal-expression mention for |" + roleCharSpan + "|",vars); } } /* create the new relation mention (either participation or TLINK) with the span of <predicate> + <role> TODO: - check the enrichment (externalReferences, TYPE, extent, ...) - check SYNTACTIC_HEAD */ if (createParticipationMentionFlag || createTlinkFlag) { Record relationM = Record.create(); /* set as NWR.SOURCE of the relation mention the id of the EVENT_MENTION related to <predicate> */ if (eventMentionId != null) { relationM.add(NWR.SOURCE, new URIImpl(eventMentionId)); deg2 += "|SOURCE:" + eventMentionId; } else { logDebug("//NAF/srl/predicate/role/ - a Role without Predicate roleID(" + ((Role) prdGObj).getId() + ")", vars); } /* set as NWR.TARGET of the relation mention the id of the role mention related to <role> */ { URI roleId = getMentionIDFromCharSpan(roleCharSpan,vars); relationM.add(NWR.TARGET, roleId); } /* set the RDF.TYPE */ if (createParticipationMentionFlag) { relationM.add(RDF.TYPE, NWR.PARTICIPATION, NWR.RELATION_MENTION, KS.MENTION); deg2 = "|TYPE:PARTICIPATION,RELATION_MENTION,MENTION|" + deg2; relationM.add(NWR.THEMATIC_ROLE, ((Role) prdGObj).getSemRole()); deg2 += "|THEMATIC_ROLE:" + ((Role) prdGObj).getSemRole(); } else { relationM.add(RDF.TYPE, NWR.TLINK, NWR.RELATION_MENTION, KS.MENTION); deg2 = "|TYPE:TLINK,RELATION_MENTION,MENTION|" + deg2; } relationM.add(KS.MENTION_OF, vars.news_file_id); /* generate and set ID of the relation mention */ // compute the Term list for the <role> if (roleTermList == null) { roleTermList = new LinkedList<Term>(); for (Target rspnTar : ((Role) prdGObj).getSpan().getTarget()) { Term ttmp = getTermfromTermId((Term) rspnTar.getId(),vars); roleTermList.addLast(ttmp); } } generateTheMIdAndSetID_forParticipationMention(eventTermList, roleTermList, relationM,vars); deg2 = "MentionId:" + relationM.getID() + deg2; boolean create = false; /* always add the relation mention (do not check if a previously accepted mention with the span of <role> exists) */ int addedNew = -1; MenCreated = true; charS = relationM.getUnique(NIF.BEGIN_INDEX, Integer.class) + "," + relationM.getUnique(NIF.END_INDEX, Integer.class); if (createParticipationMentionFlag) { logDebug(" ROL1: <srl> <predicate> <role> adding new participation mention for |" + charS + "|",vars); } else { logDebug(" ROL1: <srl> <predicate> <role> adding new TLINK mention for |" + charS + "|",vars); } /* try to add the relation mention */ addedNew = addOrMergeAMention(relationM,vars); if (addedNew==1){ vars.rolewithEntity++; vars.rolewithEntity2++; vars.roleMentions++; } else { // if with entity and conflict or enriched, counted as discarded if(create){ vars.rolewithoutEntity++; } } /* add the information from <externalReferences> sub-tag (the relation mention is always created) */ for (ExternalReferences roleGOBJ : ((Role) prdGObj).getExternalReferences()) { for (ExternalRef rexRefObj : roleGOBJ.getExternalRef()) { // check 'resource' and 'reference' attributes // String resourceValue = rexRefObj.getResource(); String referenceValue = rexRefObj.getReference(); if (resourceValue != null) { URI resourceMappedValue = vars.srlExternalRefResourceTypeMapper.get(resourceValue); URIImpl valueURI; if (resourceMappedValue != null) { valueURI = getUriForSrlExternalRefResource(resourceValue, referenceValue); } else { // force dynamic URIs // resourceMappedValue = ValueFactoryImpl.getInstance() .createURI(NWR.NAMESPACE, resourceValue.toLowerCase() + "Ref"); valueURI = new URIImpl("" + resourceValue.toLowerCase() + "/" + referenceValue); } if (charS != null) { vars.mentionListHash.get(charS).add(resourceMappedValue, valueURI); } deg2 += "|" + resourceMappedValue + ":" + valueURI; } else { // resourceValue == null) logDebug("xpath(//NAF/srl/predicate/role/externalReferences/externalRef@Resource(NULL)): RoleID(" + ((Role) prdGObj).getId() + ")", vars); } } } // end of adding info from <externalReferences> within <role> } logDebug(deg2,vars); } // end of <role> processing } // end of iteration over <externalReferences> or <role>s of the <predicate> } // end of iteration over <predicate> } private static List<PredicateAnchor> getAllRelativePredicateAnchors(String id, processNAFVariables vars) { List<PredicateAnchor> tmp= new ArrayList<PredicateAnchor>(); for( PredicateAnchor pas:vars.doc.getTemporalRelations().getPredicateAnchor()){ for(Span t: pas.getSpan()){ for(Target tm: t.getTarget()){ if(((Predicate)tm.getId()).getId().equalsIgnoreCase(id)){ tmp.add(pas); break; } } } } return tmp; } private static void getNAFHEADERMentions(NafHeader obj,processNAFVariables vars) { logDebug("Start reading the naf metadata:",vars); String deg = ""; Public publicProp = ((NafHeader) obj).getPublic(); initURIIDS(publicProp,vars); Record newsFile = Record.create(); Record nafFile = Record.create(); vars.nafFile2 = nafFile; vars.newsFile2 = newsFile; newsFile.setID(vars.news_file_id); nafFile.setID(vars.NAF_file_id); deg += "news_file_id:" + vars.news_file_id; newsFile.add(RDF.TYPE, NWR.NEWS); deg += "\nNAF_file_id:" + vars.NAF_file_id; nafFile.add(RDF.TYPE, NWR.NAFDOCUMENT); nafFile.add(NWR.ANNOTATION_OF, newsFile.getID()); newsFile.add(NWR.ANNOTATED_WITH, nafFile.getID()); if (vars.doc.getVersion() != null) { nafFile.add(NWR.VERSION, vars.doc.getVersion()); deg += "\nVERSION:" + vars.doc.getVersion(); } /* set DCTERMS.SOURCE according to the News URI */ URIImpl sourceURL; if (vars.PREFIX.matches("(?i)*")) { // LexisNexis news String preStr = ""; String postStr = "&csi=138620&perma=true"; String srcUrlstr = new String(preStr + publicProp.getPublicId() + postStr); sourceURL = new URIImpl(srcUrlstr); } else { // non-LexisNexis news sourceURL = (URIImpl)vars.news_file_id; } newsFile.add(DCTERMS.SOURCE, sourceURL); if (publicProp.getPublicId() != null) { nafFile.add(DCTERMS.IDENTIFIER, publicProp.getPublicId());// NAF/nafHeader/public@publicId deg += "|IDENTIFIER:" + publicProp.getPublicId(); } if (vars.doc.getXmlLang() != null) { newsFile.add(DCTERMS.LANGUAGE, Data.languageCodeToURI(vars.doc.getXmlLang())); deg += "|LANGUAGE:" + Data.languageCodeToURI(vars.doc.getXmlLang()); } else { logWarn("Language not catched:" + vars.doc.getXmlLang(),vars); } FileDesc fileDesc = null; if (((NafHeader) obj).getFileDesc() != null) { fileDesc = ((NafHeader) obj).getFileDesc(); if (fileDesc.getTitle() != null) { newsFile.add(DCTERMS.TITLE, fileDesc.getTitle()); deg += "|TITLE:" + fileDesc.getTitle(); } if (fileDesc.getAuthor() != null) { newsFile.add(DCTERMS.CREATOR, fileDesc.getAuthor()); deg += "|Author:" + fileDesc.getAuthor(); } if (fileDesc.getCreationtime() != null) { newsFile.add(DCTERMS.CREATED, fileDesc.getCreationtime()); deg += "|Creationtime:" + fileDesc.getCreationtime(); } if (fileDesc.getSection() != null) { newsFile.add(NWR.SECTION, fileDesc.getSection()); deg += "|SECTION:" + fileDesc.getSection(); } if (fileDesc.getMagazine() != null) { newsFile.add(NWR.MAGAZINE, fileDesc.getMagazine()); deg += "|MAGAZINE:" + fileDesc.getMagazine(); } if (fileDesc.getLocation() != null) { newsFile.add(NWR.LOCATION, fileDesc.getLocation()); deg += "|LOCATION:" + fileDesc.getLocation(); } if (fileDesc.getPublisher() != null) { newsFile.add(NWR.PUBLISHER, fileDesc.getPublisher()); deg += "|PUBLISHER:" + fileDesc.getPublisher(); } if (fileDesc.getFilename() != null) { newsFile.add(NWR.ORIGINAL_FILE_NAME, fileDesc.getFilename()); deg += "|Filename:" + fileDesc.getFilename(); } if (fileDesc.getFiletype() != null) { newsFile.add(NWR.ORIGINAL_FILE_FORMAT, fileDesc.getFiletype()); deg += "|Filetype:" + fileDesc.getFiletype(); } if (fileDesc.getPages() != null) { newsFile.add(NWR.ORIGINAL_PAGES, fileDesc.getPages()); deg += "|Pages:" + fileDesc.getPages(); } } else { logWarn("FileDesc: null",vars); } for (LinguisticProcessors lpObj : ((NafHeader) obj).getLinguisticProcessors()) { deg += "\n"; if (lpObj.getLayer() != null) { if (vars.nafLayerMapper.containsKey(lpObj.getLayer()) && vars.nafLayerMapper.get(lpObj.getLayer()) != null) { nafFile.add(NWR.LAYER, vars.nafLayerMapper.get(lpObj.getLayer())); deg += "LAYER:" + vars.nafLayerMapper.get(lpObj.getLayer()); } else { logDebug("xpath(//NAF/nafHeader/linguisticProcessors/@layer[" + lpObj.getLayer() + "]), unknown layer.", vars); } } for (Lp lpO : lpObj.getLp()) { deg += "\n"; Record r3 = Record.create(); if (lpO.getName() != null) { r3.add(DCTERMS.TITLE, lpO.getName()); deg += "TITLE:" + lpO.getName(); } if (lpO.getVersion() != null) { r3.add(NWR.VERSION, lpO.getVersion()); deg += "|VERSION:" + lpO.getVersion(); } String namuri =; String uri = vars.PREFIX + (vars.PREFIX.endsWith("/") ? "" : "/") + "lp/" + namuri + "/" + lpO.getVersion(); URI rId = new URIImpl(uri); r3.setID(rId); nafFile.add(NWR.MODULES, r3); deg += "|CREATOR:" + r3; } } logDebug(deg,vars); } private static void getCoreferencesMentions(Coreferences obj,processNAFVariables vars) { if (!checkHeaderTextTerms(vars)) { logError("Error: populating interrupted",vars); } else { logDebug("Start mapping the Coreferences mentions:",vars); } String deg = "\n"; /* process the <coref> tag */ for (Coref corefObj : ((Coreferences) obj).getCoref()) { deg = ""; if (corefObj.getSpan().size() < 1) { logDebug("Every coref must contain a 'span' element inside 'references'", vars); } /* if type exists and =="event" then create a new mention for each span; otherwise create a new mention for each span only if at least one span includes (or is) a previously extracted mention. */ boolean addMentionsFlag = false; String corefType = corefObj.getType(); List<Object> typesOfIncludedMention = null; boolean eventM = false; if (corefType != null && corefType.equalsIgnoreCase("event")) { // create new mentions addMentionsFlag = true; eventM = true; } else { // check if at least one span includes (or is) a previously extracted mention // for (Span corefSpan : corefObj.getSpan()) { String corefCharSpan = getCharSpanFromSpan(corefSpan,vars); Object[] retArray = checkSpanIncludesAnAlreadyAcceptedMention(corefCharSpan,vars); int inclFlag = ((Integer) retArray[0]).intValue(); if ((inclFlag == 1) || (inclFlag == 2)) { addMentionsFlag = true; String includedMentionCharSpan = (String)retArray[1]; typesOfIncludedMention = getMentionTypeFromCharSpan(includedMentionCharSpan,vars); logDebug("ROL1: <coref> id " + corefObj.getId() + ": found included mention for |" + corefCharSpan + "|, included mention |" + includedMentionCharSpan + "|, inclFlag " + inclFlag + ", types " + getTypeasString(typesOfIncludedMention),vars); break; } } } if (addMentionsFlag) { for (Span corefSpan : corefObj.getSpan()) { String corefCharSpan = getCharSpanFromSpan(corefSpan,vars); if (checkAlreadyAcceptedMention(corefCharSpan,vars)) { logDebug("ROL1: <coref> id " + corefObj.getId() + ": skipping already existent mention with charSpan |" + corefCharSpan + "|",vars); continue; } deg = ""; Record m = Record.create(); m.add(KS.MENTION_OF, vars.news_file_id); deg += "MENTION_OF:" + vars.news_file_id; if (corefObj.getSpan().size() > 1) { m.add(NWR.LOCAL_COREF_ID, corefObj.getId()); deg += "|LOCAL_COREF_ID:" + corefObj.getId(); } if (eventM) { m.add(RDF.TYPE, NWR.EVENT_MENTION, NWR.TIME_OR_EVENT_MENTION, NWR.ENTITY_MENTION, KS.MENTION); deg = "|TYPE:EVENT_MENTION,TIME_OR_EVENT_MENTION,ENTITY_MENTION,ENTITY_MENTION" + deg; eventM = true; } else { m.add(RDF.TYPE, NWR.OBJECT_MENTION, NWR.ENTITY_MENTION, KS.MENTION); deg = "TYPE:,OBJECT_MENTION,ENTITY_MENTION,ENTITY_MENTION|" + deg; // add types of includedMention // if (typesOfIncludedMention != null) { m.add(RDF.TYPE, typesOfIncludedMention); } } logDebug("ROL1: <coref> id " + corefObj.getId() + ": adding new mention with charSpan |" + corefCharSpan + "|, and type " + m.get(RDF.TYPE),vars); if (corefSpan.getTarget().size() < 1) { logDebug("Every span in an entity must contain at least one target inside", vars); } for (Target spTar : corefSpan.getTarget()) { if (eventM) { Term eventTerm = getTermfromTermId((Term) spTar.getId(),vars); m.add(NWR.PRED, eventTerm.getLemma()); deg += "|PRED:" + eventTerm.getLemma(); if (eventTerm.getPos() != null) { URI posVal = (eventTerm.getPos().equals("V") || eventTerm.getPos().equals("N")) ? vars.partOfSpeechMapper.get(eventTerm.getPos()) : vars.partOfSpeechMapper.get(""); m.add(NWR.POS, posVal); deg += "|POS:" + posVal; } else { logDebug("//NAF/coreferences/coref/span/target/@id/@getPOS[null], id(" + eventTerm.getId() + ")", vars); } } if (!eventM && spTar.getHead() != null && spTar.getHead().equals("yes")) { if (spTar.getId() != null) { m.add(NWR.SYNTACTIC_HEAD, spTar.getId()); deg += "|SYNTACTIC_HEAD:" + spTar.getId(); } else { logDebug("//NAF/coreferences/coref/span/target[@head='yes']/@id[null], id(" + spTar.getId() + ")", vars); } } } generateTheMIdAndSetID(corefSpan, m,vars); deg = "MentionId:" + m.getID() + deg; logDebug(deg,vars); int addedNew = addOrMergeAMention(m,vars); if (!eventM){ // eventM == false if(addedNew==1){ vars.corefMention2++; vars.no_mapping++; vars.corefMentionNotEvent++; vars.corefMention++; // logWarn("xpath(//NAF/coreferences/coref/) add a new object mention, missing type."); } } else { // eventM == true if(addedNew==1){ vars.srlMention2++; vars.corefMentionEvent++; vars.corefMention++; } } String charS2 = m.getUnique(NIF.BEGIN_INDEX, Integer.class) + "," + m.getUnique(NIF.END_INDEX, Integer.class); vars.entityMentions.put(charS2,m); } } else { logDebug("ROL1: <coref> id " + corefObj.getId() + ": entirely skipped, NO included mentions",vars); } } } private static LinkedList<Term> mergeTwoTermLists(LinkedList<Term> eventTermList, LinkedList<Term> roleTermList,processNAFVariables vars) { LinkedList<Term> merged = new LinkedList<Term>(); /* first the eventTermList (from <predicate>) then roleTermList (from <role>) */ for (Term evn : eventTermList) { merged.addLast(evn); } for (Term rol : roleTermList) { merged.addLast(rol); } logDebug("Two lists merged: eventTermListSize(" + eventTermList.size() + ") + roleTermListSize(" + roleTermList.size() + ") = mergedListSize(" + merged.size() + ").",vars); return merged; } // given a charSpan (e.g. "321,325") check if there is an already accepted mention with such span // private static boolean checkAlreadyAcceptedMention(String charSpan,processNAFVariables vars) { return vars.mentionListHash.containsKey(charSpan); } // given a charSpan (e.g. "321,325") check if it includes an already accepted mention; // return an array of 3 Objects: (Integer)inclFlag, (String)outCharSpan where // 0, null if there are no already-accepted mentions included in the charSpan // 1, charSpan if an already-accepted mention coincides with the charSpan // 2, chSpanOfIncludedMention if an already-accepted mention is stricly included in the charSpan // private static Object[] checkSpanIncludesAnAlreadyAcceptedMention(String charSpan,processNAFVariables vars) { Object[] retArray = new Object[2]; // check if an already-accepted mention coincides if (checkAlreadyAcceptedMention(charSpan,vars)) { retArray[0] = new Integer(1); retArray[1] = charSpan; return retArray; } // check if an already-accepted mention is strictly included String[] fields = charSpan.split(","); int spanBeginC = Integer.parseInt(fields[0]); int spanEndC = Integer.parseInt(fields[1]); Enumeration keys = vars.mentionListHash.keys(); String[] kfields; int kBeginC; int kEndC; while( keys.hasMoreElements() ) { String key = (String) keys.nextElement(); kfields = key.split(","); kBeginC = Integer.parseInt(kfields[0]); kEndC = Integer.parseInt(kfields[1]); if ((kBeginC >= spanBeginC) && (kEndC <= spanEndC)) { retArray[0] = new Integer(2); retArray[1] = key; return retArray; } } // no already-accepted mentions included retArray[0] = new Integer(0); retArray[1] = null; return retArray; } // given a Span object return its charSpan (e.g. "321,325") // private static String getCharSpanFromSpan(Span sp,processNAFVariables vars) { LinkedList<Wf> wordsL = fromSpanGetAllMentions(sp.getTarget(),vars); String begin = wordsL.getFirst().getOffset(); Wf lastW = wordsL.getLast(); int end = Integer.parseInt(lastW.getOffset()) + Integer.parseInt(lastW.getLength()); String charSpan = begin + "," + Integer.toString(end); return charSpan; } // given a charSpan (e.g. "321,325") get the ID of the mention with such span if exists, otherwise return null // private static URI getMentionIDFromCharSpan(String charSpan,processNAFVariables vars) { if (vars.mentionListHash.containsKey(charSpan)) { return vars.mentionListHash.get(charSpan).getID(); } else { return null; } } // given a charSpan (e.g. "321,325") get the type of the mention with such span if exists, otherwise return null // private static List<Object> getMentionTypeFromCharSpan(String charSpan,processNAFVariables vars) { if (vars.mentionListHash.containsKey(charSpan)) { return vars.mentionListHash.get(charSpan).get(RDF.TYPE); } else { return null; } } /* return: 1 if input mention "m" was added as a new mention 0 if a previously accepted mention with the same span of input mention m was enriched <0 in case of problems (e.g. due to a conflict with previously accepted mention); input mention m was not added */ private static Integer addOrMergeAMention(Record m,processNAFVariables vars) { String charS = m.getUnique(NIF.BEGIN_INDEX, Integer.class) + "," + m.getUnique(NIF.END_INDEX, Integer.class); if (vars.mentionListHash.containsKey(charS)) { /* there is a previously accepted mention with the same span: try to enrich it */ boolean chk = checkClassCompatibility(vars.mentionListHash.get(charS), m); if (!chk) { /* there is conflict between the input mention and the previously accepted mention with the same span: check if the new mention can replace the old one, otherwise report the error */ if (checkMentionReplaceability(vars.mentionListHash.get(charS), m)){ // replace the old mention with the new one vars.mentionListHash.put(charS, m); logDebug("Replacement with Mention: " + m.getID() + ", class(" + getTypeasString(m.get(RDF.TYPE)) + ")", vars); return 0; } String types =getTypeasString(m.get(RDF.TYPE)); if(types.contains(NWR.PARTICIPATION.stringValue())){ logDebug("Participation collision error, mentionID(" + m.getID() + ") class1(" + getTypeasString(m.get(RDF.TYPE)) + "), class-pre-xtracted(" + getTypeasString(vars.mentionListHash.get(charS).get(RDF.TYPE)) + ")", vars); }else{ logDebug("Generic collision error, mentionID(" + m.getID() + ") class1(" + getTypeasString(m.get(RDF.TYPE)) + "), class-pre-xtracted(" + getTypeasString(vars.mentionListHash.get(charS).get(RDF.TYPE)) + ")", vars); } return -1; } else { /* there is compatibility between the input mention and the previously accepted mention with the same span: enrich old mention with properties from the new one (except participation mentions) */ String types =getTypeasString(m.get(RDF.TYPE)); if(types.contains(NWR.PARTICIPATION.stringValue())){//Rule: no enrichment for participation logDebug("Refused enrichment with participation mention, mentionID(" + m.getID() + ")", vars); return -1; } // enrich mention ListIterator<URI> mit = m.getProperties().listIterator(); while (mit.hasNext()) { URI mittmp =; for (Object pit : m.get(mittmp)) { vars.mentionListHash.get(charS).add(mittmp, pit); } } logDebug("Mention enrichment: " + m.getID() + ", class(" + getTypeasString(m.get(RDF.TYPE)) + ")", vars); return 0; } } else { /* the mention is new (there is no previously accepted mention with the same span) */ vars.mentionListHash.put(charS, m); logDebug("Created Mention: " + m.getID(),vars); return 1; } } // apply to all the mentions the following changes: // - add the "extent" attribute as NIF.ANCHOR_OF // private static void fixMentions(processNAFVariables vars) { Enumeration keys = vars.mentionListHash.keys(); while( keys.hasMoreElements() ) { String key = (String) keys.nextElement(); Record m = (Record) vars.mentionListHash.get(key); // get charStartIndex and charEndIndex from the mention charSpan (= the key) // String[] csList = key.split(","); int cStart = Integer.parseInt(csList[0]); int cEnd = Integer.parseInt(csList[1]); String extentStr = vars.rawText.substring(cStart, cEnd); m.add(NIF.ANCHOR_OF, extentStr); } } private static String getTypeasString(List<Object> list) { String tmp=""; for(Object ll :list){ tmp+=ll.toString()+","; } tmp+="\""; return tmp.replace(",\"",""); } private static boolean checkClassCompatibility(Record m, Record m2) { List<Object> types = m.get(RDF.TYPE); List<Object> types1 = m2.get(RDF.TYPE); for (Object tytmp : types) { if (!types1.contains(tytmp)) { return false; } } return true; } private static boolean checkMentionReplaceability(Record oldM, Record newM) { /* return true oldM can be replaced with newM; this happens if oldM is a generic OBJECT_MENTION (= without ENTITY_TYPE) and newM is more specific (an OBJECT_MENTION with ENTITY_TYPE, or a TIME_MENTION, or EVENT_MENTION) */ List<Object> typesOld = oldM.get(RDF.TYPE); List<Object> typesNew = newM.get(RDF.TYPE); boolean isGenericOldM = (typesOld.contains(NWR.OBJECT_MENTION) && (oldM.get(NWR.ENTITY_TYPE) == null||oldM.get(NWR.ENTITY_TYPE).size() == 0)); boolean isSpecificNewM = ((typesNew.contains(NWR.OBJECT_MENTION) && (newM.get(NWR.ENTITY_TYPE) != null && oldM.get(NWR.ENTITY_TYPE).size() > 0)) || typesNew.contains(NWR.TIME_MENTION) || typesNew.contains(NWR.EVENT_MENTION)); /* logWarn("ROL3: checkMentionReplaceability: " + getTypeasString(typesOld) + "| " + getTypeasString(typesNew)); logWarn("ROL3: isGenericOldM " + isGenericOldM + "| isSpecificNewM " + isSpecificNewM); */ if (isGenericOldM && isSpecificNewM) { return true; } else { return false; } } private static void initURIIDS(Public publicProp,processNAFVariables vars) { if (publicProp.getPublicId() == null) { logError("Corrupted Naf file: PublicId in the Naf header is missed",vars); System.exit(0); } vars.nafPublicId = publicProp.getPublicId(); String uri = publicProp.getUri(); //TODO remove it @mohammed Sept2014PREFIX //uri = PREFIX+uri; vars.news_file_id = new URIImpl(uri); String nafuri = uri + ".naf"; vars.NAF_file_id = new URIImpl(nafuri); logDebug("news_file_id: " + uri,vars); logDebug("NAF_file_id: " + nafuri,vars); // set the PREFIX given the news uri try { URL nurl = new URL(uri); Path p = Paths.get(nurl.getPath()); vars.PREFIX = nurl.getProtocol() + "://" + nurl.getAuthority() + "/" + p.subpath(0,2); } catch (Exception me) { vars.PREFIX = vars.news_file_id.getNamespace(); } logDebug("PREFIX: " + vars.PREFIX,vars); } static void generateMIDAndSetIdWF(LinkedList<Wf> wordsL, Record m,processNAFVariables vars) { int begin = Integer.parseInt(wordsL.getFirst().getOffset()); int end = (Integer.parseInt(wordsL.getLast().getOffset()) + Integer.parseInt(wordsL .getLast().getLength())); m.add(NIF.BEGIN_INDEX, begin); m.add(NIF.END_INDEX, end); String tmpid = vars.news_file_id + "#char=" + begin + "," + end; URI mId = new URIImpl(tmpid); m.setID(mId); } private static void logError(String error,processNAFVariables vars) { if (vars.logErrorActive) { vars.logger.error(vars.filePath.getName() + " " + error); } if (!vars.storePartialInforInCaseOfError) { System.exit(-1); } } private static void logDebug(String error,processNAFVariables vars) { if (vars.logDebugActive) { vars.logger.debug(error); } } private static void logWarn(String error,processNAFVariables vars) { vars.logger.warn(vars.filePath.getName() + " "+error); } private static boolean checkHeaderTextTerms(processNAFVariables vars) { if (vars.globalTerms == null) { logWarn("Error: No term(s) has been catched!",vars); return false; } if (vars.globalText == null) { logWarn("Error: No text(s) has been catched!",vars); return false; } return true; } public static Logger getLogger(processNAFVariables vars) { return vars.logger; } public static void setLogger(final Logger logger,processNAFVariables vars) { vars.logger = logger; } private static void generateTheMIdAndSetID(Span spansObj, Record m,processNAFVariables vars) { LinkedList<Wf> wordsL = fromSpanGetAllMentions(((Span) spansObj).getTarget(),vars); int begin = Integer.parseInt(wordsL.getFirst().getOffset()); int end = (Integer.parseInt(wordsL.getLast().getOffset()) + Integer.parseInt(wordsL .getLast().getLength())); m.add(NIF.BEGIN_INDEX, begin); m.add(NIF.END_INDEX, end); String muri = vars.news_file_id + "#char=" + begin + "," + end; URI mId = new URIImpl(muri); m.setID(mId); } /* similar to generateTheMIdAndSetID() but specific for ParticipationMention */ private static void generateTheMIdAndSetID_forParticipationMention(LinkedList<Term> eventTermList, LinkedList<Term> roleTermList, Record m,processNAFVariables vars) { LinkedList<Wf> eventWordList = getTheWFListByThereTermsFromTargetList(eventTermList,vars); LinkedList<Wf> roleWordList = getTheWFListByThereTermsFromTargetList(roleTermList,vars); int charStartOfEvent = Integer.parseInt(eventWordList.getFirst().getOffset()); int charEndOfEvent = Integer.parseInt(eventWordList.getLast().getOffset()) + Integer.parseInt(eventWordList.getLast().getLength()); int charStartOfRole = Integer.parseInt(roleWordList.getFirst().getOffset()); int charEndOfRole = Integer.parseInt(roleWordList.getLast().getOffset()) + Integer.parseInt(roleWordList.getLast().getLength()); int beginIndex, endIndex; if (charStartOfEvent < charStartOfRole) { beginIndex = charStartOfEvent; endIndex = charEndOfRole; } else { beginIndex = charStartOfRole; endIndex = charEndOfEvent; } m.add(NIF.BEGIN_INDEX, beginIndex); m.add(NIF.END_INDEX, endIndex); String muri = vars.news_file_id + "#char=" + beginIndex + "," + endIndex; URI mId = new URIImpl(muri); m.setID(mId); } /* similar to generateTheMIdAndSetID() but specific for ParticipationMention */ private static String getExtentOfParticipationMention(LinkedList<Term> eventTermList, LinkedList<Term> roleTermList,processNAFVariables vars) { LinkedList<Wf> eventWordList = getTheWFListByThereTermsFromTargetList(eventTermList,vars); LinkedList<Wf> roleWordList = getTheWFListByThereTermsFromTargetList(roleTermList,vars); LinkedList<Wf> mergedWordList = new LinkedList<Wf>(); int charStartOfEvent = Integer.parseInt(eventWordList.getFirst().getOffset()); int charStartOfRole = Integer.parseInt(roleWordList.getFirst().getOffset()); LinkedList<Wf> firstWL, secondWL; if (charStartOfEvent <= charStartOfRole) { firstWL = eventWordList; secondWL = roleWordList; } else { firstWL = roleWordList; secondWL = eventWordList; } for (Wf w : firstWL) { if (! mergedWordList.contains(w)) {mergedWordList.add(w);} } for (Wf w : secondWL) { if (! mergedWordList.contains(w)) {mergedWordList.add(w);} } StringBuffer extent = new StringBuffer(); for (Wf w : mergedWordList) { extent.append(w.getvalue() + " "); } String sExtent = extent.toString(); return sExtent.substring(0, sExtent.length() - 1); } /* return true if another mention exists with the same span */ private static boolean checkDuplicate(String muri,processNAFVariables vars) { boolean re = false; for(String keys:vars.entityMentions.keySet()){ if (keys.equals(muri)) { //if (mtmp.getID().stringValue().equals(muri)) { re = true; break; } } return re; } private static Term getTermfromTermId(Term termId,processNAFVariables vars) { if (vars.globalTerms != null) { if (vars.globalTerms.getTerm().contains(termId)) return vars.globalTerms.getTerm().get(vars.globalTerms.getTerm().indexOf(termId)); } else { Terms ltmp = vars.doc.getTerms(); if (((Terms) ltmp).getTerm().contains(termId)) return vars.globalTerms.getTerm().get(vars.globalTerms.getTerm().indexOf(termId)); } logWarn("Term is not found, searched TermId(" + termId.getId() + ")",vars); return null; } private static LinkedList<Wf> fromSpanGetAllMentionsTmx(List<Target> list,processNAFVariables vars) { LinkedList<Wf> returned = new LinkedList<Wf>(); LinkedList<Wf> wordsIDL = new LinkedList<Wf>(); for (Target ltmp : list) { wordsIDL.addLast((Wf) ltmp.getId()); } if (vars.globalText != null) { int found = 0; for (Wf wftmp : vars.globalText.getWf()) { if (wordsIDL.contains(wftmp)) { returned.addLast(wftmp); found++; } if (found >= wordsIDL.size()) { break; } } } else { Text prop = vars.doc.getText(); int found = 0; for (Wf wftmp : prop.getWf()) { if (wordsIDL.contains(wftmp)) { returned.addLast(wftmp); found++; } if (found >= wordsIDL.size()) { break; } } } return returned; } private static LinkedList<Wf> getTheWFListByThereTermsFromTargetList(LinkedList<Term> targetTermList,processNAFVariables vars) { LinkedList<Wf> returned = new LinkedList<Wf>(); LinkedList<Wf> wordsIDL = new LinkedList<Wf>(); boolean spanTermFound = false; if (vars.globalTerms != null) { for (Term termtmp : vars.globalTerms.getTerm()) { if (targetTermList.contains(termtmp)) { Iterator<Object> spansl = termtmp .getSentimentOrSpanOrExternalReferencesOrComponent().iterator(); while (spansl.hasNext()) { Object spantmp =; if (spantmp instanceof Span) { spanTermFound = true; for (Target targtmp : ((Span) spantmp).getTarget()) { wordsIDL.addLast((Wf) targtmp.getId()); } } } } } } else { Terms prop = vars.doc.getTerms(); for (Term termtmp : prop.getTerm()) { if (targetTermList.contains(termtmp)) { Iterator<Object> spansl = termtmp .getSentimentOrSpanOrExternalReferencesOrComponent() .iterator(); while (spansl.hasNext()) { Object spantmp =; if (spantmp instanceof Span) { spanTermFound = true; for (Target targtmp : ((Span) spantmp).getTarget()) { wordsIDL.addLast((Wf) targtmp.getId()); } } } } } } /* if (!spanTermFound) { logWarn("Inconsistence NAF file(#TS): Every term must contain a span element",vars); }*/ if (vars.globalText != null) { int found = 0; for (Wf wftmp : vars.globalText.getWf()) { if (wordsIDL.contains(wftmp)) { returned.addLast(wftmp); found++; } if (found >= wordsIDL.size()) { break; } } if (found < wordsIDL.size()) { logWarn("Inconsistence NAF file(#SW): Wf(s) arenot found when loading term ",vars); } } else { Text prop = vars.doc.getText(); int found = 0; for (Wf wftmp : prop.getWf()) { if (wordsIDL.contains(wftmp)) { returned.addLast(wftmp); found++; } if (found >= wordsIDL.size()) { break; } } if (found < wordsIDL.size()) { logWarn("Inconsistence NAF file(#SW): Wf(s) arenot found when loading term ",vars); } } return returned; } private static LinkedList<Wf> fromSpanGetAllMentions(List<Target> list,processNAFVariables vars) { LinkedList<Term> targetTermList = new LinkedList<Term>(); Iterator<Target> targetList = list.iterator(); while (targetList.hasNext()) { Target tarm =; targetTermList.add((Term)tarm.getId()); } LinkedList<Wf> corrispondingWf = getTheWFListByThereTermsFromTargetList(targetTermList,vars); return corrispondingWf; } public static void readNAFFile(File naf,processNAFVariables vars) { try { JAXBContext jc = JAXBContext.newInstance("eu.fbk.knowledgestore.populator.naf.model"); Unmarshaller unmarshaller = jc.createUnmarshaller(); byte[] bytes = ByteStreams.toByteArray(; vars.doc = (NAF) unmarshaller.unmarshal(new ByteArrayInputStream(bytes)); } catch (UnsupportedEncodingException e) { logError(e.getMessage(),vars); } catch (FileNotFoundException e) { logError(e.getMessage(),vars); } catch (IOException e) { logError(e.getMessage(),vars); } catch (JAXBException e) { logError(e.getMessage(),vars); } } static void calculateMemory() { int mb = 1024 * 1024; // Getting the runtime reference from system Runtime runtime = Runtime.getRuntime(); System.err.println("##### Heap utilization statistics [MB] #####"); // Print used memory System.err.println("Used Memory:" + (runtime.totalMemory() - runtime.freeMemory()) / mb); // Print free memory System.err.println("Free Memory:" + runtime.freeMemory() / mb); // Print total available memory System.err.println("Total Memory:" + runtime.totalMemory() / mb); // Print Maximum available memory System.err.println("Max Memory:" + runtime.maxMemory() / mb); } }