package uk.ac.shef.dcs.jate.feature; import no.uib.cipr.matrix.sparse.FlexCompRowMatrix; import no.uib.cipr.matrix.sparse.SparseVector; import org.apache.log4j.Logger; import java.util.*; /** * */ public class Cooccurrence extends AbstractFeature { private static Logger LOG = Logger.getLogger(Cooccurrence.class.getSimpleName()); protected FlexCompRowMatrix cooccurrence; protected Map<Integer, String> mapIdx2Term = new HashMap<>(); protected Map<String, Integer> mapTerm2Idx = new HashMap<>(); protected Map<Integer, String> mapIdx2RefTerm = new HashMap<>(); protected Map<String, Integer> mapRefTerm2Idx = new HashMap<>(); protected int termCounter =-1; protected int ctxTermCounter =-1; public Cooccurrence(int terms, int refTerms){ cooccurrence =new FlexCompRowMatrix(terms, refTerms); } void deduce(int rowIndex, int colIndex, int value){ double newValue = cooccurrence.get(rowIndex, colIndex); if(newValue==0) { LOG.debug(rowIndex + "|" + colIndex); LOG.debug(lookupTerm(rowIndex) + "|" + lookupRefTerm(colIndex)); } newValue-=value; newValue=newValue<0?0:newValue; cooccurrence.set(rowIndex, colIndex, newValue); } public Set<String> getTerms(){ return mapTerm2Idx.keySet(); } public Set<String> getRefTerms() {return mapRefTerm2Idx.keySet();} protected synchronized int lookupAndIndexTerm(String term){ Integer idx = mapTerm2Idx.get(term); if(idx==null) { termCounter++; mapIdx2Term.put(termCounter, term); mapTerm2Idx.put(term, termCounter); return termCounter; } return idx; } protected int lookupTerm(String term){ Integer index= mapTerm2Idx.get(term); if(index==null) { return -1; } return index; } protected synchronized int lookupAndIndexRefTerm(String refTerm){ Integer idx = mapRefTerm2Idx.get(refTerm); if(idx==null) { ctxTermCounter++; mapIdx2RefTerm.put(ctxTermCounter, refTerm); mapRefTerm2Idx.put(refTerm, ctxTermCounter); return ctxTermCounter; } return idx; } protected int lookupRefTerm(String refTerm){ Integer index= mapRefTerm2Idx.get(refTerm); if(index==null) { return -1; } return index; } protected synchronized void increment(int termIdx, int refTermIdx, int freq){ //try { double newFreq = cooccurrence.get(termIdx, refTermIdx) + freq; cooccurrence.set(termIdx, refTermIdx, newFreq); /*}catch (Exception e){ System.err.println("rows="+cooccurrence.numRows()+", columns="+cooccurrence.numColumns()+". row="+termIdx+"" + " column="+refTermIdx); e.printStackTrace(); System.err.println("\n\n\nTrying again, "); System.out.println("value="+cooccurrence.get(termIdx, refTermIdx)); }*/ } public String lookupTerm(int index){ return mapIdx2Term.get(index); } public String lookupRefTerm(int index){return mapIdx2RefTerm.get(index);} /** * It is possible to have an invalid query term, usually created because of cross-sentence-boundary n-gram, * or phrases matched by POS patterns. In these cases, an empty map is returned * * @param term term string * @return Map the map of term index and co-ocurrence */ public Map<Integer, Integer> getCoocurrence(String term){ int termIdx= lookupTerm(term); if(termIdx==-1) return new HashMap<>(); return getCooccurrence(termIdx); } Map<Integer, Integer> getCooccurrence(int index){ Map<Integer, Integer> result = new HashMap<>(); SparseVector vec=cooccurrence.getRow(index); int[] nonZeroIndexes=vec.getIndex(); for(int i=0; i<nonZeroIndexes.length; i++){ int idx=nonZeroIndexes[i]; double v = cooccurrence.get(index,idx); result.put(idx, (int)v); } return result; } }