package joshua.util; import java.util.List; import java.util.Map; import joshua.corpus.vocab.SymbolTable; public class Ngram { public static void getNgrams(Map<String, Integer> tbl, int startOrder, int endOrder, final List<Integer> wrds){ getNgrams(null, tbl, startOrder, endOrder, wrds); } public static void getNgrams(Map<String, Integer> tbl, int startOrder, int endOrder, final int[] wrds){ getNgrams(null, tbl, startOrder, endOrder, wrds); } public static void getNgrams(Map<String, Integer> tbl, int startOrder, int endOrder, final String[] wrds){ getNgrams(null, tbl, startOrder, endOrder, wrds); } /**if symbolTbl!=null, then convert interger to String */ public static void getNgrams(SymbolTable symbolTbl, Map<String, Integer> tbl, int startOrder, int endOrder, final int[] wrds){ for(int i=0; i<wrds.length; i++) for(int j=startOrder-1; j<endOrder && j+i<wrds.length; j++){//ngram: [i,i+j] StringBuffer ngram = new StringBuffer(); for(int k=i; k<=i+j; k++){ int t_wrd = wrds[k]; if(symbolTbl!=null) ngram.append(symbolTbl.getWord(t_wrd)); else ngram.append(t_wrd); if(k<i+j) ngram.append(" "); } String ngramStr = ngram.toString(); increaseCount(tbl, ngramStr, 1); } } /**if symbolTbl!=null, then convert interger to String */ public static void getNgrams(SymbolTable symbolTbl, Map<String, Integer> tbl, int startOrder, int endOrder, final List<Integer> wrds){ for(int i=0; i<wrds.size(); i++) for(int j=startOrder-1; j<endOrder && j+i<wrds.size(); j++){//ngram: [i,i+j] StringBuffer ngram = new StringBuffer(); for(int k=i; k<=i+j; k++){ int t_wrd = wrds.get(k); if(symbolTbl!=null) ngram.append(symbolTbl.getWord(t_wrd)); else ngram.append(t_wrd); if(k<i+j) ngram.append(" "); } String ngramStr = ngram.toString(); increaseCount(tbl, ngramStr, 1); } } /**if symbolTbl!=null, then convert string to integer */ public static void getNgrams(SymbolTable symbolTbl, Map<String, Integer> tbl, int startOrder, int endOrder, final String[] wrds){ for(int i=0; i<wrds.length; i++) for(int j=startOrder-1; j<endOrder && j+i<wrds.length; j++){//ngram: [i,i+j] StringBuffer ngram = new StringBuffer(); for(int k=i; k<=i+j; k++){ String t_wrd = wrds[k]; if(symbolTbl!=null) ngram.append(symbolTbl.getID(t_wrd)); else ngram.append(t_wrd); if(k<i+j) ngram.append(" "); } String ngramStr = ngram.toString(); increaseCount(tbl, ngramStr, 1); } } static private void increaseCount(Map<String, Integer> tbl, String feat, int increment){ Integer oldCount = tbl.get(feat); if(oldCount!=null) tbl.put(feat, oldCount + increment); else tbl.put(feat, increment); } }