package co.nubetech.hiho.testdata; import java.util.ArrayList; import java.util.HashSet; import org.apache.hadoop.io.Text; public class NGramLogic { public static void main(String[] args) { new NGramLogic().getNGrams(new Text("This is a book"), 2); } public HashSet<String> getNGrams(Text line, int gramSize) { ArrayList<String> words = new ArrayList<String>(); HashSet<String> nGrams = new HashSet<String>(); String[] tokens = line.toString().split(" "); for (String t : tokens) { words.add(t); } for (int i = 0; i < words.size() - gramSize + 1; i++) { String key = ""; for (int j = i; j < i + gramSize; j++) { key += words.get(j); if(j != ( i + gramSize - 1)){ key += " "; } } nGrams.add(key); } for (String ngram : nGrams) { System.out.println(ngram); } return nGrams; } public void end() { } }