/**
*
*/
package nicetext;
//import crow.utils.WebUtils;
/**
* @author vikasing
*/
public class TestNiceText {
/**
* @param args
*/
public static void main(String[] args) {
/*WebUtils htmlUtils = new WebUtils();
Set<String> urlSet = htmlUtils.getLinksFromWebPage("https://news.ycombinator.com/news");
for (String url : urlSet) {
System.out.println(url+" +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++");
String text = htmlHelper.getText(url).get();
System.out.println(text);
}*/
NiceText niceText = new NTImpl();
String[] urls = new String[]{
"http://www.scientificamerican.com/article/common-parasite-could-manipulate-our-behavior/",
"http://www.npr.org/2015/03/27/395593337/twist-of-fate-an-accident-brings-beautiful-symmetry-to-two-lives",
"http://www.oneindia.com/sports/cricket/bangladesh-captain-speaks-on-aleem-dar-s-no-ball-error-at-world-cup-1689394.html",
"http://www.chinadaily.com.cn/business/2015-03/20/content_19862174.htm",
"http://www.vox.com/2015/3/16/8225977/dick-vitale-talking",
"http://economictimes.indiatimes.com/news/politics-and-nation/supreme-court-extends-interim-bail-to-teesta-setalvad-and-husband-javed-anand/articleshow/46628427.cms",
"http://www.theaustralian.com.au/business/latest/rba-awaits-data-before-more-easing/story-e6frg90f-1227265988415",
"http://www.sfchronicle.com/crime/article/Man-shot-to-death-by-Napa-police-6134255.php",
"http://www.cio.com.au/whitepaper/372445/tintri-vmstore-application-aware-storage/?type=section&arg=51236&location=rhs_featured_whitepaper",
"http://www.newsday.com/business/msg-president-and-ceo-resigns-tad-smith-takes-same-roles-at-sotheby-s-1.10067686#disqus_thread",
"http://www.theaustralian.com.au/business/news/asic-puts-payday-lenders-on-notice/story-e6frg906-1227265934107",
"http://www.njherald.com/story/28526788/10-things-to-know-for-today",
"http://www.jpost.com/International/US-Senate-leader-Obama-on-cusp-of-very-bad-deal-with-Iran-393972",
"http://www.ndtv.com/karnataka-news/karnataka-governor-walks-off-during-national-anthem-745652",
"http://www.aninews.in/newsdetail2/story203457/will-bounce-back-after-two-three-years-congress.html",
"http://edition.cnn.com/2015/03/10/world/afghanistan-violence/index.html",
"http://economictimes.indiatimes.com/news/politics-and-nation/membership-drive-bjp-turns-to-mps-mlas-for-final-push-to-make-it-worlds-largest-party/articleshow/46449753.cms",
"http://www.thehindu.com/news/national/andhra-pradesh/tap-aquaculture-potential-to-full/article6943174.ece?homepage=true",
"http://www.denverpost.com/ci_27602128/charles-koch-working-business-book-scheduled-october",
"http://www.irishtimes.com/news/world/secretive-bilderberg-group-sets-sights-on-michael-o-leary-1.2119343",
"http://www.independent.co.uk/news/uk/crime/claudia-lawrence-father-of-missing-chef-says-it-is-dreadful-people-may-have-lied-to-police--as-officers-carry-out-search-of-alleyway-10069547.html",
"http://www.theaustralian.com.au/national-affairs/health/two-children-tested-for-ebola-in-melbourne-hospital/story-fn59nokw-1227239685887",
"http://www.thestar.com/business/2015/02/19/oil-slump-could-dip-inflation-into-the-negative-boc.html",
"http://www.reuters.com/article/2015/02/18/us-health-obesity-idUSKBN0LM2E320150218",
"http://www.ndtv.com/diaspora/us-lawmaker-tulsi-gabbard-to-marry-in-april-in-vedic-ceremony-740759?pfrom=home-diaspora",
"http://www.thehindu.com/news/cities/Delhi/kejriwal-seeks-services-of-sanjeev-chaturvedi/article6905600.ece?ref=topnavwidget&utm_source=topnavdd&utm_medium=topnavdropdownwidget&utm_campaign=topnavdropdown",
"http://www.deccanherald.com/content/458482/karnataka-man-seen-cctv-footage.html",
};
for (String url : urls) {
String[] t = niceText.extract(url).split("\n");
StringBuilder txtB = new StringBuilder();
for (String s : t) {
s = s.trim();
if (s.charAt(s.length() - 1) == '.') {
txtB.append(s).append(" ");
} else {
txtB.append(s).append(". ");
}
}
System.out.println(txtB.toString());
System.out.println("==================================");
}
/*
NGramExtracter nExtracter = new NGramExtracter();
Map<String, SortedSet<Entry<String, Integer>>> nGramMap = nExtracter.extract(text);
SortedSet<Entry<String, Integer>> bigrams = nGramMap.get("bi");
for (Entry<String, Integer> entry : bigrams) {
if (entry.getValue()>1) {
System.out.println(entry.getKey() +" "+entry.getValue());
}
}
SortedSet<Entry<String, Integer>> trigrams = nGramMap.get("tri");
for (Entry<String, Integer> entry : trigrams) {
if (entry.getValue()>1) {
System.out.println(entry.getKey() +" "+entry.getValue());
}
}
SortedSet<Entry<String, Integer>> monograms = nGramMap.get("mono");
for (Entry<String, Integer> entry : monograms) {
if (entry.getValue()>1) {
System.out.println(entry.getKey() +" "+entry.getValue());
}
}*/
}
}