package com.manuelmaly.hn.parser;
import com.manuelmaly.hn.App;
import com.manuelmaly.hn.Settings;
import com.manuelmaly.hn.model.HNFeed;
import com.manuelmaly.hn.model.HNPost;
import com.manuelmaly.hn.util.HNHelper;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.util.ArrayList;
public class HNFeedParser extends BaseHTMLParser<HNFeed> {
@Override
public HNFeed parseDocument(Element doc) throws Exception {
if (doc == null)
return new HNFeed();
String currentUser = Settings.getUserName(App.getInstance());
ArrayList<HNPost> posts = new ArrayList<HNPost>();
// clumsy, but hopefully stable query - first element retrieved is the
// top table, we have to skip that:
Elements tableRows = doc.select("table tr table tr");
tableRows.remove(0);
Elements nextPageURLElements = tableRows.select("a:matches(^More$)");
// In case there are multiple "More" elements, select only the one which is a relative link:
if (nextPageURLElements.size() > 1) {
nextPageURLElements = nextPageURLElements.select("a[href^=/]");
}
String nextPageURL = null;
if (nextPageURLElements.size() > 0)
nextPageURL = HNHelper.resolveRelativeHNURL(nextPageURLElements.attr("href"));
String url = null;
String title = null;
String author = null;
int commentsCount = 0;
int points = 0;
String urlDomain = null;
String postID = null;
String upvoteURL = null;
boolean endParsing = false;
for (int row = 0; row < tableRows.size(); row++) {
int rowInPost = row % 3;
Element rowElement = tableRows.get(row);
switch (rowInPost) {
case 0:
Element e1 = rowElement.select("tr > td:eq(2) > a").first();
if (e1 == null) {
endParsing = true;
break;
}
title = e1.text();
url = HNHelper.resolveRelativeHNURL(e1.attr("href"));
urlDomain = getDomainName(url);
Element e4 = rowElement.select("tr > td:eq(1) a").first();
if (e4 != null) {
upvoteURL = e4.attr("href");
if (!upvoteURL.contains("auth=")) // HN changed authentication
upvoteURL = null;
else
upvoteURL = HNHelper.resolveRelativeHNURL(upvoteURL);
}
break;
case 1:
points = getIntValueFollowedBySuffix(rowElement.select("tr > td:eq(1) > span").text(), "p");
author = rowElement.select("tr > td:eq(1) > a[href*=user]").text();
Element e2 = rowElement.select("tr > td:eq(1) > a[href*=item]").last(); // assuming the the last link is the comments link
if (e2 != null) {
commentsCount = getIntValueFollowedBySuffix(e2.text(), "c");
if (commentsCount == BaseHTMLParser.UNDEFINED && e2.text().contains("discuss"))
commentsCount = 0;
postID = getStringValuePrefixedByPrefix(e2.attr("href"), "id=");
}
else
commentsCount = BaseHTMLParser.UNDEFINED;
posts.add(new HNPost(url, title, urlDomain, author, postID, commentsCount, points, upvoteURL));
break;
default:
break;
}
if (endParsing)
break;
}
return new HNFeed(posts, nextPageURL, Settings
.getUserName(App.getInstance()));
}
}