package com.ladjzero.hipda; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.parser.Tag; import org.jsoup.select.Elements; import java.util.Map; /** * Created by chenzhuo on 16-2-11. */ public class PostsParser extends Parser { private static Post toPostObj(Element ePost) { int idPrefixLength = "pid".length(); Post post = new Post(); String id = ePost.attr("id").substring(idPrefixLength); Elements eBody = ePost.select("td.t_msgfont").tagName("div"); if (eBody.size() != 0) { replaceQuoteLink(eBody.get(0)); findSig(eBody.get(0)); Elements imgPlaceHolders = eBody.select("span[id^=attach_]"); if (imgPlaceHolders.select("> img").size() > 0) imgPlaceHolders.remove(); Elements imgDownloadLinks = eBody.select("div.t_attach"); imgDownloadLinks.remove(); Elements newBody = new Elements(); newBody.addAll(eBody); Elements attaches = ePost.select(".postattachlist"); if (attaches.size() > 0) { Elements attachImgs = attaches.select(".attachimg p>img"); newBody.addAll(attachImgs); Elements otherAttaches = attaches.select(".attachname"); newBody.addAll(otherAttaches); } // hipda wraps an a element around image. for (Element a : newBody.select("a[href=javascript:;]")) { a.tagName("span"); } for (Element img : newBody.select("img")) { String src = img.attr("file"); if (src.length() == 0) src = img.attr("src"); if (!src.contains("images/smilies/") && !src.endsWith("common/back.gif") && !src.endsWith("default/attachimg.gif") && !img.attr("width").equals("16")) { img.addClass("content-image"); } img.removeAttr("file") .removeAttr("width") .removeAttr("height") .removeAttr("onclick") .removeAttr("onload") .removeAttr("onmouseover"); if (!src.startsWith("http")) { img.attr("src", "http://www.hi-pda.com/forum/" + src); } else { img.attr("src", src); } } for (Element a : newBody.select("a")) { String href = a.attr("href"); if (!href.startsWith("http")) { a.attr("href", "http://www.hi-pda.com/forum/" + href); } } Elements styles = newBody.select("[style]"); for (Element style : styles) { style.removeAttr("style"); } post.setBody(newBody.outerHtml()); } else { post.setBody("<div class=\"error\">作者被禁止或删除</div>"); } String timeStr = ePost.select(".authorinfo > em").text(); if (timeStr.startsWith("发表于")) { timeStr = timeStr.substring(3); timeStr = timeStr.trim(); } String postIndex = ePost.select("a[id^=postnum] > em").text(); postIndex = postIndex.trim(); User user = new User().setId(0).setName(""); try { Element eUinfo = ePost.select("a[href^=space.php?uid=]").get(0); String url = eUinfo.attr("href"); String userId = Utils.getUriQueryParameter(url).get("uid"); String userName = eUinfo.text(); user.setId(Integer.valueOf(userId)).setName(userName); } catch (Exception e) { } post.setId(Integer.valueOf(id))/*.setNiceBody(niceBody)*/ .setAuthor(user).setTimeStr(timeStr).setPostIndex(Integer.valueOf(postIndex)); return post; } private static void replaceQuoteLink(Element eBody) { Elements quoteArrowIcon = eBody.select("blockquote > font[size=2] > a[href^=http://www.hi-pda.com/forum/redirect.php?goto=findpost]"); if (quoteArrowIcon.size() > 0) { quoteArrowIcon.get(0).html("查看"); } } private static void findSig(Element eBody) { Elements children = eBody.children(); if (children.size() > 0) { Element lastChild = children.last(); if (lastChild.tagName().equalsIgnoreCase("FONT") && lastChild.attr("size").equals("1")) { lastChild.addClass("sig"); if (lastChild.select("font[color=Gray]").size() > 0) { lastChild.addClass("sig-uzlee"); } else { lastChild.addClass("sig-ios"); } } else if (lastChild.tagName().equalsIgnoreCase("A") && lastChild.select("font[size=1]").size() > 0) { lastChild.addClass("sig").addClass("sig-android"); } else if (lastChild.tagName().equalsIgnoreCase("IMG") && lastChild.attr("width").equals("16") && lastChild.attr("height").equals("16")) { Element newChild = new Element(Tag.valueOf("span"), lastChild.baseUri()); lastChild.remove(); newChild.appendChild(lastChild); newChild.addClass("sig").addClass("sig-wp"); eBody.appendChild(newChild); } } } public Post parseEditablePost(String html) { Document doc = getDoc(html, new Response.Meta()); String title = doc.select("#subject").val(); String editBody = doc.select("#e_textarea").text(); return new Post().setTitle(title).setBody(editBody); } public Response parse(String html) { Posts posts = new Posts(); Response.Meta resMeta = new Response.Meta(); Document doc = getDoc(html, resMeta); Element eFid = doc.select("#nav a").last(); String fidStr = eFid.attr("href"); int fid = -1; try { fid = Integer.valueOf(Utils.getUriQueryParameter(fidStr).get("fid")); } catch (Exception e) { e.printStackTrace(); } String title = eFid.nextSibling().toString().replaceAll(" » ", ""); Elements pages = doc.select("div.pages"); int totalPage = 1; if (pages.size() == 2) { Elements lastPage = pages.select("a.last"); if (lastPage.size() > 0) { try { totalPage = Integer.valueOf(Utils.getUriQueryParameter(lastPage.attr("href")).get("page")); } catch (Exception e) { } } else { lastPage = pages.select("a:not(.next)"); if (lastPage.size() > 0) { try { totalPage = Integer.valueOf(Utils.getUriQueryParameter(lastPage.last().attr("href")).get("page")); } catch (Exception e) { } } } } Elements ePosts = doc.select("table[id^=pid]"); int i = 0; for (Element ePost : ePosts) { posts.add(toPostObj(ePost)); } int currPage = 1; Elements page = doc.select("div.pages > strong"); if (page.size() > 0) { currPage = Integer.valueOf(page.first().text()); } boolean hasNextPage = doc.select("div.pages > a[href$=&page=" + (currPage + 1) + "]").size() > 0; Posts.Meta meta = posts.getMeta(); meta.setHasNextPage(hasNextPage); meta.setPage(currPage); meta.setTotalPage(Math.max(totalPage, currPage)); meta.setFid(fid); meta.setTitle(title); Response res = new Response(); res.setData(posts); res.setMeta(resMeta); res.setSuccess(true); return res; } public Posts parseMentions(String html) { Posts mentions = new Posts(); try { Document doc = getDoc(html, new Response.Meta()); Elements eNotices = doc.select("ul.feed > li.s_clear > div"); for (Element eNotice : eNotices) { String title; Elements eSummary = eNotice.select(">dl.summary"); String body; String tid; String pid; String fid = "0"; String findPostLink; if (eSummary.size() > 0) { findPostLink = eNotice.select(">p>a").last().attr("href"); title = eNotice.select(">a").last().text(); body = eSummary.select("dt").last().text() + eSummary.select("dd").last().text(); String viewPostLink = eNotice.select(">p>a").first().attr("href"); fid = Utils.getUriQueryParameter(viewPostLink).get("tid"); } else { // thread watched on Element lastA = eNotice.select(">a").last(); findPostLink = lastA.attr("href"); lastA.remove(); lastA = eNotice.select(">a").last(); title = lastA.text(); lastA.remove(); eNotice.select(">em").last().remove(); eNotice.select("dfn").remove(); body = eNotice.text(); } Map<String, String> params = Utils.getUriQueryParameter(findPostLink); tid = params.get("ptid"); pid = params.get("pid"); if (tid == null) tid = "0"; if (pid == null) pid = "0"; Post post = new Post().setId(Integer.valueOf(pid)) .setTid(Integer.valueOf(tid)) .setFid(Integer.valueOf(fid)) .setTitle(title).setBody(body); mentions.add(post); } int currPage = 1; Elements page = doc.select("div.pages > strong"); if (page.size() > 0) { currPage = Integer.valueOf(page.first().text()); } boolean hasNextPage = doc.select("div.pages > a[href$=&page=" + (currPage + 1) + "]").size() > 0; // TO-DO Posts.Meta meta = mentions.getMeta(); meta.setHasNextPage(hasNextPage); meta.setPage(currPage); } catch (Exception e) { e.printStackTrace(); } return mentions; } public String parseMessagesToHtml(String html) { Posts posts = new Posts(); Document doc = getDoc(html, new Response.Meta()); Element ePosts = doc.select("#pmlist > .pm_list").first(); Elements avatars = ePosts.select("a.avatar > img"); for (Element avatar : avatars) { String src = avatar.attr("src"); avatar.attr("src", src.replaceAll("_avatar_small", "_avatar_middle")); } return ePosts == null ? "" : ePosts.outerHtml(); } }