package tk.djcrazy.libCC98; import android.text.Html; import android.util.Log; import com.google.inject.Inject; import com.google.inject.Singleton; import ch.boye.httpclientandroidlib.ParseException; import ch.boye.httpclientandroidlib.client.ClientProtocolException; import java.io.IOException; import java.net.URLEncoder; import java.util.ArrayList; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; import tk.djcrazy.libCC98.data.BoardEntity; import tk.djcrazy.libCC98.data.BoardStatus; import tk.djcrazy.libCC98.data.Gender; import tk.djcrazy.libCC98.data.HotTopicEntity; import tk.djcrazy.libCC98.data.InboxInfo; import tk.djcrazy.libCC98.data.LoginType; import tk.djcrazy.libCC98.data.PmInfo; import tk.djcrazy.libCC98.data.PostContentEntity; import tk.djcrazy.libCC98.data.PostEntity; import tk.djcrazy.libCC98.data.SearchResultEntity; import tk.djcrazy.libCC98.data.UserProfileEntity; import tk.djcrazy.libCC98.data.UserStatue; import tk.djcrazy.libCC98.data.UserStatueEntity; import tk.djcrazy.libCC98.exception.NoUserFoundException; import tk.djcrazy.libCC98.exception.ParseContentException; import tk.djcrazy.libCC98.util.DateFormatUtil; import tk.djcrazy.libCC98.util.RegexUtil; import tk.djcrazy.libCC98.util.StringUtil; import static tk.djcrazy.libCC98.CC98ParseRepository.HOT_TOPIC_BOARD_ID_REGEX; import static tk.djcrazy.libCC98.CC98ParseRepository.HOT_TOPIC_BOARD_NAME_WITH_AUTHOR_REGEX; import static tk.djcrazy.libCC98.CC98ParseRepository.HOT_TOPIC_CLICK_REGEX; import static tk.djcrazy.libCC98.CC98ParseRepository.HOT_TOPIC_ID_REGEX; import static tk.djcrazy.libCC98.CC98ParseRepository.HOT_TOPIC_NAME_REGEX; import static tk.djcrazy.libCC98.CC98ParseRepository.HOT_TOPIC_POST_TIME_REGEX; import static tk.djcrazy.libCC98.CC98ParseRepository.HOT_TOPIC_WRAPPER; import static tk.djcrazy.libCC98.CC98ParseRepository.NEW_TOPIC_AUTHOR_REGEX; import static tk.djcrazy.libCC98.CC98ParseRepository.NEW_TOPIC_BOARD_ID; import static tk.djcrazy.libCC98.CC98ParseRepository.NEW_TOPIC_FACE_REGEX; import static tk.djcrazy.libCC98.CC98ParseRepository.NEW_TOPIC_ID_REGEX; import static tk.djcrazy.libCC98.CC98ParseRepository.NEW_TOPIC_TIME; import static tk.djcrazy.libCC98.CC98ParseRepository.NEW_TOPIC_TITLE_REGEX; import static tk.djcrazy.libCC98.CC98ParseRepository.NEW_TOPIC_TOTAL_POST; import static tk.djcrazy.libCC98.CC98ParseRepository.NEW_TOPIC_WRAPPER_REGEX; import static tk.djcrazy.libCC98.CC98ParseRepository.POST_CONTENT_GENDER_REGEX; import static tk.djcrazy.libCC98.CC98ParseRepository.POST_CONTENT_INFO_REGEX; import static tk.djcrazy.libCC98.CC98ParseRepository.POST_CONTENT_POST_CONTENT_REGEX; import static tk.djcrazy.libCC98.CC98ParseRepository.POST_CONTENT_POST_FACE_REGEX; import static tk.djcrazy.libCC98.CC98ParseRepository.POST_CONTENT_POST_TIME_REGEX; import static tk.djcrazy.libCC98.CC98ParseRepository.POST_CONTENT_POST_TITLE_REGEX; import static tk.djcrazy.libCC98.CC98ParseRepository.POST_CONTENT_USERNAME_REGEX; import static tk.djcrazy.libCC98.CC98ParseRepository.POST_CONTENT_USER_AVATAR_LINK_REGEX; import static tk.djcrazy.libCC98.CC98ParseRepository.POST_CONTENT_WHOLE_REGEX; import static tk.djcrazy.libCC98.CC98ParseRepository.POST_LIST_LAST_REPLY_AUTHOR_REGEX; import static tk.djcrazy.libCC98.CC98ParseRepository.POST_LIST_LAST_REPLY_TIME_REGEX; import static tk.djcrazy.libCC98.CC98ParseRepository.POST_LIST_POST_AUTHOR_NAME_REGEX; import static tk.djcrazy.libCC98.CC98ParseRepository.POST_LIST_POST_BOARD_ID_REGEX; import static tk.djcrazy.libCC98.CC98ParseRepository.POST_LIST_POST_ENTITY_REGEX; import static tk.djcrazy.libCC98.CC98ParseRepository.POST_LIST_POST_ID_REGEX; import static tk.djcrazy.libCC98.CC98ParseRepository.POST_LIST_POST_NAME_REGEX; import static tk.djcrazy.libCC98.CC98ParseRepository.POST_LIST_POST_TYPE_REGEX; import static tk.djcrazy.libCC98.CC98ParseRepository.POST_LIST_REPLY_NUM_REGEX; import static tk.djcrazy.libCC98.CC98ParseRepository.P_BOARD_ID_REGEX; import static tk.djcrazy.libCC98.CC98ParseRepository.P_BOARD_LAST_REPLY_AUTHOR_REGEX; import static tk.djcrazy.libCC98.CC98ParseRepository.P_BOARD_LAST_REPLY_TIME_REGEX; import static tk.djcrazy.libCC98.CC98ParseRepository.P_BOARD_LAST_REPLY_TOPIC_ID_REGEX; import static tk.djcrazy.libCC98.CC98ParseRepository.P_BOARD_LAST_REPLY_TOPIC_NAME_REGEX; import static tk.djcrazy.libCC98.CC98ParseRepository.P_BOARD_NAME_REGEX; import static tk.djcrazy.libCC98.CC98ParseRepository.P_BOARD_POST_NUMBER_TODAY; import static tk.djcrazy.libCC98.CC98ParseRepository.P_BOARD_SINGLE_BOARD_WRAPPER_REGEX; import static tk.djcrazy.libCC98.CC98ParseRepository.TODAY_BOARD_ENTITY_REGEX; import static tk.djcrazy.libCC98.CC98ParseRepository.TODAY_BOARD_ID_REGEX; import static tk.djcrazy.libCC98.CC98ParseRepository.TODAY_BOARD_NAME_REGEX; import static tk.djcrazy.libCC98.CC98ParseRepository.TODAY_BOARD_TOPIC_NUM_REGEX; import static tk.djcrazy.libCC98.CC98ParseRepository.TODAY_POST_NUMBER_REGEX; import static tk.djcrazy.libCC98.CC98ParseRepository.USER_PROFILE_AVATAR_REGEX; import static tk.djcrazy.libCC98.CC98ParseRepository.USER_PROFILE_GENERAL_PROFILE_REGEX; import static tk.djcrazy.libCC98.CC98ParseRepository.USER_PROFILE_ONLINE_INFO_REGEX; import static tk.djcrazy.libCC98.CC98ParseRepository.USER_PROFILE_PERSON_PROFILE_REGEX; import static tk.djcrazy.libCC98.util.DateFormatUtil.convertStringToDateInPostContent; import static tk.djcrazy.libCC98.util.RegexUtil.getMatchedString; import static tk.djcrazy.libCC98.util.RegexUtil.getMatchedStringList; import static tk.djcrazy.libCC98.util.StringUtil.filterHtmlDecode; @Singleton public class NewCC98Parser { private static final String TAG = "CC98ParserImpl"; @Inject private ICC98UrlManager cc98UrlManager; /** * Parse the HTML, and put posts and their poster id in a list of * NameValuePair * * @param html * String (must not be null) * @return A list of the pair of poster id and their posts * @throws tk.djcrazy.libCC98.exception.ParseContentException * @throws java.text.ParseException */ public List<PostContentEntity> parsePostContentList(String html) throws ParseContentException, java.text.ParseException { List<PostContentEntity> list = new ArrayList<PostContentEntity>(); // get some information of the topic List<String> postInfoList = getMatchedStringList(POST_CONTENT_INFO_REGEX, html, 3); PostContentEntity postInfoEntity = new PostContentEntity(); postInfoEntity.setPostTopic(filterHtmlDecode(postInfoList.get(0))); postInfoEntity.setBoardName(Html.fromHtml(postInfoList.get(1)).toString()); postInfoEntity.setTotalPage((int) Math.ceil(Integer.parseInt(postInfoList.get(2)) / 10.0)); list.add(postInfoEntity); // get each reply info List<String> contentHtml = getMatchedStringList(POST_CONTENT_WHOLE_REGEX, html, -1); for (String reply : contentHtml) { PostContentEntity entity = new PostContentEntity(); try { entity.setUserName(Html.fromHtml(getMatchedString(POST_CONTENT_USERNAME_REGEX, reply)) .toString()); entity.setPostContent(getMatchedString(POST_CONTENT_POST_CONTENT_REGEX, reply)); entity.setPostTitle(getMatchedString(POST_CONTENT_POST_TITLE_REGEX, reply)); entity.setPostFace(getMatchedString(POST_CONTENT_POST_FACE_REGEX, reply)); entity.setPostTime(convertStringToDateInPostContent(getMatchedString( POST_CONTENT_POST_TIME_REGEX, reply))); } catch (Exception e) { e.printStackTrace(); } try { String avatarLink = getMatchedString(POST_CONTENT_USER_AVATAR_LINK_REGEX, reply); if (!avatarLink.contains("http://")) { avatarLink = cc98UrlManager.getClientUrl() + avatarLink; } entity.setUserAvatarLink(avatarLink); } catch (ParseContentException e) { entity.setUserAvatarLink("file:///android_asset/pic/no_avatar.jpg"); } { String sex = getMatchedString(POST_CONTENT_GENDER_REGEX, reply); if ("Male".equals(sex)) { entity.setGender(Gender.MALE); } else { entity.setGender(Gender.FEMALE); } } list.add(entity); } return list; } /** * Parse the HTML to obtain posts names and their URL. * * @param html * @return * @throws tk.djcrazy.libCC98.exception.ParseContentException * @throws java.text.ParseException */ public List<PostEntity> parsePostList(String html) throws ParseContentException, java.text.ParseException { List<PostEntity> list = new ArrayList<PostEntity>(); List<String> contentList = getMatchedStringList(POST_LIST_POST_ENTITY_REGEX, html, -1); for (String post : contentList) { PostEntity entity = new PostEntity(); entity.setPostName(Html.fromHtml(getMatchedString(POST_LIST_POST_NAME_REGEX, post)) .toString()); entity.setPostType(getMatchedString(POST_LIST_POST_TYPE_REGEX, post)); entity.setReplyNumber(getMatchedString(POST_LIST_REPLY_NUM_REGEX, post).replaceAll( "<.*?>", "")); entity.setPostAuthorName(Html.fromHtml( getMatchedString(POST_LIST_POST_AUTHOR_NAME_REGEX, post)).toString()); entity.setLastReplyAuthor(Html.fromHtml( getMatchedString(POST_LIST_LAST_REPLY_AUTHOR_REGEX, post)).toString()); entity.setLastReplyTime(DateFormatUtil.convertStringToDateInPostList(getMatchedString( POST_LIST_LAST_REPLY_TIME_REGEX, post))); entity.setPostId(getMatchedString(POST_LIST_POST_ID_REGEX, post)); entity.setBoardId(getMatchedString(POST_LIST_POST_BOARD_ID_REGEX, post)); list.add(entity); } return list; } /** * * @param html * @return * @throws tk.djcrazy.libCC98.exception.ParseContentException * @throws java.text.ParseException */ public List<BoardEntity> parsePersonalBoardList(String html) throws ParseContentException, java.text.ParseException { List<BoardEntity> nList = new ArrayList<BoardEntity>(); String boardinfo = html; List<String> board = getMatchedStringList(P_BOARD_SINGLE_BOARD_WRAPPER_REGEX, boardinfo, 0); for (String string : board) { BoardEntity entity = new BoardEntity(); entity.setBoardID(getMatchedString(P_BOARD_ID_REGEX, string)); try { entity.setChildBoardNumber(Integer.parseInt(getMatchedString(CC98ParseRepository.P_IS_PARENT_BOARD_REGEX, string))); } catch (ParseContentException e) { entity.setChildBoardNumber(0); } try { entity.setPostNumberToday(Integer.parseInt(getMatchedString( P_BOARD_POST_NUMBER_TODAY, string))); entity.setBoardName(Html.fromHtml(getMatchedString(P_BOARD_NAME_REGEX, string)) .toString()); entity.setLastReplyBoardId(getMatchedString(CC98ParseRepository.P_BOARD_LAST_REPLY_BOARDID_REGEX, string)); entity.setLastReplyAuthor(Html.fromHtml( getMatchedString(P_BOARD_LAST_REPLY_AUTHOR_REGEX, string)).toString()); entity.setLastReplyTime(DateFormatUtil.convertStrToDateInPBoard(getMatchedString( P_BOARD_LAST_REPLY_TIME_REGEX, string))); entity.setLastReplyTopicID(getMatchedString(P_BOARD_LAST_REPLY_TOPIC_ID_REGEX, string)); entity.setLastReplyTopicName(Html.fromHtml( getMatchedString(P_BOARD_LAST_REPLY_TOPIC_NAME_REGEX, string)).toString()); } catch (Exception e) { e.printStackTrace(); } nList.add(entity); } return nList; } /** * * @param html * @return * @throws tk.djcrazy.libCC98.exception.ParseContentException * @throws java.text.ParseException */ public List<BoardEntity> parseBoardList(String html) throws ParseContentException, java.text.ParseException { List<BoardEntity> nList = new ArrayList<BoardEntity>(); String boardinfo = html; List<String> board = getMatchedStringList(CC98ParseRepository.LIST_BOARD_SINGLE_BOARD_WRAPPER_REGEX, boardinfo, 0); for (String string : board) { BoardEntity entity = new BoardEntity(); entity.setBoardID(getMatchedString(CC98ParseRepository.LIST_BOARD_ID_REGEX, string)); try { entity.setChildBoardNumber(Integer.parseInt(getMatchedString(CC98ParseRepository.LIST_IS_PARENT_BOARD_REGEX, string))); } catch (ParseContentException e) { entity.setChildBoardNumber(0); } try { entity.setBoardName(Html.fromHtml(getMatchedString(CC98ParseRepository.LIST_BOARD_NAME_REGEX, string).replace("<.*?>", "")) .toString()); entity.setLastReplyAuthor(Html.fromHtml( getMatchedString(CC98ParseRepository.LIST_BOARD_LAST_REPLY_AUTHOR_REGEX, string)).toString()); entity.setLastReplyBoardId(getMatchedString(CC98ParseRepository.LIST_BOARD_LAST_REPLY_BOARDID_REGEX, string)); entity.setLastReplyTime(DateFormatUtil.convertStrToDateInPBoard(getMatchedString( CC98ParseRepository.LIST_BOARD_LAST_REPLY_TIME_REGEX, string))); entity.setLastReplyTopicID(getMatchedString(CC98ParseRepository.LIST_BOARD_LAST_REPLY_TOPIC_ID_REGEX, string)); entity.setLastReplyTopicName(Html.fromHtml( getMatchedString(CC98ParseRepository.LIST_BOARD_LAST_REPLY_TOPIC_NAME_REGEX, string)).toString()); entity.setPostNumberToday(Integer.parseInt(getMatchedString( CC98ParseRepository.LIST_BOARD_POST_NUMBER_TODAY, string))); } catch (Exception e) { Log.e(NewCC98Parser.class.getSimpleName(), "parseBoardList failed", e); } nList.add(entity); } return nList; } public String parseUserAvatar(String html, LoginType loginType, String proxyHost) { try { String url = getMatchedString(USER_PROFILE_AVATAR_REGEX, html); if (!url.startsWith("http") && !url.startsWith("ftp")) { url = cc98UrlManager.getClientUrl(loginType, proxyHost) + url; } return url; } catch (Exception e) { return cc98UrlManager.getClientUrl(loginType, proxyHost) + "PresetFace/male_1.gif"; } } /** * @author DJ * @param html * @return * @throws tk.djcrazy.libCC98.exception.ParseContentException */ public UserProfileEntity parseUserProfile(String html) throws ParseContentException { UserProfileEntity entity = new UserProfileEntity(); // avatar link { String url = getMatchedString(USER_PROFILE_AVATAR_REGEX, html); if (!url.startsWith("http") && !url.startsWith("ftp")) { url = cc98UrlManager.getClientUrl() + url; } entity.setUserAvatarLink(url); } // general profile { String info = getMatchedString(USER_PROFILE_GENERAL_PROFILE_REGEX, html); String[] details = info.split("<br>"); entity.setUserNickName(details[0]); entity.setUserLevel(details[1]); entity.setUserGroup(details[2]); entity.setGoodPosts(details[3]); entity.setTotalPosts(details[4]); entity.setUserPrestige(details[5]); entity.setRegisterTime(details[6]); entity.setLoginTimes(details[7]); entity.setDeletedPosts(details[8]); entity.setDeletedRatio(details[9]); entity.setLastLoginTime(details[10]); } // personal profile try { String info = getMatchedString(USER_PROFILE_PERSON_PROFILE_REGEX, html); String[] details = info.split("<br>"); details[1] = details[1].replaceAll("<.*?>", ""); details[2] = details[2].replaceAll("<.*?>", ""); details[3] = details[3].replaceAll("<.*?>", ""); details[4] = details[4].replaceAll("<.*?>", ""); details[5] = details[5].replaceAll(" ", " "); details[5] = details[5].replaceAll("<.*?>", ""); details[6] = details[6].replaceAll("<.*?>", ""); Pattern pattern = Pattern.compile("(?<=alt=).*?座", Pattern.DOTALL); Matcher matcher = pattern.matcher(details[2]); if (matcher.find()) { details[2] = matcher.group(); } entity.setUserGender(details[0]); entity.setUserBirthday(details[1]); entity.setUserConstellation(details[2]); entity.setUserEmail(details[3]); entity.setUserQQ(details[4]); entity.setUserMSN(details[5]); entity.setUserPage(details[6]); } catch (Exception e) { e.printStackTrace(); } // bbs master info { String string = getMatchedString(USER_PROFILE_AVATAR_REGEX, html); string = string.replaceAll("\t|\n|\r|<br>| |<.*?>| ", ""); entity.setBbsMasterInfo(string); } // online status entity.setOnlineTime(getMatchedString(USER_PROFILE_ONLINE_INFO_REGEX, html)); return entity; } public List<HotTopicEntity> parseHotTopicList(String page) throws ParseContentException { List<HotTopicEntity> list = new ArrayList<HotTopicEntity>(); List<String> topicList = getMatchedStringList(HOT_TOPIC_WRAPPER, page, -1); for (String topic : topicList) { HotTopicEntity entity = new HotTopicEntity(); entity.setTopicName(filterHtmlDecode(getMatchedString(HOT_TOPIC_NAME_REGEX, topic))); entity.setPostId(getMatchedString(HOT_TOPIC_ID_REGEX, topic)); entity.setPostTime(getMatchedString(HOT_TOPIC_POST_TIME_REGEX, topic)); entity.setBoardId(getMatchedString(HOT_TOPIC_BOARD_ID_REGEX, topic)); // click number { List<String> numList = getMatchedStringList(HOT_TOPIC_CLICK_REGEX, topic, 3); entity.setFocusNumber(Integer.parseInt(numList.get(0))); entity.setReplyNumber(Integer.parseInt(numList.get(1))); entity.setClickNumber(Integer.parseInt(numList.get(2))); } // board name, author { List<String> bList = getMatchedStringList(HOT_TOPIC_BOARD_NAME_WITH_AUTHOR_REGEX, topic, -1); entity.setBoardName(bList.get(0)); if (bList.size() < 2) { entity.setPostAuthor("匿名"); } else { entity.setPostAuthor(bList.get(1)); } } list.add(entity); } return list; } /** * Store information of the msgs in a list * * @author zsy * @param html * The html of the inbox page * @return A list of PmInfo */ public InboxInfo parsePmList(String html) { InboxInfo info = new InboxInfo(); String regexString = ""; if (html.indexOf("已发送的消息") > 0) { regexString = "(?<=<img src=pic/m_)\\w+(?=\\.gif>)|(?<=target=_blank>)[^:]+(?=</a>)|(?<=\\s>).*?(?=</a></td>)|" + "(?<=<a href=\"messanger.asp\\?action=(read|outread)&id=)\\d+?(?=&sender)|(?<=target=_blank>).*?(?=</a></td>)"; } else { regexString = "(?<=<img src=pic/m_)\\w+(?=\\.gif>)|(?<=target=\"_blank\">)[^:]+(?=</a>)|(?<=\\s>).*?(?=</a></td>)|" + "(?<=<a href=\"messanger.asp\\?action=(read|outread)&id=)\\d+?(?=&sender)|(?<=target=_blank>).*?(?=</a></td>)|" + "(?<=gray;\">).+(?=</span>)"; } html = html.substring(html.indexOf("新</span>")); List<PmInfo> pmList = new ArrayList<PmInfo>(); Matcher m1 = Pattern.compile(regexString).matcher(html); getInboxList(pmList, m1); // Get total page number Pattern p2 = Pattern.compile("(?<=/<b>)\\d+(?=</b>页)"); Matcher m2 = p2.matcher(html); if (m2.find()) { // Get the total page number of the pm inbox. info.setTotalInPage(Integer.parseInt(m2.group())); } // Get total pm count Pattern p3 = Pattern.compile("(?<=总数<b>)\\d+(?=</b></td>)"); Matcher m3 = p3.matcher(html); if (m3.find()) { info.setTotalPmIn(Integer.parseInt(m3.group())); } info.setPmInfos(pmList); return info; } /** * Get a list of PmInfo. * * @author zsy * * @param pmList * @param m1 */ private void getInboxList(List<PmInfo> pmList, Matcher m1) { while (m1.find()) { String isNewString = m1.group(); boolean isNew = isNewString.equals("olds") || isNewString.equals("issend_1") ? false : true; m1.find(); String sender = m1.group(); m1.find(); String topic = m1.group(); m1.find(); int pmId = Integer.parseInt(m1.group()); m1.find(); String time = m1.group(); pmList.add(new PmInfo.Builder(pmId).fromWho(sender).topicTitle(topic).sendTime(time) .newTopic(isNew).userAvatar("").build()); } } /* * (non-Javadoc) * * @see tk.djcrazy.libCC98.ICC98Parser#parseQueryResult(java.lang.String) */ public List<SearchResultEntity> parseQueryResult(String html) throws ParseContentException, java.text.ParseException { List<SearchResultEntity> list = new ArrayList<SearchResultEntity>(); String totalPost; try { totalPost = getMatchedString(NEW_TOPIC_TOTAL_POST, html); } catch (Exception e) { e.printStackTrace(); totalPost = "0"; return list; } List<String> entityList = getMatchedStringList(NEW_TOPIC_WRAPPER_REGEX, html, -1); for (int i = 0; i < entityList.size(); i++) { String string = entityList.get(i); SearchResultEntity entity = new SearchResultEntity(); entity.setTitle(StringUtil.filterHtmlDecode(getMatchedString(NEW_TOPIC_TITLE_REGEX, string))); try { entity.setAuthorName(getMatchedString(NEW_TOPIC_AUTHOR_REGEX, string)); } catch (Exception e) { entity.setAuthorName("匿名"); } entity.setBoardId(getMatchedString(NEW_TOPIC_BOARD_ID, string)); entity.setFaceId(getMatchedString(NEW_TOPIC_FACE_REGEX, string)); entity.setPostTime(DateFormatUtil.convertStringToDateInQueryResult(getMatchedString( NEW_TOPIC_TIME, string).replaceAll(" ", " ").replaceAll("\n|\t|\r", " ") .trim())); entity.setTotalResult(totalPost); entity.setPostId(getMatchedString(NEW_TOPIC_ID_REGEX, string)); list.add(entity); } return list; } public List<UserStatueEntity> parseUserFriendList(String html) throws ParseException, IOException, ParseContentException { if (html == null) { throw new IllegalArgumentException("Null pointer!"); } List<UserStatueEntity> list = new ArrayList<UserStatueEntity>(); Pattern userRegexPattern = Pattern.compile(" <a href=dispuser\\.asp\\?name=.*?<br>", Pattern.DOTALL); Pattern userNamePattern = Pattern.compile("(?<= >).*?(?=</a>)", Pattern.DOTALL); Pattern userStatuePattern = Pattern.compile("(?<=\\[).*?(?=\\])", Pattern.DOTALL); Matcher matcher = userRegexPattern.matcher(html); while (matcher.find()) { String mString = matcher.group(); UserStatueEntity mEntity = new UserStatueEntity(); Matcher mMatcher = userNamePattern.matcher(mString); if (mMatcher.find()) { mEntity.setUserName(mMatcher.group()); } mMatcher = userStatuePattern.matcher(mString); if (mMatcher.find()) { String string = mMatcher.group().replaceAll("<.*?>", ""); if (string.contains("离线")) { mEntity.setStatue(UserStatue.OFF_LINE); } else { mEntity.setStatue(UserStatue.ON_LINE); mEntity.setOnlineTime(string); } } list.add(mEntity); } return list; } public List<BoardStatus> parseTodayBoardList(String content) throws ParseContentException { int postNum = Integer.parseInt(getMatchedString(TODAY_POST_NUMBER_REGEX, content)); List<BoardStatus> list = new ArrayList<BoardStatus>(); List<String> contentList = getMatchedStringList(TODAY_BOARD_ENTITY_REGEX, content, -1); for (int i = 0; i < contentList.size(); i++) { String string = contentList.get(i); BoardStatus status = new BoardStatus(); status.setBoardId(getMatchedString(TODAY_BOARD_ID_REGEX, string)); status.setBoardName(getMatchedString(TODAY_BOARD_NAME_REGEX, string)); status.setPostNumberToday(Integer.parseInt(getMatchedString( TODAY_BOARD_TOPIC_NUM_REGEX, string))); status.setTotalPostToday(postNum); status.setRating(i + 1); list.add(status); } return list; } public String parseMsgContent(String html) { Pattern p = Pattern.compile("(?<=<span id=\"ubbcode1\" >).*?(?=</span>)"); Matcher m = p.matcher(html); if (!m.find()) { throw new IllegalStateException("can not get msg content"); } return m.group(); } public String parseUploadPicture(String html) throws ParseContentException { return RegexUtil.getMatchedString( CC98ParseRepository.UPLOAD_PIC_ADDRESS_REGEX, html) .replace(",1", ""); } }