/******************************************************************************* * Copyright (c) 2005, 2006 Erkki Lindpere and others. * All rights reserved. This program and the accompanying materials * are made available under the terms of the Eclipse Public License v1.0 * which accompanies this distribution, and is available at * http://www.eclipse.org/legal/epl-v10.html * * Contributors: * Erkki Lindpere - initial API and implementation *******************************************************************************/ package org.eclipse.ecf.internal.provider.vbulletin; import java.net.URL; import java.text.DateFormat; import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Calendar; import java.util.Date; import java.util.LinkedHashMap; import java.util.List; import java.util.Locale; import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.eclipse.ecf.bulletinboard.BBException; import org.eclipse.ecf.bulletinboard.IBBObject; import org.eclipse.ecf.bulletinboard.IMember; import org.eclipse.ecf.core.identity.ID; import org.eclipse.ecf.core.identity.IDCreateException; import org.eclipse.ecf.core.identity.Namespace; import org.eclipse.ecf.internal.bulletinboard.commons.AbstractParser; import org.eclipse.ecf.internal.bulletinboard.commons.IBBObjectFactory; import org.eclipse.ecf.internal.bulletinboard.commons.parsing.DefaultPatternDescriptor; import org.eclipse.ecf.internal.bulletinboard.commons.parsing.IPatternDescriptor; import org.eclipse.ecf.internal.bulletinboard.commons.util.StringUtil; import org.eclipse.ecf.internal.provider.vbulletin.ThreadBrowser2.SkippedStatus; import org.eclipse.ecf.internal.provider.vbulletin.identity.ThreadMessageID; import org.eclipse.ecf.internal.provider.vbulletin.internal.ForumFactory; import org.eclipse.ecf.internal.provider.vbulletin.internal.GuestFactory; import org.eclipse.ecf.internal.provider.vbulletin.internal.MemberFactory; import org.eclipse.ecf.internal.provider.vbulletin.internal.MemberGroupFactory; import org.eclipse.ecf.internal.provider.vbulletin.internal.ThreadMessageFactory; import org.eclipse.ecf.internal.provider.vbulletin.internal.VBException; public class VBParser extends AbstractParser { public VBParser(Namespace namespace, URL baseURL) { super(namespace, baseURL); } private static final Pattern PAT_TITLE = Pattern .compile("<title>(.*?)</title>"); public String parseTitle(CharSequence seq) { Matcher m = PAT_TITLE.matcher(seq); if (m.find()) { return new String(m.group(1)); } return null; } @Override public IBBObjectFactory getMemberFactory() { return new MemberFactory(); } public Pattern getMemberNamePattern() { return Pattern.compile("- View Profile: (.*?)</title>"); } public static final Pattern PAT_FORUM = Pattern .compile("<a href=\"forumdisplay.php?(?:.*?)f=([0-9]+)\">(.*?)</a>"); public Map<ID, Forum> parseForums(final CharSequence seq) { Map<ID, Forum> forums = new LinkedHashMap<ID, Forum>(); Matcher matcher = PAT_FORUM.matcher(seq); while (matcher.find()) { String name = StringUtil.stripHTMLTrim(matcher.group(2)); // String desc = StringUtil.stripHTMLTrim(matcher.group(3)); if (StringUtil.notEmptyStr(name)) { ForumFactory ff = new ForumFactory(); String idStr = matcher.group(1); ID id = null; try { id = ff.createBBObjectId(namespace, baseURL, idStr); } catch (NumberFormatException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IDCreateException e) { // TODO Auto-generated catch block e.printStackTrace(); } Forum forum = (Forum) ff.createBBObject(id, name, null); // forum.setDescription(desc); forums.put(id, forum); } } return forums; } @Override public IPatternDescriptor getThreadPattern() { return DefaultPatternDescriptor .defaultCustom( Pattern .compile( "<a href=\"showthread.php?(?:.*?)t=([0-9]+?)\" id=\"thread_title_(?:[0-9]+?)\">(.*?)</a>(?:.*?)<div class=\"smallfont\">(.*?)</div>", Pattern.DOTALL), new String[] { "id", "name", "authorInfo" }); } @Override public IBBObjectFactory getThreadFactory() { return new ThreadFactory(); } public static final Pattern PAT_MSG_INFORMATION = Pattern .compile( "<div class=\"panel\">(?:.*?)<blockquote>(.*?)</blockquote>(?:.*?)</td>", Pattern.DOTALL); public String parseInformationMessage(CharSequence seq) { String msg = null; Matcher m = PAT_MSG_INFORMATION.matcher(seq); if (m.find()) { msg = "vBulletin: " + m.group(1); } return msg; } public static final Pattern PAT_MSG_POST_ERROR = Pattern .compile( "<!--POSTERROR do not remove this comment-->(.*?)<!--/POSTERROR do not remove this comment-->", Pattern.DOTALL); public String parsePostErrorMessage(CharSequence seq) { String msg = null; Matcher m = PAT_MSG_POST_ERROR.matcher(seq); if (m.find()) { msg = "vBulletin: " + m.group(1); } return msg; } protected BBException createVBException(String msg, CharSequence seq) { String vbmsg = parseInformationMessage(seq); if (vbmsg == null) { vbmsg = parsePostErrorMessage(seq); } if (vbmsg != null) { return new BBException(msg, new VBException(new String(StringUtil .stripHTMLTrim(vbmsg)))); } else { return new BBException(msg); } } public static final Pattern PAT_THEAD_ATTRS = Pattern // .compile("<title>(?:.*?) :: View topic - (.*?)</title>"); .compile( "<td class=\"navbar\"(?:.*?)><a href=\"/showthread.php\\?t=([0-9]+)(?:.*?)\">(.*?)</td>", Pattern.DOTALL); public static final Pattern PAT_THEAD_ATTRS_FORUM = Pattern .compile("<span class=\"navbar\">> <a href=\"forumdisplay.php\\?f=([0-9]+?)\">(.*?)</a></span>"); public static final IPatternDescriptor PD_THREAD_ATTRS = DefaultPatternDescriptor .defaultIdAndName(PAT_THEAD_ATTRS); public static final IPatternDescriptor PD_THREAD_ATTRS_FORUM = DefaultPatternDescriptor .defaultIdAndName(PAT_THEAD_ATTRS_FORUM); public Thread parseThreadPageForThreadAttributes(CharSequence seq) throws BBException { Thread t = (Thread) genericParser.parseSingleIdName(PD_THREAD_ATTRS, seq, new ThreadFactory()); if (t != null) { Map<ID, IBBObject> forums = genericParser.parseMultiIdName( PD_THREAD_ATTRS_FORUM, seq, new ForumFactory(), true); Forum prev = null; Forum f = null; for (IBBObject obj : forums.values()) { f = (Forum) obj; if (prev != null) { prev.subforums.add(f); } f.setParent(prev); prev = f; } t.forum = f; return t; } else { throw new BBException("Failed to parse the thread."); } } public static final Pattern PAT_MSG = Pattern.compile( "<!-- post #([0-9]+) -->(.*)<!-- / post #\\1 -->", Pattern.DOTALL); public List<ThreadMessage> parseMessages2(final CharSequence seq, final ID lastReadId, boolean desc, SkippedStatus skipped) throws BBException { Matcher m; ThreadMessage msg; List<ThreadMessage> messages = new ArrayList<ThreadMessage>(); m = PAT_MSG.matcher(seq); while (m.find()) { ThreadMessageFactory tmf = new ThreadMessageFactory(); ThreadMessageID id = null; try { id = (ThreadMessageID) tmf.createBBObjectId(namespace, baseURL, m.group(1)); } catch (NumberFormatException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IDCreateException e) { // TODO Auto-generated catch block e.printStackTrace(); } if (lastReadId == null || id.getLongValue() > ((ThreadMessageID) lastReadId) .getLongValue()) { String msgSrc = m.group(2); msg = parseMessage2(id, msgSrc); if (msg != null) { if (desc) { messages.add(0, msg); } else { messages.add(msg); } } } else { skipped.messagesSkipped = true; } } return messages; } public static final Pattern PAT_MSG_USER = Pattern .compile("<a class=\"bigusername\" href=\"member.php?(.*)u=([0-9]+)\">(.*)</a>"); public static final Pattern PAT_MSG_TITLE = Pattern.compile( "<!-- icon and title -->(.*)<!-- / icon and title -->", Pattern.DOTALL); public static final Pattern PAT_MSG_MESSAGE = Pattern.compile( "<!-- message -->(.*)<!-- / message -->", Pattern.DOTALL); public static final Pattern PAT_MSG_TIMESTAMP = Pattern.compile( "<!-- status icon and date -->(.*)<!-- / status icon and date -->", Pattern.DOTALL); @Override public Long parseTimestamp(CharSequence seq) { Long l = null; final Locale locale = Locale.ENGLISH; final String dateFormat = "MM-dd-yyyy"; final String timeFormat = "hh:mm aa"; final String dateTimeSeparator = ", "; final DateFormat fmtTimestamp = new SimpleDateFormat(dateFormat + dateTimeSeparator + timeFormat, locale); final DateFormat fmtTime = new SimpleDateFormat(timeFormat, locale); Matcher matcher; matcher = PAT_MSG_TIMESTAMP.matcher(seq); if (matcher.find()) { String timestamp = StringUtil.stripHTMLFullTrim(matcher.group(1)); timestamp = timestamp.replaceAll("1st", "1"); timestamp = timestamp.replaceAll("2nd", "2"); timestamp = timestamp.replaceAll("3rd", "3"); timestamp = timestamp.replaceAll("th", ""); if (timestamp.startsWith("Today") || timestamp.startsWith("Yesterday")) { String[] s = timestamp.split(dateTimeSeparator); try { Calendar now = Calendar.getInstance(fmtTime.getTimeZone()); if ("Yesterday".equals(s[0])) { now.add(Calendar.DATE, -1); } Date d = fmtTime.parse(s[1]); Calendar then = Calendar.getInstance(fmtTime.getTimeZone()); then.setTime(d); then.set(now.get(Calendar.YEAR), now.get(Calendar.MONTH), now.get(Calendar.DATE)); l = new Long(then.getTimeInMillis()); } catch (ParseException e) { e.printStackTrace(); } } else { try { l = new Long(fmtTimestamp.parse(timestamp).getTime()); } catch (ParseException e) { e.printStackTrace(); } } } return l; } private ThreadMessage parseMessage2(final ID id, final CharSequence seq) { ThreadMessage msg = null; ThreadMessageFactory tmf = new ThreadMessageFactory(); msg = (ThreadMessage) tmf.createBBObject(id, null, null); Matcher m; String uname; Long l = parseTimestamp(seq); if (l != null) { msg.timePosted = new Date(l); } m = Pattern.compile( "<div id=\"postmenu_" + ((ThreadMessageID) id).getLongValue() + "\">(.*?)</div>", Pattern.DOTALL).matcher(seq); if (m.find()) { String userInfoStr = m.group(1); m = PAT_MSG_USER.matcher(userInfoStr); if (m.find()) { MemberFactory mf = new MemberFactory(); uname = new String(StringUtil.simpleStripHTML(m.group(3))); ID uid = null; try { uid = mf.createBBObjectId(namespace, baseURL, m.group(2)); } catch (NumberFormatException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IDCreateException e) { // TODO Auto-generated catch block e.printStackTrace(); } msg.author = (IMember) mf.createBBObject(uid, uname, null); } else { // Didn't find a registered author, so the userinfo should // contain only the username. msg.author = new Member(new String(userInfoStr.trim())); } } m = Pattern.compile( "#<a href=\"showpost.php\\?p=" + ((ThreadMessageID) id).getLongValue() + "(?:.*?)><strong>([0-9]+)</strong></a>").matcher(seq); m.find(); msg.number = Integer.parseInt(m.group(1)); m = PAT_MSG_TITLE.matcher(seq); m.find(); msg.setNameInternal(new String(StringUtil.stripHTMLTrim(m.group(1)))); m = PAT_MSG_MESSAGE.matcher(seq); m.find(); String message = StringUtil.stripHTMLFullTrim(m.group(1)); msg.message = message; return msg; } public static final Pattern PAT_PAGES = Pattern .compile("<td class=\"vbmenu_control\"(?:.*?)>Page ([0-9]+) of ([0-9]+)</td>"); public int parseNextPage(CharSequence seq) { Matcher m = PAT_PAGES.matcher(seq); int next = -1; if (m.find()) { int current = Integer.parseInt(m.group(1)); int last = Integer.parseInt(m.group(2)); if (current < last) { next = current + 1; } } return next; } public int parsePrevPage(CharSequence seq) { Matcher m = PAT_PAGES.matcher(seq); int prev = -1; if (m.find()) { int current = Integer.parseInt(m.group(1)); if (current > 1) { prev = current - 1; } } return prev; } @Override public IPatternDescriptor getMemberPattern() { return DefaultPatternDescriptor.defaultIdAndName(Pattern .compile("<a href=\"member.php\\?u=([0-9]+?)\">(.*?)</a>")); } @Override public IPatternDescriptor getAuthorInfoMemberPattern() { return DefaultPatternDescriptor .defaultIdAndName(Pattern .compile("<span(?:.*?)onclick=\"window.open('member.php\\?u=([0-9]+?)', '_self')\">(.*?)</span>")); } @Override public Pattern getMemberGroupContainerPattern() { return Pattern .compile( "<form action=\"profile.php\\?do=joingroup\" method=\"post\">(.*?)</form>", Pattern.DOTALL); } @Override public IBBObjectFactory getMemberGroupFactory() { return new MemberGroupFactory(); } @Override public IPatternDescriptor getMemberGroupPattern() { return DefaultPatternDescriptor .reverseIdAndName(Pattern .compile( "<tr>(?:.*?)<td class=\"alt(?:[12]{1})\">(.*?)<div class=\"smallfont\">(?:.*?)</div>(?:.*?)<label for=\"rb_join_([0-9]+?)\">(?:.*?)</tr>", Pattern.DOTALL)); } @Override public void throwException(final String msg, final CharSequence seq) throws BBException { throw createVBException(msg, seq); } @Override public IBBObjectFactory getGuestFactory() { return new GuestFactory(); } }