package net.krautchan.parser;
/*
* Copyright (C) 2011 Johannes Jander (johannes@jandermail.de)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.Map;
import net.krautchan.data.KCBoard;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class KCBoardListParser {
/*
* Contrary to the Thread and Posting parsers, which are called frequently,
* this is not a stream parser, but uses JSoup for simplicity
*/
private static Map<String, KCBoard> getBoardList(Document doc, String baseUrl) throws MalformedURLException {
Map<String, KCBoard> boards = new LinkedHashMap<String, KCBoard>();
Elements boardlist = doc.select(".boardlist li");
Iterator<Element> iter = boardlist.iterator();
URL base = new URL(baseUrl);
while (iter.hasNext()) {
Element elem = iter.next();
String idStr = elem.id();
if (idStr.startsWith("board_")) {
Elements links = elem.select("a");
if ((null != links) && (null != links.get(0))) {
KCBoard board = new KCBoard();
board.uri = new URL(base, links.attr("href")).toExternalForm();
String content = links.get(0).ownText();
String[] keyVal = content.split("\\s+-\\s+");
board.shortName = keyVal[0].trim().replaceAll("/", "");
board.name = keyVal[1].trim();
board.dbId = (long) (baseUrl+"/"+board.shortName).hashCode();
boards.put(board.shortName, board);
}
}
}
return boards;
}
public static Map<String, KCBoard> getBoardList (String boardListUrlStr, String baseUrl, String userAgentName) throws IOException {
Document doc = Jsoup.connect(boardListUrlStr)
.userAgent(userAgentName)
.cookie("auth", "token")
.timeout(3000)
.get();
if ((null != baseUrl) && (baseUrl.endsWith("/"))) {
baseUrl = baseUrl.substring(0, baseUrl.length() -1);
}
return getBoardList(doc, baseUrl);
}
public static Map<String, KCBoard> getBoardList (String html, String baseUrl) throws IOException {
Document doc = Jsoup.parse(html);
return getBoardList(doc, baseUrl);
}
}