Java Examples for org.jsoup.select.Elements
The following java examples will help you to understand the usage of org.jsoup.select.Elements. These source code samples are taken from different open source projects.
Example 1
| Project: lavender-master File: RamdomImgParser.java View source code |
public static String parserImg(String html) {
Document document = Jsoup.parse(html);
Elements divs = document.select("div");
for (Element div : divs) {
if (!div.attr("id").equals("photo-detail-wrapper")) {
continue;
}
return div.select("img").first().attr("src");
}
return null;
}Example 2
| Project: Android-Studio-Project-master File: ContentParser.java View source code |
public static Content Parser(String html) {
Document doc = Jsoup.parse(html);
Elements links = doc.select("img[src~=(?i)\\.(png|jpe?g)]");
Content content = new Content();
Element element = links.get(1).getElementsByTag("img").first();
content.setUrl(element.attr("src"));
content.setTitle(element.attr("alt"));
return content;
}Example 3
| Project: muzima-android-master File: HTMLConceptParser.java View source code |
public List<String> parse(String html) {
Set<String> concepts = new HashSet<String>();
Document htmlDoc = Jsoup.parse(html);
//Select all elements containing data-concept attr and is not a div.
Elements elements = htmlDoc.select("*:not(div)[" + DATA_CONCEPT_TAG + "]");
for (Element element : elements) {
concepts.add(getConceptName(element.attr(DATA_CONCEPT_TAG)));
}
return new ArrayList<String>(concepts);
}Example 4
| Project: v2ex-android-master File: ContentUtils.java View source code |
public static int[] parsePage(Element body) {
int currentPage = 1, totalPage = 1;
Elements elements = body.getElementsByClass("page_current");
for (Element el : elements) {
String text = el.text();
try {
currentPage = Integer.parseInt(text);
break;
} catch (Exception e) {
}
}
elements = body.getElementsByClass("page_normal");
totalPage = currentPage;
for (Element el : elements) {
String text = el.text();
try {
int page = Integer.parseInt(text);
if (totalPage < page)
totalPage = page;
} catch (Exception e) {
}
}
return new int[] { currentPage, totalPage };
}Example 5
| Project: validadorAcessibilidade-master File: RecomendacaoDepreciados.java View source code |
@Override
public String executa(Document doc) {
String elementoDescontinuado = "";
Elements frame = doc.select("frame");
Elements applet = doc.select("applet");
Elements blink = doc.select("blink");
Elements marquee = doc.select("marquee");
Elements basefont = doc.select("basefont");
Elements center = doc.select("center");
Elements dir = doc.select("dir");
Elements align = doc.select("align");
Elements font = doc.select("font");
Elements isindex = doc.select("isindex");
Elements menu = doc.select("menu");
Elements strike = doc.select("strike");
Elements u = doc.select("u");
if (!frame.isEmpty() || !applet.isEmpty() || !blink.isEmpty() || !marquee.isEmpty() || !basefont.isEmpty() || !center.isEmpty() || !dir.isEmpty() || !align.isEmpty() || !font.isEmpty() || !isindex.isEmpty() || !menu.isEmpty() || !strike.isEmpty() || !u.isEmpty()) {
elementoDescontinuado += "\n" + frame.toString() + "\n" + applet.toString() + "\n" + blink.toString() + "\n" + marquee.toString() + "\n" + basefont.toString() + "\n" + center.toString() + "\n" + dir.toString() + "\n" + align.toString() + "\n" + font.toString() + "\n" + isindex.toString() + "\n" + menu.toString() + "\n" + strike.toString() + "\n" + u.toString();
}
return elementoDescontinuado;
}Example 6
| Project: yahnac-master File: VoteUrlParser.java View source code |
public String parse() {
Elements links = document.select("a[id=up_" + storyId + "]");
if (links.size() > 0) {
Element voteElement = links.get(0).select("a[href^=vote]").first();
String url = voteElement.attr("href").contains("auth=") ? (voteElement.attr("href")) : null;
return "/" + url;
} else {
return EMPTY;
}
}Example 7
| Project: bashoid-master File: Parser.java View source code |
static ArrayList<Quote> getQuotes(WebPage page) {
ArrayList<Quote> quotes = new ArrayList<>();
Element container = Jsoup.parse(page.getContent()).getElementsByAttribute("valign").first();
Elements headers = container.getElementsByClass("quote");
Elements bodies = container.getElementsByClass("qt");
final int COUNT = headers.size();
for (int i = 0; i < COUNT; ++i) {
String[] body = bodies.get(i).html().split("<br />");
Element header = headers.get(i);
String quoteId = header.getElementsByTag("b").first().text().substring(1);
int id = Integer.parseInt(quoteId);
String quoteScore = header.ownText().substring(1, header.ownText().length() - 1);
int score = Integer.parseInt(quoteScore);
quotes.add(new Quote(body, score, id));
}
return quotes;
}Example 8
| Project: deepnighttwo-master File: FirstTry.java View source code |
public static void main(String[] args) throws IOException {
Document doc = Jsoup.connect("http://www.envir.gov.cn/airnews/index.asp").data("Fdate", "2000-6-1").data("Tdate", "2000-6-8").userAgent("I'm jsoup").timeout(3000).post();
// System.out.println(doc);
Elements eles = doc.select("table[bordercolor] > tr");
eles.remove(0);
for (Element ele : eles) {
Elements rows = ele.select("td");
for (Element row : rows) {
System.out.println(row.ownText());
}
}
// Element content = doc.getElementById("content");
// Elements links = content.getElementsByTag("a");
// for (Element link : links) {
// String linkHref = link.attr("href");
// String linkText = link.text();
// System.out.println(linkHref);
// System.out.println(linkText);
// }
}Example 9
| Project: example-webapp-master File: ExceptionHandlingIntegrationTests.java View source code |
@Test
public void shouldSeeErrorReferenceDisplayedOnThePage() throws Exception {
SpringDispatcherServlet servlet = SpringDispatcherServlet.create();
MockHttpServletResponse response = servlet.process(new MockHttpServletRequest("GET", "/bad"));
String redirectedUrl = response.getRedirectedUrl();
assertThat(redirectedUrl, matchesPattern(sequence("/error/", exactly(7, anyCharacterIn("A-Z0-9")))));
String errorRef = StringUtils.substringAfterLast(redirectedUrl, "/");
response = servlet.process(new MockHttpServletRequest("GET", redirectedUrl));
String html = response.getContentAsString();
Document document = Jsoup.parse(html);
Elements elements = document.select("#errorRef");
assertThat(elements.size(), equalTo(1));
assertThat(elements.first().text(), equalTo(errorRef));
}Example 10
| Project: fpcms-master File: JsoupSelectorUtil.java View source code |
public static Elements select(Element doc, String... selectors) { if (selectors != null) { for (String selector : selectors) { if (StringUtils.isBlank(selector)) { continue; } Elements elements = doc.select(selector); if (elements.isEmpty()) { continue; } return elements; } } return new Elements(); }
Example 11
| Project: GoVRE-master File: ProxyNetworkTrainMapImage.java View source code |
//METHODS
private static String fetchTrainImageUrlFromVRE(Context context) {
try {
String imgUrl = "";
String url = context.getResources().getString(R.string.urlVREImgMap);
Document doc = Jsoup.connect(url).get();
//Focus on all tags with source attributes
Elements media = doc.select("[src]");
for (Element src : media) {
//Verify this is an image
if (src.tagName().equals("img")) {
imgUrl = src.attr("abs:src");
//Check if link contains the action query string, the map is the only image that will have it.
if (imgUrl.contains("app?action=getimg")) {
return imgUrl;
}
}
}
//Else Return Empty String
return "";
} catch (IOException e) {
}
return null;
}Example 12
| Project: IU-master File: ConsumeInfo.java View source code |
public static int parseHtml(List<ConsumeInfo> list, Document doc) {
if (doc == null) {
return 0;
}
if (list == null) {
list = new ArrayList<>();
}
Elements table = doc.select("table#GridView1").select("tr");
int size = table.size();
if (size < 2) {
return 0;
}
Element tr;
Elements td2;
ConsumeInfo info;
for (int i = 1; i < size - 1; i++) {
tr = table.get(i);
td2 = tr.children();
if (td2.size() != 3) {
continue;
}
info = new ConsumeInfo();
info.time = td2.get(1).text();
info.remain = td2.get(2).text();
list.add(info);
}
return table.select("a").size() + 1;
}Example 13
| Project: japicmp-master File: ITReportTitle.java View source code |
@Test
public void testReportTitle() throws IOException {
Path htmlPath = Paths.get(System.getProperty("user.dir"), "target", "site", "project-reports.html");
assertThat(Files.exists(htmlPath), is(true));
Document document = Jsoup.parse(htmlPath.toFile(), "UTF-8");
Elements leftNav = document.select("#leftColumn [href=\"japicmp.html\"]");
assertThat(leftNav.attr("title"), is("japicmp"));
assertThat(leftNav.text(), is("japicmp"));
Elements overviewRow = document.select("#bodyColumn tr:has([href=\"japicmp.html\"])");
Elements link = overviewRow.select("[href=\"japicmp.html\"]");
assertThat(link.text(), is("japicmp"));
Elements description = overviewRow.select("td:eq(1)");
String projectVersion = System.getProperty("project.version");
assertThat(description.text(), is("Comparing source compatibility of japicmp-test-v2-" + projectVersion + ".jar against japicmp-test-v1-" + projectVersion + ".jar"));
}Example 14
| Project: JAViewer-master File: BTSOLinkProvider.java View source code |
@Override
public List<DownloadLink> parseDownloadLinks(String htmlContent) {
ArrayList<DownloadLink> links = new ArrayList<>();
Elements rows = Jsoup.parse(htmlContent).getElementsByClass("row");
for (Element row : rows) {
try {
Element a = row.getElementsByTag("a").first();
links.add(DownloadLink.create(row.getElementsByClass("file").first().text(), row.getElementsByClass("size").first().text(), row.getElementsByClass("date").first().text(), a.attr("href"), null));
} catch (Exception ignored) {
}
}
return links;
}Example 15
| Project: JianShuApp-master File: HomePageDataPool.java View source code |
@Override
protected ArticleItem[] getArticleItems(Document doc) {
Elements loadMoreElements = doc.select(LOAD_MORE_SELECTOR);
if (loadMoreElements.size() > 0) {
mLoadMoreUrl = getHtmlUrl(loadMoreElements.get(0).attr("data-url"));
} else {
mLoadMoreUrl = null;
mIsAtTheEnd = true;
}
Elements articleElements = doc.select(ARTICLE_SELECTOR);
if (articleElements != null) {
int i = 0;
ArticleItem[] result = new ArticleItem[articleElements.size()];
for (Element el : articleElements) {
result[i++] = parseElement(el);
}
return result;
} else {
return null;
}
}Example 16
| Project: jphp-master File: JsoupExtension.java View source code |
@Override
public void onRegister(CompileScope scope) {
registerClass(scope, WrapJsoup.class);
registerWrapperClass(scope, Connection.class, WrapConnection.class);
registerWrapperClass(scope, Connection.Response.class, WrapConnectionResponse.class);
registerWrapperClass(scope, Connection.Request.class, WrapConnectionRequest.class);
registerWrapperClass(scope, Document.class, WrapDocument.class);
registerWrapperClass(scope, Element.class, WrapElement.class);
registerWrapperClass(scope, Elements.class, WrapElements.class);
MemoryOperation.register(new UrlMemoryOperation());
//MemoryOperation.register(new BinaryMemoryOperation());
}Example 17
| Project: jspider-master File: BaiduSpiderTest.java View source code |
@Override
public Result process(Request request, Page page) {
Result result = new Result();
Elements elements = page.document().select(".c-container a");
if (elements != null && elements.size() > 0 && count == 0) {
List<String> links = new ArrayList<String>(elements.size());
for (Element element : elements) {
String href = element.absUrl("href");
if (StringUtils.isNotBlank(href) && StringUtils.isNotBlank(SpiderUrlUtils.getUrlHost(href))) {
links.add(href);
}
}
page.addTargetRequests(links);
}
result.put("title", page.document().title());
return result;
}Example 18
| Project: learn_crawler-master File: HtmlParserTool.java View source code |
public static Set<String> extracLinks(String url, LinkFilter filter) {
Set<String> result = new HashSet<String>();
Document doc;
try {
doc = Jsoup.connect(url).timeout(5000).get();
Elements links = doc.select("a[href]");
Elements frames = doc.select("frame[src]");
Elements iframes = doc.select("iframe[src]");
for (Element e : links) {
System.out.println(e.absUrl("href"));
if (filter.accept(e.absUrl("href")))
result.add(e.absUrl("href"));
}
for (Element e : frames) {
if (filter.accept(e.absUrl("src")))
result.add(e.absUrl("src"));
}
for (Element e : iframes) {
if (filter.accept(e.absUrl("src")))
result.add(e.absUrl("src"));
}
} catch (IOException e) {
e.printStackTrace();
}
return result;
}Example 19
| Project: like_googleplus_layout-master File: PhoneKRNewsContentUtils.java View source code |
public static LinkedList<String> getPhoneKRNewsDataList(String newsUrl) {
LinkedList<String> data = null;
Document document;
try {
document = Jsoup.connect(newsUrl).get();
Element element = document.getElementById("xs-post");
Elements elements = element.getElementsByTag("p");
if (!elements.isEmpty()) {
data = new LinkedList<String>();
for (int i = 0; i < elements.size(); i++) {
String text = null;
element = elements.get(i);
if (element.getElementsByTag("a").isEmpty()) {
text = FOUR_BLANK_SPACE + element.text();
} else {
if (!element.getElementsByTag("a").get(0).getElementsByTag("img").isEmpty()) {
// System.out.println("图片 = "+element.getElementsByTag("a").get(0).getElementsByTag("img").get(0).attr("src"));
text = element.getElementsByTag("a").get(0).getElementsByTag("img").get(0).attr("src");
}
}
if (!TextUtils.isEmpty(text)) {
data.add(text);
}
}
}
} catch (IOException e) {
e.printStackTrace();
}
return data;
}Example 20
| Project: mlcomp-master File: TitleMap.java View source code |
@Override
public void map(long recordNum, Record record, TaskContext context) throws IOException {
String url = (String) record.get(0);
String html = (String) record.get(1);
// Boolean isWebshell=QueryWebshell.isWebshell(postdata);
Document doc = Jsoup.parse(html);
Elements links = doc.getElementsByTag("title");
String title = "";
for (Element link : links) {
title = title + "," + link.text();
}
Record result_record = context.createOutputRecord();
result_record.set("url", url);
result_record.set("title", title);
context.write(result_record);
}Example 21
| Project: mobile-ycjw-master File: StudentDevelopmentScheduleQuery.java View source code |
@Override
public String getDevelopmentScheduleQueryInfo(Context context) throws Exception {
try {
YCApplication app = (YCApplication) context.getApplicationContext();
String url = (String) app.get("selectedIp") + Constant.developScheduleQuery;
HttpGet request = new HttpGet(url);
HttpResponse response = app.getClient().execute(request);
InputStream is = response.getEntity().getContent();
BufferedReader br = new BufferedReader(new InputStreamReader(is, Constant.ENCODING));
StringBuilder sb = new StringBuilder();
String temp = null;
while ((temp = br.readLine()) != null) {
sb.append(temp);
}
Document doc = Jsoup.parse(sb.toString());
Elements table = doc.select("#DG_GetGrjh");
return table.toString();
} catch (Exception e) {
throw new Exception(e);
}
}Example 22
| Project: Muzik-master File: SearchDownloadsNL.java View source code |
public static ArrayList<SongResult> getSongs(String query) {
ArrayList<SongResult> temp = new ArrayList<SongResult>();
//base query url.
String u = "http://www.downloads.nl/results/mp3/1/" + Uri.parse(query);
Elements searchResults = new Elements();
try {
Document document = Jsoup.connect(u).get();
searchResults = document.select(".tl");
for (Element x : searchResults) {
String url = "http://www.downloads.nl" + x.attr("href");
//todo add artist string to the name so that result is clearer
URL url2 = new URL(url);
HttpURLConnection ucon = (HttpURLConnection) url2.openConnection();
ucon.setInstanceFollowRedirects(false);
URL secondURL = new URL(ucon.getHeaderField("Location"));
String name = x.select("span").text();
if (HomescreenActivity.debugMode) {
Log.d("Play", "Downloads.nl Name=" + name + " url=" + secondURL);
}
temp.add(new SongResult(name, secondURL.toString()));
}
} catch (IOException e) {
e.printStackTrace();
}
return temp;
}Example 23
| Project: pictorial_android_client-master File: ParserImageList.java View source code |
public static ImageListBean parser(String mRet) {
ImageListBean imageListBean = new ImageListBean();
if (mRet != null) {
Document document = Jsoup.parse(mRet);
Elements elements = document.getElementsByClass("post-inner");
for (Element element : elements) {
String imgurl = element.select("a[title]").attr("href");
Element element2 = element.select("img[src]").first();
String src = element2.attr("src");
String width = element2.attr("width");
String height = element2.attr("height");
String alt = element2.attr("alt");
ImageBean imageBean = new ImageBean();
imageBean.setAlt(alt);
imageBean.setDetailurl(imgurl);
imageBean.setHeight(height);
imageBean.setWidth(width);
imageBean.setImgurl(src);
imageListBean.add(imageBean);
}
}
return imageListBean;
}Example 24
| Project: sample-skeleton-projects-master File: Crawler.java View source code |
public static void processPage(String URL) throws SQLException, IOException {
/*
* check if the given URL is already in database. get useful information
*/
if (ContentList.isContentInMap(URL)) {
return;
}
Document doc = null;
try {
doc = Jsoup.connect(URL).timeout(5000).get();
if (doc.text().contains("research")) {
System.out.println(URL);
ContentList.insertKey(URL, URL);
}
// get all links and recursively call the processPage method
Elements questions = doc.select("a[href]");
for (Element link : questions) {
if (link.attr("href").contains("mit.edu"))
processPage(link.attr("abs:href"));
}
} catch (Exception e) {
System.out.println("skipping .... " + URL);
}
}Example 25
| Project: TACIT-master File: SupremCrawlerFilter.java View source code |
public List<String> filters(String segment) throws IOException {
List<String> filterContents = new ArrayList<String>();
URI crawlUrl = URI.create(this.crawlerUrl + "/" + segment);
Document doc = parseContentFromUrl(crawlUrl.toString());
Element itemList = doc.select(".exmenu").get(0);
Elements items = itemList.select("a");
filterContents.add("All");
for (Element element : items) {
filterContents.add(element.attr("href").trim());
}
return filterContents;
}Example 26
| Project: WaveTact-master File: Search.java View source code |
@Override
public void onCommand(String command, User user, PircBotX network, String prefix, Channel channel, boolean isPrivate, int userPermLevel, String... args) throws Exception {
int ArrayIndex = 0;
if (GeneralUtils.isInteger(args[0])) {
ArrayIndex = Integer.parseInt(args[0]) - 1;
args = ArrayUtils.remove(args, 0);
}
Document doc = Jsoup.connect("http://www.dogpile.com/dogpilecontrol/search/web?fcoid=417&fcop=topnav&fpid=27&q=thing" + StringUtils.join(args, "%20")).get();
Elements results = doc.select(".searchResult");
if (results.size() > 0) {
if (results.size() - 1 >= ArrayIndex) {
String title = results.get(ArrayIndex).select(".resultTitlePane").text();
String url = results.get(ArrayIndex).select(".resultDisplayUrl").text();
String content = results.get(ArrayIndex).select(".resultDescription").text();
IRCUtils.sendMessage(user, network, channel, "[" + title + "] " + content + " - " + GeneralUtils.shortenURL(url), prefix);
} else {
ArrayIndex = ArrayIndex + 1;
IRCUtils.sendError(user, network, channel, "Search #" + ArrayIndex + " does not exist", prefix);
}
} else {
IRCUtils.sendError(user, network, channel, "Search returned no results", prefix);
}
}Example 27
| Project: webmagic-master File: CharsetUtils.java View source code |
public static String detectCharset(String contentType, byte[] contentBytes) throws IOException {
String charset;
// charset
// 1�encoding in http header Content-Type
charset = UrlUtils.getCharset(contentType);
if (StringUtils.isNotBlank(contentType) && StringUtils.isNotBlank(charset)) {
logger.debug("Auto get charset: {}", charset);
return charset;
}
// use default charset to decode first time
Charset defaultCharset = Charset.defaultCharset();
String content = new String(contentBytes, defaultCharset);
// 2�charset in meta
if (StringUtils.isNotEmpty(content)) {
Document document = Jsoup.parse(content);
Elements links = document.select("meta");
for (Element link : links) {
// 2.1�html4.01 <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
String metaContent = link.attr("content");
String metaCharset = link.attr("charset");
if (metaContent.indexOf("charset") != -1) {
metaContent = metaContent.substring(metaContent.indexOf("charset"), metaContent.length());
charset = metaContent.split("=")[1];
break;
} else // 2.2�html5 <meta charset="UTF-8" />
if (StringUtils.isNotEmpty(metaCharset)) {
charset = metaCharset;
break;
}
}
}
logger.debug("Auto get charset: {}", charset);
// 3�todo use tools as cpdetector for content decode
return charset;
}Example 28
| Project: ache-master File: GoogleSearch.java View source code |
public List<BackLinkNeighborhood> submitQuery(String query, int page) throws IOException {
timer.waitMinimumDelayIfNecesary();
// 21 -> max number allowed by google... decreases after
String queryUrl = "https://www.google.com/search?q=" + query + "&num=" + docsPerPage + "&start=" + page * docsPerPage;
System.out.println("URL:" + queryUrl);
try {
FetchedResult result = fetcher.get(queryUrl);
InputStream is = new ByteArrayInputStream(result.getContent());
Document doc = Jsoup.parse(is, "UTF-8", query);
is.close();
Elements searchItems = doc.select("div#search");
Elements linkHeaders = searchItems.select(".r");
Elements linksUrl = linkHeaders.select("a[href]");
List<BackLinkNeighborhood> links = new ArrayList<>();
for (Element link : linksUrl) {
String title = link.text();
String url = link.attr("href");
links.add(new BackLinkNeighborhood(url, title));
}
System.out.println(getClass().getSimpleName() + " hits: " + links.size());
return links;
} catch (IOExceptionBaseFetchException | e) {
throw new IOException("Failed to download backlinks from Google.", e);
}
}Example 29
| Project: any-video-master File: PandaCrawler.java View source code |
private void savePandaLivesToRedis(Document document) {
List<VideoDTO> lives = new ArrayList<>();
Elements elements = document.select("li.video-list-item.video-no-tag");
for (Element element : elements) {
VideoDTO videoDTO = new VideoDTO();
String title = "[" + element.select("div.video-info span.video-cate").text() + "] " + element.select("div.video-info span.video-nickname").text();
String image = element.select("img.video-img").attr("data-original");
String url = PANDA + element.attr("data-id");
videoDTO.setAvailable(true);
videoDTO.setTitle(title);
videoDTO.setImage(image);
videoDTO.setValue(url);
lives.add(videoDTO);
if (lives.size() > 48) {
break;
}
}
String key = redisSourceManager.VIDEO_PREFIx_HOME_LIVE_KEY + "_" + TAG;
redisSourceManager.saveVideos(key, lives);
}Example 30
| Project: asoiaf-master File: FetchUrls.java View source code |
public static ImageUrl FetchImageUrl(String url) {
ImageUrl iu = new ImageUrl();
try {
Document doc = Jsoup.connect(url).timeout(5000).get();
Elements e = doc.select("li.outlink a");
for (Element item : e) {
if (item.text().equals("200")) {
//Log.d("","200:"+item.select("a[href]").attr("href"));
iu.setThumbUrl(item.select("a[href]").attr("href"));
}
if (item.text().equals("original")) {
//Log.d("","original:"+item.select("a[href]").attr("href"));
iu.setOringinUrl(item.select("a[href]").attr("href"));
}
}
} catch (Exception e) {
e.printStackTrace();
}
return iu;
}Example 31
| Project: baleen-master File: CommentArea.java View source code |
@Override
public void manipulate(Document document) {
document.select("p:contains(" + COMMENT_END + ")").forEach( last -> {
if (last.ownText().contains(COMMENT_START)) {
last.wrap(ASIDE);
} else {
int index = last.elementSiblingIndex();
Elements allSiblings = last.siblingElements();
Elements pSiblings = allSiblings.select("p");
Element startSibling = null;
for (int i = index - 1; i >= 0; i--) {
Element e = pSiblings.get(i);
if (e.ownText().contains(COMMENT_START)) {
startSibling = e;
break;
}
}
if (startSibling != null) {
for (int i = startSibling.elementSiblingIndex(); i < index; i++) {
allSiblings.get(i).wrap(ASIDE);
}
last.wrap(ASIDE);
}
}
});
}Example 32
| Project: bank-importer-master File: ItauPoupancaImportador.java View source code |
@Override
public List<BancoRegistro> carregarLancamentosExtrato() {
carregarOpcoesMenu();
String html = /*carregarHtml(poupancaUrl, 200);
html = */
carregarHtml("https://ww70.itau.com.br/M/SaldoPoupanca.aspx", 200);
Document doc = carregarHtmlDeLink(html, "a[href^=SaldoPoupanca]", "Últimos 30 dias");
Element tableExtrato = doc.getElementById("ctl00_ContentPlaceHolder1_Fieldset2");
Iterator<Element> iterator = tableExtrato.select("div.rowPar, div.rowImpar").iterator();
List<BancoRegistro> list = new ArrayList<BancoRegistro>();
while (iterator.hasNext()) {
Element e = iterator.next();
Elements children = e.select("td");
String data = children.get(1).text();
String desc = children.get(2).text().trim();
String val = children.get(3).text();
if (!descricoesIgnorar.contains(desc)) {
list.add(gerarRegistro(data, desc, val));
}
}
return list;
}Example 33
| Project: bennu-master File: Component.java View source code |
public static String process(String origin) {
Document doc = Jsoup.parse(origin);
Elements components = doc.select("[bennu-component]");
for (Element component : components) {
String key = component.attr("bennu-component");
Optional.ofNullable(COMPONENTS.get(key)).ifPresent( x -> component.replaceWith(x.process(component)));
}
return doc.toString();
}Example 34
| Project: CarHome-master File: FetcherSLFilter.java View source code |
public static void fetchRawData(String url) {
Map<String, String> brandMap = Maps.newLinkedHashMap();
Map<String, String> typeMap = Maps.newLinkedHashMap();
Document document = getDocument(url, "UTF-8");
Elements selectElems = document.select("select");
selectElems.get(0).select("option").forEach( option -> brandMap.put(option.attr("value"), option.text()));
selectElems.get(1).select("option").forEach( option -> typeMap.put(option.attr("value"), option.text()));
parseAjax(brandMap, typeMap);
}Example 35
| Project: clicker-master File: CNProxyGetter.java View source code |
@Override
public Set<Proxy> find() {
final Set<Proxy> ret = new HashSet<Proxy>();
for (int i = 1; i < 11; i++) {
try {
final Document doc = Jsoup.parse(new URL("http://www.cnproxy.com/proxy" + i + ".html"), TIMEOUT);
final Elements tables = doc.getElementsByTag("table");
final Element table = tables.get(2);
final Elements trs = table.getElementsByTag("tr");
for (int j = 1; j < trs.size(); j++) {
final Element tr = trs.get(j);
try {
final Element td = tr.getElementsByTag("td").get(0);
final String host = td.text();
String port = td.getElementsByTag("script").get(0).data();
port = port.replace("document.write(", "").replace("\"", "").replace(")", "").replace("+", "").replace(":", "");
for (final Map.Entry<String, String> replacement : REPLACEMENTS.entrySet()) {
port = port.replaceAll(replacement.getKey(), replacement.getValue());
}
final Proxy proxy = new Proxy(host, Integer.valueOf(port), this.properties);
ret.add(proxy);
} catch (final Exception e) {
}
}
} catch (final Exception e) {
}
}
return ret;
}Example 36
| Project: CN1ML-NetbeansModule-master File: ListLinks.java View source code |
public static void main(String[] args) throws IOException {
Validate.isTrue(args.length == 1, "usage: supply url to fetch");
String url = args[0];
print("Fetching %s...", url);
Document doc = Jsoup.connect(url).get();
Elements links = doc.select("a[href]");
Elements media = doc.select("[src]");
Elements imports = doc.select("link[href]");
print("\nMedia: (%d)", media.size());
for (Element src : media) {
if (src.tagName().equals("img"))
print(" * %s: <%s> %sx%s (%s)", src.tagName(), src.attr("abs:src"), src.attr("width"), src.attr("height"), trim(src.attr("alt"), 20));
else
print(" * %s: <%s>", src.tagName(), src.attr("abs:src"));
}
print("\nImports: (%d)", imports.size());
for (Element link : imports) {
print(" * %s <%s> (%s)", link.tagName(), link.attr("abs:href"), link.attr("rel"));
}
print("\nLinks: (%d)", links.size());
for (Element link : links) {
print(" * a: <%s> (%s)", link.attr("abs:href"), trim(link.text(), 35));
}
}Example 37
| Project: crawler-master File: DefaultAssetsParser.java View source code |
@Override
public Set<CrawlerURL> getAssets(Document doc, String referer) {
Elements media = doc.select("[src]");
Elements imports = doc.select("link[href]");
Set<CrawlerURL> urls = new HashSet<CrawlerURL>(media.size() + imports.size());
for (Element link : imports) {
urls.add(new CrawlerURL(link.attr("abs:href"), referer));
}
for (Element src : media) {
urls.add(new CrawlerURL(src.attr("abs:src"), referer));
}
return urls;
}Example 38
| Project: curiosity-maps-master File: LocationCrawler.java View source code |
/**
* Parse the list of {@link RoverLocation}s from the retrieved {@code document}.
*/
private List<RoverLocation> parseLocations(Document document) {
// only use the last location from each endSol
Map<Integer, RoverLocation> roverLocations = new HashMap<>();
Elements locations = document.select("location");
for (Element location : locations) {
try {
int sol = Integer.parseInt(location.select("endSol").text());
if (roverLocations.containsKey(sol)) {
continue;
}
double latitude = Double.parseDouble(location.select("lat").text());
double longitude = Double.parseDouble(location.select("lon").text());
String arrivalTime = location.select("arrivalTime").text();
roverLocations.put(sol, new RoverLocation(sol, latitude, longitude, arrivalTime));
} catch (Throwable t) {
t.printStackTrace();
}
}
return ImmutableList.copyOf(roverLocations.values());
}Example 39
| Project: en-webmagic-master File: CssSelector.java View source code |
@Override
public List<String> selectList(String text) {
List<String> strings = new ArrayList<String>();
Document doc = Jsoup.parse(text);
Elements elements = doc.select(selectorText);
if (CollectionUtils.isNotEmpty(elements)) {
for (Element element : elements) {
String value = getValue(element);
if (!StringUtils.isEmpty(value)) {
strings.add(value);
}
}
}
return strings;
}Example 40
| Project: EventApp-master File: ConferenceSessionLoader.java View source code |
@Override
public void onResponse(String body) {
List<ConferenceSession> sessions = new ArrayList<ConferenceSession>();
Document document = Jsoup.parse(body);
//track name
Elements h1 = document.select("h1.entry-title");
String trackName = null;
if (!h1.isEmpty()) {
trackName = h1.first().text();
}
Elements elements = document.select("table.track");
if (!elements.isEmpty()) {
Elements trs = elements.first().select("tr");
for (Element tr : trs) {
ConferenceSession session = new ConferenceSession();
session.setTrackName(trackName);
session.setSessionTitle(text(tr.select("span.session_title")));
session.setSpeakerName(text(tr.select("span.speaker_name")));
if (!tr.select("span.speaker_profile").isEmpty()) {
session.setSpeakerProfile(text(tr.select("span.speaker_profile")));
}
session.setBeginTime(text(tr.select("span.starttime")));
if (!tr.select("span.session_description").isEmpty()) {
session.setDescription(tr.select("span.session_description").first().text().replace("� 講演内容 】<br />", ""));
}
session.setRoom(tr.select("span.roomname").first().text());
sessions.add(session);
}
}
listener.onSuccess(sessions);
}Example 41
| Project: extentreports-java-master File: SystemAttributeTests.java View source code |
private void performAssertForKVPairs(String key, String value) {
Boolean keyFound = false;
Boolean valueFound = false;
extent.flush();
String html = Reader.readAllText(htmlFilePath);
Document doc = Jsoup.parse(html);
Elements tdColl = doc.select(".environment td");
for (Element td : tdColl) {
if (td.text().equals(key))
keyFound = true;
if (td.text().equals(value))
valueFound = true;
}
Assert.assertTrue(keyFound);
Assert.assertTrue(valueFound);
}Example 42
| Project: FakeWeather-master File: MzituZiPaiFragment.java View source code |
@Override
public List<Girl> call(String url) {
List<Girl> girls = new ArrayList<>();
try {
Document doc = Jsoup.connect(url).timeout(10000).get();
Element total = doc.select("div.postlist").first();
Elements items = total.select("li");
for (Element element : items) {
Girl girl = new Girl(element.select("img").first().attr("src"));
girls.add(girl);
}
} catch (IOException e) {
e.printStackTrace();
}
return girls;
}Example 43
| Project: FudanBBS-master File: ListLinks.java View source code |
public static void main(String[] args) throws IOException {
Validate.isTrue(args.length == 1, "usage: supply url to fetch");
String url = args[0];
print("Fetching %s...", url);
Document doc = Jsoup.connect(url).get();
Elements links = doc.select("a[href]");
Elements media = doc.select("[src]");
Elements imports = doc.select("link[href]");
print("\nMedia: (%d)", media.size());
for (Element src : media) {
if (src.tagName().equals("img"))
print(" * %s: <%s> %sx%s (%s)", src.tagName(), src.attr("abs:src"), src.attr("width"), src.attr("height"), trim(src.attr("alt"), 20));
else
print(" * %s: <%s>", src.tagName(), src.attr("abs:src"));
}
print("\nImports: (%d)", imports.size());
for (Element link : imports) {
print(" * %s <%s> (%s)", link.tagName(), link.attr("abs:href"), link.attr("rel"));
}
print("\nLinks: (%d)", links.size());
for (Element link : links) {
print(" * a: <%s> (%s)", link.attr("abs:href"), trim(link.text(), 35));
}
}Example 44
| Project: Gazetti_Newspaper_Reader-master File: hindu.java View source code |
public String[] getHinduArticleContent() {
Document doc;
String[] result = new String[3];
String url = mArticleURL;
try {
Connection connection = Jsoup.connect(url).userAgent("Mozilla").timeout(10 * 1000);
Response response = connection.execute();
if (response == null) {
Crashlytics.log("Is response null ? " + (null == response));
return null;
} else if (response.statusCode() != 200) {
Crashlytics.log("Received response - " + response.statusCode() + " -- " + response.statusMessage());
Crashlytics.log("Received response - " + response.body());
return null;
}
doc = connection.get();
// get Body
Element bodyElement = doc.body();
// get Title
String HinduTitleXPath = ConfigService.getInstance().getTheHinduHead();
Elements titleElements = bodyElement.select(HinduTitleXPath);
titleText = titleElements.first().text();
// get HeaderImageUrl
mImageURL = getImageURL(bodyElement);
String HinduArticleXPath = ConfigService.getInstance().getTheHinduBody();
Elements bodyArticleElements = bodyElement.select(HinduArticleXPath);
for (Element textArticleElement : bodyArticleElements) {
bodyText += textArticleElement.text() + "\n\n";
}
result[0] = titleText;
result[1] = mImageURL;
result[2] = bodyText;
} catch (IOException e) {
Crashlytics.logException(e);
return null;
} catch (NullPointerException npe) {
bodyText = null;
Crashlytics.logException(npe);
return null;
} catch (Exception e) {
Crashlytics.logException(e);
return null;
}
return result;
}Example 45
| Project: HackerNews-master File: UserParser.java View source code |
public static User parseUser(String username) {
try {
User user = new User();
user.username = username;
// don't use user cookie so that "about" text appears correctly
Document page = ConnectionManager.anonConnect("/user?id=" + username).get();
Elements trs = page.select("form > table > tbody > tr");
user.created = trs.select("td:containsOwn(created:) + td").first().text();
user.karma = Integer.parseInt(trs.select("td:containsOwn(karma:) + td").first().text());
try {
user.avg = Float.parseFloat(trs.select("td:containsOwn(avg:) + td").first().text());
} catch (Exception e) {
user.avg = -1.0f;
}
user.aboutHtml = trs.select("td:containsOwn(about:) + td").first().html();
return user;
} catch (IOException e) {
e.printStackTrace();
Log.d(TAG, "IOException parsing UserModel for: " + username);
return null;
} catch (NumberFormatException e) {
e.printStackTrace();
Log.d(TAG, "NumberFormatException parsing UserModel for: " + username);
return null;
} catch (NullPointerException e) {
e.printStackTrace();
Log.d(TAG, "NullPointerException parsing UserModel for: " + username);
return null;
}
}Example 46
| Project: html-exporter-master File: StyleParser.java View source code |
public Map<String, Style> parseStyles(Elements elements) {
Map<String, Style> styles = new HashMap<>();
for (Element element : elements) {
try {
List<Rule> rules = CSSParser.parse(element.data());
mapStyles(rules, styles);
} catch (Exception e) {
throw new RuntimeException(e);
}
}
return styles;
}Example 47
| Project: janglipse-master File: KeywordDocParser.java View source code |
private List<KeywordDocumentation> parse(Document doc) {
List<KeywordDocumentation> list = new ArrayList<KeywordDocumentation>();
Elements tables = doc.select("table.detailHeader");
if (tables.size() > 0) {
for (Element table : tables) {
KeywordDocumentation keyword = new KeywordDocumentation();
Elements header = table.select("td.detailHeaderName");
keyword.setName(header.get(0).text());
keyword.setDescription(table.nextSibling().outerHtml());
list.add(keyword);
}
}
return list;
}Example 48
| Project: java-manga-reader-master File: MangaUtil.java View source code |
/**
* Retrieves a list of licensed Manga from Anime News Network.
* @return A list of Manga licensed in English.
* @throws IOException If it cannot complete the request.
*/
public static List<String> getLicensedManga() throws IOException {
StringBuilder sb = new StringBuilder("http://www.animenewsnetwork.com/encyclopedia/anime-list.php");
sb.append("?licensed=1");
sb.append("&sort=title");
sb.append("&showG=1");
Document doc = Jsoup.connect(sb.toString()).maxBodySize(0).get();
Elements list = doc.getElementsByClass("HOVERLINE");
List<String> blackList = new ArrayList<String>(list.size());
for (Element e : list) {
String title = e.text();
if (title.startsWith("(The)")) {
title = title.replace("(The)", "The");
}
if (title.contains("(")) {
title = title.substring(0, title.lastIndexOf('(')).trim();
}
blackList.add(title);
}
return blackList;
}Example 49
| Project: JCommons-master File: DownloaderTest.java View source code |
public static void main(String[] args) throws IOException {
Document doc = Jsoup.connect("http://meta.stackexchange.com/questions/134495/academic-papers-using-stack-exchange-data").get();
Elements eles = doc.getElementsContainingText("[PDF]");
eles.addAll(doc.getElementsContainingText("[arXiv]"));
String folderName = "D:/dl";
for (Element ele : eles) {
String src = ele.attr("href");
if (src == null || src.trim().equals(""))
continue;
URL url = new URL(src);
Element parent = ele.parent();
Elements eles1 = parent.getElementsByTag("strong");
Element nameEle = eles1.get(0);
String fileName = nameEle.text().replace(":", " ").replace("\"", "").replace("'", "").replace("?", "");
if (fileName.contains("Fit or"))
continue;
if (!fileName.endsWith("."))
fileName = fileName.concat(".");
fileName = fileName.concat("pdf");
System.out.println(fileName);
InputStream in = null;
try {
in = url.openStream();
} catch (Exception e) {
continue;
}
OutputStream out = new BufferedOutputStream(new FileOutputStream(folderName + "/" + fileName));
for (int b; (b = in.read()) != -1; ) {
out.write(b);
}
out.close();
in.close();
}
}Example 50
| Project: jeboorker-master File: DNBMetadataDownloader.java View source code |
private List<MetadataDownloadEntry> getMetadataDownloadEntries(List<byte[]> metadataHtmlContent) throws IOException {
List<MetadataDownloadEntry> result = new ArrayList<>(metadataHtmlContent.size());
for (byte[] html : metadataHtmlContent) {
if (html != null) {
Document htmlDoc = Jsoup.parse(new ByteArrayInputStream(html), StringUtil.UTF_8, MAIN_URL);
Elements tags = htmlDoc.getElementsByTag("td");
result.add(new DNBMetadataDownloadEntry(htmlDoc, tags));
}
}
return result;
}Example 51
| Project: jsoup-master File: ListLinks.java View source code |
public static void main(String[] args) throws IOException {
Validate.isTrue(args.length == 1, "usage: supply url to fetch");
String url = args[0];
print("Fetching %s...", url);
Document doc = Jsoup.connect(url).get();
Elements links = doc.select("a[href]");
Elements media = doc.select("[src]");
Elements imports = doc.select("link[href]");
print("\nMedia: (%d)", media.size());
for (Element src : media) {
if (src.tagName().equals("img"))
print(" * %s: <%s> %sx%s (%s)", src.tagName(), src.attr("abs:src"), src.attr("width"), src.attr("height"), trim(src.attr("alt"), 20));
else
print(" * %s: <%s>", src.tagName(), src.attr("abs:src"));
}
print("\nImports: (%d)", imports.size());
for (Element link : imports) {
print(" * %s <%s> (%s)", link.tagName(), link.attr("abs:href"), link.attr("rel"));
}
print("\nLinks: (%d)", links.size());
for (Element link : links) {
print(" * a: <%s> (%s)", link.attr("abs:href"), trim(link.text(), 35));
}
}Example 52
| Project: jubula.core-master File: HtmlImageLister.java View source code |
// Searches for images in html files and outputs the paths to these files in stdout
public static void main(String[] args) {
try {
if (args.length == 0) {
System.err.println("Please provide a path!\n Usage: java -jar HtmlImageLister.jar <path>");
System.exit(-1);
}
List<File> fileList = HtmlImageLister.getFilesRecursive(args[0]);
Set<String> imageSet = new HashSet<>();
for (File f : fileList) {
Document doc = Jsoup.parse(f, "UTF-8");
Elements els = doc.getElementsByTag("img");
for (Element el : els) {
imageSet.add((f.getParentFile() + "/").concat(el.attr("src")));
}
}
for (String s : imageSet) {
System.out.println(s);
}
} catch (IOException e) {
e.printStackTrace();
}
}Example 53
| Project: karma-exchange-master File: SalesforceUtil.java View source code |
private static void updateSalesforceCdnImgLinks(Document doc, EventSourceInfo sourceInfo) {
Elements imgs = doc.getElementsByTag("img");
for (Element img : imgs) {
URI uri = null;
try {
uri = new URI(img.attr("src"));
} catch (URISyntaxException e) {
}
if (uri != null) {
String domain = uri.getHost();
if (domain.toLowerCase().endsWith(IMG_CDN_DOMAIN)) {
img.attr("src", "https://" + sourceInfo.getDomain() + uri.getPath() + "?" + uri.getQuery());
}
}
}
}Example 54
| Project: ManalithBot-master File: TranslatorPlugin.java View source code |
@BotCommand("번ì—")
public String translate(@Option(name = "ko|en...", help = "번ì—í• ëŒ€ìƒ? 언어") String to, @Option(name = "메시지", help = "번ì—í• ë©”ì‹œì§€") String message) {
final String url = "https://api.datamarket.azure.com/Bing/MicrosoftTranslator/v1/Translate?Text='%s'&To='%s'";
String login = "USER_ID_IGNORED:" + clientSecret;
String base64login = new String(Base64.encodeBase64(login.getBytes()));
try {
Document doc = Jsoup.connect(String.format(url, message, to)).header("Authorization", "Basic " + base64login).ignoreContentType(true).get();
logger.debug("response", doc);
Elements elem = doc.select("d|text[m:type=Edm.String]");
return elem.text();
} catch (IOException e) {
logger.error(e.getMessage(), e);
}
return "번ì—í• ë‚´ìš©ì?´ 없습니다.";
}Example 55
| Project: medium-textview-master File: Utils.java View source code |
public static void appendView(ElementView elementView, Elements elements) {
for (Element e : elements) {
if (JsoupUtils.isBlockquote(e)) {
elementView.addView(new BlockquoteView(elementView.getContext(), e));
} else if (JsoupUtils.isHeader(e)) {
elementView.addView(new HeaderView(elementView.getContext(), e));
} else if (JsoupUtils.isIFrame(e)) {
elementView.addView(new IFrameView(elementView.getContext(), e));
} else if (JsoupUtils.isParagraph(e)) {
elementView.addView(new ParagraphView(elementView.getContext(), e));
} else if (JsoupUtils.isImage(e)) {
elementView.addView(new ImageView(elementView.getContext(), e));
} else if (JsoupUtils.isDiv(e)) {
elementView.addView(new DivView(elementView.getContext(), e));
} else {
}
}
}Example 56
| Project: memorabilia-master File: PostTag.java View source code |
public Elements selectFrom(Document doc) { Elements elements = new Elements(); Elements fromXpath = doc.select(xpath); elements.addAll(fromXpath); Elements scripts = doc.getElementsByTag("script"); for (Element script : scripts) { String html = script.html(); if (html.contains(pattern)) { elements.add(script); } } return elements; }
Example 57
| Project: mensaapp-master File: PreviewMenuParser.java View source code |
@Override
protected Menu parseMenu(WeeklyMenu weeklyMenu, Element menuTable) {
Menu menu = new Menu(weeklyMenu);
Elements rows = menuTable.select("tr");
for (int i = 0; i < 4; i++) {
String name = rows.get(1).children().get(i).text();
addMeal(menu, name, rows.get(2).children().get(i).text(), parseMealTypes(rows.get(3).children().get(i)), getDefaultPrice(name));
}
return menu;
}Example 58
| Project: mini-blog-master File: Ku6VideoHandler.java View source code |
/**
* 获�酷6视频
*
* @param url
* 视频URL
*/
public Video getVideo(String url) {
if (url.indexOf("v.ku6.com") != -1) {
try {
Document doc = VideoUtil.getURLContent(url);
String content = doc.html();
/**
* 获å?–è§†é¢‘æ ‡é¢˜
*/
String title = doc.title();
/**
* 获�视频地�
*/
Elements flashEt = doc.getElementsByClass("text_A");
String flash = null;
if (flashEt != null) {
flash = flashEt.attr("value");
}
/**
* 获�视频缩略图
*/
int beginLocal = content.indexOf("A.VideoInfo =");
int endLocal = content.indexOf("\", data: {");
content = content.substring(beginLocal, endLocal);
String pic = content.substring(content.lastIndexOf("http://"), content.length());
Video video = new Video();
video.setPic(pic);
video.setFlash(flash);
video.setTitle(title);
return video;
} catch (Exception e) {
logger.error("---------------->error is " + e.getMessage());
e.printStackTrace();
}
}
return this.successor.getVideo(url);
}Example 59
| Project: moulder-j-master File: MoulderShop.java View source code |
public void process(Document doc) {
for (TemplatorConfig c : registry.getConfig()) {
Elements elements = doc.select(c.selector);
for (Element e : elements) {
Collection<Node> oes = MouldersApplier.applyMoulders(c.templators, Arrays.<Node>asList(e));
// replace e with oes
for (Node oe : oes) {
e.before(oe.outerHtml());
}
e.remove();
}
}
}Example 60
| Project: myrobotlab-master File: JSoupExtractor.java View source code |
@Override
public List<Document> processDocument(Document doc) {
for (Object o : doc.getField(htmlField)) {
org.jsoup.nodes.Document jSoupDoc = Jsoup.parse(o.toString());
Elements links = jSoupDoc.select(jSoupSelector);
for (Element link : links) {
doc.addToField(outputField, link);
}
}
return null;
}Example 61
| Project: NewsStats-master File: NewYorkTimesContentHandler.java View source code |
@Override
public List extractArticles(Page page) {
if (page.getParseData() instanceof HtmlParseData) {
System.out.println("Current URL: " + page.getWebURL());
HtmlParseData htmlParseData = (HtmlParseData) page.getParseData();
String html = htmlParseData.getHtml();
Document doc = Jsoup.parseBodyFragment(html);
Element articleElement = doc.getElementById("story");
if (articleElement == null) {
// if no article can be found
return articles;
}
String title = articleElement.getElementById("story-heading").ownText();
String dateString = articleElement.getElementsByClass("dateline").first().attr("datetime");
Date date = null;
try {
date = new SimpleDateFormat("yyyy-MM-dd").parse(dateString);
} catch (ParseException e) {
e.printStackTrace();
}
String author = articleElement.getElementsByClass("byline-author").first().ownText();
String content = "";
Elements contentElements = articleElement.select("p.story-body-text.story-content");
for (Element contentElement : contentElements) {
content += contentElement.ownText();
}
if (!filterArticles(content)) {
// ignore the article if filter does not approve
return articles;
}
Article article = new NewYorkTimesArticle();
article.setTitle(title);
article.setCreatedDate(date);
article.setAuthor(author);
article.setContent(content);
articles.add(article);
}
return articles;
}Example 62
| Project: nocket-master File: AbstractHtmlVisitor.java View source code |
protected void checkAndAddGroupTabbedPanel(Document document) {
if (!(getContext().getFileAndClassNameStrategy() instanceof GroupNameFileAndClassNameStrategy)) {
return;
}
GroupNameFileAndClassNameStrategy strategy = (GroupNameFileAndClassNameStrategy) getContext().getFileAndClassNameStrategy();
if (!strategy.isDomainObjectWithGroupAnnotations() || !strategy.isStrategyForMainObject()) {
return;
}
if (document.getElementsByAttributeValue("wicket:id", "groupTabbedPanel").isEmpty()) {
Elements elementsByTag = document.getElementsByTag("form");
if (!elementsByTag.isEmpty()) {
org.jsoup.nodes.Element element = elementsByTag.first();
org.jsoup.nodes.Element ulElement = element.appendElement("ul");
ulElement.attr("wicket:id", "groupTabbedPanel");
}
}
}Example 63
| Project: pack-master File: PttGossiping.java View source code |
public static void main(String[] argv) {
String prevPage = CrawlerPack.start().addCookie("over18", // å…«å?¦ç‰ˆé€²å…¥éœ€è¦?è¨å®šcookie
"1").getFromHtml(// é? ç«¯è³‡æ–™æ ¼å¼?為 HTML
gossipMainPage).select(// å?–å¾—å?³ä¸Šè§’『å‰?一é ?ã€?的內容
".action-bar .pull-right > a").get(1).attr("href").replaceAll("/bbs/Gossiping/index([0-9]+).html", "$1");
// ç›®å‰?最末é ? index 編號
Integer lastPage = Integer.valueOf(prevPage) + 1;
List<String> lastPostsLink = new ArrayList<String>();
while (loadLastPosts > lastPostsLink.size()) {
String currPage = String.format(gossipIndexPage, lastPage--);
Elements links = CrawlerPack.start().addCookie("over18", "1").getFromHtml(currPage).select(".title > a");
for (Element link : links) lastPostsLink.add(link.attr("href"));
}
// 檢視�果
for (String url : lastPostsLink) {
System.out.println(url);
}
}Example 64
| Project: qiushi_baike-master File: KJFMUtils.java View source code |
public static LinkedList<KJFMItem> handleKJFMItems(String pageNo) throws IOException {
LinkedList<KJFMItem> items = null;
String url = KE_JI_FENG_MANG_URL + pageNo + "/";
System.out.println("url = " + url);
Document document = Jsoup.connect(url).timeout(20000).get();
Element divTag = document.getElementById("xs-main");
if (null != divTag) {
Elements entryTags = divTag.getElementsByClass("xs-entry");
if (null != entryTags && entryTags.size() > 0) {
items = new LinkedList<KJFMItem>();
for (Element e : entryTags) {
KJFMItem item = new KJFMItem();
Elements aTags = e.getElementsByTag("a");
if (null != aTags && aTags.size() > 0) {
String url1 = aTags.get(0).attr("href");
System.out.println("url1 = " + url1);
if (url1 != null && url1.startsWith("www")) {
url1 += "http://";
}
item.title = url1;
}
Elements imgTags = e.getElementsByTag("img");
if (null != imgTags && imgTags.size() > 0) {
String img = imgTags.get(0).attr("src");
String title = imgTags.get(0).attr("alt");
// System.out.println("img = " + img + " title = " +
// title);
item.img = img;
item.title = title;
}
Elements pTags = e.getElementsByTag("p");
if (null != pTags && pTags.size() > 0) {
String content = pTags.get(0).text();
// System.out.println("content = " + content);
item.content = content;
}
items.add(item);
}
}
}
return items;
}Example 65
| Project: ripme-master File: YoupornRipper.java View source code |
@Override
public void rip() throws IOException {
logger.info(" Retrieving " + this.url);
Document doc = Http.url(this.url).get();
Elements videos = doc.select("video");
if (videos.size() == 0) {
throw new IOException("Could not find Embed code at " + url);
}
Element video = videos.get(0);
String vidUrl = video.attr("src");
addURLToDownload(new URL(vidUrl), HOST + "_" + getGID(this.url));
waitForThreads();
}Example 66
| Project: sagan-master File: ApiDocumentMapper.java View source code |
public ApiDoc map(Document document) {
if (document.baseUri().endsWith("allclasses-frame.html"))
return null;
String apiContent;
Elements blocks = document.select(".block");
if (blocks.size() > 0) {
apiContent = blocks.text();
} else {
apiContent = document.select("p").text();
}
Elements subTitle = document.select(".header .subTitle");
ApiDoc entry = new ApiDoc();
entry.setClassName(findClassName(document));
if (subTitle.size() == 1) {
entry.setPackageName(subTitle.text());
}
entry.setRawContent(apiContent);
entry.setSummary(apiContent.substring(0, Math.min(apiContent.length(), 500)));
entry.setTitle(document.title());
entry.setSubTitle(String.format("%s (%s API)", project.getName(), version.getVersion()));
entry.setPath(document.baseUri());
entry.setCurrent(version.isCurrent());
entry.setVersion(version.getVersion());
entry.setProjectId(project.getId());
entry.addFacetPaths("Projects", "Projects/Api", "Projects/" + project.getName(), "Projects/" + project.getName() + "/" + version.getVersion());
return entry;
}Example 67
| Project: seldon-server-master File: AllElementsTextListValueDynamicExtractor.java View source code |
@Override
public String extract(AttributeDetail attributeDetail, String url, Document articleDoc) throws Exception {
String attrib_value = null;
if ((attributeDetail.extractor_args != null) && (attributeDetail.extractor_args.size() >= 1)) {
String cssSelector = attributeDetail.extractor_args.get(0);
Elements elements = articleDoc.select(cssSelector);
if (StringUtils.isNotBlank(cssSelector)) {
if (elements != null) {
StringBuilder sb = new StringBuilder();
boolean isFirstInList = true;
for (Element e : elements) {
String eText = e.text();
eText = StringUtils.strip(eText);
if (StringUtils.isBlank(eText))
continue;
eText = eText.toLowerCase();
if (isFirstInList) {
isFirstInList = false;
} else {
sb.append(",");
}
sb.append(eText);
}
attrib_value = sb.toString();
}
}
}
return attrib_value;
}Example 68
| Project: SimpleFunctionalTest-master File: UsingFixturesHelper.java View source code |
private void thenAllFixturesOfThisFixturesHelperClassAreSeenAsFixturesOfTheUseCaseClassAndCanBeUsedAsItsOwn() throws IOException {
Elements delegatedFixtureCall = jUnitHtmlHelper.html.select("div.instruction span");
Assert.assertEquals("First fixture", delegatedFixtureCall.get(0).text());
Assert.assertEquals("Second fixture with parameter ABCDEFGHIJKLMNOPQRSTUVWXYZ and 99", delegatedFixtureCall.get(1).text());
}Example 69
| Project: SocialConnect-master File: OpenTableSocialCrawlerTest.java View source code |
@Test
public void testExtractReviewData() throws Exception {
Element body = jsoupCrawler.getDocument().body();
Elements reviewContainer = jsoupCrawler.getReviewDataContainer(body);
List<Messages> result = jsoupCrawler.extractReviewDataFromHtml(reviewContainer, jsoupCrawler.getDocument().head(), 1L);
assertTrue(result.size() >= 10);
// assertTrue(result.get(0).getMessage().startsWith("Even though Vapiano is one of my favorite places"));
assertEquals("n/a", result.get(0).getNetworkUser());
assertEquals("n/a", result.get(0).getNetworkUserId());
assertEquals("en-US", result.get(0).getLanguage());
assertEquals("OPENTABLE", result.get(0).getNetworkId());
assertEquals(1, result.get(0).getCustomerId().longValue());
// assertEquals("4.0", result.get(0).getNetworkUserRating());
}Example 70
| Project: SpiderJackson-master File: YouDaiLiPage1.java View source code |
@Override
public boolean responseHandle(Proxy ip, ProxyController proxyController, Url url, UrlService urlService, ContextSrc contextSrc, HttpRequestBase request, CloseableHttpResponse response, String content) {
ArrayList<Url> urls = new ArrayList<>();
Document doc = Jsoup.parse(content);
Elements elements = doc.select("div.chunlist");
for (Element e : elements) {
Elements es = e.select("a[href]");
for (Element el : es) {
Url url1 = Url.newHttpGetUrl(el.attr("href"), YouDaiLiPage2.class);
url1.setPriority(url.getPriority() + 1);
urls.add(url1);
}
}
urlService.insert(urls);
return true;
}Example 71
| Project: StartupNews-master File: SNCommentsParserV1.java View source code |
@Override
public SNComments parseDocument(Document doc) throws Exception {
SNComments comments = new SNComments();
if (doc == null) {
return comments;
}
Element body = doc.body();
Elements commentSpans = body.select("span.comment");
Elements comHeadSpans = body.select("span.comhead");
if (!commentSpans.isEmpty()) {
Iterator<Element> spanCommentIt = commentSpans.iterator();
Iterator<Element> spanComHeadIt = comHeadSpans.iterator();
SNComment comment = null;
SNUser user = null;
while (spanComHeadIt.hasNext() && spanCommentIt.hasNext()) {
String commentText = spanCommentIt.next().text();
Element span = spanComHeadIt.next();
Elements as = span.getElementsByTag("a");
user = new SNUser();
user.setId(as.get(0).text());
String link = as.get(1).attr("href");
String parent = as.get(2).attr("href");
String discuss = as.get(3).attr("href");
String title = as.get(3).text();
comment = new SNComment();
comment.setUser(user);
comment.setLinkURL(resolveRelativeSNURL(link));
comment.setParentURL(resolveRelativeSNURL(parent));
comment.setDiscussURL(resolveRelativeSNURL(discuss));
comment.setText(commentText);
comment.setArtistTitle(title);
comments.addComment(comment);
}
}
Elements moreURLElements = body.select("a:matches(More)");
String moreURL = null;
if (moreURLElements.size() > 0) {
moreURL = resolveRelativeSNURL(moreURLElements.attr("href"));
}
comments.setMoreURL(moreURL);
return comments;
}Example 72
| Project: StatusParser-master File: vkOld.java View source code |
public String[] vkPart(int id, int firstPost) {
for (int i = 0; i < 8; i++) {
statuses[i] = null;
}
int iter = 0;
String partURL = "http://vk.com/al_wall.php?act=get_wall&al=1&fixed=&offset=" + firstPost + "&owner_id=-" + id + "&type=all";
try {
String partSource = getUrlSource(partURL);
partSource = partSource.substring(4);
Document partDom = Jsoup.parse(partSource);
try {
Elements postTexts = partDom.select("div.wall_post_text");
for (Element postText : postTexts) {
statuses[iter] = postText.text();
iter++;
System.out.println(statuses[iter]);
}
} catch (NullPointerException eText) {
eText.printStackTrace();
}
} catch (IOException e) {
e.printStackTrace();
}
return statuses;
}Example 73
| Project: SuZhouTong-client-for-android-master File: HtmlResolving.java View source code |
/*
* 解�新闻类容
*/
public ArrayList<NewsContentVo> getNewsContent(String news_detaiol) {
ArrayList<NewsContentVo> contents = new ArrayList<NewsContentVo>();
NewsContentVo ncv = null;
Document document = Jsoup.parse(news_detaiol);
Elements info = document.getElementsByTag("span");
for (Element element : info) {
ncv = new NewsContentVo();
ncv.setIsImg(0);
ncv.setContentList(element.text());
contents.add(ncv);
}
Elements elements = document.getElementsByTag("p");
Elements media = document.select("[src]");
int i = 1;
for (Element element : elements) {
if (element.hasText()) {
ncv = new NewsContentVo();
ncv.setIsImg(0);
ncv.setContentList(element.text());
contents.add(ncv);
} else {
if (element.hasAttr("align") && media != null && media.size() > 0 && media.size() > i) {
Element src = media.get(i);
if (src.tagName().equals("img")) {
ncv = new NewsContentVo();
ncv.setIsImg(1);
ncv.setContentList(src.attr("src"));
contents.add(ncv);
}
i++;
}
}
}
return contents;
}Example 74
| Project: Tanaguru-master File: MarkupChecker.java View source code |
@Override
protected void doCheck(SSPHandler sspHandler, Elements elements, TestSolutionHandler testSolutionHandler) {
// boolean isHtmlValid = MarkupValidator.getInstance().isHtmlValid(
// sspHandler.getPage().getId(),
// sspHandler.getSSP().getDOM());
// if (isHtmlValid) {
// testSolutionHandler.addTestSolution(TestSolution.PASSED);
// } else {
// testSolutionHandler.addTestSolution(TestSolution.FAILED);
// }
}Example 75
| Project: tradeframework-master File: HtmlSelectorMsgParser.java View source code |
public boolean parseContent(InputStream input, long length, String contentType, MsgHandler handler) throws IOException {
Matcher charset = charsetPattern.matcher(contentType);
Document doc = Jsoup.parse(input, charset.find() ? charset.group(1) : null, "");
Elements nodes = doc.select(selector);
for (Element node : nodes) {
Msg msg = createMsg(node);
if (msg != null && !handler.newMsg(msg))
return false;
}
return true;
}Example 76
| Project: UnisaConnect-master File: Esse3CheckErrorMessage.java View source code |
@Override
public LoadStates startScraper() {
try {
Document document = scraperGetUrl(homeUrl);
Elements messaggioEl = document.getElementsContainingOwnText("Messaggio");
if (messaggioEl.isEmpty() || messaggioEl.first().nextElementSibling() == null) {
return LoadStates.WRONG_DATA;
} else {
errorMessage = messaggioEl.first().nextElementSibling().text();
return LoadStates.ESSE3_PROBLEM;
}
} catch (HttpStatusException e) {
Log.w(Utils.TAG, "ERROR ", e);
errorMessage = "Il servizio ESSE3 è temporaneamente non disponibile, puoi verificare il problema andando su: esse3web.unisa.it";
return LoadStates.ESSE3_PROBLEM;
} catch (Exception e) {
Log.w(Utils.TAG, "ERROR ", e);
return LoadStates.UNKNOWN_PROBLEM;
}
}Example 77
| Project: uzlee-master File: UserParser.java View source code |
public Response parse(String html) {
Response.Meta resMeta = new Response.Meta();
Document doc = getDoc(html, resMeta);
String img = doc.select("div.avatar>img").attr("src");
String uidLink = doc.select("li.searchpost a").attr("href");
String uid = Utils.getUriQueryParameter(uidLink).get("srchuid");
String name = doc.select("div.itemtitle.s_clear h1").text().trim();
Element eProfile = doc.select("#profilecontent").first();
Elements eBaseProfile = eProfile.select("> #baseprofile > table");
String sex = eBaseProfile.first().select("td[width=70]").text().trim();
String qq = eBaseProfile.get(1).select("a[href^=http://wpa.qq.com]").text().trim();
String point = eProfile.select("> h3").get(1).text().trim().substring(4);
eProfile = eProfile.select("> div.s_clear").get(1);
String registerDate = eProfile.select("> .right > li").first().text().trim().substring(6);
Elements eSubProfiles = eProfile.select("> ul").get(1).select("> li");
String level = eSubProfiles.get(0).text().substring(7);
String totalThreads = eSubProfiles.get(2).text().trim().substring(4);
totalThreads = totalThreads.substring(0, totalThreads.indexOf("篇") - 1);
User user = new User().setId(Integer.valueOf(uid)).setImage(img).setName(name).setRegisterDateStr(registerDate).setQq(qq).setSex(sex).setPoints(point).setLevel(level).setTotalThreads(totalThreads);
Response res = new Response();
res.setMeta(resMeta);
res.setData(res);
res.setSuccess(true);
return res;
}Example 78
| Project: VileBot-master File: Ttc.java View source code |
private Elements parseContent(String content) throws Exception { Elements alerts = new Elements(); Document doc = Jsoup.parse(content); Elements alertDivs = doc.select("div[class=alert-content]"); for (Element element : alertDivs) { if (!element.text().toLowerCase().contains("elevator")) { alerts.addAll(element.select("p[class=veh-replace]")); } } return alerts; }
Example 79
| Project: w3act-master File: MetadataExtractor.java View source code |
public void extract(Document document, org.jsoup.nodes.Document doc) {
Elements name = doc.select(nameSelector);
if (!name.isEmpty()) {
document.title = name.get(0).text();
if (datePublishedSelector != null) {
Elements datePublished = doc.select(datePublishedSelector);
if (!datePublished.isEmpty()) {
try {
document.publicationDate = new SimpleDateFormat("yyyy-MM-dd").parse(datePublished.get(0).attr("content"));
Calendar calendar = Calendar.getInstance();
calendar.setTime(document.publicationDate);
document.publicationYear = calendar.get(Calendar.YEAR);
} catch (ParseException e) {
}
}
}
if (authorSelector != null) {
Elements author = doc.select(authorSelector);
if (!author.isEmpty()) {
String authorsString = author.get(0).text();
String[] authors = authorsString.split(",|and");
if (authors.length >= 1) {
String[] a = authors[0].trim().split("\\s+", 2);
document.author1Fn = a[0];
document.author1Ln = a[1];
}
if (authors.length >= 2) {
String[] a = authors[1].trim().split("\\s+", 2);
document.author2Fn = a[0];
document.author2Ln = a[1];
}
if (authors.length >= 3) {
String[] a = authors[2].trim().split("\\s+", 2);
document.author3Fn = a[0];
document.author3Ln = a[1];
}
}
}
} else {
Logger.error("No " + nameSelector + " found!");
}
}Example 80
| Project: weishijie-develop-master File: PicModel.java View source code |
@Override
public void call(Subscriber<? super List<Juzimi>> subscriber) {
try {
Document result = Jsoup.connect(params.get("url") + "?page=" + params.get("page")).get();
Elements elements = result.select("div[class^=views-row views-row]");
List<Juzimi> list = new ArrayList<Juzimi>();
for (Element e : elements) {
Juzimi mJuzimi = new Juzimi();
Elements chromeimg = e.getElementsByClass("chromeimg");
if (chromeimg == null || chromeimg.size() == 0)
continue;
mJuzimi.url = chromeimg.get(0).attr("src");
Elements xlistjus = e.getElementsByClass("xlistju");
if (xlistjus != null && xlistjus.size() > 0) {
mJuzimi.content = xlistjus.get(0).text();
}
Elements xqusernpops = e.getElementsByClass("xqusernpop");
if (xqusernpops != null && xqusernpops.size() > 0) {
mJuzimi.sender = xqusernpops.get(0).text();
}
list.add(mJuzimi);
}
subscriber.onNext(list);
} catch (IOException e) {
e.printStackTrace();
subscriber.onNext(null);
}
}Example 81
| Project: xmpp-master File: addPingLun.java View source code |
public static void main(String[] args) {
File in = new File("index.html");
News_pinglunDaoImpl ndi = new News_pinglunDaoImpl();
try {
Document doc = Jsoup.parse(in, "UTF-8", "");
Elements e1 = doc.getElementsByClass("comment_item");
for (int i = e1.size() - 1; i >= 0; i--) {
String ptime = e1.get(i).getElementsByClass("ptime").text();
ptime = ptime.replaceAll("·¢±í", "");
System.out.println(ptime + "\t" + e1.get(i).getElementsByClass("username").text() + "\t" + (e1.get(i).getElementsByTag("img").attr("src")) + "\t" + e1.get(i).getElementsByClass("comment_body").text());
//ÎÄÕÂid
int id = 30;
String user = e1.get(i).getElementsByClass("username").text() + ";" + (e1.get(i).getElementsByTag("img").attr("src"));
String plocation = "";
String pcontent = e1.get(i).getElementsByClass("comment_body").text();
String zan = "0";
News_pinglun news = new News_pinglun(id, user, plocation, ptime, pcontent, zan);
if (ndi.save(news)) {
}
}
} catch (IOException e) {
e.printStackTrace();
}
}Example 82
| Project: yyl-master File: ZGYWikiHelper.java View source code |
private void parseResponse(Response response, Action1<List<Account>> onNextAction) {
Document document;
try {
document = Jsoup.parse(response.getBody().in(), "UTF-8", "https://wiki.zhenguanyu.com/");
} catch (IOException e) {
e.printStackTrace();
return;
}
Elements items = document.getElementsByTag("tr");
Observable.from(items).skip(// 第一个itemæ˜¯æ ‡é¢˜
1).map( item -> item.getElementsByTag("td").text().split(" ")).map(this::parseAccount).toList().doOnNext( accountList -> {
AccountDBHelper.helper.clear();
AccountDBHelper.helper.save(accountList);
}).observeOn(AndroidSchedulers.mainThread()).subscribe(onNextAction);
;
}Example 83
| Project: zafu_jwc-master File: ListLinks.java View source code |
public static void main(String[] args) throws IOException {
Validate.isTrue(args.length == 1, "usage: supply url to fetch");
String url = args[0];
print("Fetching %s...", url);
Document doc = Jsoup.connect(url).get();
Elements links = doc.select("a[href]");
Elements media = doc.select("[src]");
Elements imports = doc.select("link[href]");
print("\nMedia: (%d)", media.size());
for (Element src : media) {
if (src.tagName().equals("img"))
print(" * %s: <%s> %sx%s (%s)", src.tagName(), src.attr("abs:src"), src.attr("width"), src.attr("height"), trim(src.attr("alt"), 20));
else
print(" * %s: <%s>", src.tagName(), src.attr("abs:src"));
}
print("\nImports: (%d)", imports.size());
for (Element link : imports) {
print(" * %s <%s> (%s)", link.tagName(), link.attr("abs:href"), link.attr("rel"));
}
print("\nLinks: (%d)", links.size());
for (Element link : links) {
print(" * a: <%s> (%s)", link.attr("abs:href"), trim(link.text(), 35));
}
}Example 84
| Project: 4pdaClient-plus-master File: UsersApi.java View source code |
/**
* Ð?дминиÑ?трациÑ?: Ð?дмины, Ñ?уперы,модеры
*
* @param client
* @return
* @throws IOException
*/
public static ArrayList<LeadUser> getLeaders(IHttpClient client) throws IOException {
String page = client.performGet("http://4pda.ru/forum/index.php?act=Stats&CODE=leaders");
Document doc = Jsoup.parse(page);
ArrayList<LeadUser> res = new ArrayList<>();
Pattern p = Pattern.compile("showuser=(\\d+)", Pattern.CASE_INSENSITIVE);
for (Element groupElement : doc.select("div.borderwrap")) {
String group = groupElement.select("div.maintitle").first().text().trim();
for (Element trElement : groupElement.select("table.ipbtable").first().select("tr")) {
Elements tds = trElement.select("td.row1");
if (tds.size() == 0)
continue;
Element el = tds.get(0).select("a").first();
Matcher m = p.matcher(el.attr("href"));
if (m.find()) {
LeadUser user = new LeadUser(m.group(1), el.text());
user.setGroup(group);
Elements forumElements = tds.get(1).select("option");
if (forumElements.size() == 0 && "Ð’Ñ?е форумы".equals(tds.get(1).text())) {
user.getForums().add(new Forum("-1", "Ð’Ñ?е форумы"));
} else {
for (Element forumEl : forumElements) {
if ("-1".equals(forumEl.attr("value")))
continue;
user.getForums().add(new Forum(forumEl.attr("value"), forumEl.text()));
}
}
res.add(user);
}
}
}
return res;
}Example 85
| Project: ambra-master File: HtmlChecker.java View source code |
void check(Document html) {
Elements elements = html.getElementsByAttribute("xpathlocation");
for (Element element : elements) {
if (element.nodeName().equalsIgnoreCase("p")) {
String xpathLocation = element.attr("xpathlocation");
String anchorId = xpathLocation.replaceAll("\\[", "");
anchorId = anchorId.replaceAll("\\]", "");
anchorId = anchorId.replaceAll("/", ".");
anchorId = anchorId.substring(1);
if (!xpathLocation.equalsIgnoreCase("noSelect")) {
Elements anchorElementsViaId = html.getElementsByAttributeValue("id", anchorId);
assertEquals(anchorElementsViaId.size(), 1, "Did not find the anchor tag for the given paragraph");
Elements anchorElementsViaName = html.getElementsByAttributeValue("name", anchorId);
assertEquals(anchorElementsViaName.size(), 1, "Did not find the anchor tag for the given paragraph");
}
}
}
}Example 86
| Project: android-opensource-library-56-master File: RssLoader.java View source code |
private void parseDomTraverse(Document document) {
Elements elements = document.getElementsByTag("item");
for (Element element : elements) {
Item item = new Item();
Elements title = element.getElementsByTag("title");
Elements link = element.getElementsByTag("link");
if (!title.isEmpty()) {
item.title = title.get(0).text();
}
if (!link.isEmpty()) {
item.url = link.get(0).text();
}
if (mList == null) {
mList = new RssList();
}
mList.addItem(item);
}
}Example 87
| Project: AndroidOpenTextbook-master File: MainActivity.java View source code |
private void parseRss(String rssBody) {
Document document = Jsoup.parse(rssBody, "", Parser.xmlParser());
List<Item> itemList = new ArrayList<Item>();
Elements items = document.select("item");
for (Element element : items) {
Item item = new Item();
item.setTitle(element.select("title").first().text());
item.setUrl(element.select("link").first().text());
itemList.add(item);
}
showRss(itemList);
}Example 88
| Project: asta4d-master File: ElementUtil.java View source code |
public static final void removeNodesBySelector(Element target, String selector, boolean pullupChildren) {
Elements removeNodes = target.select(selector);
Iterator<Element> it = removeNodes.iterator();
Element rm;
while (it.hasNext()) {
rm = it.next();
if (target == rm) {
continue;
}
if (rm.ownerDocument() == null) {
continue;
}
if (pullupChildren) {
pullupChildren(rm);
}
rm.remove();
}
}Example 89
| Project: blade.tools-master File: MarkdownParser.java View source code |
private static Map<String, String> parseHtml(String html) {
Map<String, String> retval = new HashMap<>();
Document document = Jsoup.parse(html);
Elements elements = document.select("a[href] > h3");
for (Element h3 : elements) {
Element a = h3.parent();
int index = a.siblingIndex();
List<Node> siblings = a.siblingNodes();
StringBuilder sb = new StringBuilder();
List<Node> interesting = new ArrayList<>();
for (int i = index; i < siblings.size(); i++) {
Node sibling = siblings.get(i);
if (sibling.toString().startsWith("<hr")) {
break;
} else {
interesting.add(sibling);
}
}
for (Node node : interesting) {
sb.append(node.toString());
}
String href = a.attr("href");
retval.put(href, sb.toString());
}
return retval;
}Example 90
| Project: calcite-master File: FileEnumerator.java View source code |
public boolean moveNext() {
try {
if (this.iterator.hasNext()) {
final Elements row = this.iterator.next();
current = this.converter.toRow(row, this.fields);
return true;
} else {
current = null;
return false;
}
} catch (RuntimeExceptionError | e) {
throw e;
} catch (Exception e) {
throw new RuntimeException(e);
}
}Example 91
| Project: CodeUtils-master File: Xedtt.java View source code |
private static List<GifInfo> getAllGif(String appUrl) throws Exception {
System.out.println("get gif ... url = " + appUrl);
List<GifInfo> infos = new ArrayList<>();
String response = HttpUtils.getString(appUrl);
Document parse = Jsoup.parse(response);
Elements contentElements = parse.getElementsByClass("miao-con").get(0).getElementsByTag("li");
for (Element element : contentElements) {
String title = element.text();
String thumbnailImgUrl = element.getElementsByTag("img").get(0).attr("src");
String oriGifUrl = element.getElementsByTag("a").get(0).attr("href");
oriGifUrl = hostUrl + oriGifUrl;
GifInfo info = new GifInfo();
info.title = title;
info.thumbnailImgUrl = thumbnailImgUrl;
String oriGifContent = HttpUtils.getString(oriGifUrl);
Element oriGifElement = Jsoup.parse(oriGifContent).getElementsByClass("content").get(0);
Element imageElement = oriGifElement.getElementsByTag("img").get(0);
String imgUrl = imageElement.attr("src");
int width = 0;
int height = 0;
try {
width = Integer.parseInt(imageElement.attr("width"));
height = Integer.parseInt(imageElement.attr("height"));
} catch (Exception e) {
}
info.imgUrl = imgUrl;
info.width = width;
info.height = height;
infos.add(info);
}
return infos;
}Example 92
| Project: coding2017-master File: Struts.java View source code |
public static View runAction(String actionName, Map<String, String> parameters) {
// 0. 读��置文件struts.xml,引用Jsoup
Document document = null;
try {
document = Jsoup.connect("http://my.977996067.cn/2017_2/struts.xml").get();
} catch (IOException e) {
System.err.println("xml解�失败");
}
// 获å?–所有的actionæ ‡ç¾
Elements actions = document != null ? document.select("action") : null;
if (actions == null)
return null;
for (Element actionElement : actions) {
String name = actionElement.attr("name");
// å??å°„action
if (actionName.equals(name)) {
try {
Class<?> actionClass = Class.forName(actionElement.attr("class"));
Object o = actionClass.getConstructor().newInstance();
parameters.forEach(( k, v) -> {
try {
Field field = actionClass.getDeclaredField(k);
field.setAccessible(true);
//赋值
field.set(o, v);
} catch (Exception e) {
e.printStackTrace();
}
});
// 执行execute方法,获�返回值
Method method = actionClass.getMethod("execute");
String returnValue = (String) method.invoke(o);
View view = new View();
Map map = new HashMap();
Arrays.stream(actionClass.getMethods()).forEach(((Method m) -> {
try {
String methodName = m.getName();
if (methodName.startsWith("get"))
map.put(methodName.substring(methodName.indexOf("get") + 3).toLowerCase(), m.invoke(o));
} catch (Exception e) {
e.printStackTrace();
}
}));
view.setParameters(map);
// 获å?–返回视图å??
Elements children = actionElement.children().select("result");
for (Element aChildren : children) {
if (returnValue.equals(aChildren.attr("name"))) {
view.setJsp(aChildren.text());
}
}
System.out.println(view);
return view;
} catch (Exception e) {
e.printStackTrace();
}
}
}
return null;
}Example 93
| Project: dd4t-2-java-master File: RichTextUtils.java View source code |
/**
* This method takes care of:
* 1. Formatting RTF from Tridion. This basically means
* Stripping out the xhtml and xlink namespaces
* 2. Resolve any component links which can be
*
* @param xhtmlFields the DD4T Xhtml Field
* @param resolveLinks resolve links as well as stripping namespaces
* @param linkResolver a concrete link resolver
*/
public static void resolveXhtmlField(final XhtmlField xhtmlFields, final boolean resolveLinks, final LinkResolver linkResolver, final String contextPath) throws ItemNotFoundException, SerializationException {
List<Object> xhtmlValues = xhtmlFields.getValues();
List<String> newValues = new ArrayList<>();
String contextPathToUse = contextPath == null ? "" : contextPath;
for (Object xhtmlField : xhtmlValues) {
if (StringUtils.isEmpty((String) xhtmlField)) {
newValues.add("");
} else {
Document document = Jsoup.parseBodyFragment("<" + XHTMLBODYROOT + ">" + xhtmlField + "</" + XHTMLBODYROOT + ">");
Element xhtmlBodyRoot = document.getElementsByTag(XHTMLBODYROOT).first();
if (resolveLinks && linkResolver != null) {
Elements links = xhtmlBodyRoot.getElementsByAttributeValueMatching(XLINK_HREF, TCM);
for (Element link : links) {
String resolvedLink = linkResolver.resolve(link.attr(XLINK_HREF));
if (StringUtils.isNotEmpty(resolvedLink)) {
link.attr(XLINK_HREF, contextPathToUse + resolvedLink);
} else {
link.attr(XLINK_HREF, "");
}
}
}
if (StringUtils.isNotEmpty(contextPathToUse)) {
Elements images = xhtmlBodyRoot.getElementsByTag(IMG_TAG);
for (Element image : images) {
String src = image.attr(SRC_ATTR);
image.attr(SRC_ATTR, contextPathToUse + src);
}
}
if (xhtmlBodyRoot != null) {
newValues.add(xhtmlBodyRoot.html().replaceAll("(?ims)xlink:|xmlns(=\"http://www\\.w3\\.org/1999/xhtml\"\\s*|:xlink=\"http://www\\.w3\\.org/1999/xlink\"\\s*)", ""));
}
}
}
xhtmlFields.setTextValues(newValues);
}Example 94
| Project: Devtf_APP-master File: HtmlInputRequest.java View source code |
@Override
protected Response<List<EmploymentItem>> parseNetworkResponse(NetworkResponse response) {
List<EmploymentItem> eiList = null;
try {
Document doc = Jsoup.parse(new ByteArrayInputStream(response.data), "UTF-8", WebAPI.BASE_URL);
Elements trs = doc.getElementsByTag("table").get(0).select("tbody").select("tr");
eiList = new ArrayList<EmploymentItem>();
for (Element tr : trs) {
Elements tds = tr.select("td");
EmploymentItem eItem = new EmploymentItem();
eItem.setCompanyName(tds.get(0).text());
Elements td = tds.get(1).select("a");
eItem.setJobName(td.text());
eItem.setJobDescAddress(td.attr("href"));
eItem.setPostTempt(tds.get(2).text());
eItem.setEmail(tds.get(3).text());
eiList.add(eItem);
}
} catch (Exception e) {
return Response.error(new ParseError(e));
}
return Response.success(eiList, HttpHeaderParser.parseCacheHeaders(response));
}Example 95
| Project: EhViewer-master File: WhatsHotParser.java View source code |
@SuppressWarnings("ConstantConditions")
public static List<GalleryInfo> parse(String body) throws ParseException {
try {
List<GalleryInfo> galleryInfoList = new ArrayList<>(15);
Document d = Jsoup.parse(body);
Element pp = d.getElementById("pp");
Elements id1List = pp.getElementsByClass("id1");
for (int i = 0, n = id1List.size(); i < n; i++) {
GalleryInfo galleryInfo = new GalleryInfo();
Element id1 = id1List.get(i);
Element id3 = JsoupUtils.getElementByClass(id1, "id3");
Element temp = JsoupUtils.getElementByTag(id3, "a");
String url = temp.attr("href");
GalleryDetailUrlParser.Result result = GalleryDetailUrlParser.parse(url);
galleryInfo.gid = result.gid;
galleryInfo.token = result.token;
temp = JsoupUtils.getElementByTag(temp, "img");
galleryInfo.thumb = EhUtils.handleThumbUrlResolution(temp.attr("src"));
galleryInfo.title = temp.attr("title");
galleryInfo.generateSLang();
galleryInfoList.add(galleryInfo);
}
return galleryInfoList;
} catch (Exception e) {
throw new ParseException("Parse whats hot error", body);
}
}Example 96
| Project: firing-range-master File: Expression.java View source code |
@Override
public void doGet(HttpServletRequest request, HttpServletResponse response) throws IOException {
if (request.getParameter("q") == null) {
Responses.sendError(response, "Missing q parameter", 400);
return;
}
String q = request.getParameter("q");
Document doc = Jsoup.parseBodyFragment(q);
Element body = doc.body();
Elements elements = body.getAllElements();
elements.remove(body);
if (elements.isEmpty()) {
Responses.sendError(response, "Invalid input, no tags", 400);
return;
}
StringBuilder res = new StringBuilder();
for (Element element : elements) {
boolean validElement = true;
Attributes attributes = element.attributes();
for (Attribute attribute : attributes) {
if (attribute.getKey().toLowerCase().startsWith("on") || attribute.getKey().toLowerCase().equals("href") || attribute.getKey().toLowerCase().equals("src")) {
validElement = false;
}
if (attribute.getKey().toLowerCase().equals("style") && attribute.getValue().toLowerCase().contains("expression")) {
validElement = false;
}
}
if (validElement) {
res.append(element.toString());
}
}
Responses.sendXssed(response, res.toString());
}Example 97
| Project: Girls-master File: TypeImageListModelImpl.java View source code |
@Override
public void call(Subscriber<? super List<TypeImageDomain>> subscriber) {
List<TypeImageDomain> typeImageDomains = new ArrayList();
try {
Document document = Jsoup.connect(url).get();
Element element = document.getElementById("gallery-1");
Elements elementsA = element.getElementsByTag("a");
for (Element a : elementsA) {
String linkUrl = a.attr("abs:href");
Elements img = a.getElementsByTag("img");
String src = img.attr("src");
String width = img.attr("width");
String height = img.attr("height");
typeImageDomains.add(new TypeImageDomain(Integer.valueOf(width), Integer.valueOf(height), src, linkUrl));
}
} catch (IOException e) {
subscriber.onError(e);
}
System.out.print(typeImageDomains.get(0).getHeight());
subscriber.onNext(typeImageDomains);
}Example 98
| Project: HabReader-master File: UsersLoader.java View source code |
@Override
public ArrayList<UsersData> loadInBackground() {
ArrayList<UsersData> data = new ArrayList<UsersData>();
try {
Log.i(TAG, "Loading a page: " + url);
Document document = Jsoup.connect(url).get();
Elements users = document.select("div.user");
for (Element user : users) {
UsersData usersData = new UsersData();
Element rating = user.select("div.rating").first();
Element karma = user.select("div.karma").first();
Element avatar = user.select("div.avatar > a > img").first();
Element name = user.select("div.userlogin > div.username > a").first();
Element lifetime = user.select("div.info > div.lifetime").first();
usersData.setName(name.text());
usersData.setUrl(name.attr("abs:href"));
usersData.setRating(rating.text());
usersData.setKarma(karma.text());
usersData.setAvatar(avatar.attr("src"));
usersData.setLifetime(lifetime.text());
data.add(usersData);
}
} catch (IOException e) {
}
return data;
}Example 99
| Project: halgnu-master File: GoogleSearchListener.java View source code |
private String getGoogleResult(String query) {
String google = "http://www.google.com/search?q=";
String search = query;
String charset = "UTF-8";
String userAgent = "HalGNU 1.0 (+https://github.com/R4stl1n/halgnu)";
String result = "No results found";
String title = "";
String url = "";
Elements links = null;
try {
links = Jsoup.connect(google + URLEncoder.encode(search, charset)).userAgent(userAgent).get().select("li.g>h3>a");
} catch (IOException e) {
e.printStackTrace();
}
if (links != null) {
if (links.size() >= 1) {
title = links.first().text();
url = links.first().absUrl("href");
try {
url = URLDecoder.decode(url.substring(url.indexOf('=') + 1, url.indexOf('&')), "UTF-8");
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}
System.out.println("Title: " + title);
System.out.println("URL: " + url);
}
}
result = title + ": " + url;
return result;
}Example 100
| Project: IntelliDict-master File: PonsDictionary.java View source code |
public static List<Map> getPonsEntry(String entry) {
loadDocument();
Elements ponsEntryInHtml = extractPonsEntryInHtmlForSpecifiedLanguage("pl");
List<Map> ponsEntry = new ArrayList<Map>();
for (Element ponsSubEntryInHtml : ponsEntryInHtml) {
String ponsSubEntryHeader = ponsSubEntryInHtml.getElementsByTag("thead").first().text();
Element ponsSubEntryInTBodyElement = ponsSubEntryInHtml.getElementsByTag("tbody").first();
Map<String, String> ponsSubEntry = extractPonsSubEntryFromTBodyElement(ponsSubEntryInTBodyElement);
ponsEntry.add(ponsSubEntry);
}
return ponsEntry;
}Example 101
| Project: jinjava-master File: BatchFilterTest.java View source code |
@Test
public void batchFilterNoBackfill() {
Map<String, Object> context = ImmutableMap.of("items", (Object) Lists.newArrayList("1", "2", "3", "4", "5", "6"));
Document dom = Jsoup.parseBodyFragment(render("batch-filter", context));
assertThat(dom.select("tr")).hasSize(2);
Elements trs = dom.select("tr");
assertThat(trs.get(0).select("td")).hasSize(3);
assertThat(trs.get(0).select("td").get(0).text()).isEqualTo("1");
assertThat(trs.get(0).select("td").get(1).text()).isEqualTo("2");
assertThat(trs.get(0).select("td").get(2).text()).isEqualTo("3");
assertThat(trs.get(1).select("td")).hasSize(3);
assertThat(trs.get(1).select("td").get(0).text()).isEqualTo("4");
assertThat(trs.get(1).select("td").get(1).text()).isEqualTo("5");
assertThat(trs.get(1).select("td").get(2).text()).isEqualTo("6");
}