Java Examples for org.jsoup.select.Elements

The following java examples will help you to understand the usage of org.jsoup.select.Elements. These source code samples are taken from different open source projects.

Example 1
Project: lavender-master  File: RamdomImgParser.java View source code
public static String parserImg(String html) {
    Document document = Jsoup.parse(html);
    Elements divs = document.select("div");
    for (Element div : divs) {
        if (!div.attr("id").equals("photo-detail-wrapper")) {
            continue;
        }
        return div.select("img").first().attr("src");
    }
    return null;
}
Example 2
Project: Android-Studio-Project-master  File: ContentParser.java View source code
public static Content Parser(String html) {
    Document doc = Jsoup.parse(html);
    Elements links = doc.select("img[src~=(?i)\\.(png|jpe?g)]");
    Content content = new Content();
    Element element = links.get(1).getElementsByTag("img").first();
    content.setUrl(element.attr("src"));
    content.setTitle(element.attr("alt"));
    return content;
}
Example 3
Project: muzima-android-master  File: HTMLConceptParser.java View source code
public List<String> parse(String html) {
    Set<String> concepts = new HashSet<String>();
    Document htmlDoc = Jsoup.parse(html);
    //Select all elements containing data-concept attr and is not a div.
    Elements elements = htmlDoc.select("*:not(div)[" + DATA_CONCEPT_TAG + "]");
    for (Element element : elements) {
        concepts.add(getConceptName(element.attr(DATA_CONCEPT_TAG)));
    }
    return new ArrayList<String>(concepts);
}
Example 4
Project: v2ex-android-master  File: ContentUtils.java View source code
public static int[] parsePage(Element body) {
    int currentPage = 1, totalPage = 1;
    Elements elements = body.getElementsByClass("page_current");
    for (Element el : elements) {
        String text = el.text();
        try {
            currentPage = Integer.parseInt(text);
            break;
        } catch (Exception e) {
        }
    }
    elements = body.getElementsByClass("page_normal");
    totalPage = currentPage;
    for (Element el : elements) {
        String text = el.text();
        try {
            int page = Integer.parseInt(text);
            if (totalPage < page)
                totalPage = page;
        } catch (Exception e) {
        }
    }
    return new int[] { currentPage, totalPage };
}
Example 5
Project: validadorAcessibilidade-master  File: RecomendacaoDepreciados.java View source code
@Override
public String executa(Document doc) {
    String elementoDescontinuado = "";
    Elements frame = doc.select("frame");
    Elements applet = doc.select("applet");
    Elements blink = doc.select("blink");
    Elements marquee = doc.select("marquee");
    Elements basefont = doc.select("basefont");
    Elements center = doc.select("center");
    Elements dir = doc.select("dir");
    Elements align = doc.select("align");
    Elements font = doc.select("font");
    Elements isindex = doc.select("isindex");
    Elements menu = doc.select("menu");
    Elements strike = doc.select("strike");
    Elements u = doc.select("u");
    if (!frame.isEmpty() || !applet.isEmpty() || !blink.isEmpty() || !marquee.isEmpty() || !basefont.isEmpty() || !center.isEmpty() || !dir.isEmpty() || !align.isEmpty() || !font.isEmpty() || !isindex.isEmpty() || !menu.isEmpty() || !strike.isEmpty() || !u.isEmpty()) {
        elementoDescontinuado += "\n" + frame.toString() + "\n" + applet.toString() + "\n" + blink.toString() + "\n" + marquee.toString() + "\n" + basefont.toString() + "\n" + center.toString() + "\n" + dir.toString() + "\n" + align.toString() + "\n" + font.toString() + "\n" + isindex.toString() + "\n" + menu.toString() + "\n" + strike.toString() + "\n" + u.toString();
    }
    return elementoDescontinuado;
}
Example 6
Project: yahnac-master  File: VoteUrlParser.java View source code
public String parse() {
    Elements links = document.select("a[id=up_" + storyId + "]");
    if (links.size() > 0) {
        Element voteElement = links.get(0).select("a[href^=vote]").first();
        String url = voteElement.attr("href").contains("auth=") ? (voteElement.attr("href")) : null;
        return "/" + url;
    } else {
        return EMPTY;
    }
}
Example 7
Project: bashoid-master  File: Parser.java View source code
static ArrayList<Quote> getQuotes(WebPage page) {
    ArrayList<Quote> quotes = new ArrayList<>();
    Element container = Jsoup.parse(page.getContent()).getElementsByAttribute("valign").first();
    Elements headers = container.getElementsByClass("quote");
    Elements bodies = container.getElementsByClass("qt");
    final int COUNT = headers.size();
    for (int i = 0; i < COUNT; ++i) {
        String[] body = bodies.get(i).html().split("<br />");
        Element header = headers.get(i);
        String quoteId = header.getElementsByTag("b").first().text().substring(1);
        int id = Integer.parseInt(quoteId);
        String quoteScore = header.ownText().substring(1, header.ownText().length() - 1);
        int score = Integer.parseInt(quoteScore);
        quotes.add(new Quote(body, score, id));
    }
    return quotes;
}
Example 8
Project: deepnighttwo-master  File: FirstTry.java View source code
public static void main(String[] args) throws IOException {
    Document doc = Jsoup.connect("http://www.envir.gov.cn/airnews/index.asp").data("Fdate", "2000-6-1").data("Tdate", "2000-6-8").userAgent("I'm jsoup").timeout(3000).post();
    // System.out.println(doc);
    Elements eles = doc.select("table[bordercolor] > tr");
    eles.remove(0);
    for (Element ele : eles) {
        Elements rows = ele.select("td");
        for (Element row : rows) {
            System.out.println(row.ownText());
        }
    }
// Element content = doc.getElementById("content");
// Elements links = content.getElementsByTag("a");
// for (Element link : links) {
// String linkHref = link.attr("href");
// String linkText = link.text();
// System.out.println(linkHref);
// System.out.println(linkText);
// }
}
Example 9
Project: example-webapp-master  File: ExceptionHandlingIntegrationTests.java View source code
@Test
public void shouldSeeErrorReferenceDisplayedOnThePage() throws Exception {
    SpringDispatcherServlet servlet = SpringDispatcherServlet.create();
    MockHttpServletResponse response = servlet.process(new MockHttpServletRequest("GET", "/bad"));
    String redirectedUrl = response.getRedirectedUrl();
    assertThat(redirectedUrl, matchesPattern(sequence("/error/", exactly(7, anyCharacterIn("A-Z0-9")))));
    String errorRef = StringUtils.substringAfterLast(redirectedUrl, "/");
    response = servlet.process(new MockHttpServletRequest("GET", redirectedUrl));
    String html = response.getContentAsString();
    Document document = Jsoup.parse(html);
    Elements elements = document.select("#errorRef");
    assertThat(elements.size(), equalTo(1));
    assertThat(elements.first().text(), equalTo(errorRef));
}
Example 10
Project: fpcms-master  File: JsoupSelectorUtil.java View source code
public static Elements select(Element doc, String... selectors) {
    if (selectors != null) {
        for (String selector : selectors) {
            if (StringUtils.isBlank(selector)) {
                continue;
            }
            Elements elements = doc.select(selector);
            if (elements.isEmpty()) {
                continue;
            }
            return elements;
        }
    }
    return new Elements();
}
Example 11
Project: GoVRE-master  File: ProxyNetworkTrainMapImage.java View source code
//METHODS	
private static String fetchTrainImageUrlFromVRE(Context context) {
    try {
        String imgUrl = "";
        String url = context.getResources().getString(R.string.urlVREImgMap);
        Document doc = Jsoup.connect(url).get();
        //Focus on all tags with source attributes
        Elements media = doc.select("[src]");
        for (Element src : media) {
            //Verify this is an image 
            if (src.tagName().equals("img")) {
                imgUrl = src.attr("abs:src");
                //Check if link contains the action query string, the map is the only image that will have it.
                if (imgUrl.contains("app?action=getimg")) {
                    return imgUrl;
                }
            }
        }
        //Else Return Empty String
        return "";
    } catch (IOException e) {
    }
    return null;
}
Example 12
Project: IU-master  File: ConsumeInfo.java View source code
public static int parseHtml(List<ConsumeInfo> list, Document doc) {
    if (doc == null) {
        return 0;
    }
    if (list == null) {
        list = new ArrayList<>();
    }
    Elements table = doc.select("table#GridView1").select("tr");
    int size = table.size();
    if (size < 2) {
        return 0;
    }
    Element tr;
    Elements td2;
    ConsumeInfo info;
    for (int i = 1; i < size - 1; i++) {
        tr = table.get(i);
        td2 = tr.children();
        if (td2.size() != 3) {
            continue;
        }
        info = new ConsumeInfo();
        info.time = td2.get(1).text();
        info.remain = td2.get(2).text();
        list.add(info);
    }
    return table.select("a").size() + 1;
}
Example 13
Project: japicmp-master  File: ITReportTitle.java View source code
@Test
public void testReportTitle() throws IOException {
    Path htmlPath = Paths.get(System.getProperty("user.dir"), "target", "site", "project-reports.html");
    assertThat(Files.exists(htmlPath), is(true));
    Document document = Jsoup.parse(htmlPath.toFile(), "UTF-8");
    Elements leftNav = document.select("#leftColumn [href=\"japicmp.html\"]");
    assertThat(leftNav.attr("title"), is("japicmp"));
    assertThat(leftNav.text(), is("japicmp"));
    Elements overviewRow = document.select("#bodyColumn tr:has([href=\"japicmp.html\"])");
    Elements link = overviewRow.select("[href=\"japicmp.html\"]");
    assertThat(link.text(), is("japicmp"));
    Elements description = overviewRow.select("td:eq(1)");
    String projectVersion = System.getProperty("project.version");
    assertThat(description.text(), is("Comparing source compatibility of japicmp-test-v2-" + projectVersion + ".jar against japicmp-test-v1-" + projectVersion + ".jar"));
}
Example 14
Project: JAViewer-master  File: BTSOLinkProvider.java View source code
@Override
public List<DownloadLink> parseDownloadLinks(String htmlContent) {
    ArrayList<DownloadLink> links = new ArrayList<>();
    Elements rows = Jsoup.parse(htmlContent).getElementsByClass("row");
    for (Element row : rows) {
        try {
            Element a = row.getElementsByTag("a").first();
            links.add(DownloadLink.create(row.getElementsByClass("file").first().text(), row.getElementsByClass("size").first().text(), row.getElementsByClass("date").first().text(), a.attr("href"), null));
        } catch (Exception ignored) {
        }
    }
    return links;
}
Example 15
Project: JianShuApp-master  File: HomePageDataPool.java View source code
@Override
protected ArticleItem[] getArticleItems(Document doc) {
    Elements loadMoreElements = doc.select(LOAD_MORE_SELECTOR);
    if (loadMoreElements.size() > 0) {
        mLoadMoreUrl = getHtmlUrl(loadMoreElements.get(0).attr("data-url"));
    } else {
        mLoadMoreUrl = null;
        mIsAtTheEnd = true;
    }
    Elements articleElements = doc.select(ARTICLE_SELECTOR);
    if (articleElements != null) {
        int i = 0;
        ArticleItem[] result = new ArticleItem[articleElements.size()];
        for (Element el : articleElements) {
            result[i++] = parseElement(el);
        }
        return result;
    } else {
        return null;
    }
}
Example 16
Project: jphp-master  File: JsoupExtension.java View source code
@Override
public void onRegister(CompileScope scope) {
    registerClass(scope, WrapJsoup.class);
    registerWrapperClass(scope, Connection.class, WrapConnection.class);
    registerWrapperClass(scope, Connection.Response.class, WrapConnectionResponse.class);
    registerWrapperClass(scope, Connection.Request.class, WrapConnectionRequest.class);
    registerWrapperClass(scope, Document.class, WrapDocument.class);
    registerWrapperClass(scope, Element.class, WrapElement.class);
    registerWrapperClass(scope, Elements.class, WrapElements.class);
    MemoryOperation.register(new UrlMemoryOperation());
//MemoryOperation.register(new BinaryMemoryOperation());
}
Example 17
Project: jspider-master  File: BaiduSpiderTest.java View source code
@Override
public Result process(Request request, Page page) {
    Result result = new Result();
    Elements elements = page.document().select(".c-container a");
    if (elements != null && elements.size() > 0 && count == 0) {
        List<String> links = new ArrayList<String>(elements.size());
        for (Element element : elements) {
            String href = element.absUrl("href");
            if (StringUtils.isNotBlank(href) && StringUtils.isNotBlank(SpiderUrlUtils.getUrlHost(href))) {
                links.add(href);
            }
        }
        page.addTargetRequests(links);
    }
    result.put("title", page.document().title());
    return result;
}
Example 18
Project: learn_crawler-master  File: HtmlParserTool.java View source code
public static Set<String> extracLinks(String url, LinkFilter filter) {
    Set<String> result = new HashSet<String>();
    Document doc;
    try {
        doc = Jsoup.connect(url).timeout(5000).get();
        Elements links = doc.select("a[href]");
        Elements frames = doc.select("frame[src]");
        Elements iframes = doc.select("iframe[src]");
        for (Element e : links) {
            System.out.println(e.absUrl("href"));
            if (filter.accept(e.absUrl("href")))
                result.add(e.absUrl("href"));
        }
        for (Element e : frames) {
            if (filter.accept(e.absUrl("src")))
                result.add(e.absUrl("src"));
        }
        for (Element e : iframes) {
            if (filter.accept(e.absUrl("src")))
                result.add(e.absUrl("src"));
        }
    } catch (IOException e) {
        e.printStackTrace();
    }
    return result;
}
Example 19
Project: like_googleplus_layout-master  File: PhoneKRNewsContentUtils.java View source code
public static LinkedList<String> getPhoneKRNewsDataList(String newsUrl) {
    LinkedList<String> data = null;
    Document document;
    try {
        document = Jsoup.connect(newsUrl).get();
        Element element = document.getElementById("xs-post");
        Elements elements = element.getElementsByTag("p");
        if (!elements.isEmpty()) {
            data = new LinkedList<String>();
            for (int i = 0; i < elements.size(); i++) {
                String text = null;
                element = elements.get(i);
                if (element.getElementsByTag("a").isEmpty()) {
                    text = FOUR_BLANK_SPACE + element.text();
                } else {
                    if (!element.getElementsByTag("a").get(0).getElementsByTag("img").isEmpty()) {
                        // System.out.println("图片  = "+element.getElementsByTag("a").get(0).getElementsByTag("img").get(0).attr("src"));
                        text = element.getElementsByTag("a").get(0).getElementsByTag("img").get(0).attr("src");
                    }
                }
                if (!TextUtils.isEmpty(text)) {
                    data.add(text);
                }
            }
        }
    } catch (IOException e) {
        e.printStackTrace();
    }
    return data;
}
Example 20
Project: mlcomp-master  File: TitleMap.java View source code
@Override
public void map(long recordNum, Record record, TaskContext context) throws IOException {
    String url = (String) record.get(0);
    String html = (String) record.get(1);
    //		Boolean isWebshell=QueryWebshell.isWebshell(postdata);
    Document doc = Jsoup.parse(html);
    Elements links = doc.getElementsByTag("title");
    String title = "";
    for (Element link : links) {
        title = title + "," + link.text();
    }
    Record result_record = context.createOutputRecord();
    result_record.set("url", url);
    result_record.set("title", title);
    context.write(result_record);
}
Example 21
Project: mobile-ycjw-master  File: StudentDevelopmentScheduleQuery.java View source code
@Override
public String getDevelopmentScheduleQueryInfo(Context context) throws Exception {
    try {
        YCApplication app = (YCApplication) context.getApplicationContext();
        String url = (String) app.get("selectedIp") + Constant.developScheduleQuery;
        HttpGet request = new HttpGet(url);
        HttpResponse response = app.getClient().execute(request);
        InputStream is = response.getEntity().getContent();
        BufferedReader br = new BufferedReader(new InputStreamReader(is, Constant.ENCODING));
        StringBuilder sb = new StringBuilder();
        String temp = null;
        while ((temp = br.readLine()) != null) {
            sb.append(temp);
        }
        Document doc = Jsoup.parse(sb.toString());
        Elements table = doc.select("#DG_GetGrjh");
        return table.toString();
    } catch (Exception e) {
        throw new Exception(e);
    }
}
Example 22
Project: Muzik-master  File: SearchDownloadsNL.java View source code
public static ArrayList<SongResult> getSongs(String query) {
    ArrayList<SongResult> temp = new ArrayList<SongResult>();
    //base query url.
    String u = "http://www.downloads.nl/results/mp3/1/" + Uri.parse(query);
    Elements searchResults = new Elements();
    try {
        Document document = Jsoup.connect(u).get();
        searchResults = document.select(".tl");
        for (Element x : searchResults) {
            String url = "http://www.downloads.nl" + x.attr("href");
            //todo add artist string to the name so that result is clearer
            URL url2 = new URL(url);
            HttpURLConnection ucon = (HttpURLConnection) url2.openConnection();
            ucon.setInstanceFollowRedirects(false);
            URL secondURL = new URL(ucon.getHeaderField("Location"));
            String name = x.select("span").text();
            if (HomescreenActivity.debugMode) {
                Log.d("Play", "Downloads.nl Name=" + name + " url=" + secondURL);
            }
            temp.add(new SongResult(name, secondURL.toString()));
        }
    } catch (IOException e) {
        e.printStackTrace();
    }
    return temp;
}
Example 23
Project: pictorial_android_client-master  File: ParserImageList.java View source code
public static ImageListBean parser(String mRet) {
    ImageListBean imageListBean = new ImageListBean();
    if (mRet != null) {
        Document document = Jsoup.parse(mRet);
        Elements elements = document.getElementsByClass("post-inner");
        for (Element element : elements) {
            String imgurl = element.select("a[title]").attr("href");
            Element element2 = element.select("img[src]").first();
            String src = element2.attr("src");
            String width = element2.attr("width");
            String height = element2.attr("height");
            String alt = element2.attr("alt");
            ImageBean imageBean = new ImageBean();
            imageBean.setAlt(alt);
            imageBean.setDetailurl(imgurl);
            imageBean.setHeight(height);
            imageBean.setWidth(width);
            imageBean.setImgurl(src);
            imageListBean.add(imageBean);
        }
    }
    return imageListBean;
}
Example 24
Project: sample-skeleton-projects-master  File: Crawler.java View source code
public static void processPage(String URL) throws SQLException, IOException {
    /*
         * check if the given URL is already in database. get useful information
         */
    if (ContentList.isContentInMap(URL)) {
        return;
    }
    Document doc = null;
    try {
        doc = Jsoup.connect(URL).timeout(5000).get();
        if (doc.text().contains("research")) {
            System.out.println(URL);
            ContentList.insertKey(URL, URL);
        }
        // get all links and recursively call the processPage method
        Elements questions = doc.select("a[href]");
        for (Element link : questions) {
            if (link.attr("href").contains("mit.edu"))
                processPage(link.attr("abs:href"));
        }
    } catch (Exception e) {
        System.out.println("skipping .... " + URL);
    }
}
Example 25
Project: TACIT-master  File: SupremCrawlerFilter.java View source code
public List<String> filters(String segment) throws IOException {
    List<String> filterContents = new ArrayList<String>();
    URI crawlUrl = URI.create(this.crawlerUrl + "/" + segment);
    Document doc = parseContentFromUrl(crawlUrl.toString());
    Element itemList = doc.select(".exmenu").get(0);
    Elements items = itemList.select("a");
    filterContents.add("All");
    for (Element element : items) {
        filterContents.add(element.attr("href").trim());
    }
    return filterContents;
}
Example 26
Project: WaveTact-master  File: Search.java View source code
@Override
public void onCommand(String command, User user, PircBotX network, String prefix, Channel channel, boolean isPrivate, int userPermLevel, String... args) throws Exception {
    int ArrayIndex = 0;
    if (GeneralUtils.isInteger(args[0])) {
        ArrayIndex = Integer.parseInt(args[0]) - 1;
        args = ArrayUtils.remove(args, 0);
    }
    Document doc = Jsoup.connect("http://www.dogpile.com/dogpilecontrol/search/web?fcoid=417&fcop=topnav&fpid=27&q=thing" + StringUtils.join(args, "%20")).get();
    Elements results = doc.select(".searchResult");
    if (results.size() > 0) {
        if (results.size() - 1 >= ArrayIndex) {
            String title = results.get(ArrayIndex).select(".resultTitlePane").text();
            String url = results.get(ArrayIndex).select(".resultDisplayUrl").text();
            String content = results.get(ArrayIndex).select(".resultDescription").text();
            IRCUtils.sendMessage(user, network, channel, "[" + title + "] " + content + " - " + GeneralUtils.shortenURL(url), prefix);
        } else {
            ArrayIndex = ArrayIndex + 1;
            IRCUtils.sendError(user, network, channel, "Search #" + ArrayIndex + " does not exist", prefix);
        }
    } else {
        IRCUtils.sendError(user, network, channel, "Search returned no results", prefix);
    }
}
Example 27
Project: webmagic-master  File: CharsetUtils.java View source code
public static String detectCharset(String contentType, byte[] contentBytes) throws IOException {
    String charset;
    // charset
    // 1�encoding in http header Content-Type
    charset = UrlUtils.getCharset(contentType);
    if (StringUtils.isNotBlank(contentType) && StringUtils.isNotBlank(charset)) {
        logger.debug("Auto get charset: {}", charset);
        return charset;
    }
    // use default charset to decode first time
    Charset defaultCharset = Charset.defaultCharset();
    String content = new String(contentBytes, defaultCharset);
    // 2�charset in meta
    if (StringUtils.isNotEmpty(content)) {
        Document document = Jsoup.parse(content);
        Elements links = document.select("meta");
        for (Element link : links) {
            // 2.1�html4.01 <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
            String metaContent = link.attr("content");
            String metaCharset = link.attr("charset");
            if (metaContent.indexOf("charset") != -1) {
                metaContent = metaContent.substring(metaContent.indexOf("charset"), metaContent.length());
                charset = metaContent.split("=")[1];
                break;
            } else // 2.2�html5 <meta charset="UTF-8" />
            if (StringUtils.isNotEmpty(metaCharset)) {
                charset = metaCharset;
                break;
            }
        }
    }
    logger.debug("Auto get charset: {}", charset);
    // 3�todo use tools as cpdetector for content decode
    return charset;
}
Example 28
Project: ache-master  File: GoogleSearch.java View source code
public List<BackLinkNeighborhood> submitQuery(String query, int page) throws IOException {
    timer.waitMinimumDelayIfNecesary();
    // 21 -> max number allowed by google... decreases after
    String queryUrl = "https://www.google.com/search?q=" + query + "&num=" + docsPerPage + "&start=" + page * docsPerPage;
    System.out.println("URL:" + queryUrl);
    try {
        FetchedResult result = fetcher.get(queryUrl);
        InputStream is = new ByteArrayInputStream(result.getContent());
        Document doc = Jsoup.parse(is, "UTF-8", query);
        is.close();
        Elements searchItems = doc.select("div#search");
        Elements linkHeaders = searchItems.select(".r");
        Elements linksUrl = linkHeaders.select("a[href]");
        List<BackLinkNeighborhood> links = new ArrayList<>();
        for (Element link : linksUrl) {
            String title = link.text();
            String url = link.attr("href");
            links.add(new BackLinkNeighborhood(url, title));
        }
        System.out.println(getClass().getSimpleName() + " hits: " + links.size());
        return links;
    } catch (IOExceptionBaseFetchException |  e) {
        throw new IOException("Failed to download backlinks from Google.", e);
    }
}
Example 29
Project: any-video-master  File: PandaCrawler.java View source code
private void savePandaLivesToRedis(Document document) {
    List<VideoDTO> lives = new ArrayList<>();
    Elements elements = document.select("li.video-list-item.video-no-tag");
    for (Element element : elements) {
        VideoDTO videoDTO = new VideoDTO();
        String title = "[" + element.select("div.video-info span.video-cate").text() + "] " + element.select("div.video-info span.video-nickname").text();
        String image = element.select("img.video-img").attr("data-original");
        String url = PANDA + element.attr("data-id");
        videoDTO.setAvailable(true);
        videoDTO.setTitle(title);
        videoDTO.setImage(image);
        videoDTO.setValue(url);
        lives.add(videoDTO);
        if (lives.size() > 48) {
            break;
        }
    }
    String key = redisSourceManager.VIDEO_PREFIx_HOME_LIVE_KEY + "_" + TAG;
    redisSourceManager.saveVideos(key, lives);
}
Example 30
Project: asoiaf-master  File: FetchUrls.java View source code
public static ImageUrl FetchImageUrl(String url) {
    ImageUrl iu = new ImageUrl();
    try {
        Document doc = Jsoup.connect(url).timeout(5000).get();
        Elements e = doc.select("li.outlink a");
        for (Element item : e) {
            if (item.text().equals("200")) {
                //Log.d("","200:"+item.select("a[href]").attr("href"));
                iu.setThumbUrl(item.select("a[href]").attr("href"));
            }
            if (item.text().equals("original")) {
                //Log.d("","original:"+item.select("a[href]").attr("href"));
                iu.setOringinUrl(item.select("a[href]").attr("href"));
            }
        }
    } catch (Exception e) {
        e.printStackTrace();
    }
    return iu;
}
Example 31
Project: baleen-master  File: CommentArea.java View source code
@Override
public void manipulate(Document document) {
    document.select("p:contains(" + COMMENT_END + ")").forEach( last -> {
        if (last.ownText().contains(COMMENT_START)) {
            last.wrap(ASIDE);
        } else {
            int index = last.elementSiblingIndex();
            Elements allSiblings = last.siblingElements();
            Elements pSiblings = allSiblings.select("p");
            Element startSibling = null;
            for (int i = index - 1; i >= 0; i--) {
                Element e = pSiblings.get(i);
                if (e.ownText().contains(COMMENT_START)) {
                    startSibling = e;
                    break;
                }
            }
            if (startSibling != null) {
                for (int i = startSibling.elementSiblingIndex(); i < index; i++) {
                    allSiblings.get(i).wrap(ASIDE);
                }
                last.wrap(ASIDE);
            }
        }
    });
}
Example 32
Project: bank-importer-master  File: ItauPoupancaImportador.java View source code
@Override
public List<BancoRegistro> carregarLancamentosExtrato() {
    carregarOpcoesMenu();
    String html = /*carregarHtml(poupancaUrl, 200);
		html = */
    carregarHtml("https://ww70.itau.com.br/M/SaldoPoupanca.aspx", 200);
    Document doc = carregarHtmlDeLink(html, "a[href^=SaldoPoupanca]", "Últimos 30 dias");
    Element tableExtrato = doc.getElementById("ctl00_ContentPlaceHolder1_Fieldset2");
    Iterator<Element> iterator = tableExtrato.select("div.rowPar, div.rowImpar").iterator();
    List<BancoRegistro> list = new ArrayList<BancoRegistro>();
    while (iterator.hasNext()) {
        Element e = iterator.next();
        Elements children = e.select("td");
        String data = children.get(1).text();
        String desc = children.get(2).text().trim();
        String val = children.get(3).text();
        if (!descricoesIgnorar.contains(desc)) {
            list.add(gerarRegistro(data, desc, val));
        }
    }
    return list;
}
Example 33
Project: bennu-master  File: Component.java View source code
public static String process(String origin) {
    Document doc = Jsoup.parse(origin);
    Elements components = doc.select("[bennu-component]");
    for (Element component : components) {
        String key = component.attr("bennu-component");
        Optional.ofNullable(COMPONENTS.get(key)).ifPresent( x -> component.replaceWith(x.process(component)));
    }
    return doc.toString();
}
Example 34
Project: CarHome-master  File: FetcherSLFilter.java View source code
public static void fetchRawData(String url) {
    Map<String, String> brandMap = Maps.newLinkedHashMap();
    Map<String, String> typeMap = Maps.newLinkedHashMap();
    Document document = getDocument(url, "UTF-8");
    Elements selectElems = document.select("select");
    selectElems.get(0).select("option").forEach( option -> brandMap.put(option.attr("value"), option.text()));
    selectElems.get(1).select("option").forEach( option -> typeMap.put(option.attr("value"), option.text()));
    parseAjax(brandMap, typeMap);
}
Example 35
Project: clicker-master  File: CNProxyGetter.java View source code
@Override
public Set<Proxy> find() {
    final Set<Proxy> ret = new HashSet<Proxy>();
    for (int i = 1; i < 11; i++) {
        try {
            final Document doc = Jsoup.parse(new URL("http://www.cnproxy.com/proxy" + i + ".html"), TIMEOUT);
            final Elements tables = doc.getElementsByTag("table");
            final Element table = tables.get(2);
            final Elements trs = table.getElementsByTag("tr");
            for (int j = 1; j < trs.size(); j++) {
                final Element tr = trs.get(j);
                try {
                    final Element td = tr.getElementsByTag("td").get(0);
                    final String host = td.text();
                    String port = td.getElementsByTag("script").get(0).data();
                    port = port.replace("document.write(", "").replace("\"", "").replace(")", "").replace("+", "").replace(":", "");
                    for (final Map.Entry<String, String> replacement : REPLACEMENTS.entrySet()) {
                        port = port.replaceAll(replacement.getKey(), replacement.getValue());
                    }
                    final Proxy proxy = new Proxy(host, Integer.valueOf(port), this.properties);
                    ret.add(proxy);
                } catch (final Exception e) {
                }
            }
        } catch (final Exception e) {
        }
    }
    return ret;
}
Example 36
Project: CN1ML-NetbeansModule-master  File: ListLinks.java View source code
public static void main(String[] args) throws IOException {
    Validate.isTrue(args.length == 1, "usage: supply url to fetch");
    String url = args[0];
    print("Fetching %s...", url);
    Document doc = Jsoup.connect(url).get();
    Elements links = doc.select("a[href]");
    Elements media = doc.select("[src]");
    Elements imports = doc.select("link[href]");
    print("\nMedia: (%d)", media.size());
    for (Element src : media) {
        if (src.tagName().equals("img"))
            print(" * %s: <%s> %sx%s (%s)", src.tagName(), src.attr("abs:src"), src.attr("width"), src.attr("height"), trim(src.attr("alt"), 20));
        else
            print(" * %s: <%s>", src.tagName(), src.attr("abs:src"));
    }
    print("\nImports: (%d)", imports.size());
    for (Element link : imports) {
        print(" * %s <%s> (%s)", link.tagName(), link.attr("abs:href"), link.attr("rel"));
    }
    print("\nLinks: (%d)", links.size());
    for (Element link : links) {
        print(" * a: <%s>  (%s)", link.attr("abs:href"), trim(link.text(), 35));
    }
}
Example 37
Project: crawler-master  File: DefaultAssetsParser.java View source code
@Override
public Set<CrawlerURL> getAssets(Document doc, String referer) {
    Elements media = doc.select("[src]");
    Elements imports = doc.select("link[href]");
    Set<CrawlerURL> urls = new HashSet<CrawlerURL>(media.size() + imports.size());
    for (Element link : imports) {
        urls.add(new CrawlerURL(link.attr("abs:href"), referer));
    }
    for (Element src : media) {
        urls.add(new CrawlerURL(src.attr("abs:src"), referer));
    }
    return urls;
}
Example 38
Project: curiosity-maps-master  File: LocationCrawler.java View source code
/**
     * Parse the list of {@link RoverLocation}s from the retrieved {@code document}.
     */
private List<RoverLocation> parseLocations(Document document) {
    // only use the last location from each endSol
    Map<Integer, RoverLocation> roverLocations = new HashMap<>();
    Elements locations = document.select("location");
    for (Element location : locations) {
        try {
            int sol = Integer.parseInt(location.select("endSol").text());
            if (roverLocations.containsKey(sol)) {
                continue;
            }
            double latitude = Double.parseDouble(location.select("lat").text());
            double longitude = Double.parseDouble(location.select("lon").text());
            String arrivalTime = location.select("arrivalTime").text();
            roverLocations.put(sol, new RoverLocation(sol, latitude, longitude, arrivalTime));
        } catch (Throwable t) {
            t.printStackTrace();
        }
    }
    return ImmutableList.copyOf(roverLocations.values());
}
Example 39
Project: en-webmagic-master  File: CssSelector.java View source code
@Override
public List<String> selectList(String text) {
    List<String> strings = new ArrayList<String>();
    Document doc = Jsoup.parse(text);
    Elements elements = doc.select(selectorText);
    if (CollectionUtils.isNotEmpty(elements)) {
        for (Element element : elements) {
            String value = getValue(element);
            if (!StringUtils.isEmpty(value)) {
                strings.add(value);
            }
        }
    }
    return strings;
}
Example 40
Project: EventApp-master  File: ConferenceSessionLoader.java View source code
@Override
public void onResponse(String body) {
    List<ConferenceSession> sessions = new ArrayList<ConferenceSession>();
    Document document = Jsoup.parse(body);
    //track name
    Elements h1 = document.select("h1.entry-title");
    String trackName = null;
    if (!h1.isEmpty()) {
        trackName = h1.first().text();
    }
    Elements elements = document.select("table.track");
    if (!elements.isEmpty()) {
        Elements trs = elements.first().select("tr");
        for (Element tr : trs) {
            ConferenceSession session = new ConferenceSession();
            session.setTrackName(trackName);
            session.setSessionTitle(text(tr.select("span.session_title")));
            session.setSpeakerName(text(tr.select("span.speaker_name")));
            if (!tr.select("span.speaker_profile").isEmpty()) {
                session.setSpeakerProfile(text(tr.select("span.speaker_profile")));
            }
            session.setBeginTime(text(tr.select("span.starttime")));
            if (!tr.select("span.session_description").isEmpty()) {
                session.setDescription(tr.select("span.session_description").first().text().replace("� 講演内容 】<br />", ""));
            }
            session.setRoom(tr.select("span.roomname").first().text());
            sessions.add(session);
        }
    }
    listener.onSuccess(sessions);
}
Example 41
Project: extentreports-java-master  File: SystemAttributeTests.java View source code
private void performAssertForKVPairs(String key, String value) {
    Boolean keyFound = false;
    Boolean valueFound = false;
    extent.flush();
    String html = Reader.readAllText(htmlFilePath);
    Document doc = Jsoup.parse(html);
    Elements tdColl = doc.select(".environment td");
    for (Element td : tdColl) {
        if (td.text().equals(key))
            keyFound = true;
        if (td.text().equals(value))
            valueFound = true;
    }
    Assert.assertTrue(keyFound);
    Assert.assertTrue(valueFound);
}
Example 42
Project: FakeWeather-master  File: MzituZiPaiFragment.java View source code
@Override
public List<Girl> call(String url) {
    List<Girl> girls = new ArrayList<>();
    try {
        Document doc = Jsoup.connect(url).timeout(10000).get();
        Element total = doc.select("div.postlist").first();
        Elements items = total.select("li");
        for (Element element : items) {
            Girl girl = new Girl(element.select("img").first().attr("src"));
            girls.add(girl);
        }
    } catch (IOException e) {
        e.printStackTrace();
    }
    return girls;
}
Example 43
Project: FudanBBS-master  File: ListLinks.java View source code
public static void main(String[] args) throws IOException {
    Validate.isTrue(args.length == 1, "usage: supply url to fetch");
    String url = args[0];
    print("Fetching %s...", url);
    Document doc = Jsoup.connect(url).get();
    Elements links = doc.select("a[href]");
    Elements media = doc.select("[src]");
    Elements imports = doc.select("link[href]");
    print("\nMedia: (%d)", media.size());
    for (Element src : media) {
        if (src.tagName().equals("img"))
            print(" * %s: <%s> %sx%s (%s)", src.tagName(), src.attr("abs:src"), src.attr("width"), src.attr("height"), trim(src.attr("alt"), 20));
        else
            print(" * %s: <%s>", src.tagName(), src.attr("abs:src"));
    }
    print("\nImports: (%d)", imports.size());
    for (Element link : imports) {
        print(" * %s <%s> (%s)", link.tagName(), link.attr("abs:href"), link.attr("rel"));
    }
    print("\nLinks: (%d)", links.size());
    for (Element link : links) {
        print(" * a: <%s>  (%s)", link.attr("abs:href"), trim(link.text(), 35));
    }
}
Example 44
Project: Gazetti_Newspaper_Reader-master  File: hindu.java View source code
public String[] getHinduArticleContent() {
    Document doc;
    String[] result = new String[3];
    String url = mArticleURL;
    try {
        Connection connection = Jsoup.connect(url).userAgent("Mozilla").timeout(10 * 1000);
        Response response = connection.execute();
        if (response == null) {
            Crashlytics.log("Is response null ? " + (null == response));
            return null;
        } else if (response.statusCode() != 200) {
            Crashlytics.log("Received response - " + response.statusCode() + " -- " + response.statusMessage());
            Crashlytics.log("Received response - " + response.body());
            return null;
        }
        doc = connection.get();
        // get Body
        Element bodyElement = doc.body();
        // get Title
        String HinduTitleXPath = ConfigService.getInstance().getTheHinduHead();
        Elements titleElements = bodyElement.select(HinduTitleXPath);
        titleText = titleElements.first().text();
        // get HeaderImageUrl
        mImageURL = getImageURL(bodyElement);
        String HinduArticleXPath = ConfigService.getInstance().getTheHinduBody();
        Elements bodyArticleElements = bodyElement.select(HinduArticleXPath);
        for (Element textArticleElement : bodyArticleElements) {
            bodyText += textArticleElement.text() + "\n\n";
        }
        result[0] = titleText;
        result[1] = mImageURL;
        result[2] = bodyText;
    } catch (IOException e) {
        Crashlytics.logException(e);
        return null;
    } catch (NullPointerException npe) {
        bodyText = null;
        Crashlytics.logException(npe);
        return null;
    } catch (Exception e) {
        Crashlytics.logException(e);
        return null;
    }
    return result;
}
Example 45
Project: HackerNews-master  File: UserParser.java View source code
public static User parseUser(String username) {
    try {
        User user = new User();
        user.username = username;
        // don't use user cookie so that "about" text appears correctly
        Document page = ConnectionManager.anonConnect("/user?id=" + username).get();
        Elements trs = page.select("form > table > tbody > tr");
        user.created = trs.select("td:containsOwn(created:) + td").first().text();
        user.karma = Integer.parseInt(trs.select("td:containsOwn(karma:) + td").first().text());
        try {
            user.avg = Float.parseFloat(trs.select("td:containsOwn(avg:) + td").first().text());
        } catch (Exception e) {
            user.avg = -1.0f;
        }
        user.aboutHtml = trs.select("td:containsOwn(about:) + td").first().html();
        return user;
    } catch (IOException e) {
        e.printStackTrace();
        Log.d(TAG, "IOException parsing UserModel for: " + username);
        return null;
    } catch (NumberFormatException e) {
        e.printStackTrace();
        Log.d(TAG, "NumberFormatException parsing UserModel for: " + username);
        return null;
    } catch (NullPointerException e) {
        e.printStackTrace();
        Log.d(TAG, "NullPointerException parsing UserModel for: " + username);
        return null;
    }
}
Example 46
Project: html-exporter-master  File: StyleParser.java View source code
public Map<String, Style> parseStyles(Elements elements) {
    Map<String, Style> styles = new HashMap<>();
    for (Element element : elements) {
        try {
            List<Rule> rules = CSSParser.parse(element.data());
            mapStyles(rules, styles);
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    }
    return styles;
}
Example 47
Project: janglipse-master  File: KeywordDocParser.java View source code
private List<KeywordDocumentation> parse(Document doc) {
    List<KeywordDocumentation> list = new ArrayList<KeywordDocumentation>();
    Elements tables = doc.select("table.detailHeader");
    if (tables.size() > 0) {
        for (Element table : tables) {
            KeywordDocumentation keyword = new KeywordDocumentation();
            Elements header = table.select("td.detailHeaderName");
            keyword.setName(header.get(0).text());
            keyword.setDescription(table.nextSibling().outerHtml());
            list.add(keyword);
        }
    }
    return list;
}
Example 48
Project: java-manga-reader-master  File: MangaUtil.java View source code
/**
	 * Retrieves a list of licensed Manga from Anime News Network. 
	 * @return A list of Manga licensed in English.
	 * @throws IOException If it cannot complete the request.
	 */
public static List<String> getLicensedManga() throws IOException {
    StringBuilder sb = new StringBuilder("http://www.animenewsnetwork.com/encyclopedia/anime-list.php");
    sb.append("?licensed=1");
    sb.append("&sort=title");
    sb.append("&showG=1");
    Document doc = Jsoup.connect(sb.toString()).maxBodySize(0).get();
    Elements list = doc.getElementsByClass("HOVERLINE");
    List<String> blackList = new ArrayList<String>(list.size());
    for (Element e : list) {
        String title = e.text();
        if (title.startsWith("(The)")) {
            title = title.replace("(The)", "The");
        }
        if (title.contains("(")) {
            title = title.substring(0, title.lastIndexOf('(')).trim();
        }
        blackList.add(title);
    }
    return blackList;
}
Example 49
Project: JCommons-master  File: DownloaderTest.java View source code
public static void main(String[] args) throws IOException {
    Document doc = Jsoup.connect("http://meta.stackexchange.com/questions/134495/academic-papers-using-stack-exchange-data").get();
    Elements eles = doc.getElementsContainingText("[PDF]");
    eles.addAll(doc.getElementsContainingText("[arXiv]"));
    String folderName = "D:/dl";
    for (Element ele : eles) {
        String src = ele.attr("href");
        if (src == null || src.trim().equals(""))
            continue;
        URL url = new URL(src);
        Element parent = ele.parent();
        Elements eles1 = parent.getElementsByTag("strong");
        Element nameEle = eles1.get(0);
        String fileName = nameEle.text().replace(":", " ").replace("\"", "").replace("'", "").replace("?", "");
        if (fileName.contains("Fit or"))
            continue;
        if (!fileName.endsWith("."))
            fileName = fileName.concat(".");
        fileName = fileName.concat("pdf");
        System.out.println(fileName);
        InputStream in = null;
        try {
            in = url.openStream();
        } catch (Exception e) {
            continue;
        }
        OutputStream out = new BufferedOutputStream(new FileOutputStream(folderName + "/" + fileName));
        for (int b; (b = in.read()) != -1; ) {
            out.write(b);
        }
        out.close();
        in.close();
    }
}
Example 50
Project: jeboorker-master  File: DNBMetadataDownloader.java View source code
private List<MetadataDownloadEntry> getMetadataDownloadEntries(List<byte[]> metadataHtmlContent) throws IOException {
    List<MetadataDownloadEntry> result = new ArrayList<>(metadataHtmlContent.size());
    for (byte[] html : metadataHtmlContent) {
        if (html != null) {
            Document htmlDoc = Jsoup.parse(new ByteArrayInputStream(html), StringUtil.UTF_8, MAIN_URL);
            Elements tags = htmlDoc.getElementsByTag("td");
            result.add(new DNBMetadataDownloadEntry(htmlDoc, tags));
        }
    }
    return result;
}
Example 51
Project: jsoup-master  File: ListLinks.java View source code
public static void main(String[] args) throws IOException {
    Validate.isTrue(args.length == 1, "usage: supply url to fetch");
    String url = args[0];
    print("Fetching %s...", url);
    Document doc = Jsoup.connect(url).get();
    Elements links = doc.select("a[href]");
    Elements media = doc.select("[src]");
    Elements imports = doc.select("link[href]");
    print("\nMedia: (%d)", media.size());
    for (Element src : media) {
        if (src.tagName().equals("img"))
            print(" * %s: <%s> %sx%s (%s)", src.tagName(), src.attr("abs:src"), src.attr("width"), src.attr("height"), trim(src.attr("alt"), 20));
        else
            print(" * %s: <%s>", src.tagName(), src.attr("abs:src"));
    }
    print("\nImports: (%d)", imports.size());
    for (Element link : imports) {
        print(" * %s <%s> (%s)", link.tagName(), link.attr("abs:href"), link.attr("rel"));
    }
    print("\nLinks: (%d)", links.size());
    for (Element link : links) {
        print(" * a: <%s>  (%s)", link.attr("abs:href"), trim(link.text(), 35));
    }
}
Example 52
Project: jubula.core-master  File: HtmlImageLister.java View source code
// Searches for images in html files and outputs the paths to these files in stdout
public static void main(String[] args) {
    try {
        if (args.length == 0) {
            System.err.println("Please provide a path!\n Usage: java -jar HtmlImageLister.jar <path>");
            System.exit(-1);
        }
        List<File> fileList = HtmlImageLister.getFilesRecursive(args[0]);
        Set<String> imageSet = new HashSet<>();
        for (File f : fileList) {
            Document doc = Jsoup.parse(f, "UTF-8");
            Elements els = doc.getElementsByTag("img");
            for (Element el : els) {
                imageSet.add((f.getParentFile() + "/").concat(el.attr("src")));
            }
        }
        for (String s : imageSet) {
            System.out.println(s);
        }
    } catch (IOException e) {
        e.printStackTrace();
    }
}
Example 53
Project: karma-exchange-master  File: SalesforceUtil.java View source code
private static void updateSalesforceCdnImgLinks(Document doc, EventSourceInfo sourceInfo) {
    Elements imgs = doc.getElementsByTag("img");
    for (Element img : imgs) {
        URI uri = null;
        try {
            uri = new URI(img.attr("src"));
        } catch (URISyntaxException e) {
        }
        if (uri != null) {
            String domain = uri.getHost();
            if (domain.toLowerCase().endsWith(IMG_CDN_DOMAIN)) {
                img.attr("src", "https://" + sourceInfo.getDomain() + uri.getPath() + "?" + uri.getQuery());
            }
        }
    }
}
Example 54
Project: ManalithBot-master  File: TranslatorPlugin.java View source code
@BotCommand("번역")
public String translate(@Option(name = "ko|en...", help = "번역할 대� 언어") String to, @Option(name = "메시지", help = "번역할 메시지") String message) {
    final String url = "https://api.datamarket.azure.com/Bing/MicrosoftTranslator/v1/Translate?Text='%s'&To='%s'";
    String login = "USER_ID_IGNORED:" + clientSecret;
    String base64login = new String(Base64.encodeBase64(login.getBytes()));
    try {
        Document doc = Jsoup.connect(String.format(url, message, to)).header("Authorization", "Basic " + base64login).ignoreContentType(true).get();
        logger.debug("response", doc);
        Elements elem = doc.select("d|text[m:type=Edm.String]");
        return elem.text();
    } catch (IOException e) {
        logger.error(e.getMessage(), e);
    }
    return "번역할 내용� 없습니다.";
}
Example 55
Project: medium-textview-master  File: Utils.java View source code
public static void appendView(ElementView elementView, Elements elements) {
    for (Element e : elements) {
        if (JsoupUtils.isBlockquote(e)) {
            elementView.addView(new BlockquoteView(elementView.getContext(), e));
        } else if (JsoupUtils.isHeader(e)) {
            elementView.addView(new HeaderView(elementView.getContext(), e));
        } else if (JsoupUtils.isIFrame(e)) {
            elementView.addView(new IFrameView(elementView.getContext(), e));
        } else if (JsoupUtils.isParagraph(e)) {
            elementView.addView(new ParagraphView(elementView.getContext(), e));
        } else if (JsoupUtils.isImage(e)) {
            elementView.addView(new ImageView(elementView.getContext(), e));
        } else if (JsoupUtils.isDiv(e)) {
            elementView.addView(new DivView(elementView.getContext(), e));
        } else {
        }
    }
}
Example 56
Project: memorabilia-master  File: PostTag.java View source code
public Elements selectFrom(Document doc) {
    Elements elements = new Elements();
    Elements fromXpath = doc.select(xpath);
    elements.addAll(fromXpath);
    Elements scripts = doc.getElementsByTag("script");
    for (Element script : scripts) {
        String html = script.html();
        if (html.contains(pattern)) {
            elements.add(script);
        }
    }
    return elements;
}
Example 57
Project: mensaapp-master  File: PreviewMenuParser.java View source code
@Override
protected Menu parseMenu(WeeklyMenu weeklyMenu, Element menuTable) {
    Menu menu = new Menu(weeklyMenu);
    Elements rows = menuTable.select("tr");
    for (int i = 0; i < 4; i++) {
        String name = rows.get(1).children().get(i).text();
        addMeal(menu, name, rows.get(2).children().get(i).text(), parseMealTypes(rows.get(3).children().get(i)), getDefaultPrice(name));
    }
    return menu;
}
Example 58
Project: mini-blog-master  File: Ku6VideoHandler.java View source code
/**
	 * 获�酷6视频
	 * 
	 * @param url
	 *            视频URL
	 */
public Video getVideo(String url) {
    if (url.indexOf("v.ku6.com") != -1) {
        try {
            Document doc = VideoUtil.getURLContent(url);
            String content = doc.html();
            /**
				 * 获�视频标题
				 */
            String title = doc.title();
            /**
				 * 获�视频地�
				 */
            Elements flashEt = doc.getElementsByClass("text_A");
            String flash = null;
            if (flashEt != null) {
                flash = flashEt.attr("value");
            }
            /**
				 * 获�视频缩略图
				 */
            int beginLocal = content.indexOf("A.VideoInfo =");
            int endLocal = content.indexOf("\", data: {");
            content = content.substring(beginLocal, endLocal);
            String pic = content.substring(content.lastIndexOf("http://"), content.length());
            Video video = new Video();
            video.setPic(pic);
            video.setFlash(flash);
            video.setTitle(title);
            return video;
        } catch (Exception e) {
            logger.error("---------------->error is " + e.getMessage());
            e.printStackTrace();
        }
    }
    return this.successor.getVideo(url);
}
Example 59
Project: moulder-j-master  File: MoulderShop.java View source code
public void process(Document doc) {
    for (TemplatorConfig c : registry.getConfig()) {
        Elements elements = doc.select(c.selector);
        for (Element e : elements) {
            Collection<Node> oes = MouldersApplier.applyMoulders(c.templators, Arrays.<Node>asList(e));
            // replace e with oes
            for (Node oe : oes) {
                e.before(oe.outerHtml());
            }
            e.remove();
        }
    }
}
Example 60
Project: myrobotlab-master  File: JSoupExtractor.java View source code
@Override
public List<Document> processDocument(Document doc) {
    for (Object o : doc.getField(htmlField)) {
        org.jsoup.nodes.Document jSoupDoc = Jsoup.parse(o.toString());
        Elements links = jSoupDoc.select(jSoupSelector);
        for (Element link : links) {
            doc.addToField(outputField, link);
        }
    }
    return null;
}
Example 61
Project: NewsStats-master  File: NewYorkTimesContentHandler.java View source code
@Override
public List extractArticles(Page page) {
    if (page.getParseData() instanceof HtmlParseData) {
        System.out.println("Current URL: " + page.getWebURL());
        HtmlParseData htmlParseData = (HtmlParseData) page.getParseData();
        String html = htmlParseData.getHtml();
        Document doc = Jsoup.parseBodyFragment(html);
        Element articleElement = doc.getElementById("story");
        if (articleElement == null) {
            // if no article can be found
            return articles;
        }
        String title = articleElement.getElementById("story-heading").ownText();
        String dateString = articleElement.getElementsByClass("dateline").first().attr("datetime");
        Date date = null;
        try {
            date = new SimpleDateFormat("yyyy-MM-dd").parse(dateString);
        } catch (ParseException e) {
            e.printStackTrace();
        }
        String author = articleElement.getElementsByClass("byline-author").first().ownText();
        String content = "";
        Elements contentElements = articleElement.select("p.story-body-text.story-content");
        for (Element contentElement : contentElements) {
            content += contentElement.ownText();
        }
        if (!filterArticles(content)) {
            // ignore the article if filter does not approve
            return articles;
        }
        Article article = new NewYorkTimesArticle();
        article.setTitle(title);
        article.setCreatedDate(date);
        article.setAuthor(author);
        article.setContent(content);
        articles.add(article);
    }
    return articles;
}
Example 62
Project: nocket-master  File: AbstractHtmlVisitor.java View source code
protected void checkAndAddGroupTabbedPanel(Document document) {
    if (!(getContext().getFileAndClassNameStrategy() instanceof GroupNameFileAndClassNameStrategy)) {
        return;
    }
    GroupNameFileAndClassNameStrategy strategy = (GroupNameFileAndClassNameStrategy) getContext().getFileAndClassNameStrategy();
    if (!strategy.isDomainObjectWithGroupAnnotations() || !strategy.isStrategyForMainObject()) {
        return;
    }
    if (document.getElementsByAttributeValue("wicket:id", "groupTabbedPanel").isEmpty()) {
        Elements elementsByTag = document.getElementsByTag("form");
        if (!elementsByTag.isEmpty()) {
            org.jsoup.nodes.Element element = elementsByTag.first();
            org.jsoup.nodes.Element ulElement = element.appendElement("ul");
            ulElement.attr("wicket:id", "groupTabbedPanel");
        }
    }
}
Example 63
Project: pack-master  File: PttGossiping.java View source code
public static void main(String[] argv) {
    String prevPage = CrawlerPack.start().addCookie("over18", // 八�版進入需�設定cookie
    "1").getFromHtml(// �端資料格�為 HTML
    gossipMainPage).select(// �得�上角『�一��的內容
    ".action-bar .pull-right > a").get(1).attr("href").replaceAll("/bbs/Gossiping/index([0-9]+).html", "$1");
    // 目�最末� index 編號
    Integer lastPage = Integer.valueOf(prevPage) + 1;
    List<String> lastPostsLink = new ArrayList<String>();
    while (loadLastPosts > lastPostsLink.size()) {
        String currPage = String.format(gossipIndexPage, lastPage--);
        Elements links = CrawlerPack.start().addCookie("over18", "1").getFromHtml(currPage).select(".title > a");
        for (Element link : links) lastPostsLink.add(link.attr("href"));
    }
    // 檢視�果
    for (String url : lastPostsLink) {
        System.out.println(url);
    }
}
Example 64
Project: qiushi_baike-master  File: KJFMUtils.java View source code
public static LinkedList<KJFMItem> handleKJFMItems(String pageNo) throws IOException {
    LinkedList<KJFMItem> items = null;
    String url = KE_JI_FENG_MANG_URL + pageNo + "/";
    System.out.println("url = " + url);
    Document document = Jsoup.connect(url).timeout(20000).get();
    Element divTag = document.getElementById("xs-main");
    if (null != divTag) {
        Elements entryTags = divTag.getElementsByClass("xs-entry");
        if (null != entryTags && entryTags.size() > 0) {
            items = new LinkedList<KJFMItem>();
            for (Element e : entryTags) {
                KJFMItem item = new KJFMItem();
                Elements aTags = e.getElementsByTag("a");
                if (null != aTags && aTags.size() > 0) {
                    String url1 = aTags.get(0).attr("href");
                    System.out.println("url1 = " + url1);
                    if (url1 != null && url1.startsWith("www")) {
                        url1 += "http://";
                    }
                    item.title = url1;
                }
                Elements imgTags = e.getElementsByTag("img");
                if (null != imgTags && imgTags.size() > 0) {
                    String img = imgTags.get(0).attr("src");
                    String title = imgTags.get(0).attr("alt");
                    // System.out.println("img = " + img + " title = " +
                    // title);
                    item.img = img;
                    item.title = title;
                }
                Elements pTags = e.getElementsByTag("p");
                if (null != pTags && pTags.size() > 0) {
                    String content = pTags.get(0).text();
                    // System.out.println("content = " + content);
                    item.content = content;
                }
                items.add(item);
            }
        }
    }
    return items;
}
Example 65
Project: ripme-master  File: YoupornRipper.java View source code
@Override
public void rip() throws IOException {
    logger.info("    Retrieving " + this.url);
    Document doc = Http.url(this.url).get();
    Elements videos = doc.select("video");
    if (videos.size() == 0) {
        throw new IOException("Could not find Embed code at " + url);
    }
    Element video = videos.get(0);
    String vidUrl = video.attr("src");
    addURLToDownload(new URL(vidUrl), HOST + "_" + getGID(this.url));
    waitForThreads();
}
Example 66
Project: sagan-master  File: ApiDocumentMapper.java View source code
public ApiDoc map(Document document) {
    if (document.baseUri().endsWith("allclasses-frame.html"))
        return null;
    String apiContent;
    Elements blocks = document.select(".block");
    if (blocks.size() > 0) {
        apiContent = blocks.text();
    } else {
        apiContent = document.select("p").text();
    }
    Elements subTitle = document.select(".header .subTitle");
    ApiDoc entry = new ApiDoc();
    entry.setClassName(findClassName(document));
    if (subTitle.size() == 1) {
        entry.setPackageName(subTitle.text());
    }
    entry.setRawContent(apiContent);
    entry.setSummary(apiContent.substring(0, Math.min(apiContent.length(), 500)));
    entry.setTitle(document.title());
    entry.setSubTitle(String.format("%s (%s API)", project.getName(), version.getVersion()));
    entry.setPath(document.baseUri());
    entry.setCurrent(version.isCurrent());
    entry.setVersion(version.getVersion());
    entry.setProjectId(project.getId());
    entry.addFacetPaths("Projects", "Projects/Api", "Projects/" + project.getName(), "Projects/" + project.getName() + "/" + version.getVersion());
    return entry;
}
Example 67
Project: seldon-server-master  File: AllElementsTextListValueDynamicExtractor.java View source code
@Override
public String extract(AttributeDetail attributeDetail, String url, Document articleDoc) throws Exception {
    String attrib_value = null;
    if ((attributeDetail.extractor_args != null) && (attributeDetail.extractor_args.size() >= 1)) {
        String cssSelector = attributeDetail.extractor_args.get(0);
        Elements elements = articleDoc.select(cssSelector);
        if (StringUtils.isNotBlank(cssSelector)) {
            if (elements != null) {
                StringBuilder sb = new StringBuilder();
                boolean isFirstInList = true;
                for (Element e : elements) {
                    String eText = e.text();
                    eText = StringUtils.strip(eText);
                    if (StringUtils.isBlank(eText))
                        continue;
                    eText = eText.toLowerCase();
                    if (isFirstInList) {
                        isFirstInList = false;
                    } else {
                        sb.append(",");
                    }
                    sb.append(eText);
                }
                attrib_value = sb.toString();
            }
        }
    }
    return attrib_value;
}
Example 68
Project: SimpleFunctionalTest-master  File: UsingFixturesHelper.java View source code
private void thenAllFixturesOfThisFixturesHelperClassAreSeenAsFixturesOfTheUseCaseClassAndCanBeUsedAsItsOwn() throws IOException {
    Elements delegatedFixtureCall = jUnitHtmlHelper.html.select("div.instruction span");
    Assert.assertEquals("First fixture", delegatedFixtureCall.get(0).text());
    Assert.assertEquals("Second fixture with parameter ABCDEFGHIJKLMNOPQRSTUVWXYZ and 99", delegatedFixtureCall.get(1).text());
}
Example 69
Project: SocialConnect-master  File: OpenTableSocialCrawlerTest.java View source code
@Test
public void testExtractReviewData() throws Exception {
    Element body = jsoupCrawler.getDocument().body();
    Elements reviewContainer = jsoupCrawler.getReviewDataContainer(body);
    List<Messages> result = jsoupCrawler.extractReviewDataFromHtml(reviewContainer, jsoupCrawler.getDocument().head(), 1L);
    assertTrue(result.size() >= 10);
    // assertTrue(result.get(0).getMessage().startsWith("Even though Vapiano is one of my favorite places"));
    assertEquals("n/a", result.get(0).getNetworkUser());
    assertEquals("n/a", result.get(0).getNetworkUserId());
    assertEquals("en-US", result.get(0).getLanguage());
    assertEquals("OPENTABLE", result.get(0).getNetworkId());
    assertEquals(1, result.get(0).getCustomerId().longValue());
// assertEquals("4.0", result.get(0).getNetworkUserRating());
}
Example 70
Project: SpiderJackson-master  File: YouDaiLiPage1.java View source code
@Override
public boolean responseHandle(Proxy ip, ProxyController proxyController, Url url, UrlService urlService, ContextSrc contextSrc, HttpRequestBase request, CloseableHttpResponse response, String content) {
    ArrayList<Url> urls = new ArrayList<>();
    Document doc = Jsoup.parse(content);
    Elements elements = doc.select("div.chunlist");
    for (Element e : elements) {
        Elements es = e.select("a[href]");
        for (Element el : es) {
            Url url1 = Url.newHttpGetUrl(el.attr("href"), YouDaiLiPage2.class);
            url1.setPriority(url.getPriority() + 1);
            urls.add(url1);
        }
    }
    urlService.insert(urls);
    return true;
}
Example 71
Project: StartupNews-master  File: SNCommentsParserV1.java View source code
@Override
public SNComments parseDocument(Document doc) throws Exception {
    SNComments comments = new SNComments();
    if (doc == null) {
        return comments;
    }
    Element body = doc.body();
    Elements commentSpans = body.select("span.comment");
    Elements comHeadSpans = body.select("span.comhead");
    if (!commentSpans.isEmpty()) {
        Iterator<Element> spanCommentIt = commentSpans.iterator();
        Iterator<Element> spanComHeadIt = comHeadSpans.iterator();
        SNComment comment = null;
        SNUser user = null;
        while (spanComHeadIt.hasNext() && spanCommentIt.hasNext()) {
            String commentText = spanCommentIt.next().text();
            Element span = spanComHeadIt.next();
            Elements as = span.getElementsByTag("a");
            user = new SNUser();
            user.setId(as.get(0).text());
            String link = as.get(1).attr("href");
            String parent = as.get(2).attr("href");
            String discuss = as.get(3).attr("href");
            String title = as.get(3).text();
            comment = new SNComment();
            comment.setUser(user);
            comment.setLinkURL(resolveRelativeSNURL(link));
            comment.setParentURL(resolveRelativeSNURL(parent));
            comment.setDiscussURL(resolveRelativeSNURL(discuss));
            comment.setText(commentText);
            comment.setArtistTitle(title);
            comments.addComment(comment);
        }
    }
    Elements moreURLElements = body.select("a:matches(More)");
    String moreURL = null;
    if (moreURLElements.size() > 0) {
        moreURL = resolveRelativeSNURL(moreURLElements.attr("href"));
    }
    comments.setMoreURL(moreURL);
    return comments;
}
Example 72
Project: StatusParser-master  File: vkOld.java View source code
public String[] vkPart(int id, int firstPost) {
    for (int i = 0; i < 8; i++) {
        statuses[i] = null;
    }
    int iter = 0;
    String partURL = "http://vk.com/al_wall.php?act=get_wall&al=1&fixed=&offset=" + firstPost + "&owner_id=-" + id + "&type=all";
    try {
        String partSource = getUrlSource(partURL);
        partSource = partSource.substring(4);
        Document partDom = Jsoup.parse(partSource);
        try {
            Elements postTexts = partDom.select("div.wall_post_text");
            for (Element postText : postTexts) {
                statuses[iter] = postText.text();
                iter++;
                System.out.println(statuses[iter]);
            }
        } catch (NullPointerException eText) {
            eText.printStackTrace();
        }
    } catch (IOException e) {
        e.printStackTrace();
    }
    return statuses;
}
Example 73
Project: SuZhouTong-client-for-android-master  File: HtmlResolving.java View source code
/*
	 * 解�新闻类容
	 */
public ArrayList<NewsContentVo> getNewsContent(String news_detaiol) {
    ArrayList<NewsContentVo> contents = new ArrayList<NewsContentVo>();
    NewsContentVo ncv = null;
    Document document = Jsoup.parse(news_detaiol);
    Elements info = document.getElementsByTag("span");
    for (Element element : info) {
        ncv = new NewsContentVo();
        ncv.setIsImg(0);
        ncv.setContentList(element.text());
        contents.add(ncv);
    }
    Elements elements = document.getElementsByTag("p");
    Elements media = document.select("[src]");
    int i = 1;
    for (Element element : elements) {
        if (element.hasText()) {
            ncv = new NewsContentVo();
            ncv.setIsImg(0);
            ncv.setContentList(element.text());
            contents.add(ncv);
        } else {
            if (element.hasAttr("align") && media != null && media.size() > 0 && media.size() > i) {
                Element src = media.get(i);
                if (src.tagName().equals("img")) {
                    ncv = new NewsContentVo();
                    ncv.setIsImg(1);
                    ncv.setContentList(src.attr("src"));
                    contents.add(ncv);
                }
                i++;
            }
        }
    }
    return contents;
}
Example 74
Project: Tanaguru-master  File: MarkupChecker.java View source code
@Override
protected void doCheck(SSPHandler sspHandler, Elements elements, TestSolutionHandler testSolutionHandler) {
//         boolean isHtmlValid = MarkupValidator.getInstance().isHtmlValid(
//                        sspHandler.getPage().getId(), 
//                        sspHandler.getSSP().getDOM());
//         if (isHtmlValid) {
//             testSolutionHandler.addTestSolution(TestSolution.PASSED);
//         } else {
//             testSolutionHandler.addTestSolution(TestSolution.FAILED);
//         }
}
Example 75
Project: tradeframework-master  File: HtmlSelectorMsgParser.java View source code
public boolean parseContent(InputStream input, long length, String contentType, MsgHandler handler) throws IOException {
    Matcher charset = charsetPattern.matcher(contentType);
    Document doc = Jsoup.parse(input, charset.find() ? charset.group(1) : null, "");
    Elements nodes = doc.select(selector);
    for (Element node : nodes) {
        Msg msg = createMsg(node);
        if (msg != null && !handler.newMsg(msg))
            return false;
    }
    return true;
}
Example 76
Project: UnisaConnect-master  File: Esse3CheckErrorMessage.java View source code
@Override
public LoadStates startScraper() {
    try {
        Document document = scraperGetUrl(homeUrl);
        Elements messaggioEl = document.getElementsContainingOwnText("Messaggio");
        if (messaggioEl.isEmpty() || messaggioEl.first().nextElementSibling() == null) {
            return LoadStates.WRONG_DATA;
        } else {
            errorMessage = messaggioEl.first().nextElementSibling().text();
            return LoadStates.ESSE3_PROBLEM;
        }
    } catch (HttpStatusException e) {
        Log.w(Utils.TAG, "ERROR ", e);
        errorMessage = "Il servizio ESSE3 è temporaneamente non disponibile, puoi verificare il problema andando su: esse3web.unisa.it";
        return LoadStates.ESSE3_PROBLEM;
    } catch (Exception e) {
        Log.w(Utils.TAG, "ERROR ", e);
        return LoadStates.UNKNOWN_PROBLEM;
    }
}
Example 77
Project: uzlee-master  File: UserParser.java View source code
public Response parse(String html) {
    Response.Meta resMeta = new Response.Meta();
    Document doc = getDoc(html, resMeta);
    String img = doc.select("div.avatar>img").attr("src");
    String uidLink = doc.select("li.searchpost a").attr("href");
    String uid = Utils.getUriQueryParameter(uidLink).get("srchuid");
    String name = doc.select("div.itemtitle.s_clear h1").text().trim();
    Element eProfile = doc.select("#profilecontent").first();
    Elements eBaseProfile = eProfile.select("> #baseprofile > table");
    String sex = eBaseProfile.first().select("td[width=70]").text().trim();
    String qq = eBaseProfile.get(1).select("a[href^=http://wpa.qq.com]").text().trim();
    String point = eProfile.select("> h3").get(1).text().trim().substring(4);
    eProfile = eProfile.select("> div.s_clear").get(1);
    String registerDate = eProfile.select("> .right > li").first().text().trim().substring(6);
    Elements eSubProfiles = eProfile.select("> ul").get(1).select("> li");
    String level = eSubProfiles.get(0).text().substring(7);
    String totalThreads = eSubProfiles.get(2).text().trim().substring(4);
    totalThreads = totalThreads.substring(0, totalThreads.indexOf("篇") - 1);
    User user = new User().setId(Integer.valueOf(uid)).setImage(img).setName(name).setRegisterDateStr(registerDate).setQq(qq).setSex(sex).setPoints(point).setLevel(level).setTotalThreads(totalThreads);
    Response res = new Response();
    res.setMeta(resMeta);
    res.setData(res);
    res.setSuccess(true);
    return res;
}
Example 78
Project: VileBot-master  File: Ttc.java View source code
private Elements parseContent(String content) throws Exception {
    Elements alerts = new Elements();
    Document doc = Jsoup.parse(content);
    Elements alertDivs = doc.select("div[class=alert-content]");
    for (Element element : alertDivs) {
        if (!element.text().toLowerCase().contains("elevator")) {
            alerts.addAll(element.select("p[class=veh-replace]"));
        }
    }
    return alerts;
}
Example 79
Project: w3act-master  File: MetadataExtractor.java View source code
public void extract(Document document, org.jsoup.nodes.Document doc) {
    Elements name = doc.select(nameSelector);
    if (!name.isEmpty()) {
        document.title = name.get(0).text();
        if (datePublishedSelector != null) {
            Elements datePublished = doc.select(datePublishedSelector);
            if (!datePublished.isEmpty()) {
                try {
                    document.publicationDate = new SimpleDateFormat("yyyy-MM-dd").parse(datePublished.get(0).attr("content"));
                    Calendar calendar = Calendar.getInstance();
                    calendar.setTime(document.publicationDate);
                    document.publicationYear = calendar.get(Calendar.YEAR);
                } catch (ParseException e) {
                }
            }
        }
        if (authorSelector != null) {
            Elements author = doc.select(authorSelector);
            if (!author.isEmpty()) {
                String authorsString = author.get(0).text();
                String[] authors = authorsString.split(",|and");
                if (authors.length >= 1) {
                    String[] a = authors[0].trim().split("\\s+", 2);
                    document.author1Fn = a[0];
                    document.author1Ln = a[1];
                }
                if (authors.length >= 2) {
                    String[] a = authors[1].trim().split("\\s+", 2);
                    document.author2Fn = a[0];
                    document.author2Ln = a[1];
                }
                if (authors.length >= 3) {
                    String[] a = authors[2].trim().split("\\s+", 2);
                    document.author3Fn = a[0];
                    document.author3Ln = a[1];
                }
            }
        }
    } else {
        Logger.error("No " + nameSelector + " found!");
    }
}
Example 80
Project: weishijie-develop-master  File: PicModel.java View source code
@Override
public void call(Subscriber<? super List<Juzimi>> subscriber) {
    try {
        Document result = Jsoup.connect(params.get("url") + "?page=" + params.get("page")).get();
        Elements elements = result.select("div[class^=views-row views-row]");
        List<Juzimi> list = new ArrayList<Juzimi>();
        for (Element e : elements) {
            Juzimi mJuzimi = new Juzimi();
            Elements chromeimg = e.getElementsByClass("chromeimg");
            if (chromeimg == null || chromeimg.size() == 0)
                continue;
            mJuzimi.url = chromeimg.get(0).attr("src");
            Elements xlistjus = e.getElementsByClass("xlistju");
            if (xlistjus != null && xlistjus.size() > 0) {
                mJuzimi.content = xlistjus.get(0).text();
            }
            Elements xqusernpops = e.getElementsByClass("xqusernpop");
            if (xqusernpops != null && xqusernpops.size() > 0) {
                mJuzimi.sender = xqusernpops.get(0).text();
            }
            list.add(mJuzimi);
        }
        subscriber.onNext(list);
    } catch (IOException e) {
        e.printStackTrace();
        subscriber.onNext(null);
    }
}
Example 81
Project: xmpp-master  File: addPingLun.java View source code
public static void main(String[] args) {
    File in = new File("index.html");
    News_pinglunDaoImpl ndi = new News_pinglunDaoImpl();
    try {
        Document doc = Jsoup.parse(in, "UTF-8", "");
        Elements e1 = doc.getElementsByClass("comment_item");
        for (int i = e1.size() - 1; i >= 0; i--) {
            String ptime = e1.get(i).getElementsByClass("ptime").text();
            ptime = ptime.replaceAll("·¢±í", "");
            System.out.println(ptime + "\t" + e1.get(i).getElementsByClass("username").text() + "\t" + (e1.get(i).getElementsByTag("img").attr("src")) + "\t" + e1.get(i).getElementsByClass("comment_body").text());
            //ÎÄÕÂid
            int id = 30;
            String user = e1.get(i).getElementsByClass("username").text() + ";" + (e1.get(i).getElementsByTag("img").attr("src"));
            String plocation = "";
            String pcontent = e1.get(i).getElementsByClass("comment_body").text();
            String zan = "0";
            News_pinglun news = new News_pinglun(id, user, plocation, ptime, pcontent, zan);
            if (ndi.save(news)) {
            }
        }
    } catch (IOException e) {
        e.printStackTrace();
    }
}
Example 82
Project: yyl-master  File: ZGYWikiHelper.java View source code
private void parseResponse(Response response, Action1<List<Account>> onNextAction) {
    Document document;
    try {
        document = Jsoup.parse(response.getBody().in(), "UTF-8", "https://wiki.zhenguanyu.com/");
    } catch (IOException e) {
        e.printStackTrace();
        return;
    }
    Elements items = document.getElementsByTag("tr");
    Observable.from(items).skip(// 第一个item是标题
    1).map( item -> item.getElementsByTag("td").text().split(" ")).map(this::parseAccount).toList().doOnNext( accountList -> {
        AccountDBHelper.helper.clear();
        AccountDBHelper.helper.save(accountList);
    }).observeOn(AndroidSchedulers.mainThread()).subscribe(onNextAction);
    ;
}
Example 83
Project: zafu_jwc-master  File: ListLinks.java View source code
public static void main(String[] args) throws IOException {
    Validate.isTrue(args.length == 1, "usage: supply url to fetch");
    String url = args[0];
    print("Fetching %s...", url);
    Document doc = Jsoup.connect(url).get();
    Elements links = doc.select("a[href]");
    Elements media = doc.select("[src]");
    Elements imports = doc.select("link[href]");
    print("\nMedia: (%d)", media.size());
    for (Element src : media) {
        if (src.tagName().equals("img"))
            print(" * %s: <%s> %sx%s (%s)", src.tagName(), src.attr("abs:src"), src.attr("width"), src.attr("height"), trim(src.attr("alt"), 20));
        else
            print(" * %s: <%s>", src.tagName(), src.attr("abs:src"));
    }
    print("\nImports: (%d)", imports.size());
    for (Element link : imports) {
        print(" * %s <%s> (%s)", link.tagName(), link.attr("abs:href"), link.attr("rel"));
    }
    print("\nLinks: (%d)", links.size());
    for (Element link : links) {
        print(" * a: <%s>  (%s)", link.attr("abs:href"), trim(link.text(), 35));
    }
}
Example 84
Project: 4pdaClient-plus-master  File: UsersApi.java View source code
/**
     * Ð?дминиÑ?трациÑ?: Ð?дмины, Ñ?уперы,модеры
     *
     * @param client
     * @return
     * @throws IOException
     */
public static ArrayList<LeadUser> getLeaders(IHttpClient client) throws IOException {
    String page = client.performGet("http://4pda.ru/forum/index.php?act=Stats&CODE=leaders");
    Document doc = Jsoup.parse(page);
    ArrayList<LeadUser> res = new ArrayList<>();
    Pattern p = Pattern.compile("showuser=(\\d+)", Pattern.CASE_INSENSITIVE);
    for (Element groupElement : doc.select("div.borderwrap")) {
        String group = groupElement.select("div.maintitle").first().text().trim();
        for (Element trElement : groupElement.select("table.ipbtable").first().select("tr")) {
            Elements tds = trElement.select("td.row1");
            if (tds.size() == 0)
                continue;
            Element el = tds.get(0).select("a").first();
            Matcher m = p.matcher(el.attr("href"));
            if (m.find()) {
                LeadUser user = new LeadUser(m.group(1), el.text());
                user.setGroup(group);
                Elements forumElements = tds.get(1).select("option");
                if (forumElements.size() == 0 && "Ð’Ñ?е форумы".equals(tds.get(1).text())) {
                    user.getForums().add(new Forum("-1", "Ð’Ñ?е форумы"));
                } else {
                    for (Element forumEl : forumElements) {
                        if ("-1".equals(forumEl.attr("value")))
                            continue;
                        user.getForums().add(new Forum(forumEl.attr("value"), forumEl.text()));
                    }
                }
                res.add(user);
            }
        }
    }
    return res;
}
Example 85
Project: ambra-master  File: HtmlChecker.java View source code
void check(Document html) {
    Elements elements = html.getElementsByAttribute("xpathlocation");
    for (Element element : elements) {
        if (element.nodeName().equalsIgnoreCase("p")) {
            String xpathLocation = element.attr("xpathlocation");
            String anchorId = xpathLocation.replaceAll("\\[", "");
            anchorId = anchorId.replaceAll("\\]", "");
            anchorId = anchorId.replaceAll("/", ".");
            anchorId = anchorId.substring(1);
            if (!xpathLocation.equalsIgnoreCase("noSelect")) {
                Elements anchorElementsViaId = html.getElementsByAttributeValue("id", anchorId);
                assertEquals(anchorElementsViaId.size(), 1, "Did not find the anchor tag for the given paragraph");
                Elements anchorElementsViaName = html.getElementsByAttributeValue("name", anchorId);
                assertEquals(anchorElementsViaName.size(), 1, "Did not find the anchor tag for the given paragraph");
            }
        }
    }
}
Example 86
Project: android-opensource-library-56-master  File: RssLoader.java View source code
private void parseDomTraverse(Document document) {
    Elements elements = document.getElementsByTag("item");
    for (Element element : elements) {
        Item item = new Item();
        Elements title = element.getElementsByTag("title");
        Elements link = element.getElementsByTag("link");
        if (!title.isEmpty()) {
            item.title = title.get(0).text();
        }
        if (!link.isEmpty()) {
            item.url = link.get(0).text();
        }
        if (mList == null) {
            mList = new RssList();
        }
        mList.addItem(item);
    }
}
Example 87
Project: AndroidOpenTextbook-master  File: MainActivity.java View source code
private void parseRss(String rssBody) {
    Document document = Jsoup.parse(rssBody, "", Parser.xmlParser());
    List<Item> itemList = new ArrayList<Item>();
    Elements items = document.select("item");
    for (Element element : items) {
        Item item = new Item();
        item.setTitle(element.select("title").first().text());
        item.setUrl(element.select("link").first().text());
        itemList.add(item);
    }
    showRss(itemList);
}
Example 88
Project: asta4d-master  File: ElementUtil.java View source code
public static final void removeNodesBySelector(Element target, String selector, boolean pullupChildren) {
    Elements removeNodes = target.select(selector);
    Iterator<Element> it = removeNodes.iterator();
    Element rm;
    while (it.hasNext()) {
        rm = it.next();
        if (target == rm) {
            continue;
        }
        if (rm.ownerDocument() == null) {
            continue;
        }
        if (pullupChildren) {
            pullupChildren(rm);
        }
        rm.remove();
    }
}
Example 89
Project: blade.tools-master  File: MarkdownParser.java View source code
private static Map<String, String> parseHtml(String html) {
    Map<String, String> retval = new HashMap<>();
    Document document = Jsoup.parse(html);
    Elements elements = document.select("a[href] > h3");
    for (Element h3 : elements) {
        Element a = h3.parent();
        int index = a.siblingIndex();
        List<Node> siblings = a.siblingNodes();
        StringBuilder sb = new StringBuilder();
        List<Node> interesting = new ArrayList<>();
        for (int i = index; i < siblings.size(); i++) {
            Node sibling = siblings.get(i);
            if (sibling.toString().startsWith("<hr")) {
                break;
            } else {
                interesting.add(sibling);
            }
        }
        for (Node node : interesting) {
            sb.append(node.toString());
        }
        String href = a.attr("href");
        retval.put(href, sb.toString());
    }
    return retval;
}
Example 90
Project: calcite-master  File: FileEnumerator.java View source code
public boolean moveNext() {
    try {
        if (this.iterator.hasNext()) {
            final Elements row = this.iterator.next();
            current = this.converter.toRow(row, this.fields);
            return true;
        } else {
            current = null;
            return false;
        }
    } catch (RuntimeExceptionError |  e) {
        throw e;
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
}
Example 91
Project: CodeUtils-master  File: Xedtt.java View source code
private static List<GifInfo> getAllGif(String appUrl) throws Exception {
    System.out.println("get gif ... url = " + appUrl);
    List<GifInfo> infos = new ArrayList<>();
    String response = HttpUtils.getString(appUrl);
    Document parse = Jsoup.parse(response);
    Elements contentElements = parse.getElementsByClass("miao-con").get(0).getElementsByTag("li");
    for (Element element : contentElements) {
        String title = element.text();
        String thumbnailImgUrl = element.getElementsByTag("img").get(0).attr("src");
        String oriGifUrl = element.getElementsByTag("a").get(0).attr("href");
        oriGifUrl = hostUrl + oriGifUrl;
        GifInfo info = new GifInfo();
        info.title = title;
        info.thumbnailImgUrl = thumbnailImgUrl;
        String oriGifContent = HttpUtils.getString(oriGifUrl);
        Element oriGifElement = Jsoup.parse(oriGifContent).getElementsByClass("content").get(0);
        Element imageElement = oriGifElement.getElementsByTag("img").get(0);
        String imgUrl = imageElement.attr("src");
        int width = 0;
        int height = 0;
        try {
            width = Integer.parseInt(imageElement.attr("width"));
            height = Integer.parseInt(imageElement.attr("height"));
        } catch (Exception e) {
        }
        info.imgUrl = imgUrl;
        info.width = width;
        info.height = height;
        infos.add(info);
    }
    return infos;
}
Example 92
Project: coding2017-master  File: Struts.java View source code
public static View runAction(String actionName, Map<String, String> parameters) {
    //        0. 读��置文件struts.xml,引用Jsoup
    Document document = null;
    try {
        document = Jsoup.connect("http://my.977996067.cn/2017_2/struts.xml").get();
    } catch (IOException e) {
        System.err.println("xml解�失败");
    }
    //        获�所有的action标签
    Elements actions = document != null ? document.select("action") : null;
    if (actions == null)
        return null;
    for (Element actionElement : actions) {
        String name = actionElement.attr("name");
        //            å??å°„action
        if (actionName.equals(name)) {
            try {
                Class<?> actionClass = Class.forName(actionElement.attr("class"));
                Object o = actionClass.getConstructor().newInstance();
                parameters.forEach(( k,  v) -> {
                    try {
                        Field field = actionClass.getDeclaredField(k);
                        field.setAccessible(true);
                        //赋值
                        field.set(o, v);
                    } catch (Exception e) {
                        e.printStackTrace();
                    }
                });
                //                  执行execute方法,获�返回值
                Method method = actionClass.getMethod("execute");
                String returnValue = (String) method.invoke(o);
                View view = new View();
                Map map = new HashMap();
                Arrays.stream(actionClass.getMethods()).forEach(((Method m) -> {
                    try {
                        String methodName = m.getName();
                        if (methodName.startsWith("get"))
                            map.put(methodName.substring(methodName.indexOf("get") + 3).toLowerCase(), m.invoke(o));
                    } catch (Exception e) {
                        e.printStackTrace();
                    }
                }));
                view.setParameters(map);
                //                    获å?–返回视图å??
                Elements children = actionElement.children().select("result");
                for (Element aChildren : children) {
                    if (returnValue.equals(aChildren.attr("name"))) {
                        view.setJsp(aChildren.text());
                    }
                }
                System.out.println(view);
                return view;
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
    }
    return null;
}
Example 93
Project: dd4t-2-java-master  File: RichTextUtils.java View source code
/**
     * This method takes care of:
     * 1. Formatting RTF from Tridion. This basically means
     * Stripping out the xhtml and xlink namespaces
     * 2. Resolve any component links which can be
     *
     * @param xhtmlFields  the DD4T Xhtml Field
     * @param resolveLinks resolve links as well as stripping namespaces
     * @param linkResolver a concrete link resolver
     */
public static void resolveXhtmlField(final XhtmlField xhtmlFields, final boolean resolveLinks, final LinkResolver linkResolver, final String contextPath) throws ItemNotFoundException, SerializationException {
    List<Object> xhtmlValues = xhtmlFields.getValues();
    List<String> newValues = new ArrayList<>();
    String contextPathToUse = contextPath == null ? "" : contextPath;
    for (Object xhtmlField : xhtmlValues) {
        if (StringUtils.isEmpty((String) xhtmlField)) {
            newValues.add("");
        } else {
            Document document = Jsoup.parseBodyFragment("<" + XHTMLBODYROOT + ">" + xhtmlField + "</" + XHTMLBODYROOT + ">");
            Element xhtmlBodyRoot = document.getElementsByTag(XHTMLBODYROOT).first();
            if (resolveLinks && linkResolver != null) {
                Elements links = xhtmlBodyRoot.getElementsByAttributeValueMatching(XLINK_HREF, TCM);
                for (Element link : links) {
                    String resolvedLink = linkResolver.resolve(link.attr(XLINK_HREF));
                    if (StringUtils.isNotEmpty(resolvedLink)) {
                        link.attr(XLINK_HREF, contextPathToUse + resolvedLink);
                    } else {
                        link.attr(XLINK_HREF, "");
                    }
                }
            }
            if (StringUtils.isNotEmpty(contextPathToUse)) {
                Elements images = xhtmlBodyRoot.getElementsByTag(IMG_TAG);
                for (Element image : images) {
                    String src = image.attr(SRC_ATTR);
                    image.attr(SRC_ATTR, contextPathToUse + src);
                }
            }
            if (xhtmlBodyRoot != null) {
                newValues.add(xhtmlBodyRoot.html().replaceAll("(?ims)xlink:|xmlns(=\"http://www\\.w3\\.org/1999/xhtml\"\\s*|:xlink=\"http://www\\.w3\\.org/1999/xlink\"\\s*)", ""));
            }
        }
    }
    xhtmlFields.setTextValues(newValues);
}
Example 94
Project: Devtf_APP-master  File: HtmlInputRequest.java View source code
@Override
protected Response<List<EmploymentItem>> parseNetworkResponse(NetworkResponse response) {
    List<EmploymentItem> eiList = null;
    try {
        Document doc = Jsoup.parse(new ByteArrayInputStream(response.data), "UTF-8", WebAPI.BASE_URL);
        Elements trs = doc.getElementsByTag("table").get(0).select("tbody").select("tr");
        eiList = new ArrayList<EmploymentItem>();
        for (Element tr : trs) {
            Elements tds = tr.select("td");
            EmploymentItem eItem = new EmploymentItem();
            eItem.setCompanyName(tds.get(0).text());
            Elements td = tds.get(1).select("a");
            eItem.setJobName(td.text());
            eItem.setJobDescAddress(td.attr("href"));
            eItem.setPostTempt(tds.get(2).text());
            eItem.setEmail(tds.get(3).text());
            eiList.add(eItem);
        }
    } catch (Exception e) {
        return Response.error(new ParseError(e));
    }
    return Response.success(eiList, HttpHeaderParser.parseCacheHeaders(response));
}
Example 95
Project: EhViewer-master  File: WhatsHotParser.java View source code
@SuppressWarnings("ConstantConditions")
public static List<GalleryInfo> parse(String body) throws ParseException {
    try {
        List<GalleryInfo> galleryInfoList = new ArrayList<>(15);
        Document d = Jsoup.parse(body);
        Element pp = d.getElementById("pp");
        Elements id1List = pp.getElementsByClass("id1");
        for (int i = 0, n = id1List.size(); i < n; i++) {
            GalleryInfo galleryInfo = new GalleryInfo();
            Element id1 = id1List.get(i);
            Element id3 = JsoupUtils.getElementByClass(id1, "id3");
            Element temp = JsoupUtils.getElementByTag(id3, "a");
            String url = temp.attr("href");
            GalleryDetailUrlParser.Result result = GalleryDetailUrlParser.parse(url);
            galleryInfo.gid = result.gid;
            galleryInfo.token = result.token;
            temp = JsoupUtils.getElementByTag(temp, "img");
            galleryInfo.thumb = EhUtils.handleThumbUrlResolution(temp.attr("src"));
            galleryInfo.title = temp.attr("title");
            galleryInfo.generateSLang();
            galleryInfoList.add(galleryInfo);
        }
        return galleryInfoList;
    } catch (Exception e) {
        throw new ParseException("Parse whats hot error", body);
    }
}
Example 96
Project: firing-range-master  File: Expression.java View source code
@Override
public void doGet(HttpServletRequest request, HttpServletResponse response) throws IOException {
    if (request.getParameter("q") == null) {
        Responses.sendError(response, "Missing q parameter", 400);
        return;
    }
    String q = request.getParameter("q");
    Document doc = Jsoup.parseBodyFragment(q);
    Element body = doc.body();
    Elements elements = body.getAllElements();
    elements.remove(body);
    if (elements.isEmpty()) {
        Responses.sendError(response, "Invalid input, no tags", 400);
        return;
    }
    StringBuilder res = new StringBuilder();
    for (Element element : elements) {
        boolean validElement = true;
        Attributes attributes = element.attributes();
        for (Attribute attribute : attributes) {
            if (attribute.getKey().toLowerCase().startsWith("on") || attribute.getKey().toLowerCase().equals("href") || attribute.getKey().toLowerCase().equals("src")) {
                validElement = false;
            }
            if (attribute.getKey().toLowerCase().equals("style") && attribute.getValue().toLowerCase().contains("expression")) {
                validElement = false;
            }
        }
        if (validElement) {
            res.append(element.toString());
        }
    }
    Responses.sendXssed(response, res.toString());
}
Example 97
Project: Girls-master  File: TypeImageListModelImpl.java View source code
@Override
public void call(Subscriber<? super List<TypeImageDomain>> subscriber) {
    List<TypeImageDomain> typeImageDomains = new ArrayList();
    try {
        Document document = Jsoup.connect(url).get();
        Element element = document.getElementById("gallery-1");
        Elements elementsA = element.getElementsByTag("a");
        for (Element a : elementsA) {
            String linkUrl = a.attr("abs:href");
            Elements img = a.getElementsByTag("img");
            String src = img.attr("src");
            String width = img.attr("width");
            String height = img.attr("height");
            typeImageDomains.add(new TypeImageDomain(Integer.valueOf(width), Integer.valueOf(height), src, linkUrl));
        }
    } catch (IOException e) {
        subscriber.onError(e);
    }
    System.out.print(typeImageDomains.get(0).getHeight());
    subscriber.onNext(typeImageDomains);
}
Example 98
Project: HabReader-master  File: UsersLoader.java View source code
@Override
public ArrayList<UsersData> loadInBackground() {
    ArrayList<UsersData> data = new ArrayList<UsersData>();
    try {
        Log.i(TAG, "Loading a page: " + url);
        Document document = Jsoup.connect(url).get();
        Elements users = document.select("div.user");
        for (Element user : users) {
            UsersData usersData = new UsersData();
            Element rating = user.select("div.rating").first();
            Element karma = user.select("div.karma").first();
            Element avatar = user.select("div.avatar > a > img").first();
            Element name = user.select("div.userlogin > div.username > a").first();
            Element lifetime = user.select("div.info > div.lifetime").first();
            usersData.setName(name.text());
            usersData.setUrl(name.attr("abs:href"));
            usersData.setRating(rating.text());
            usersData.setKarma(karma.text());
            usersData.setAvatar(avatar.attr("src"));
            usersData.setLifetime(lifetime.text());
            data.add(usersData);
        }
    } catch (IOException e) {
    }
    return data;
}
Example 99
Project: halgnu-master  File: GoogleSearchListener.java View source code
private String getGoogleResult(String query) {
    String google = "http://www.google.com/search?q=";
    String search = query;
    String charset = "UTF-8";
    String userAgent = "HalGNU 1.0 (+https://github.com/R4stl1n/halgnu)";
    String result = "No results found";
    String title = "";
    String url = "";
    Elements links = null;
    try {
        links = Jsoup.connect(google + URLEncoder.encode(search, charset)).userAgent(userAgent).get().select("li.g>h3>a");
    } catch (IOException e) {
        e.printStackTrace();
    }
    if (links != null) {
        if (links.size() >= 1) {
            title = links.first().text();
            url = links.first().absUrl("href");
            try {
                url = URLDecoder.decode(url.substring(url.indexOf('=') + 1, url.indexOf('&')), "UTF-8");
            } catch (UnsupportedEncodingException e) {
                e.printStackTrace();
            }
            System.out.println("Title: " + title);
            System.out.println("URL: " + url);
        }
    }
    result = title + ": " + url;
    return result;
}
Example 100
Project: IntelliDict-master  File: PonsDictionary.java View source code
public static List<Map> getPonsEntry(String entry) {
    loadDocument();
    Elements ponsEntryInHtml = extractPonsEntryInHtmlForSpecifiedLanguage("pl");
    List<Map> ponsEntry = new ArrayList<Map>();
    for (Element ponsSubEntryInHtml : ponsEntryInHtml) {
        String ponsSubEntryHeader = ponsSubEntryInHtml.getElementsByTag("thead").first().text();
        Element ponsSubEntryInTBodyElement = ponsSubEntryInHtml.getElementsByTag("tbody").first();
        Map<String, String> ponsSubEntry = extractPonsSubEntryFromTBodyElement(ponsSubEntryInTBodyElement);
        ponsEntry.add(ponsSubEntry);
    }
    return ponsEntry;
}
Example 101
Project: jinjava-master  File: BatchFilterTest.java View source code
@Test
public void batchFilterNoBackfill() {
    Map<String, Object> context = ImmutableMap.of("items", (Object) Lists.newArrayList("1", "2", "3", "4", "5", "6"));
    Document dom = Jsoup.parseBodyFragment(render("batch-filter", context));
    assertThat(dom.select("tr")).hasSize(2);
    Elements trs = dom.select("tr");
    assertThat(trs.get(0).select("td")).hasSize(3);
    assertThat(trs.get(0).select("td").get(0).text()).isEqualTo("1");
    assertThat(trs.get(0).select("td").get(1).text()).isEqualTo("2");
    assertThat(trs.get(0).select("td").get(2).text()).isEqualTo("3");
    assertThat(trs.get(1).select("td")).hasSize(3);
    assertThat(trs.get(1).select("td").get(0).text()).isEqualTo("4");
    assertThat(trs.get(1).select("td").get(1).text()).isEqualTo("5");
    assertThat(trs.get(1).select("td").get(2).text()).isEqualTo("6");
}