Java Examples for org.jsoup.Jsoup

The following java examples will help you to understand the usage of org.jsoup.Jsoup. These source code samples are taken from different open source projects.

Example 1
Project: lavender-master  File: RamdomImgParser.java View source code
public static String parserImg(String html) {
    Document document = Jsoup.parse(html);
    Elements divs = document.select("div");
    for (Element div : divs) {
        if (!div.attr("id").equals("photo-detail-wrapper")) {
            continue;
        }
        return div.select("img").first().attr("src");
    }
    return null;
}
Example 2
Project: sagan-master  File: QuestionsIndexTests.java View source code
@Ignore
@Test
public void showsQuestionsIndex() throws Exception {
    MvcResult result = mockMvc.perform(get("/questions")).andExpect(status().isOk()).andExpect(content().contentTypeCompatibleWith("text/html")).andReturn();
    Document document = Jsoup.parse(result.getResponse().getContentAsString());
    String body = document.select("body").text();
    // header title
    assertThat(body, containsString("Spring at StackOverflow"));
    // latest spring-* questions pulled into the left 2/3 of the page
    // tags on the right 1/3 of the page. see seed data in
    // sagan-common/src/main/resources/database/V4__stackoverflow_tags.sql
    assertThat(body, containsString("Spring Framework"));
    assertThat(body, containsString("[spring-framework]"));
    assertThat(body, containsString("[spring-core]"));
    assertThat(body, containsString("[dependency-injection]"));
    assertThat(body, containsString("Spring Data"));
    assertThat(body, containsString("[spring-data]"));
    assertThat(body, containsString("[spring-data-mongodb]"));
    assertThat(body, containsString("[spring-data-neo4j]"));
}
Example 3
Project: android-opensource-library-56-master  File: SanitizeActivity.java View source code
@Override
protected void onCreate(Bundle savedInstanceState) {
    super.onCreate(savedInstanceState);
    setContentView(R.layout.activity_sanitize);
    final EditText inputText = (EditText) findViewById(R.id.input_text);
    inputText.setText("<p><a href='http://example.com/' onclick='doAttack()'>Link</a></p>");
    final EditText sanitizedText = (EditText) findViewById(R.id.sanitized_text);
    findViewById(R.id.sanitize_button).setOnClickListener(new OnClickListener() {

        @Override
        public void onClick(View v) {
            String sanitized = Jsoup.clean(inputText.getText().toString(), Whitelist.basic());
            sanitizedText.setText(sanitized);
        }
    });
}
Example 4
Project: Android-Studio-Project-master  File: ContentParser.java View source code
public static Content Parser(String html) {
    Document doc = Jsoup.parse(html);
    Elements links = doc.select("img[src~=(?i)\\.(png|jpe?g)]");
    Content content = new Content();
    Element element = links.get(1).getElementsByTag("img").first();
    content.setUrl(element.attr("src"));
    content.setTitle(element.attr("alt"));
    return content;
}
Example 5
Project: coolreader-master  File: DownloadPageTask.java View source code
@Override
protected AsyncTaskResult<Document> doInBackground(URL... arg0) {
    try {
        Log.d("DownloadPageTask", "Downloading: " + arg0[0].toString());
        Response response = Jsoup.connect(arg0[0].toString()).timeout(7000).execute();
        Log.d("DownloadPageTask", "Complete: " + arg0[0].toString());
        return new AsyncTaskResult<Document>(response.parse());
    } catch (Exception e) {
        return new AsyncTaskResult<Document>(e);
    }
}
Example 6
Project: jinjava-master  File: GroupByFilterTest.java View source code
@Test
public void testGroupByAttr() throws Exception {
    Document dom = Jsoup.parseBodyFragment(jinjava.render(Resources.toString(Resources.getResource("filter/groupby-attr.jinja"), StandardCharsets.UTF_8), ImmutableMap.of("persons", (Object) Lists.newArrayList(new Person("male", "jared", "stehler"), new Person("male", "foo", "bar"), new Person("female", "sarah", "jones"), new Person("male", "jim", "jones"), new Person("female", "barb", "smith")))));
    assertThat(dom.select("ul.root > li")).hasSize(2);
    assertThat(dom.select("ul.root > li.male > ul > li")).hasSize(3);
    assertThat(dom.select("ul.root > li.female > ul > li")).hasSize(2);
}
Example 7
Project: jooby-master  File: Issue624d.java View source code
@Test
public void shouldForceARedirect() throws Exception {
    request().get("/saved-url").expect( rsp -> {
        Document html = Jsoup.parse(rsp);
        String action = (html.select("form").attr("action"));
        assertEquals("/auth?client_name=FormClient", action);
    });
    request().get("/auth?username=test&password=test").expect("/saved-url");
}
Example 8
Project: LNReader-Android-master  File: DownloadPageTask.java View source code
@Override
protected AsyncTaskResult<Document> doInBackground(URL... arg0) {
    try {
        Log.d("DownloadPageTask", "Downloading: " + arg0[0].toString());
        Response response = Jsoup.connect(arg0[0].toString()).timeout(7000).execute();
        Log.d("DownloadPageTask", "Complete: " + arg0[0].toString());
        return new AsyncTaskResult<Document>(response.parse(), Document.class);
    } catch (Exception e) {
        return new AsyncTaskResult<Document>(null, Document.class, e);
    }
}
Example 9
Project: moulder-j-master  File: TexterTest.java View source code
@Test
public void testRegularText() throws Exception {
    Value<String> text = mock(Value.class);
    when(text.get()).thenReturn("text");
    Texter a = new Texter(text);
    Document document = Jsoup.parseBodyFragment("<html><body><outer>test</outer></body></html>");
    Element element = document.getElementsByTag("outer").first();
    List<Node> processed = a.process(element);
    // verify that bind and get were called, in this order
    InOrder inOrder = inOrder(text);
    inOrder.verify(text).get();
    assertXMLEqual(new StringReader("<body><outer>text</outer></body>"), new StringReader(html(processed)));
}
Example 10
Project: muzima-android-master  File: HTMLConceptParser.java View source code
public List<String> parse(String html) {
    Set<String> concepts = new HashSet<String>();
    Document htmlDoc = Jsoup.parse(html);
    //Select all elements containing data-concept attr and is not a div.
    Elements elements = htmlDoc.select("*:not(div)[" + DATA_CONCEPT_TAG + "]");
    for (Element element : elements) {
        concepts.add(getConceptName(element.attr(DATA_CONCEPT_TAG)));
    }
    return new ArrayList<String>(concepts);
}
Example 11
Project: NiceText-master  File: NTImpl.java View source code
public String extract(String url) {
    String t = null;
    try {
        Connection connection = Jsoup.connect(url).userAgent(Constants.USER_AGENT).header("Accept", "text/html,application/xhtml+xml,application/xml").header("Accept-Encoding", "gzip,deflate,sdch").followRedirects(true).timeout(Constants.CONN_TIMEOUT);
        Connection.Response response = connection.execute();
        Document document = response.parse();
        t = extract(document);
    } catch (IOException e) {
        e.printStackTrace();
    }
    return t;
}
Example 12
Project: pictorial_android_client-master  File: ParserImageList.java View source code
public static ImageListBean parser(String mRet) {
    ImageListBean imageListBean = new ImageListBean();
    if (mRet != null) {
        Document document = Jsoup.parse(mRet);
        Elements elements = document.getElementsByClass("post-inner");
        for (Element element : elements) {
            String imgurl = element.select("a[title]").attr("href");
            Element element2 = element.select("img[src]").first();
            String src = element2.attr("src");
            String width = element2.attr("width");
            String height = element2.attr("height");
            String alt = element2.attr("alt");
            ImageBean imageBean = new ImageBean();
            imageBean.setAlt(alt);
            imageBean.setDetailurl(imgurl);
            imageBean.setHeight(height);
            imageBean.setWidth(width);
            imageBean.setImgurl(src);
            imageListBean.add(imageBean);
        }
    }
    return imageListBean;
}
Example 13
Project: playconf-master  File: IndexViewTest.java View source code
@Override
public void run() {
    Context.current.set(testHttpContext());
    Proposal s = sampleProposal();
    Speaker speaker = sampleSpeaker();
    s.speaker = speaker;
    Html html = views.html.index.render(s);
    Document doc = Jsoup.parse(contentAsString(html));
    assertThat(doc.select("#title").text()).isEqualTo("Keynote - " + s.title);
    assertThat(doc.select("#speakerName").text()).isEqualTo(speaker.name);
}
Example 14
Project: SimpleFunctionalTest-master  File: HtmlBreadcrumb.java View source code
@Override
public String applyOnUseCase(UseCaseResult useCaseResult, String... parameters) {
    String result = getHtmlReport().applyOnUseCase(useCaseResult);
    final Document parse = Jsoup.parse(result);
    parse.select(".page-header .text-center").append("<ol class=\"breadcrumb\">" + printFirstUseCase(useCaseResult.useCase, useCaseResult.useCase) + "</ol>");
    return parse.toString();
}
Example 15
Project: TPPIBot-master  File: HTML.java View source code
@Override
public void onCommand(PircBotX bot, User user, Channel channel, List<String> lines, String... args) {
    if (args.length < 1) {
        lines.add("This command requires 1 arg.");
        return;
    }
    String html;
    try {
        html = Jsoup.connect(args[0]).get().html();
    } catch (IOException e) {
        lines.add(e.getClass().getName());
        return;
    }
    String paste = poster.pasteData(html);
    lines.add("HTML Source of " + args[0] + " :  " + paste);
}
Example 16
Project: ulti-master  File: UtilsDemo.java View source code
public static String TestJsoup() {
    String html1 = "<html><head><title>First parse</title></head>" + "<body><p>Parsed HTML into a doc.</p></body></html>";
    StringBuffer sb = new StringBuffer();
    Document doc = Jsoup.parse(html1);
    Logs.d("docs---" + doc.title() + "   " + doc.getAllElements().size());
    Logs.d("docs---" + doc.children().size() + "   " + doc.location());
    for (Element element : doc.getAllElements()) {
        sb.append(element.tagName() + "   " + element.nodeName() + "   " + element.children().size() + "   " + element.data() + "   " + element.text() + "\n");
        Logs.d(element.text() + "   ");
    }
    return sb.toString();
}
Example 17
Project: UltimateAndroid-master  File: UtilsDemo.java View source code
public static String TestJsoup() {
    String html1 = "<html><head><title>First parse</title></head>" + "<body><p>Parsed HTML into a doc.</p></body></html>";
    StringBuffer sb = new StringBuffer();
    Document doc = Jsoup.parse(html1);
    Logs.d("docs---" + doc.title() + "   " + doc.getAllElements().size());
    Logs.d("docs---" + doc.children().size() + "   " + doc.location());
    for (Element element : doc.getAllElements()) {
        sb.append(element.tagName() + "   " + element.nodeName() + "   " + element.children().size() + "   " + element.data() + "   " + element.text() + "\n");
        Logs.d(element.text() + "   ");
    }
    return sb.toString();
}
Example 18
Project: v2ex-android-master  File: NotificationListModel.java View source code
public void parse(String responseBody) throws Exception {
    Document doc = Jsoup.parse(responseBody);
    Element body = doc.body();
    Elements elements = body.getElementsByAttributeValue("class", "cell");
    for (Element el : elements) {
        NotificationModel notification = new NotificationModel();
        if (notification.parse(el))
            add(notification);
    }
    int[] pages = ContentUtils.parsePage(body);
    currentPage = pages[0];
    totalPage = pages[1];
}
Example 19
Project: webmagic-master  File: LinksSelectorTest.java View source code
@Test
public void testLinks() throws Exception {
    LinksSelector linksSelector = new LinksSelector();
    List<String> links = linksSelector.selectList(html);
    System.out.println(links);
    html = "<div><a href='aaa'></a></div><div><a href='http://whatever.com/bbb'></a></div><div><a href='http://other.com/bbb'></a></div>";
    links = linksSelector.selectList(Jsoup.parse(html, "http://whatever.com/"));
    System.out.println(links);
}
Example 20
Project: Android_RssReader-master  File: DescriptionFormatter.java View source code
@Override
protected String LoadFromCache(Blog blog) {
    if (blog != null && blog.Description.length() == 0) {
        return "";
    }
    Document doc = Jsoup.parse(blog.Description);
    List<Element> embeds = doc.getElementsByTag("embed");
    for (Element d : doc.getElementsByTag("iframe")) {
        if (d.hasAttr("src") && (d.attr("src").contains("swf") || d.attr("src").contains("youku") || d.attr("src").contains("sohu") || d.attr("src").contains("tudou") || d.attr("src").contains("youtube") || d.attr("src").contains("ku6")))
            embeds.add(d);
    }
    for (Element d : doc.getElementsByTag("a")) {
        if (d.hasAttr("href") && (d.attr("href").contains("swf") || d.attr("href").contains("youku") || d.attr("href").contains("sohu") || d.attr("href").contains("tudou") || d.attr("href").contains("youtube") || d.attr("href").contains("ku6")))
            embeds.add(d);
    }
    if (embeds.size() != 0)
        return "";
    for (Element img : doc.getElementsByTag("img")) {
        if (img.hasAttr("src") && !img.attr("src").startsWith(prefix)) {
            return "";
        }
    }
    return blog.Description;
}
Example 21
Project: bashoid-master  File: Parser.java View source code
static ArrayList<Quote> getQuotes(WebPage page) {
    ArrayList<Quote> quotes = new ArrayList<>();
    Element container = Jsoup.parse(page.getContent()).getElementsByAttribute("valign").first();
    Elements headers = container.getElementsByClass("quote");
    Elements bodies = container.getElementsByClass("qt");
    final int COUNT = headers.size();
    for (int i = 0; i < COUNT; ++i) {
        String[] body = bodies.get(i).html().split("<br />");
        Element header = headers.get(i);
        String quoteId = header.getElementsByTag("b").first().text().substring(1);
        int id = Integer.parseInt(quoteId);
        String quoteScore = header.ownText().substring(1, header.ownText().length() - 1);
        int score = Integer.parseInt(quoteScore);
        quotes.add(new Quote(body, score, id));
    }
    return quotes;
}
Example 22
Project: cms-ce-master  File: HtmlExtractor.java View source code
@Override
public String extractText(final String mimeType, final InputStream inputStream, final String encoding) throws IOException {
    if (!canHandle(mimeType)) {
        return null;
    }
    StringBuilder builder = new StringBuilder();
    Document doc = Jsoup.parse(inputStream, encoding, "");
    for (Element element : doc.getAllElements()) {
        for (TextNode textNode : element.textNodes()) {
            final String text = textNode.text();
            builder.append(text);
            appendWhitespaceAfterTextIfNotThere(builder, text);
        }
    }
    return builder.toString();
}
Example 23
Project: CrashMonkey4Android_tradefederation-master  File: JsonHelper.java View source code
public static String getJsonString(String url) {
    String ret = null;
    Connection conn = Jsoup.connect(url);
    Response resp = null;
    conn.ignoreContentType(true);
    try {
        resp = conn.execute();
        return resp.body();
    } catch (IOException e) {
        CLog.i("failed to get json result for %s, %s", url, e.getMessage());
    }
    return ret;
}
Example 24
Project: deepnighttwo-master  File: FirstTry.java View source code
public static void main(String[] args) throws IOException {
    Document doc = Jsoup.connect("http://www.envir.gov.cn/airnews/index.asp").data("Fdate", "2000-6-1").data("Tdate", "2000-6-8").userAgent("I'm jsoup").timeout(3000).post();
    // System.out.println(doc);
    Elements eles = doc.select("table[bordercolor] > tr");
    eles.remove(0);
    for (Element ele : eles) {
        Elements rows = ele.select("td");
        for (Element row : rows) {
            System.out.println(row.ownText());
        }
    }
// Element content = doc.getElementById("content");
// Elements links = content.getElementsByTag("a");
// for (Element link : links) {
// String linkHref = link.attr("href");
// String linkText = link.text();
// System.out.println(linkHref);
// System.out.println(linkText);
// }
}
Example 25
Project: downlords-faf-client-master  File: LastNewsController.java View source code
@PostConstruct
void postConstruct() {
    List<NewsItem> newsItems = newsService.fetchNews();
    if (!newsItems.isEmpty()) {
        NewsItem newsItem = newsItems.get(0);
        authoredLabel.setText(i18n.get("news.authoredFormat", newsItem.getAuthor(), newsItem.getDate()));
        titleLabel.setText(newsItem.getTitle());
        String text = Jsoup.parse(newsItem.getContent()).text();
        textLabel.setText(text);
    }
    // TODO only use this if there's no thumbnail. However, there's never a thumbnail ATM.
    imageView.setImage(themeService.getThemeImage(ThemeService.DEFAULT_NEWS_IMAGE));
}
Example 26
Project: dungproxy-master  File: WaitProxyTest.java View source code
public static void main(String[] args) {
    // 开�代�IP池,设置IP池空阻塞等待
    DungProxyContext dungProxyContext = DungProxyContext.create().setWaitIfNoAvailableProxy(true).setPoolEnabled(true);
    IpPoolHolder.init(dungProxyContext);
    for (int i = 0; i < 5; i++) {
        new Thread() {

            @Override
            public void run() {
                for (int i = 0; i < 5; i++) {
                    String s = HttpInvoker.get("http://ip.cn/");
                    if (StringUtils.isEmpty(s)) {
                        continue;
                    }
                    Document parse = Jsoup.parse(s);
                    System.out.println(parse.select("#result").text());
                }
            }
        }.start();
    }
    for (int i = 0; i < 10; i++) {
        String s = HttpInvoker.get("http://ip.cn/");
        if (StringUtils.isEmpty(s)) {
            continue;
        }
        Document parse = Jsoup.parse(s);
        System.out.println(parse.select("#result").text());
    }
}
Example 27
Project: EhViewer-master  File: ProfileParser.java View source code
public static Result parse(String body) throws ParseException {
    try {
        Result result = new Result();
        Document d = Jsoup.parse(body);
        Element profilename = d.getElementById("profilename");
        result.displayName = profilename.child(0).text();
        try {
            result.avatar = profilename.nextElementSibling().nextElementSibling().child(0).attr("src");
            if (TextUtils.isEmpty(result.avatar)) {
                result.avatar = null;
            } else if (!result.avatar.startsWith("http")) {
                result.avatar = EhUrl.URL_FORUMS + result.avatar;
            }
        } catch (Exception e) {
            Log.i(TAG, "No avatar");
        }
        return result;
    } catch (Exception e) {
        throw new ParseException("Parse forums error", body);
    }
}
Example 28
Project: email-master  File: UriParserTestHelper.java View source code
public static void assertContainsLink(String expected, StringBuffer actual) {
    String linkifiedUri = actual.toString();
    Document document = Jsoup.parseBodyFragment(linkifiedUri);
    Element anchorElement = document.select("a").first();
    assertNotNull("No <a> element found", anchorElement);
    assertEquals(expected, anchorElement.text());
    assertEquals(expected, anchorElement.attr("href"));
}
Example 29
Project: example-webapp-master  File: ExceptionHandlingIntegrationTests.java View source code
@Test
public void shouldSeeErrorReferenceDisplayedOnThePage() throws Exception {
    SpringDispatcherServlet servlet = SpringDispatcherServlet.create();
    MockHttpServletResponse response = servlet.process(new MockHttpServletRequest("GET", "/bad"));
    String redirectedUrl = response.getRedirectedUrl();
    assertThat(redirectedUrl, matchesPattern(sequence("/error/", exactly(7, anyCharacterIn("A-Z0-9")))));
    String errorRef = StringUtils.substringAfterLast(redirectedUrl, "/");
    response = servlet.process(new MockHttpServletRequest("GET", redirectedUrl));
    String html = response.getContentAsString();
    Document document = Jsoup.parse(html);
    Elements elements = document.select("#errorRef");
    assertThat(elements.size(), equalTo(1));
    assertThat(elements.first().text(), equalTo(errorRef));
}
Example 30
Project: GameRaven-master  File: DocumentParser.java View source code
@Override
public Future<FinalDoc> parse(DataEmitter emitter) {
    return new ByteBufferListParser().parse(emitter).then(new TransformFuture<FinalDoc, ByteBufferList>() {

        @Override
        protected void transform(ByteBufferList result) throws Exception {
            byte[] bytes = result.getAllByteArray();
            setComplete(new FinalDoc(bytes, Jsoup.parse(new String(bytes, CHARSET))));
        }
    });
}
Example 31
Project: GoVRE-master  File: ProxyNetworkTrainMapImage.java View source code
//METHODS	
private static String fetchTrainImageUrlFromVRE(Context context) {
    try {
        String imgUrl = "";
        String url = context.getResources().getString(R.string.urlVREImgMap);
        Document doc = Jsoup.connect(url).get();
        //Focus on all tags with source attributes
        Elements media = doc.select("[src]");
        for (Element src : media) {
            //Verify this is an image 
            if (src.tagName().equals("img")) {
                imgUrl = src.attr("abs:src");
                //Check if link contains the action query string, the map is the only image that will have it.
                if (imgUrl.contains("app?action=getimg")) {
                    return imgUrl;
                }
            }
        }
        //Else Return Empty String
        return "";
    } catch (IOException e) {
    }
    return null;
}
Example 32
Project: japicmp-master  File: ITReportTitle.java View source code
@Test
public void testReportTitle() throws IOException {
    Path htmlPath = Paths.get(System.getProperty("user.dir"), "target", "site", "project-reports.html");
    assertThat(Files.exists(htmlPath), is(true));
    Document document = Jsoup.parse(htmlPath.toFile(), "UTF-8");
    Elements leftNav = document.select("#leftColumn [href=\"japicmp.html\"]");
    assertThat(leftNav.attr("title"), is("japicmp"));
    assertThat(leftNav.text(), is("japicmp"));
    Elements overviewRow = document.select("#bodyColumn tr:has([href=\"japicmp.html\"])");
    Elements link = overviewRow.select("[href=\"japicmp.html\"]");
    assertThat(link.text(), is("japicmp"));
    Elements description = overviewRow.select("td:eq(1)");
    String projectVersion = System.getProperty("project.version");
    assertThat(description.text(), is("Comparing source compatibility of japicmp-test-v2-" + projectVersion + ".jar against japicmp-test-v1-" + projectVersion + ".jar"));
}
Example 33
Project: JAViewer-master  File: TorrentKittyLinkProvider.java View source code
@Override
public List<DownloadLink> parseDownloadLinks(String htmlContent) {
    ArrayList<DownloadLink> links = new ArrayList<>();
    Element table = Jsoup.parse(htmlContent).getElementById("archiveResult");
    for (Element tr : table.getElementsByTag("tr")) {
        try {
            links.add(DownloadLink.create(tr.getElementsByClass("name").first().text(), "", tr.getElementsByClass("date").first().text(), null, tr.getElementsByAttributeValue("rel", "magnet").first().attr("href")));
        } catch (Exception ignored) {
        }
    }
    return links;
}
Example 34
Project: k-9-master  File: UriParserTestHelper.java View source code
public static void assertContainsLink(String expected, StringBuffer actual) {
    String linkifiedUri = actual.toString();
    Document document = Jsoup.parseBodyFragment(linkifiedUri);
    Element anchorElement = document.select("a").first();
    assertNotNull("No <a> element found", anchorElement);
    assertEquals(expected, anchorElement.text());
    assertEquals(expected, anchorElement.attr("href"));
}
Example 35
Project: KinoCast-master  File: NowVideo.java View source code
@Override
public String getVideoPath(DetailActivity.QueryPlayTask queryTask) {
    if (TextUtils.isEmpty(url))
        return null;
    try {
        String id = url.substring(url.lastIndexOf("/") + 1);
        queryTask.updateProgress(queryTask.getContext().getString(R.string.host_progress_getvideoforid, id));
        Document doc = Jsoup.connect("http://www.nowvideo.sx/mobile/video.php?id=" + id).userAgent(Utils.USER_AGENT).timeout(3000).get();
        return doc.select("source[type=video/mp4]").attr("src");
    } catch (Exception e) {
        e.printStackTrace();
    }
    return null;
}
Example 36
Project: learn_crawler-master  File: HtmlParserTool.java View source code
public static Set<String> extracLinks(String url, LinkFilter filter) {
    Set<String> result = new HashSet<String>();
    Document doc;
    try {
        doc = Jsoup.connect(url).timeout(5000).get();
        Elements links = doc.select("a[href]");
        Elements frames = doc.select("frame[src]");
        Elements iframes = doc.select("iframe[src]");
        for (Element e : links) {
            System.out.println(e.absUrl("href"));
            if (filter.accept(e.absUrl("href")))
                result.add(e.absUrl("href"));
        }
        for (Element e : frames) {
            if (filter.accept(e.absUrl("src")))
                result.add(e.absUrl("src"));
        }
        for (Element e : iframes) {
            if (filter.accept(e.absUrl("src")))
                result.add(e.absUrl("src"));
        }
    } catch (IOException e) {
        e.printStackTrace();
    }
    return result;
}
Example 37
Project: like_googleplus_layout-master  File: PhoneKRNewsContentUtils.java View source code
public static LinkedList<String> getPhoneKRNewsDataList(String newsUrl) {
    LinkedList<String> data = null;
    Document document;
    try {
        document = Jsoup.connect(newsUrl).get();
        Element element = document.getElementById("xs-post");
        Elements elements = element.getElementsByTag("p");
        if (!elements.isEmpty()) {
            data = new LinkedList<String>();
            for (int i = 0; i < elements.size(); i++) {
                String text = null;
                element = elements.get(i);
                if (element.getElementsByTag("a").isEmpty()) {
                    text = FOUR_BLANK_SPACE + element.text();
                } else {
                    if (!element.getElementsByTag("a").get(0).getElementsByTag("img").isEmpty()) {
                        // System.out.println("图片  = "+element.getElementsByTag("a").get(0).getElementsByTag("img").get(0).attr("src"));
                        text = element.getElementsByTag("a").get(0).getElementsByTag("img").get(0).attr("src");
                    }
                }
                if (!TextUtils.isEmpty(text)) {
                    data.add(text);
                }
            }
        }
    } catch (IOException e) {
        e.printStackTrace();
    }
    return data;
}
Example 38
Project: mechanize-master  File: JsoupUtilTest.java View source code
@Test
public void testFindFirstByTagSingleTag() {
    Document document = Jsoup.parse("<html><body><a href=\"A\">A</a><a href=\"B\">B</a></body></html>");
    assertNotNull(JsoupDataUtil.findFirstByTag(document, "a"));
    assertNotNull(JsoupDataUtil.findFirstByTag(document, "body"));
    assertNotNull(JsoupDataUtil.findFirstByTag(document, "body/a"));
    assertNotNull(JsoupDataUtil.findFirstByTag(document, "html/body/a"));
    assertNotNull(JsoupDataUtil.findFirstByTag(document, "html/a"));
    assertNull(JsoupDataUtil.findFirstByTag(document, "body/html/a"));
    assertNull(JsoupDataUtil.findFirstByTag(document, "body/unknown"));
}
Example 39
Project: mlcomp-master  File: TitleMap.java View source code
@Override
public void map(long recordNum, Record record, TaskContext context) throws IOException {
    String url = (String) record.get(0);
    String html = (String) record.get(1);
    //		Boolean isWebshell=QueryWebshell.isWebshell(postdata);
    Document doc = Jsoup.parse(html);
    Elements links = doc.getElementsByTag("title");
    String title = "";
    for (Element link : links) {
        title = title + "," + link.text();
    }
    Record result_record = context.createOutputRecord();
    result_record.set("url", url);
    result_record.set("title", title);
    context.write(result_record);
}
Example 40
Project: mobile-ycjw-master  File: StudentDevelopmentScheduleQuery.java View source code
@Override
public String getDevelopmentScheduleQueryInfo(Context context) throws Exception {
    try {
        YCApplication app = (YCApplication) context.getApplicationContext();
        String url = (String) app.get("selectedIp") + Constant.developScheduleQuery;
        HttpGet request = new HttpGet(url);
        HttpResponse response = app.getClient().execute(request);
        InputStream is = response.getEntity().getContent();
        BufferedReader br = new BufferedReader(new InputStreamReader(is, Constant.ENCODING));
        StringBuilder sb = new StringBuilder();
        String temp = null;
        while ((temp = br.readLine()) != null) {
            sb.append(temp);
        }
        Document doc = Jsoup.parse(sb.toString());
        Elements table = doc.select("#DG_GetGrjh");
        return table.toString();
    } catch (Exception e) {
        throw new Exception(e);
    }
}
Example 41
Project: Muzik-master  File: SearchDownloadsNL.java View source code
public static ArrayList<SongResult> getSongs(String query) {
    ArrayList<SongResult> temp = new ArrayList<SongResult>();
    //base query url.
    String u = "http://www.downloads.nl/results/mp3/1/" + Uri.parse(query);
    Elements searchResults = new Elements();
    try {
        Document document = Jsoup.connect(u).get();
        searchResults = document.select(".tl");
        for (Element x : searchResults) {
            String url = "http://www.downloads.nl" + x.attr("href");
            //todo add artist string to the name so that result is clearer
            URL url2 = new URL(url);
            HttpURLConnection ucon = (HttpURLConnection) url2.openConnection();
            ucon.setInstanceFollowRedirects(false);
            URL secondURL = new URL(ucon.getHeaderField("Location"));
            String name = x.select("span").text();
            if (HomescreenActivity.debugMode) {
                Log.d("Play", "Downloads.nl Name=" + name + " url=" + secondURL);
            }
            temp.add(new SongResult(name, secondURL.toString()));
        }
    } catch (IOException e) {
        e.printStackTrace();
    }
    return temp;
}
Example 42
Project: opacclient-master  File: WinBiapAccountTest.java View source code
@Test
public void testParseMediaList() throws OpacApi.OpacErrorException {
    String html = readResource("/winbiap/medialist/" + file);
    // we may not have all files for all libraries
    if (html == null)
        return;
    List<LentItem> media = WinBiap.parseMediaList(Jsoup.parse(html), new JSONObject());
    assertTrue(media.size() > 0);
    for (LentItem item : media) {
        assertNotNull(item.getTitle());
        assertNotNull(item.getDeadline());
        assertNotNull(item.getMediaType());
        assertContainsData(item.getCover());
    }
}
Example 43
Project: orcid-update-java-master  File: DelegatingMetaScraper.java View source code
@Override
public IsOrcidWork fetch(String url) throws IOException {
    //check to see if we have an ethos ID
    if (url.startsWith("uk.bl.ethos")) {
        EthosMetaScraper scrape = new EthosMetaScraper();
        return scrape.fetch(url);
    }
    HTMLMetaBuilder builder = cache.getIfPresent(url);
    if (builder == null) {
        System.out.println("looking up " + url);
        Document doc = Jsoup.connect(url).timeout(10000).get();
        builder = new HTMLMetaBuilder(doc);
    }
    return builder.getDublinCoreMeta();
}
Example 44
Project: sample-skeleton-projects-master  File: MainRunner.java View source code
public static void main(String[] args) {
    String faviconImagePath = "";
    Connection conn = Jsoup.connect(URL).timeout(LONG_TIMEOUT);
    try {
        Document documentObject = conn.get();
        System.out.println("URL title: " + documentObject.title());
        Element domElement = documentObject.head().select(hrefLink).first();
        if (domElement == null) {
            domElement = documentObject.head().select(imgMeta).first();
            faviconImagePath = domElement.attr("content");
        }
        System.out.println("Favicon img: " + faviconImagePath);
    } catch (IOException e) {
        e.printStackTrace();
    }
}
Example 45
Project: selfoss-android-master  File: ArticleContentParser.java View source code
public List<String> getImagesUrls() {
    List<String> imageUrls = new ArrayList<String>();
    Document document = Jsoup.parse(article.getContent());
    for (Element element : document.getElementsByTag("img")) {
        String src = element.attr("src");
        if (src != null && !src.isEmpty()) {
            imageUrls.add(src);
        }
    }
    return imageUrls;
}
Example 46
Project: SocialConnect-master  File: JsoupBaseCrwaler.java View source code
@Override
public Document crwal(String url) throws IOException {
    if (logger.isDebugEnabled()) {
        logger.debug("Start crawling data from: " + url);
    }
    Exception ex = null;
    int maxTriesToGetRemoteData = 4;
    int tries = 0;
    while (tries < maxTriesToGetRemoteData) {
        try {
            return Jsoup.connect(url).timeout(5000).get();
        } catch (IOException e) {
            ex = e;
            if (logger.isWarnEnabled()) {
                logger.warn("Got a " + e.getMessage() + " Exception, try again to fetch data from remote address. Number of previous tries: " + tries + ". At request: " + url);
            }
            tries++;
        }
    }
    throw new IOException("After " + maxTriesToGetRemoteData + " runs, gave up on fatching data from remote url: " + url, ex);
}
Example 47
Project: StartupNews-master  File: JsoupConnector.java View source code
public Connection newJsoupConnection(String url) {
    if (TextUtils.isEmpty(url)) {
        return null;
    }
    Connection conn = null;
    String user = mSessionManager.getSessionUser();
    if (TextUtils.isEmpty(user)) {
        Log.i(LOG_TAG, "user is empty!");
        conn = Jsoup.connect(url);
    } else {
        conn = Jsoup.connect(url).cookie("user", user);
    }
    return conn;
}
Example 48
Project: stocks-master  File: YahooSearchProviderTest.java View source code
@Test
public void testParsingHtml() throws IOException {
    try (Scanner scanner = new Scanner(getClass().getResourceAsStream("response_yahoo_search.txt"), "UTF-8")) {
        String html = scanner.useDelimiter("\\A").next();
        Document document = Jsoup.parse(html);
        List<ResultItem> items = new YahooSearchProvider().extractFrom(document);
        assertThat(items.size(), equalTo(20));
        ResultItem p = items.get(0);
        assertThat(p.getSymbol(), equalTo("D979C.LS"));
        assertThat(p.getName(), equalTo("BASF AG/CITI WT 14"));
        assertThat(p.getIsin(), equalTo("DE000CF79JW9"));
        assertThat(p.getLastTrade(), equalTo(Values.Quote.factorize(0.11)));
        assertThat(p.getType(), equalTo("Zertifikate & OS"));
        assertThat(p.getExchange(), equalTo("LIS"));
    }
}
Example 49
Project: TopNews-master  File: NewsDetailsService.java View source code
public static String getNewsDetails(String url, String news_title, String news_date) {
    Document document = null;
    String data = "<body>" + "<center><h2 style='font-size:16px;'>" + news_title + "</h2></center>";
    data = data + "<p align='left' style='margin-left:10px'>" + "<span style='font-size:10px;'>" + news_date + "</span>" + "</p>";
    data = data + "<hr size='1' />";
    try {
        document = Jsoup.connect(url).timeout(9000).get();
        Element element = null;
        if (TextUtils.isEmpty(url)) {
            data = "";
            element = document.getElementById("memberArea");
        } else {
            element = document.getElementById("artibody");
        }
        if (element != null) {
            data = data + element.toString();
        }
        data = data + "</body>";
    } catch (IOException e) {
        e.printStackTrace();
    }
    return data;
}
Example 50
Project: voj-master  File: HtmlTextFilter.java View source code
/**
	 * 过滤包�HTML字符串.
	 * @param text - 待过滤的字符串
	 * @return 过滤�的字符串.
	 */
public static String filter(String text) {
    if (text == null) {
        return text;
    }
    Document document = Jsoup.parse(text);
    document.outputSettings(new Document.OutputSettings().prettyPrint(false));
    document.select("br").append("\\n");
    document.select("p").prepend("\\n\\n");
    String s = document.html().replaceAll("\\\\n", "\n");
    return Jsoup.clean(s, "", Whitelist.none(), new Document.OutputSettings().prettyPrint(false));
}
Example 51
Project: WaveTact-master  File: Quote.java View source code
@Override
public void onCommand(String command, User user, PircBotX network, String prefix, Channel channel, boolean isPrivate, int userPermLevel, String... args) throws Exception {
    Document doc = Jsoup.connect("http://wwww.quotationspage.com/random.php3").userAgent(Registry.USER_AGENT).get();
    String c = doc.select(".quote").get(0).text();
    String d = doc.select(".author").get(0).text();
    if (d.contains("-")) {
        if (!d.contains("("))
            d = d.split("-")[0];
        else
            d = d.split("\\(")[0];
    }
    IRCUtils.sendMessage(user, network, channel, c + " -" + IRCUtils.noPing(d), prefix);
}
Example 52
Project: ache-master  File: GoogleSearch.java View source code
public List<BackLinkNeighborhood> submitQuery(String query, int page) throws IOException {
    timer.waitMinimumDelayIfNecesary();
    // 21 -> max number allowed by google... decreases after
    String queryUrl = "https://www.google.com/search?q=" + query + "&num=" + docsPerPage + "&start=" + page * docsPerPage;
    System.out.println("URL:" + queryUrl);
    try {
        FetchedResult result = fetcher.get(queryUrl);
        InputStream is = new ByteArrayInputStream(result.getContent());
        Document doc = Jsoup.parse(is, "UTF-8", query);
        is.close();
        Elements searchItems = doc.select("div#search");
        Elements linkHeaders = searchItems.select(".r");
        Elements linksUrl = linkHeaders.select("a[href]");
        List<BackLinkNeighborhood> links = new ArrayList<>();
        for (Element link : linksUrl) {
            String title = link.text();
            String url = link.attr("href");
            links.add(new BackLinkNeighborhood(url, title));
        }
        System.out.println(getClass().getSimpleName() + " hits: " + links.size());
        return links;
    } catch (IOExceptionBaseFetchException |  e) {
        throw new IOException("Failed to download backlinks from Google.", e);
    }
}
Example 53
Project: asoiaf-master  File: FetchUrls.java View source code
public static ImageUrl FetchImageUrl(String url) {
    ImageUrl iu = new ImageUrl();
    try {
        Document doc = Jsoup.connect(url).timeout(5000).get();
        Elements e = doc.select("li.outlink a");
        for (Element item : e) {
            if (item.text().equals("200")) {
                //Log.d("","200:"+item.select("a[href]").attr("href"));
                iu.setThumbUrl(item.select("a[href]").attr("href"));
            }
            if (item.text().equals("original")) {
                //Log.d("","original:"+item.select("a[href]").attr("href"));
                iu.setOringinUrl(item.select("a[href]").attr("href"));
            }
        }
    } catch (Exception e) {
        e.printStackTrace();
    }
    return iu;
}
Example 54
Project: asta4d-master  File: ElementNotFoundHandlerOnDocumentTest.java View source code
@Test
public void notFoundOnDocument() throws Exception {
    String html = "<html><body><span>x</span></body></html>";
    Document doc = Jsoup.parse(html);
    Renderer renderer = Renderer.create();
    renderer.add(new ElementNotFoundHandler("div") {

        @Override
        public Renderer alternativeRenderer() {
            return Renderer.create("span", "y");
        }
    });
    RenderUtil.apply(doc, renderer);
    Assert.assertEquals(doc.select("span").text(), "y");
}
Example 55
Project: baleen-master  File: Jsp101HeadingsTest.java View source code
@Test
public void testSubjectHeading() {
    Document document = Jsoup.parseBodyFragment("<p><b>THIS IS A SUBJECT HEADING</b></p><p>THIS IS A NOT SUBJECT HEADING</p><p>THIS IS not a SUBJECT HEADING</p><p>THIS IS NOT A SUBJECT HEADING EITHER.</p>");
    manipulator.manipulate(document);
    Elements h1s = document.select("h1");
    assertEquals(1, h1s.size());
    assertEquals("THIS IS A SUBJECT HEADING", h1s.first().text());
}
Example 56
Project: bennu-master  File: Component.java View source code
public static String process(String origin) {
    Document doc = Jsoup.parse(origin);
    Elements components = doc.select("[bennu-component]");
    for (Element component : components) {
        String key = component.attr("bennu-component");
        Optional.ofNullable(COMPONENTS.get(key)).ifPresent( x -> component.replaceWith(x.process(component)));
    }
    return doc.toString();
}
Example 57
Project: clicker-master  File: CN88ProxyGetter.java View source code
@Override
public Set<Proxy> find() {
    final Set<Proxy> ret = new HashSet<Proxy>();
    for (int i = 2; i < 11; i++) {
        try {
            final Document doc = Jsoup.parse(new URL("http://www.cz88.net/proxy/http_" + i + ".aspx"), TIMEOUT);
            final Elements tables = doc.getElementsByTag("table");
            final Element table = tables.get(0);
            final Elements trs = table.getElementsByTag("tr");
            for (int j = 1; j < trs.size(); j++) {
                final Element tr = trs.get(j);
                try {
                    final Element hostTd = tr.getElementsByTag("td").get(0);
                    final Element portTd = tr.getElementsByTag("td").get(1);
                    final String host = hostTd.text();
                    final int port = Integer.valueOf(portTd.text());
                    final Proxy proxy = new Proxy(host, port, this.properties);
                    ret.add(proxy);
                } catch (final Exception e) {
                }
            }
        } catch (final Exception e) {
        }
    }
    return ret;
}
Example 58
Project: CN1ML-NetbeansModule-master  File: StringUtilTest.java View source code
@Test
public void normaliseWhiteSpaceHandlesHighSurrogates() {
    String test71540chars = "か゚  1";
    String test71540charsExpectedSingleWhitespace = "か゚ 1";
    assertEquals(test71540charsExpectedSingleWhitespace, StringUtil.normaliseWhitespace(test71540chars));
    String extractedText = Jsoup.parse(test71540chars).text();
    assertEquals(test71540charsExpectedSingleWhitespace, extractedText);
}
Example 59
Project: constellio-master  File: ConnectorHttpUtils.java View source code
public static FetchedDocumentContent fetch(String url) throws IOException {
    FetchedDocumentContent fetchedDocumentContent = new FetchedDocumentContent();
    Response response = Jsoup.connect(url).execute();
    //String contentType = response.contentType();
    fetchedDocumentContent.document = response.parse();
    fetchedDocumentContent.title = fetchedDocumentContent.document.title();
    String[] urlParts = url.split("/");
    fetchedDocumentContent.fileName = urlParts[urlParts.length - 1];
    return fetchedDocumentContent;
}
Example 60
Project: dataverse-master  File: MarkupChecker.java View source code
/**
     * Wrapper around Jsoup clean method with the basic White list
     *   http://jsoup.org/cookbook/cleaning-html/whitelist-sanitizer
     * @param unsafe
     * @return 
     */
public static String sanitizeBasicHTML(String unsafe) {
    if (unsafe == null) {
        return null;
    }
    // basic includes: a, b, blockquote, br, cite, code, dd, dl, dt, em, i, li, ol, p, pre, q, small, span, strike, strong, sub, sup, u, ul
    //Whitelist wl = Whitelist.basic().addTags("img", "h1", "h2", "h3", "kbd", "hr", "s", "del");  
    Whitelist wl = Whitelist.basicWithImages().addTags("h1", "h2", "h3", "kbd", "hr", "s", "del", "map", "area").addAttributes("img", "usemap").addAttributes("map", "name").addAttributes("area", "shape", "coords", "href", "title", "alt").addEnforcedAttribute("a", "target", "_blank");
    return Jsoup.clean(unsafe, wl);
}
Example 61
Project: en-webmagic-master  File: CssSelector.java View source code
@Override
public List<String> selectList(String text) {
    List<String> strings = new ArrayList<String>();
    Document doc = Jsoup.parse(text);
    Elements elements = doc.select(selectorText);
    if (CollectionUtils.isNotEmpty(elements)) {
        for (Element element : elements) {
            String value = getValue(element);
            if (!StringUtils.isEmpty(value)) {
                strings.add(value);
            }
        }
    }
    return strings;
}
Example 62
Project: EventApp-master  File: BazaarEntryLoader.java View source code
@Override
public void onResponse(String body) {
    List<BazaarEntry> entries = new ArrayList<BazaarEntry>();
    Document document = Jsoup.parse(body);
    Elements elements = document.select("table");
    for (Element element : elements) {
        BazaarEntry entry = new BazaarEntry();
        Elements trs = element.select("tr");
        if (trs.size() >= 3) {
            entry.setName(trs.get(0).text());
            entry.setTitle(trs.get(1).text());
            Element summary = trs.get(2);
            entry.setSummary(summary.text());
            Elements a = summary.select("a");
            if (!a.isEmpty()) {
                entry.setUrl(a.attr("href"));
            }
        }
        entries.add(entry);
    }
    listener.onSuccess(entries);
}
Example 63
Project: extentreports-java-master  File: SystemAttributeTests.java View source code
private void performAssertForKVPairs(String key, String value) {
    Boolean keyFound = false;
    Boolean valueFound = false;
    extent.flush();
    String html = Reader.readAllText(htmlFilePath);
    Document doc = Jsoup.parse(html);
    Elements tdColl = doc.select(".environment td");
    for (Element td : tdColl) {
        if (td.text().equals(key))
            keyFound = true;
        if (td.text().equals(value))
            valueFound = true;
    }
    Assert.assertTrue(keyFound);
    Assert.assertTrue(valueFound);
}
Example 64
Project: FakeWeather-master  File: MzituZiPaiFragment.java View source code
@Override
public List<Girl> call(String url) {
    List<Girl> girls = new ArrayList<>();
    try {
        Document doc = Jsoup.connect(url).timeout(10000).get();
        Element total = doc.select("div.postlist").first();
        Elements items = total.select("li");
        for (Element element : items) {
            Girl girl = new Girl(element.select("img").first().attr("src"));
            girls.add(girl);
        }
    } catch (IOException e) {
        e.printStackTrace();
    }
    return girls;
}
Example 65
Project: FudanBBS-master  File: ListLinks.java View source code
public static void main(String[] args) throws IOException {
    Validate.isTrue(args.length == 1, "usage: supply url to fetch");
    String url = args[0];
    print("Fetching %s...", url);
    Document doc = Jsoup.connect(url).get();
    Elements links = doc.select("a[href]");
    Elements media = doc.select("[src]");
    Elements imports = doc.select("link[href]");
    print("\nMedia: (%d)", media.size());
    for (Element src : media) {
        if (src.tagName().equals("img"))
            print(" * %s: <%s> %sx%s (%s)", src.tagName(), src.attr("abs:src"), src.attr("width"), src.attr("height"), trim(src.attr("alt"), 20));
        else
            print(" * %s: <%s>", src.tagName(), src.attr("abs:src"));
    }
    print("\nImports: (%d)", imports.size());
    for (Element link : imports) {
        print(" * %s <%s> (%s)", link.tagName(), link.attr("abs:href"), link.attr("rel"));
    }
    print("\nLinks: (%d)", links.size());
    for (Element link : links) {
        print(" * a: <%s>  (%s)", link.attr("abs:href"), trim(link.text(), 35));
    }
}
Example 66
Project: Gazetti_Newspaper_Reader-master  File: toi.java View source code
public String[] getToiArticleContent() {
    Document doc;
    String[] result = new String[3];
    String url = mArticleURL;
    try {
        Connection connection = Jsoup.connect(url).userAgent("Mozilla").timeout(10 * 1000);
        Response response = connection.execute();
        if (response == null) {
            Crashlytics.log("Is response null ? " + (null == response));
            return null;
        } else if (response.statusCode() != 200) {
            Crashlytics.log("Received response - " + response.statusCode() + " -- " + response.statusMessage());
            Crashlytics.log("Received response - " + response.body());
            return null;
        }
        doc = connection.get();
        // get Title
        String ToiTitleXPath = ConfigService.getInstance().getTOIHead();
        titleText = doc.select(ToiTitleXPath).text();
        // get HeaderImageUrl
        mImageURL = getImageURL(doc);
        String ToiArticleXPath = ConfigService.getInstance().getTOIBody();
        Element bodyArticleElements = doc.select(ToiArticleXPath).first();
        String temp = bodyArticleElements.html().replace("<br />", "$$$");
        Document bodyNewLine = Jsoup.parse(temp);
        bodyText = bodyNewLine.text().replace("$$$", "\n");
        result[0] = titleText;
        result[1] = mImageURL;
        result[2] = bodyText;
    } catch (IOException e) {
        Crashlytics.logException(e);
        return null;
    } catch (NullPointerException npe) {
        bodyText = null;
        Crashlytics.logException(npe);
        return null;
    } catch (Exception e) {
        Crashlytics.logException(e);
        return null;
    }
    return result;
}
Example 67
Project: gvoa-master  File: ItemHtmlParser.java View source code
public static void parseItemDetail(RssItem item) throws Exception {
    /*
		if(null==item.getLink())
		{
			return;
		}*/
    //String testurl ="http://www.51voa.com/VOA_Standard_English/us-weighs-boosting-training-for-syrian-rebels-52551.html";
    String respContent = NetworkUtil.httpGetContent(item.getLink());
    Document doc = Jsoup.parse(respContent);
    Element mp3link = doc.select("a[id=mp3]").first();
    if (mp3link != null) {
        Log.i(tag, mp3link.attr("href"));
        item.setMp3url(mp3link.attr("href"));
    } else {
        Log.i(tag, "can't get mp3");
    }
    Element content = doc.getElementById("content");
    Element imageEl = content.select("div.contentImage").first();
    if (imageEl != null) {
        Log.i(tag, "remove image element from content");
        imageEl.remove();
    }
    String contentStr = content.html();
    Log.i(tag, contentStr);
    item.setFullText(contentStr);
    Element lrclink = content.select("a[id=lrc]").first();
    if (lrclink != null) {
        Log.i(tag, lrclink.attr("href"));
    }
    item.setStatus(RssItem.E_PARSE_TXT_OK);
    return;
}
Example 68
Project: HabReader-master  File: PostShowLoader.java View source code
@Override
public PostsFullData loadInBackground() {
    PostsFullData data = new PostsFullData();
    try {
        Document document = Jsoup.connect(url).get();
        Element title = document.select("span.post_title").first();
        Element hubs = document.select("div.hubs").first();
        Element content = document.select("div.content").first();
        Element date = document.select("div.published").first();
        Element author = document.select("div.author > a").first();
        if (title != null) {
            data.setUrl(url);
            data.setTitle(title.text());
            data.setHubs(hubs.text());
            data.setContent(content.html());
            data.setDate(date.text());
            data.setAuthor(author.text());
        } else
            data.setContent(context.getString(R.string.error_404));
    } catch (IOException e) {
    }
    return data;
}
Example 69
Project: HackerNews-master  File: ConnectionManager.java View source code
/** Connects to news.ycombinator.com with no user cookie authentication **/
public static Connection anonConnect(String baseUrlExtension) {
    Connection conn = Jsoup.connect(ConnectionManager.BASE_URL + baseUrlExtension).timeout(TIMEOUT_MILLIS).userAgent(ConnectionManager.USER_AGENT);
    UserPrefs prefs = new UserPrefs(MainApplication.getInstance().getApplicationContext());
    boolean compress = prefs.getCompressData();
    Crashlytics.setBool("ConnectionManager :: GZip Responses", compress);
    if (compress) {
        conn.header("Accept-Encoding", "gzip");
    }
    return conn;
}
Example 70
Project: HappyResearch-master  File: MTimeCrawler.java View source code
public void crawl_web_pages() throws Exception {
    String filePath = "./src/main/resources/mtime.txt";
    List<String> urls = FileIO.readAsList(filePath);
    for (String url : urls) {
        String html = URLReader.read(url);
        Document doc = Jsoup.parse(html);
        String name = doc.select("span[property=v:itemreviewed]").text();
        name = Strings.filterWebString(name, '_');
        String dirPath = dir + name + "/";
        FileIO.makeDirectory(dirPath);
        FileIO.writeString(dirPath + name + ".html", html);
    }
}
Example 71
Project: jabref-master  File: ACS.java View source code
/**
     * Tries to find a fulltext URL for a given BibTex entry.
     *
     * Currently only uses the DOI if found.
     *
     * @param entry The Bibtex entry
     * @return The fulltext PDF URL Optional, if found, or an empty Optional if not found.
     * @throws NullPointerException if no BibTex entry is given
     * @throws java.io.IOException
     */
@Override
public Optional<URL> findFullText(BibEntry entry) throws IOException {
    Objects.requireNonNull(entry);
    Optional<URL> pdfLink = Optional.empty();
    // DOI search
    Optional<DOI> doi = entry.getField(FieldName.DOI).flatMap(DOI::parse);
    if (doi.isPresent()) {
        String source = String.format(SOURCE, doi.get().getDOI());
        // Retrieve PDF link
        Document html = Jsoup.connect(source).ignoreHttpErrors(true).get();
        Element link = html.select(".pdf-high-res a").first();
        if (link != null) {
            LOGGER.info("Fulltext PDF found @ ACS.");
            pdfLink = Optional.of(new URL(source.replaceFirst("/abs/", "/pdf/")));
        }
    }
    return pdfLink;
}
Example 72
Project: jacorb-master  File: Client.java View source code
public static void main(String args[]) throws Exception {
    String updateString, ior;
    if (args.length >= 1) {
        updateString = args[0];
    } else {
        updateString = UUID.randomUUID().toString();
    }
    // Grab the IOR from the servlet.
    Document doc = Jsoup.connect("http://localhost:8080/jacorb-appserver/PrintIOR").get();
    ior = doc.select("h1").first().text();
    System.out.println("Retrieved ior " + ior);
    Properties orbProps = new Properties();
    orbProps.setProperty("org.omg.CORBA.ORBClass", "org.jacorb.orb.ORB");
    orbProps.setProperty("org.omg.CORBA.ORBSingletonClass", "org.jacorb.orb.ORBSingleton");
    orbProps.setProperty("jacorb.interop.null_string_encoding", "true");
    ORB orb = ORB.init(args, orbProps);
    org.omg.CORBA.Object obj = orb.string_to_object(ior);
    GoodDay goodDay = GoodDayHelper.narrow(obj);
    // Invoke remote server
    System.out.println("Retrieved initial string " + goodDay.get_string());
    goodDay.record_string(updateString);
    System.out.println("Retrieved string " + goodDay.get_string());
}
Example 73
Project: java-manga-reader-master  File: MangaUtil.java View source code
/**
	 * Retrieves a list of licensed Manga from Anime News Network. 
	 * @return A list of Manga licensed in English.
	 * @throws IOException If it cannot complete the request.
	 */
public static List<String> getLicensedManga() throws IOException {
    StringBuilder sb = new StringBuilder("http://www.animenewsnetwork.com/encyclopedia/anime-list.php");
    sb.append("?licensed=1");
    sb.append("&sort=title");
    sb.append("&showG=1");
    Document doc = Jsoup.connect(sb.toString()).maxBodySize(0).get();
    Elements list = doc.getElementsByClass("HOVERLINE");
    List<String> blackList = new ArrayList<String>(list.size());
    for (Element e : list) {
        String title = e.text();
        if (title.startsWith("(The)")) {
            title = title.replace("(The)", "The");
        }
        if (title.contains("(")) {
            title = title.substring(0, title.lastIndexOf('(')).trim();
        }
        blackList.add(title);
    }
    return blackList;
}
Example 74
Project: JCommons-master  File: DownloaderTest.java View source code
public static void main(String[] args) throws IOException {
    Document doc = Jsoup.connect("http://meta.stackexchange.com/questions/134495/academic-papers-using-stack-exchange-data").get();
    Elements eles = doc.getElementsContainingText("[PDF]");
    eles.addAll(doc.getElementsContainingText("[arXiv]"));
    String folderName = "D:/dl";
    for (Element ele : eles) {
        String src = ele.attr("href");
        if (src == null || src.trim().equals(""))
            continue;
        URL url = new URL(src);
        Element parent = ele.parent();
        Elements eles1 = parent.getElementsByTag("strong");
        Element nameEle = eles1.get(0);
        String fileName = nameEle.text().replace(":", " ").replace("\"", "").replace("'", "").replace("?", "");
        if (fileName.contains("Fit or"))
            continue;
        if (!fileName.endsWith("."))
            fileName = fileName.concat(".");
        fileName = fileName.concat("pdf");
        System.out.println(fileName);
        InputStream in = null;
        try {
            in = url.openStream();
        } catch (Exception e) {
            continue;
        }
        OutputStream out = new BufferedOutputStream(new FileOutputStream(folderName + "/" + fileName));
        for (int b; (b = in.read()) != -1; ) {
            out.write(b);
        }
        out.close();
        in.close();
    }
}
Example 75
Project: jeboorker-master  File: ComicsOrgDownloader.java View source code
private List<MetadataDownloadEntry> getMetadataDownloadEntries(List<byte[]> metadataHtmlContent) throws IOException {
    List<MetadataDownloadEntry> result = new ArrayList<>(metadataHtmlContent.size());
    for (byte[] html : metadataHtmlContent) {
        if (html != null) {
            Document htmlDoc = Jsoup.parse(new ByteArrayInputStream(html), StringUtil.UTF_8, MAIN_URL);
            result.add(new ComicsOrgDownloadEntry(htmlDoc, MAIN_URL));
        }
    }
    return result;
}
Example 76
Project: JianShuApp-master  File: DataPool.java View source code
private Object[] load(String url) throws IOException, LoginRequiredException {
    Object httpResult = JianshuSession.getsInstance().getSync(url, true);
    if (httpResult instanceof String) {
        Document doc = Jsoup.parse((String) httpResult);
        if (doc.select("div.login-page").size() > 0) {
            JianshuSession.getsInstance().validate();
            if (JianshuSession.getsInstance().getState() instanceof JianshuSession.LogoutState) {
                throw new LoginRequiredException();
            }
        }
        parsePageUserInfo(doc);
        return this.getItems(doc);
    } else {
        JianshuSession.getsInstance().validate();
        if (JianshuSession.getsInstance().getState() instanceof JianshuSession.LogoutState) {
            throw new LoginRequiredException();
        }
    }
    return null;
}
Example 77
Project: JKuuza-master  File: ContentAnalyzerTest.java View source code
/**
	 * Test of classExists method, of class TestHelpers.
	 */
@Test
public void testDocHasClass() {
    System.out.println("classExists");
    String html = "";
    String message = "";
    html = "<div><span class=\"foo\"></span></div>";
    message = "expected: true - " + html;
    assertTrue(message, new ContentAnalyzer(Jsoup.parse(html)).docHasClass("foo"));
    html = "<div><span class=\"bar\"></span></div>";
    message = "expected: false - " + html;
    assertFalse(message, new ContentAnalyzer(Jsoup.parse(html)).docHasClass("foo"));
}
Example 78
Project: jmeter-master  File: RenderAsHTMLFormatted.java View source code
private void showHTMLFormattedResponse(String response) {
    // $NON-NLS-1$
    results.setContentType("text/plain");
    // $NON-NLS-1$
    results.setText(response == null ? "" : Jsoup.parse(response).html());
    results.setCaretPosition(0);
    resultsScrollPane.setViewportView(results);
    // Bug 55111 - Refresh JEditor pane size depending on the presence or absence of scrollbars
    resultsScrollPane.setPreferredSize(resultsScrollPane.getMinimumSize());
    results.revalidate();
}
Example 79
Project: jsoup-master  File: ListLinks.java View source code
public static void main(String[] args) throws IOException {
    Validate.isTrue(args.length == 1, "usage: supply url to fetch");
    String url = args[0];
    print("Fetching %s...", url);
    Document doc = Jsoup.connect(url).get();
    Elements links = doc.select("a[href]");
    Elements media = doc.select("[src]");
    Elements imports = doc.select("link[href]");
    print("\nMedia: (%d)", media.size());
    for (Element src : media) {
        if (src.tagName().equals("img"))
            print(" * %s: <%s> %sx%s (%s)", src.tagName(), src.attr("abs:src"), src.attr("width"), src.attr("height"), trim(src.attr("alt"), 20));
        else
            print(" * %s: <%s>", src.tagName(), src.attr("abs:src"));
    }
    print("\nImports: (%d)", imports.size());
    for (Element link : imports) {
        print(" * %s <%s> (%s)", link.tagName(), link.attr("abs:href"), link.attr("rel"));
    }
    print("\nLinks: (%d)", links.size());
    for (Element link : links) {
        print(" * a: <%s>  (%s)", link.attr("abs:href"), trim(link.text(), 35));
    }
}
Example 80
Project: ManalithBot-master  File: TranslatorPlugin.java View source code
@BotCommand("번역")
public String translate(@Option(name = "ko|en...", help = "번역할 대� 언어") String to, @Option(name = "메시지", help = "번역할 메시지") String message) {
    final String url = "https://api.datamarket.azure.com/Bing/MicrosoftTranslator/v1/Translate?Text='%s'&To='%s'";
    String login = "USER_ID_IGNORED:" + clientSecret;
    String base64login = new String(Base64.encodeBase64(login.getBytes()));
    try {
        Document doc = Jsoup.connect(String.format(url, message, to)).header("Authorization", "Basic " + base64login).ignoreContentType(true).get();
        logger.debug("response", doc);
        Elements elem = doc.select("d|text[m:type=Edm.String]");
        return elem.text();
    } catch (IOException e) {
        logger.error(e.getMessage(), e);
    }
    return "번역할 내용� 없습니다.";
}
Example 81
Project: mayocat-shop-master  File: DefaultPdfTemplateRenderer.java View source code
@Override
public void generatePDF(OutputStream outputStream, Path template, Path renderingRoot, Map<String, Object> context) throws PdfRenderingException {
    ITextRenderer renderer = new ITextRenderer();
    try {
        String html = templateRenderer.renderAsString(template, context);
        // Ensure we have a valid XHTML document using JSoup
        Document jsoupDoc = Jsoup.parse(html);
        jsoupDoc.outputSettings().syntax(Document.OutputSettings.Syntax.xml);
        jsoupDoc.outputSettings().escapeMode(Entities.EscapeMode.xhtml);
        jsoupDoc.outputSettings().charset("UTF-8");
        String path = renderingRoot.toAbsolutePath().toUri().toString();
        renderer.setDocumentFromString(jsoupDoc.toString(), path);
        renderer.layout();
        renderer.createPDF(outputStream);
    } catch (DocumentExceptionTemplateRenderingException |  e) {
        throw new PdfRenderingException(e);
    }
}
Example 82
Project: medium-textview-master  File: JsoupUtils.java View source code
public static List<String> findAllVideoLinks(String content) {
    final List<String> links = new ArrayList<>();
    final Document document = Jsoup.parse(content);
    Elements medias = document.select("[src]");
    for (Element element : medias) {
        if (element.tagName().equals("iframe")) {
            links.add(element.attr("abs:src"));
        } else {
        }
    }
    return links;
}
Example 83
Project: mensaapp-master  File: WeeklyMenuTask.java View source code
@Override
protected Pair<WeeklyMenu, Exception> doInBackground(String... urls) {
    List<WeeklyMenu> menus = new ArrayList<WeeklyMenu>();
    for (String url : urls) {
        try {
            Document document = Jsoup.connect(url).get();
            WeeklyMenuParser parser = WeeklyMenuParser.create(context, document, mensa);
            menus.add(parser.parse());
        } catch (WeeklyMenuParseException wmpe) {
            Log.w(TAG, String.format(context.getString(R.string.error_menu_parse), url), wmpe);
            return new Pair<WeeklyMenu, Exception>(null, wmpe);
        } catch (Exception e) {
            Log.e(TAG, String.format(context.getString(R.string.error_menu_download), url), e);
            return new Pair<WeeklyMenu, Exception>(null, e);
        }
    }
    return new Pair<WeeklyMenu, Exception>(WeeklyMenu.merge(mensa, Utils.now(), menus), null);
}
Example 84
Project: meta-server-master  File: ServerHtmlContentTest.java View source code
@Test
public void testShowHtml() throws IOException {
    String url = URL_BASE + "/servers/show";
    Document doc = Jsoup.connect(url).get();
    Element table = doc.getElementById("server-list");
    Assert.assertTrue(table.nodeName().equals("table"));
    Element tableBody = table.select("tbody").first();
    Element firstRow = tableBody.select("tr").first();
    Assert.assertEquals(firstEntry.getName(), firstRow.getElementsByClass("server-name").first().text());
    Assert.assertEquals(firstEntry.getOwner(), firstRow.getElementsByClass("server-owner").first().text());
    Assert.assertEquals("" + firstEntry.getPort(), firstRow.getElementsByClass("server-port").first().text());
    Assert.assertEquals(firstEntry.getAddress(), firstRow.getElementsByClass("server-address").first().text());
}
Example 85
Project: myrobotlab-master  File: JSoupExtractor.java View source code
@Override
public List<Document> processDocument(Document doc) {
    for (Object o : doc.getField(htmlField)) {
        org.jsoup.nodes.Document jSoupDoc = Jsoup.parse(o.toString());
        Elements links = jSoupDoc.select(jSoupSelector);
        for (Element link : links) {
            doc.addToField(outputField, link);
        }
    }
    return null;
}
Example 86
Project: NewsStats-master  File: NewYorkTimesContentHandler.java View source code
@Override
public List extractArticles(Page page) {
    if (page.getParseData() instanceof HtmlParseData) {
        System.out.println("Current URL: " + page.getWebURL());
        HtmlParseData htmlParseData = (HtmlParseData) page.getParseData();
        String html = htmlParseData.getHtml();
        Document doc = Jsoup.parseBodyFragment(html);
        Element articleElement = doc.getElementById("story");
        if (articleElement == null) {
            // if no article can be found
            return articles;
        }
        String title = articleElement.getElementById("story-heading").ownText();
        String dateString = articleElement.getElementsByClass("dateline").first().attr("datetime");
        Date date = null;
        try {
            date = new SimpleDateFormat("yyyy-MM-dd").parse(dateString);
        } catch (ParseException e) {
            e.printStackTrace();
        }
        String author = articleElement.getElementsByClass("byline-author").first().ownText();
        String content = "";
        Elements contentElements = articleElement.select("p.story-body-text.story-content");
        for (Element contentElement : contentElements) {
            content += contentElement.ownText();
        }
        if (!filterArticles(content)) {
            // ignore the article if filter does not approve
            return articles;
        }
        Article article = new NewYorkTimesArticle();
        article.setTitle(title);
        article.setCreatedDate(date);
        article.setAuthor(author);
        article.setContent(content);
        articles.add(article);
    }
    return articles;
}
Example 87
Project: pack-master  File: Ch5Coz4.java View source code
// 如果使用�設的 xmlParser會發生什麼? 試試看
public static void normalXmlParse() {
    String json = CrawlerPack.getFromRemote(url);
    String xml = CrawlerPack.jsonToXml(json);
    // 原始 json 轉為 xml 的�果
    System.out.println("原始XML");
    System.out.println(xml);
    Document jsoupDoc = Jsoup.parse(xml, "", Parser.xmlParser());
    jsoupDoc.charset(StandardCharsets.UTF_8);
    // 發生了什麼事?
    System.out.println("轉�後XML");
    System.out.println(jsoupDoc.toString());
}
Example 88
Project: qiushi_baike-master  File: KJFMUtils.java View source code
public static LinkedList<KJFMItem> handleKJFMItems(String pageNo) throws IOException {
    LinkedList<KJFMItem> items = null;
    String url = KE_JI_FENG_MANG_URL + pageNo + "/";
    System.out.println("url = " + url);
    Document document = Jsoup.connect(url).timeout(20000).get();
    Element divTag = document.getElementById("xs-main");
    if (null != divTag) {
        Elements entryTags = divTag.getElementsByClass("xs-entry");
        if (null != entryTags && entryTags.size() > 0) {
            items = new LinkedList<KJFMItem>();
            for (Element e : entryTags) {
                KJFMItem item = new KJFMItem();
                Elements aTags = e.getElementsByTag("a");
                if (null != aTags && aTags.size() > 0) {
                    String url1 = aTags.get(0).attr("href");
                    System.out.println("url1 = " + url1);
                    if (url1 != null && url1.startsWith("www")) {
                        url1 += "http://";
                    }
                    item.title = url1;
                }
                Elements imgTags = e.getElementsByTag("img");
                if (null != imgTags && imgTags.size() > 0) {
                    String img = imgTags.get(0).attr("src");
                    String title = imgTags.get(0).attr("alt");
                    // System.out.println("img = " + img + " title = " +
                    // title);
                    item.img = img;
                    item.title = title;
                }
                Elements pTags = e.getElementsByTag("p");
                if (null != pTags && pTags.size() > 0) {
                    String content = pTags.get(0).text();
                    // System.out.println("content = " + content);
                    item.content = content;
                }
                items.add(item);
            }
        }
    }
    return items;
}
Example 89
Project: quadriga-master  File: HTMLContentValidator.java View source code
/**
     * This method validates the entered title, description of a about text for
     * a project. Validates if the values are available or not. If values are
     * not available error is thrown
     * 
     * @param obj
     * @param err
     */
@Override
public void validate(Object obj, Errors err) {
    String description = null;
    String title = null;
    if (obj instanceof IAboutText) {
        IAboutText abtText = (IAboutText) obj;
        description = abtText.getDescription();
        title = abtText.getTitle();
    } else if (obj instanceof ProjectBlogEntry) {
        ProjectBlogEntry projectBlog = (ProjectBlogEntry) obj;
        description = projectBlog.getDescription();
        title = projectBlog.getTitle();
    }
    Whitelist whitelist = ExtendedWhitelist.extendedWhiteListWithBase64();
    Whitelist titleWhitelist = Whitelist.simpleText();
    // validate all the input parameters
    ValidationUtils.rejectIfEmptyOrWhitespace(err, "title", "about_title.required");
    ValidationUtils.rejectIfEmptyOrWhitespace(err, "description", "about_description.required");
    if (!Jsoup.isValid(description, whitelist)) {
        err.rejectValue("description", "about_description.proper");
    }
    if (!Jsoup.isValid(title, titleWhitelist)) {
        err.rejectValue("title", "about_title.proper");
    }
}
Example 90
Project: serengeti-ws-master  File: HostInstallDetailsParser.java View source code
@Override
public ParseResult parse(String pageContent) {
    Document doc = Jsoup.parse(pageContent);
    ParseResult result = new ParseResult();
    for (Element item : doc.getElementsByClass("wrap")) {
        if (item.hasAttr("style")) {
        //result.put(PROGRESS_KEY, Integer.valueOf(item.attr("style").split("\\s+")[1].replace("%", "")));
        }
        for (Element subItem : item.getElementsByClass("message")) {
            result.setMessage(subItem.text());
        }
    }
    for (Element item : doc.getElementsByClass("progress")) {
        for (Element subItem : item.getElementsByClass("bar")) {
            if (subItem.hasAttr("style")) {
                result.setPercent(Integer.valueOf(subItem.attr("style").split("\\s+")[1].replace("%", "")));
            }
        }
    }
    return result;
}
Example 91
Project: slack-capybara-master  File: nomurishHandlers.java View source code
/**
	 * 入力�れ�言葉をノムリッシュ化��返�
	 * @param word word
	 * @return nomurished word
	 * @throws URISyntaxException URISyntaxException
	 * @throws IOException IOException
	 */
private static String nomurish(final String word) throws URISyntaxException, IOException {
    final String url = new URIBuilder().setScheme(BASE_SCHEME).setHost(BASE_HOST).setPath(BASE_PATH).build().toString();
    final Document document = Jsoup.connect(url).data("before", word).data("level", "1").data("option", "nochk").data("transbtn", "翻訳").post();
    return document.select("[name=after1]").text();
}
Example 92
Project: SpiderJackson-master  File: YouDaiLiPage1.java View source code
@Override
public boolean responseHandle(Proxy ip, ProxyController proxyController, Url url, UrlService urlService, ContextSrc contextSrc, HttpRequestBase request, CloseableHttpResponse response, String content) {
    ArrayList<Url> urls = new ArrayList<>();
    Document doc = Jsoup.parse(content);
    Elements elements = doc.select("div.chunlist");
    for (Element e : elements) {
        Elements es = e.select("a[href]");
        for (Element el : es) {
            Url url1 = Url.newHttpGetUrl(el.attr("href"), YouDaiLiPage2.class);
            url1.setPriority(url.getPriority() + 1);
            urls.add(url1);
        }
    }
    urlService.insert(urls);
    return true;
}
Example 93
Project: spimedb-master  File: HTML.java View source code
public static String filterHTML(String html) {
    try {
        Document dirty = Jsoup.parseBodyFragment(html);
        Document clean = cleaner.clean(dirty);
        clean.outputSettings(outputSettings);
        return clean.body().html();
    //            String compressedHtml = compressor.compress(html);
    //            return compressedHtml;
    } catch (Exception e) {
        logger.error("filterHTML {}: \"{}\"", e, html);
        return html;
    }
}
Example 94
Project: spring-documenter-master  File: Application.java View source code
public static void main(String[] args) throws Exception {
    Reflections reflections = new Reflections("org.springframework");
    List<Class<?>> clazzes = new ArrayList<Class<?>>(reflections.getTypesAnnotatedWith(Documented.class));
    Collections.sort(clazzes, new Comparator<Class<?>>() {

        @Override
        public int compare(Class o1, Class o2) {
            return o1.getName().compareTo(o2.getName());
        }
    });
    System.out.println("##################################################");
    System.out.println("Total Annotations: " + clazzes.size());
    System.out.println("##################################################");
    String old = IOUtils.toString(new FileReader("annotations-bkp.csv"));
    FileWriter out = new FileWriter(new File("annotations.csv"));
    out.write("\"Name\",\"Class\",\"URL\"\n");
    for (Class<?> class1 : clazzes) {
        System.out.println(class1.getName());
        Document doc = Jsoup.connect("http://www.bing.com/search?q=" + URLEncoder.encode(class1.getName(), "UTF-8")).get();
        int ctr = 1;
        for (Element elem : doc.select("h2 a")) {
            ctr++;
            System.out.println(elem.attr("href"));
            out.append("\"" + class1.getSimpleName() + "\",\"" + class1.getName() + "\",\"" + elem.attr("href") + "\"\n");
            if (ctr > 2)
                break;
        }
    }
    out.close();
    System.out.println("##################################################");
}
Example 95
Project: StatusParser-master  File: vkOld.java View source code
public String[] vkPart(int id, int firstPost) {
    for (int i = 0; i < 8; i++) {
        statuses[i] = null;
    }
    int iter = 0;
    String partURL = "http://vk.com/al_wall.php?act=get_wall&al=1&fixed=&offset=" + firstPost + "&owner_id=-" + id + "&type=all";
    try {
        String partSource = getUrlSource(partURL);
        partSource = partSource.substring(4);
        Document partDom = Jsoup.parse(partSource);
        try {
            Elements postTexts = partDom.select("div.wall_post_text");
            for (Element postText : postTexts) {
                statuses[iter] = postText.text();
                iter++;
                System.out.println(statuses[iter]);
            }
        } catch (NullPointerException eText) {
            eText.printStackTrace();
        }
    } catch (IOException e) {
        e.printStackTrace();
    }
    return statuses;
}
Example 96
Project: storm-crawler-master  File: RefreshTagTest.java View source code
@Test
public void testExtractRefreshURL() throws MalformedURLException, IOException {
    String expected = "http://www.example.com/";
    String[] htmlStrings = new String[] { "<html><head><META http-equiv=\"refresh\" content=\"0; URL=http://www.example.com/\"></head><body>Lorem ipsum.</body></html>", "<html><head><META http-equiv=\"refresh\" content=\"0;URL=http://www.example.com/\"></head><body>Lorem ipsum.</body></html>" };
    for (String htmlString : htmlStrings) {
        Document doc = Jsoup.parseBodyFragment(htmlString);
        DocumentFragment fragment = JSoupDOMBuilder.jsoup2HTML(doc);
        String redirection = RefreshTag.extractRefreshURL(fragment);
        Assert.assertEquals(expected, redirection);
    }
}
Example 97
Project: SuZhouTong-client-for-android-master  File: HtmlResolving.java View source code
/*
	 * 解�新闻类容
	 */
public ArrayList<NewsContentVo> getNewsContent(String news_detaiol) {
    ArrayList<NewsContentVo> contents = new ArrayList<NewsContentVo>();
    NewsContentVo ncv = null;
    Document document = Jsoup.parse(news_detaiol);
    Elements info = document.getElementsByTag("span");
    for (Element element : info) {
        ncv = new NewsContentVo();
        ncv.setIsImg(0);
        ncv.setContentList(element.text());
        contents.add(ncv);
    }
    Elements elements = document.getElementsByTag("p");
    Elements media = document.select("[src]");
    int i = 1;
    for (Element element : elements) {
        if (element.hasText()) {
            ncv = new NewsContentVo();
            ncv.setIsImg(0);
            ncv.setContentList(element.text());
            contents.add(ncv);
        } else {
            if (element.hasAttr("align") && media != null && media.size() > 0 && media.size() > i) {
                Element src = media.get(i);
                if (src.tagName().equals("img")) {
                    ncv = new NewsContentVo();
                    ncv.setIsImg(1);
                    ncv.setContentList(src.attr("src"));
                    contents.add(ncv);
                }
                i++;
            }
        }
    }
    return contents;
}
Example 98
Project: symphony-master  File: XSSTestCase.java View source code
@Test
public void xss() {
    String src = "http://error\"  onerror=\"this.src='http://7u2fje.com1.z0.glb.clouddn.com/girl.jpg';this.removeAttribute('onerror');if(!window.a){console.log('Where am I ?');window.a=1}";
    assertFalse(Jsoup.isValid("<img src=\"" + src + "\"/>", Whitelist.basicWithImages()));
    src = "http://7u2fje.com1.z0.glb.clouddn.com/girl.jpg";
    assertTrue(Jsoup.isValid("<img src=\"" + src + "\"/>", Whitelist.basicWithImages()));
}
Example 99
Project: Tanaguru-master  File: JsoupFunc.java View source code
public Document getDocument() {
    try {
        URL url = new URL("http://www.accessiweb.org/index.php/accessiweb-html5aria-liste-deployee.html");
        return Jsoup.parse(url, 5000);
    } catch (IOException ex) {
        Logger.getLogger(ExportDomToCsv.class.getName()).log(Level.SEVERE, null, ex);
        return null;
    }
}
Example 100
Project: tradeframework-master  File: HtmlSelectorMsgParser.java View source code
public boolean parseContent(InputStream input, long length, String contentType, MsgHandler handler) throws IOException {
    Matcher charset = charsetPattern.matcher(contentType);
    Document doc = Jsoup.parse(input, charset.find() ? charset.group(1) : null, "");
    Elements nodes = doc.select(selector);
    for (Element node : nodes) {
        Msg msg = createMsg(node);
        if (msg != null && !handler.newMsg(msg))
            return false;
    }
    return true;
}
Example 101
Project: validadorAcessibilidade-master  File: ListLinks.java View source code
public static void main(String[] args) throws IOException {
    Validate.isTrue(args.length == 1, "usage: supply url to fetch");
    String url = args[0];
    print("Fetching %s...", url);
    Document doc = Jsoup.connect(url).get();
    Elements links = doc.select("a[href]");
    Elements media = doc.select("[src]");
    Elements imports = doc.select("link[href]");
    print("\nMedia: (%d)", media.size());
    for (Element src : media) {
        if (src.tagName().equals("img"))
            print(" * %s: <%s> %sx%s (%s)", src.tagName(), src.attr("abs:src"), src.attr("width"), src.attr("height"), trim(src.attr("alt"), 20));
        else
            print(" * %s: <%s>", src.tagName(), src.attr("abs:src"));
    }
    print("\nImports: (%d)", imports.size());
    for (Element link : imports) {
        print(" * %s <%s> (%s)", link.tagName(), link.attr("abs:href"), link.attr("rel"));
    }
    print("\nLinks: (%d)", links.size());
    for (Element link : links) {
        print(" * a: <%s>  (%s)", link.attr("abs:href"), trim(link.text(), 35));
    }
}