Java Examples for org.jsoup.nodes.Document

The following java examples will help you to understand the usage of org.jsoup.nodes.Document. These source code samples are taken from different open source projects.

Example 1
Project: SlideshowFX-master  File: DOMUtils.java View source code
public static void saveDocument(Document document, File file) {
    String result = null;
    document.outputSettings().prettyPrint(true);
    try (final Writer output = new DefaultCharsetWriter(file)) {
        output.write(document.outerHtml());
    } catch (FileNotFoundException e) {
        e.printStackTrace();
    } catch (IOException e) {
        e.printStackTrace();
    }
}
Example 2
Project: lavender-master  File: RamdomImgParser.java View source code
public static String parserImg(String html) {
    Document document = Jsoup.parse(html);
    Elements divs = document.select("div");
    for (Element div : divs) {
        if (!div.attr("id").equals("photo-detail-wrapper")) {
            continue;
        }
        return div.select("img").first().attr("src");
    }
    return null;
}
Example 3
Project: sagan-master  File: ReferenceDocumentSearchEntryMapper.java View source code
@Override
public ReferenceDoc map(Document document) {
    ReferenceDoc entry = new ReferenceDoc();
    String text = document.text();
    entry.setRawContent(text);
    entry.setSummary(text.substring(0, Math.min(500, text.length())));
    entry.setTitle(document.title());
    entry.setSubTitle(String.format("%s (%s Reference)", project.getName(), version.getVersion()));
    entry.setPath(document.baseUri());
    entry.setCurrent(version.isCurrent());
    entry.setProjectId(project.getId());
    entry.setVersion(version.getVersion());
    entry.addFacetPaths("Projects", "Projects/Reference", "Projects/" + project.getName(), "Projects/" + project.getName() + "/" + version.getVersion());
    return entry;
}
Example 4
Project: Android-Studio-Project-master  File: ContentParser.java View source code
public static Content Parser(String html) {
    Document doc = Jsoup.parse(html);
    Elements links = doc.select("img[src~=(?i)\\.(png|jpe?g)]");
    Content content = new Content();
    Element element = links.get(1).getElementsByTag("img").first();
    content.setUrl(element.attr("src"));
    content.setTitle(element.attr("alt"));
    return content;
}
Example 5
Project: coolreader-master  File: DownloadPageTask.java View source code
@Override
protected AsyncTaskResult<Document> doInBackground(URL... arg0) {
    try {
        Log.d("DownloadPageTask", "Downloading: " + arg0[0].toString());
        Response response = Jsoup.connect(arg0[0].toString()).timeout(7000).execute();
        Log.d("DownloadPageTask", "Complete: " + arg0[0].toString());
        return new AsyncTaskResult<Document>(response.parse());
    } catch (Exception e) {
        return new AsyncTaskResult<Document>(e);
    }
}
Example 6
Project: jinjava-master  File: GroupByFilterTest.java View source code
@Test
public void testGroupByAttr() throws Exception {
    Document dom = Jsoup.parseBodyFragment(jinjava.render(Resources.toString(Resources.getResource("filter/groupby-attr.jinja"), StandardCharsets.UTF_8), ImmutableMap.of("persons", (Object) Lists.newArrayList(new Person("male", "jared", "stehler"), new Person("male", "foo", "bar"), new Person("female", "sarah", "jones"), new Person("male", "jim", "jones"), new Person("female", "barb", "smith")))));
    assertThat(dom.select("ul.root > li")).hasSize(2);
    assertThat(dom.select("ul.root > li.male > ul > li")).hasSize(3);
    assertThat(dom.select("ul.root > li.female > ul > li")).hasSize(2);
}
Example 7
Project: jooby-master  File: Issue624d.java View source code
@Test
public void shouldForceARedirect() throws Exception {
    request().get("/saved-url").expect( rsp -> {
        Document html = Jsoup.parse(rsp);
        String action = (html.select("form").attr("action"));
        assertEquals("/auth?client_name=FormClient", action);
    });
    request().get("/auth?username=test&password=test").expect("/saved-url");
}
Example 8
Project: jsoup-master  File: W3CDom.java View source code
/**
     * Convert a jsoup Document to a W3C Document.
     * @param in jsoup doc
     * @return w3c doc
     */
public Document fromJsoup(org.jsoup.nodes.Document in) {
    Validate.notNull(in);
    DocumentBuilder builder;
    try {
        //set the factory to be namespace-aware
        factory.setNamespaceAware(true);
        builder = factory.newDocumentBuilder();
        Document out = builder.newDocument();
        convert(in, out);
        return out;
    } catch (ParserConfigurationException e) {
        throw new IllegalStateException(e);
    }
}
Example 9
Project: LNReader-Android-master  File: DownloadPageTask.java View source code
@Override
protected AsyncTaskResult<Document> doInBackground(URL... arg0) {
    try {
        Log.d("DownloadPageTask", "Downloading: " + arg0[0].toString());
        Response response = Jsoup.connect(arg0[0].toString()).timeout(7000).execute();
        Log.d("DownloadPageTask", "Complete: " + arg0[0].toString());
        return new AsyncTaskResult<Document>(response.parse(), Document.class);
    } catch (Exception e) {
        return new AsyncTaskResult<Document>(null, Document.class, e);
    }
}
Example 10
Project: moulder-j-master  File: TexterTest.java View source code
@Test
public void testRegularText() throws Exception {
    Value<String> text = mock(Value.class);
    when(text.get()).thenReturn("text");
    Texter a = new Texter(text);
    Document document = Jsoup.parseBodyFragment("<html><body><outer>test</outer></body></html>");
    Element element = document.getElementsByTag("outer").first();
    List<Node> processed = a.process(element);
    // verify that bind and get were called, in this order
    InOrder inOrder = inOrder(text);
    inOrder.verify(text).get();
    assertXMLEqual(new StringReader("<body><outer>text</outer></body>"), new StringReader(html(processed)));
}
Example 11
Project: muzima-android-master  File: HTMLConceptParser.java View source code
public List<String> parse(String html) {
    Set<String> concepts = new HashSet<String>();
    Document htmlDoc = Jsoup.parse(html);
    //Select all elements containing data-concept attr and is not a div.
    Elements elements = htmlDoc.select("*:not(div)[" + DATA_CONCEPT_TAG + "]");
    for (Element element : elements) {
        concepts.add(getConceptName(element.attr(DATA_CONCEPT_TAG)));
    }
    return new ArrayList<String>(concepts);
}
Example 12
Project: NiceText-master  File: NTImpl.java View source code
public String extract(String url) {
    String t = null;
    try {
        Connection connection = Jsoup.connect(url).userAgent(Constants.USER_AGENT).header("Accept", "text/html,application/xhtml+xml,application/xml").header("Accept-Encoding", "gzip,deflate,sdch").followRedirects(true).timeout(Constants.CONN_TIMEOUT);
        Connection.Response response = connection.execute();
        Document document = response.parse();
        t = extract(document);
    } catch (IOException e) {
        e.printStackTrace();
    }
    return t;
}
Example 13
Project: pictorial_android_client-master  File: ParserImageList.java View source code
public static ImageListBean parser(String mRet) {
    ImageListBean imageListBean = new ImageListBean();
    if (mRet != null) {
        Document document = Jsoup.parse(mRet);
        Elements elements = document.getElementsByClass("post-inner");
        for (Element element : elements) {
            String imgurl = element.select("a[title]").attr("href");
            Element element2 = element.select("img[src]").first();
            String src = element2.attr("src");
            String width = element2.attr("width");
            String height = element2.attr("height");
            String alt = element2.attr("alt");
            ImageBean imageBean = new ImageBean();
            imageBean.setAlt(alt);
            imageBean.setDetailurl(imgurl);
            imageBean.setHeight(height);
            imageBean.setWidth(width);
            imageBean.setImgurl(src);
            imageListBean.add(imageBean);
        }
    }
    return imageListBean;
}
Example 14
Project: playconf-master  File: IndexViewTest.java View source code
@Override
public void run() {
    Context.current.set(testHttpContext());
    Proposal s = sampleProposal();
    Speaker speaker = sampleSpeaker();
    s.speaker = speaker;
    Html html = views.html.index.render(s);
    Document doc = Jsoup.parse(contentAsString(html));
    assertThat(doc.select("#title").text()).isEqualTo("Keynote - " + s.title);
    assertThat(doc.select("#speakerName").text()).isEqualTo(speaker.name);
}
Example 15
Project: ulti-master  File: UtilsDemo.java View source code
public static String TestJsoup() {
    String html1 = "<html><head><title>First parse</title></head>" + "<body><p>Parsed HTML into a doc.</p></body></html>";
    StringBuffer sb = new StringBuffer();
    Document doc = Jsoup.parse(html1);
    Logs.d("docs---" + doc.title() + "   " + doc.getAllElements().size());
    Logs.d("docs---" + doc.children().size() + "   " + doc.location());
    for (Element element : doc.getAllElements()) {
        sb.append(element.tagName() + "   " + element.nodeName() + "   " + element.children().size() + "   " + element.data() + "   " + element.text() + "\n");
        Logs.d(element.text() + "   ");
    }
    return sb.toString();
}
Example 16
Project: UltimateAndroid-master  File: UtilsDemo.java View source code
public static String TestJsoup() {
    String html1 = "<html><head><title>First parse</title></head>" + "<body><p>Parsed HTML into a doc.</p></body></html>";
    StringBuffer sb = new StringBuffer();
    Document doc = Jsoup.parse(html1);
    Logs.d("docs---" + doc.title() + "   " + doc.getAllElements().size());
    Logs.d("docs---" + doc.children().size() + "   " + doc.location());
    for (Element element : doc.getAllElements()) {
        sb.append(element.tagName() + "   " + element.nodeName() + "   " + element.children().size() + "   " + element.data() + "   " + element.text() + "\n");
        Logs.d(element.text() + "   ");
    }
    return sb.toString();
}
Example 17
Project: v2ex-android-master  File: NotificationListModel.java View source code
public void parse(String responseBody) throws Exception {
    Document doc = Jsoup.parse(responseBody);
    Element body = doc.body();
    Elements elements = body.getElementsByAttributeValue("class", "cell");
    for (Element el : elements) {
        NotificationModel notification = new NotificationModel();
        if (notification.parse(el))
            add(notification);
    }
    int[] pages = ContentUtils.parsePage(body);
    currentPage = pages[0];
    totalPage = pages[1];
}
Example 18
Project: validadorAcessibilidade-master  File: RecomendacaoTarget.java View source code
@Override
public String executa(Document doc) {
    String elementoTarget = "";
    Elements targetBlank = doc.select("a[target=_blank");
    for (Element element : targetBlank) {
        elementoTarget += "\n" + element;
    }
    Elements targetNew = doc.select("a[target=_new");
    for (Element element : targetNew) {
        elementoTarget += "\n" + element;
    }
    Element execucaoAutomatica = doc.select("body").first();
    String linkHref = execucaoAutomatica.attr("onload");
    if (!linkHref.isEmpty()) {
        elementoTarget += "\n" + linkHref;
    }
    return elementoTarget;
}
Example 19
Project: alfresco-apache-storm-demo-master  File: JSoupDOMBuilder.java View source code
/**
     * Returns a W3C DOM that exposes the same content as the supplied Jsoup document into a W3C
     * DOM.
     *
     * @param jsoupDocument The Jsoup document to convert.
     * @return A W3C Document.
     */
public static Document jsoup2DOM(org.jsoup.nodes.Document jsoupDocument) {
    Document document = null;
    try {
        /* Obtain the document builder for the configured XML parser. */
        DocumentBuilderFactory docBuilderFactory = DocumentBuilderFactory.newInstance();
        DocumentBuilder docBuilder = docBuilderFactory.newDocumentBuilder();
        /* Create a document to contain the content. */
        document = docBuilder.newDocument();
        createDOM(jsoupDocument, document, document, new HashMap<String, String>());
    } catch (ParserConfigurationException pce) {
        throw new RuntimeException(pce);
    }
    return document;
}
Example 20
Project: Android_RssReader-master  File: DescriptionFormatter.java View source code
@Override
protected String LoadFromCache(Blog blog) {
    if (blog != null && blog.Description.length() == 0) {
        return "";
    }
    Document doc = Jsoup.parse(blog.Description);
    List<Element> embeds = doc.getElementsByTag("embed");
    for (Element d : doc.getElementsByTag("iframe")) {
        if (d.hasAttr("src") && (d.attr("src").contains("swf") || d.attr("src").contains("youku") || d.attr("src").contains("sohu") || d.attr("src").contains("tudou") || d.attr("src").contains("youtube") || d.attr("src").contains("ku6")))
            embeds.add(d);
    }
    for (Element d : doc.getElementsByTag("a")) {
        if (d.hasAttr("href") && (d.attr("href").contains("swf") || d.attr("href").contains("youku") || d.attr("href").contains("sohu") || d.attr("href").contains("tudou") || d.attr("href").contains("youtube") || d.attr("href").contains("ku6")))
            embeds.add(d);
    }
    if (embeds.size() != 0)
        return "";
    for (Element img : doc.getElementsByTag("img")) {
        if (img.hasAttr("src") && !img.attr("src").startsWith(prefix)) {
            return "";
        }
    }
    return blog.Description;
}
Example 21
Project: any-video-master  File: PandaCrawler.java View source code
private void savePandaLivesToRedis(Document document) {
    List<VideoDTO> lives = new ArrayList<>();
    Elements elements = document.select("li.video-list-item.video-no-tag");
    for (Element element : elements) {
        VideoDTO videoDTO = new VideoDTO();
        String title = "[" + element.select("div.video-info span.video-cate").text() + "] " + element.select("div.video-info span.video-nickname").text();
        String image = element.select("img.video-img").attr("data-original");
        String url = PANDA + element.attr("data-id");
        videoDTO.setAvailable(true);
        videoDTO.setTitle(title);
        videoDTO.setImage(image);
        videoDTO.setValue(url);
        lives.add(videoDTO);
        if (lives.size() > 48) {
            break;
        }
    }
    String key = redisSourceManager.VIDEO_PREFIx_HOME_LIVE_KEY + "_" + TAG;
    redisSourceManager.saveVideos(key, lives);
}
Example 22
Project: cms-ce-master  File: HtmlExtractor.java View source code
@Override
public String extractText(final String mimeType, final InputStream inputStream, final String encoding) throws IOException {
    if (!canHandle(mimeType)) {
        return null;
    }
    StringBuilder builder = new StringBuilder();
    Document doc = Jsoup.parse(inputStream, encoding, "");
    for (Element element : doc.getAllElements()) {
        for (TextNode textNode : element.textNodes()) {
            final String text = textNode.text();
            builder.append(text);
            appendWhitespaceAfterTextIfNotThere(builder, text);
        }
    }
    return builder.toString();
}
Example 23
Project: deepnighttwo-master  File: FirstTry.java View source code
public static void main(String[] args) throws IOException {
    Document doc = Jsoup.connect("http://www.envir.gov.cn/airnews/index.asp").data("Fdate", "2000-6-1").data("Tdate", "2000-6-8").userAgent("I'm jsoup").timeout(3000).post();
    // System.out.println(doc);
    Elements eles = doc.select("table[bordercolor] > tr");
    eles.remove(0);
    for (Element ele : eles) {
        Elements rows = ele.select("td");
        for (Element row : rows) {
            System.out.println(row.ownText());
        }
    }
// Element content = doc.getElementById("content");
// Elements links = content.getElementsByTag("a");
// for (Element link : links) {
// String linkHref = link.attr("href");
// String linkText = link.text();
// System.out.println(linkHref);
// System.out.println(linkText);
// }
}
Example 24
Project: dungproxy-master  File: WaitProxyTest.java View source code
public static void main(String[] args) {
    // 开�代�IP池,设置IP池空阻塞等待
    DungProxyContext dungProxyContext = DungProxyContext.create().setWaitIfNoAvailableProxy(true).setPoolEnabled(true);
    IpPoolHolder.init(dungProxyContext);
    for (int i = 0; i < 5; i++) {
        new Thread() {

            @Override
            public void run() {
                for (int i = 0; i < 5; i++) {
                    String s = HttpInvoker.get("http://ip.cn/");
                    if (StringUtils.isEmpty(s)) {
                        continue;
                    }
                    Document parse = Jsoup.parse(s);
                    System.out.println(parse.select("#result").text());
                }
            }
        }.start();
    }
    for (int i = 0; i < 10; i++) {
        String s = HttpInvoker.get("http://ip.cn/");
        if (StringUtils.isEmpty(s)) {
            continue;
        }
        Document parse = Jsoup.parse(s);
        System.out.println(parse.select("#result").text());
    }
}
Example 25
Project: EhViewer-master  File: ProfileParser.java View source code
public static Result parse(String body) throws ParseException {
    try {
        Result result = new Result();
        Document d = Jsoup.parse(body);
        Element profilename = d.getElementById("profilename");
        result.displayName = profilename.child(0).text();
        try {
            result.avatar = profilename.nextElementSibling().nextElementSibling().child(0).attr("src");
            if (TextUtils.isEmpty(result.avatar)) {
                result.avatar = null;
            } else if (!result.avatar.startsWith("http")) {
                result.avatar = EhUrl.URL_FORUMS + result.avatar;
            }
        } catch (Exception e) {
            Log.i(TAG, "No avatar");
        }
        return result;
    } catch (Exception e) {
        throw new ParseException("Parse forums error", body);
    }
}
Example 26
Project: email-master  File: UriParserTestHelper.java View source code
public static void assertContainsLink(String expected, StringBuffer actual) {
    String linkifiedUri = actual.toString();
    Document document = Jsoup.parseBodyFragment(linkifiedUri);
    Element anchorElement = document.select("a").first();
    assertNotNull("No <a> element found", anchorElement);
    assertEquals(expected, anchorElement.text());
    assertEquals(expected, anchorElement.attr("href"));
}
Example 27
Project: example-webapp-master  File: ExceptionHandlingIntegrationTests.java View source code
@Test
public void shouldSeeErrorReferenceDisplayedOnThePage() throws Exception {
    SpringDispatcherServlet servlet = SpringDispatcherServlet.create();
    MockHttpServletResponse response = servlet.process(new MockHttpServletRequest("GET", "/bad"));
    String redirectedUrl = response.getRedirectedUrl();
    assertThat(redirectedUrl, matchesPattern(sequence("/error/", exactly(7, anyCharacterIn("A-Z0-9")))));
    String errorRef = StringUtils.substringAfterLast(redirectedUrl, "/");
    response = servlet.process(new MockHttpServletRequest("GET", redirectedUrl));
    String html = response.getContentAsString();
    Document document = Jsoup.parse(html);
    Elements elements = document.select("#errorRef");
    assertThat(elements.size(), equalTo(1));
    assertThat(elements.first().text(), equalTo(errorRef));
}
Example 28
Project: GoVRE-master  File: ProxyNetworkTrainMapImage.java View source code
//METHODS	
private static String fetchTrainImageUrlFromVRE(Context context) {
    try {
        String imgUrl = "";
        String url = context.getResources().getString(R.string.urlVREImgMap);
        Document doc = Jsoup.connect(url).get();
        //Focus on all tags with source attributes
        Elements media = doc.select("[src]");
        for (Element src : media) {
            //Verify this is an image 
            if (src.tagName().equals("img")) {
                imgUrl = src.attr("abs:src");
                //Check if link contains the action query string, the map is the only image that will have it.
                if (imgUrl.contains("app?action=getimg")) {
                    return imgUrl;
                }
            }
        }
        //Else Return Empty String
        return "";
    } catch (IOException e) {
    }
    return null;
}
Example 29
Project: IU-master  File: ConsumeInfo.java View source code
public static int parseHtml(List<ConsumeInfo> list, Document doc) {
    if (doc == null) {
        return 0;
    }
    if (list == null) {
        list = new ArrayList<>();
    }
    Elements table = doc.select("table#GridView1").select("tr");
    int size = table.size();
    if (size < 2) {
        return 0;
    }
    Element tr;
    Elements td2;
    ConsumeInfo info;
    for (int i = 1; i < size - 1; i++) {
        tr = table.get(i);
        td2 = tr.children();
        if (td2.size() != 3) {
            continue;
        }
        info = new ConsumeInfo();
        info.time = td2.get(1).text();
        info.remain = td2.get(2).text();
        list.add(info);
    }
    return table.select("a").size() + 1;
}
Example 30
Project: japicmp-master  File: ITReportTitle.java View source code
@Test
public void testReportTitle() throws IOException {
    Path htmlPath = Paths.get(System.getProperty("user.dir"), "target", "site", "project-reports.html");
    assertThat(Files.exists(htmlPath), is(true));
    Document document = Jsoup.parse(htmlPath.toFile(), "UTF-8");
    Elements leftNav = document.select("#leftColumn [href=\"japicmp.html\"]");
    assertThat(leftNav.attr("title"), is("japicmp"));
    assertThat(leftNav.text(), is("japicmp"));
    Elements overviewRow = document.select("#bodyColumn tr:has([href=\"japicmp.html\"])");
    Elements link = overviewRow.select("[href=\"japicmp.html\"]");
    assertThat(link.text(), is("japicmp"));
    Elements description = overviewRow.select("td:eq(1)");
    String projectVersion = System.getProperty("project.version");
    assertThat(description.text(), is("Comparing source compatibility of japicmp-test-v2-" + projectVersion + ".jar against japicmp-test-v1-" + projectVersion + ".jar"));
}
Example 31
Project: JianShuApp-master  File: DataPool.java View source code
private Object[] load(String url) throws IOException, LoginRequiredException {
    Object httpResult = JianshuSession.getsInstance().getSync(url, true);
    if (httpResult instanceof String) {
        Document doc = Jsoup.parse((String) httpResult);
        if (doc.select("div.login-page").size() > 0) {
            JianshuSession.getsInstance().validate();
            if (JianshuSession.getsInstance().getState() instanceof JianshuSession.LogoutState) {
                throw new LoginRequiredException();
            }
        }
        parsePageUserInfo(doc);
        return this.getItems(doc);
    } else {
        JianshuSession.getsInstance().validate();
        if (JianshuSession.getsInstance().getState() instanceof JianshuSession.LogoutState) {
            throw new LoginRequiredException();
        }
    }
    return null;
}
Example 32
Project: jphp-master  File: JsoupExtension.java View source code
@Override
public void onRegister(CompileScope scope) {
    registerClass(scope, WrapJsoup.class);
    registerWrapperClass(scope, Connection.class, WrapConnection.class);
    registerWrapperClass(scope, Connection.Response.class, WrapConnectionResponse.class);
    registerWrapperClass(scope, Connection.Request.class, WrapConnectionRequest.class);
    registerWrapperClass(scope, Document.class, WrapDocument.class);
    registerWrapperClass(scope, Element.class, WrapElement.class);
    registerWrapperClass(scope, Elements.class, WrapElements.class);
    MemoryOperation.register(new UrlMemoryOperation());
//MemoryOperation.register(new BinaryMemoryOperation());
}
Example 33
Project: k-9-master  File: UriParserTestHelper.java View source code
public static void assertContainsLink(String expected, StringBuffer actual) {
    String linkifiedUri = actual.toString();
    Document document = Jsoup.parseBodyFragment(linkifiedUri);
    Element anchorElement = document.select("a").first();
    assertNotNull("No <a> element found", anchorElement);
    assertEquals(expected, anchorElement.text());
    assertEquals(expected, anchorElement.attr("href"));
}
Example 34
Project: KinoCast-master  File: NowVideo.java View source code
@Override
public String getVideoPath(DetailActivity.QueryPlayTask queryTask) {
    if (TextUtils.isEmpty(url))
        return null;
    try {
        String id = url.substring(url.lastIndexOf("/") + 1);
        queryTask.updateProgress(queryTask.getContext().getString(R.string.host_progress_getvideoforid, id));
        Document doc = Jsoup.connect("http://www.nowvideo.sx/mobile/video.php?id=" + id).userAgent(Utils.USER_AGENT).timeout(3000).get();
        return doc.select("source[type=video/mp4]").attr("src");
    } catch (Exception e) {
        e.printStackTrace();
    }
    return null;
}
Example 35
Project: learn_crawler-master  File: HtmlParserTool.java View source code
public static Set<String> extracLinks(String url, LinkFilter filter) {
    Set<String> result = new HashSet<String>();
    Document doc;
    try {
        doc = Jsoup.connect(url).timeout(5000).get();
        Elements links = doc.select("a[href]");
        Elements frames = doc.select("frame[src]");
        Elements iframes = doc.select("iframe[src]");
        for (Element e : links) {
            System.out.println(e.absUrl("href"));
            if (filter.accept(e.absUrl("href")))
                result.add(e.absUrl("href"));
        }
        for (Element e : frames) {
            if (filter.accept(e.absUrl("src")))
                result.add(e.absUrl("src"));
        }
        for (Element e : iframes) {
            if (filter.accept(e.absUrl("src")))
                result.add(e.absUrl("src"));
        }
    } catch (IOException e) {
        e.printStackTrace();
    }
    return result;
}
Example 36
Project: like_googleplus_layout-master  File: PhoneKRNewsContentUtils.java View source code
public static LinkedList<String> getPhoneKRNewsDataList(String newsUrl) {
    LinkedList<String> data = null;
    Document document;
    try {
        document = Jsoup.connect(newsUrl).get();
        Element element = document.getElementById("xs-post");
        Elements elements = element.getElementsByTag("p");
        if (!elements.isEmpty()) {
            data = new LinkedList<String>();
            for (int i = 0; i < elements.size(); i++) {
                String text = null;
                element = elements.get(i);
                if (element.getElementsByTag("a").isEmpty()) {
                    text = FOUR_BLANK_SPACE + element.text();
                } else {
                    if (!element.getElementsByTag("a").get(0).getElementsByTag("img").isEmpty()) {
                        // System.out.println("图片  = "+element.getElementsByTag("a").get(0).getElementsByTag("img").get(0).attr("src"));
                        text = element.getElementsByTag("a").get(0).getElementsByTag("img").get(0).attr("src");
                    }
                }
                if (!TextUtils.isEmpty(text)) {
                    data.add(text);
                }
            }
        }
    } catch (IOException e) {
        e.printStackTrace();
    }
    return data;
}
Example 37
Project: mechanize-master  File: JsoupUtilTest.java View source code
@Test
public void testFindFirstByTagSingleTag() {
    Document document = Jsoup.parse("<html><body><a href=\"A\">A</a><a href=\"B\">B</a></body></html>");
    assertNotNull(JsoupDataUtil.findFirstByTag(document, "a"));
    assertNotNull(JsoupDataUtil.findFirstByTag(document, "body"));
    assertNotNull(JsoupDataUtil.findFirstByTag(document, "body/a"));
    assertNotNull(JsoupDataUtil.findFirstByTag(document, "html/body/a"));
    assertNotNull(JsoupDataUtil.findFirstByTag(document, "html/a"));
    assertNull(JsoupDataUtil.findFirstByTag(document, "body/html/a"));
    assertNull(JsoupDataUtil.findFirstByTag(document, "body/unknown"));
}
Example 38
Project: mlcomp-master  File: TitleMap.java View source code
@Override
public void map(long recordNum, Record record, TaskContext context) throws IOException {
    String url = (String) record.get(0);
    String html = (String) record.get(1);
    //		Boolean isWebshell=QueryWebshell.isWebshell(postdata);
    Document doc = Jsoup.parse(html);
    Elements links = doc.getElementsByTag("title");
    String title = "";
    for (Element link : links) {
        title = title + "," + link.text();
    }
    Record result_record = context.createOutputRecord();
    result_record.set("url", url);
    result_record.set("title", title);
    context.write(result_record);
}
Example 39
Project: mobile-ycjw-master  File: StudentDevelopmentScheduleQuery.java View source code
@Override
public String getDevelopmentScheduleQueryInfo(Context context) throws Exception {
    try {
        YCApplication app = (YCApplication) context.getApplicationContext();
        String url = (String) app.get("selectedIp") + Constant.developScheduleQuery;
        HttpGet request = new HttpGet(url);
        HttpResponse response = app.getClient().execute(request);
        InputStream is = response.getEntity().getContent();
        BufferedReader br = new BufferedReader(new InputStreamReader(is, Constant.ENCODING));
        StringBuilder sb = new StringBuilder();
        String temp = null;
        while ((temp = br.readLine()) != null) {
            sb.append(temp);
        }
        Document doc = Jsoup.parse(sb.toString());
        Elements table = doc.select("#DG_GetGrjh");
        return table.toString();
    } catch (Exception e) {
        throw new Exception(e);
    }
}
Example 40
Project: Muzik-master  File: SearchDownloadsNL.java View source code
public static ArrayList<SongResult> getSongs(String query) {
    ArrayList<SongResult> temp = new ArrayList<SongResult>();
    //base query url.
    String u = "http://www.downloads.nl/results/mp3/1/" + Uri.parse(query);
    Elements searchResults = new Elements();
    try {
        Document document = Jsoup.connect(u).get();
        searchResults = document.select(".tl");
        for (Element x : searchResults) {
            String url = "http://www.downloads.nl" + x.attr("href");
            //todo add artist string to the name so that result is clearer
            URL url2 = new URL(url);
            HttpURLConnection ucon = (HttpURLConnection) url2.openConnection();
            ucon.setInstanceFollowRedirects(false);
            URL secondURL = new URL(ucon.getHeaderField("Location"));
            String name = x.select("span").text();
            if (HomescreenActivity.debugMode) {
                Log.d("Play", "Downloads.nl Name=" + name + " url=" + secondURL);
            }
            temp.add(new SongResult(name, secondURL.toString()));
        }
    } catch (IOException e) {
        e.printStackTrace();
    }
    return temp;
}
Example 41
Project: opacclient-master  File: ZonesTest.java View source code
@Test
public void testAccountPages() {
    Document page1 = Jsoup.parse(readResource("/zones/medialist/koeln_pages_1.html"));
    Document page2 = Jsoup.parse(readResource("/zones/medialist/koeln_pages_2.html"));
    page1.setBaseUri(BASE_URL);
    page2.setBaseUri(BASE_URL);
    String nextPage1 = Zones.findNextPageUrl(page1);
    assertNotNull(nextPage1);
    assertEquals(nextPage1, "https://katalog.stbib-koeln.de/alswww2" + ".dll/Obj_4051458325195?Style=Portal3&SubStyle=&Lang=GER&ResponseEncoding" + "=utf-8&Method=PageDown&PageSize=10");
    String nextPage2 = Zones.findNextPageUrl(page2);
    assertNull(nextPage2);
}
Example 42
Project: orcid-update-java-master  File: DelegatingMetaScraper.java View source code
@Override
public IsOrcidWork fetch(String url) throws IOException {
    //check to see if we have an ethos ID
    if (url.startsWith("uk.bl.ethos")) {
        EthosMetaScraper scrape = new EthosMetaScraper();
        return scrape.fetch(url);
    }
    HTMLMetaBuilder builder = cache.getIfPresent(url);
    if (builder == null) {
        System.out.println("looking up " + url);
        Document doc = Jsoup.connect(url).timeout(10000).get();
        builder = new HTMLMetaBuilder(doc);
    }
    return builder.getDublinCoreMeta();
}
Example 43
Project: pack-master  File: CrawlerPack.java View source code
/**
     * 將 XML 轉化為 Jsoup Document 物件
     *
     * Jsoup 1.9.1+ supported non-ascii tag
     * -----
     * 如果碰到Tag å??稱首字元é?ž a-zA-Z 的字元,jsoup 會解æž?為註解
     * 所以必需用騙的先置入 prefix
     * �改寫xmlParse 在回傳時移除prefix
     *
     * @param xml XML format string
     * @return org.jsoup.nodes.Document
     */
public org.jsoup.nodes.Document xmlToJsoupDoc(String xml) {
    // Tag 首字元� a-zA-Z 時轉化為註解的�題
    //xml = xml.replaceAll("<([^A-Za-z\\/! ][^\\/>]*)>", "<"+prefix.toLowerCase()+"$1>")
    //         .replaceAll("<\\/([^A-Za-z\\/ ][^\\/>]*)>", "</"+prefix.toLowerCase()+"$1>");
    // 將 xml 轉為 jsoup Document 物件
    //Document jsoupDoc = Jsoup.parse(xml, "", new Parser( new PrefixXmlTreeBuilder(prefix.toLowerCase()) ) );
    Document jsoupDoc = Jsoup.parse(xml, "", Parser.xmlParser());
    jsoupDoc.charset(StandardCharsets.UTF_8);
    return jsoupDoc;
}
Example 44
Project: sample-skeleton-projects-master  File: MainRunner.java View source code
public static void main(String[] args) {
    String faviconImagePath = "";
    Connection conn = Jsoup.connect(URL).timeout(LONG_TIMEOUT);
    try {
        Document documentObject = conn.get();
        System.out.println("URL title: " + documentObject.title());
        Element domElement = documentObject.head().select(hrefLink).first();
        if (domElement == null) {
            domElement = documentObject.head().select(imgMeta).first();
            faviconImagePath = domElement.attr("content");
        }
        System.out.println("Favicon img: " + faviconImagePath);
    } catch (IOException e) {
        e.printStackTrace();
    }
}
Example 45
Project: seldon-server-master  File: UrlSectionDynamicExtractor.java View source code
@Override
public String extract(AttributeDetail attributeDetail, String url, Document articleDoc) throws Exception {
    String urlWithoutProtocol = url.replace("http://", "");
    String[] urlSplit = urlWithoutProtocol.split("/");
    if (attributeDetail.extractor_args.isEmpty())
        return null;
    int sectionNumber = Integer.parseInt(attributeDetail.extractor_args.get(0));
    if (!(urlSplit.length > (sectionNumber + 1)))
        return null;
    return urlSplit[sectionNumber];
}
Example 46
Project: selfoss-android-master  File: ArticleContentParser.java View source code
public List<String> getImagesUrls() {
    List<String> imageUrls = new ArrayList<String>();
    Document document = Jsoup.parse(article.getContent());
    for (Element element : document.getElementsByTag("img")) {
        String src = element.attr("src");
        if (src != null && !src.isEmpty()) {
            imageUrls.add(src);
        }
    }
    return imageUrls;
}
Example 47
Project: SocialConnect-master  File: JsoupBaseCrwaler.java View source code
@Override
public Document crwal(String url) throws IOException {
    if (logger.isDebugEnabled()) {
        logger.debug("Start crawling data from: " + url);
    }
    Exception ex = null;
    int maxTriesToGetRemoteData = 4;
    int tries = 0;
    while (tries < maxTriesToGetRemoteData) {
        try {
            return Jsoup.connect(url).timeout(5000).get();
        } catch (IOException e) {
            ex = e;
            if (logger.isWarnEnabled()) {
                logger.warn("Got a " + e.getMessage() + " Exception, try again to fetch data from remote address. Number of previous tries: " + tries + ". At request: " + url);
            }
            tries++;
        }
    }
    throw new IOException("After " + maxTriesToGetRemoteData + " runs, gave up on fatching data from remote url: " + url, ex);
}
Example 48
Project: stanbol-master  File: DOMBuilder.java View source code
/**
   * Returns a W3C DOM that exposes the same content as the supplied Jsoup document into a W3C DOM.
   * @param jsoupDocument The Jsoup document to convert.
   * @return A W3C Document.
   */
public static Document jsoup2DOM(org.jsoup.nodes.Document jsoupDocument) {
    Document document = null;
    try {
        /* Obtain the document builder for the configured XML parser. */
        DocumentBuilderFactory docBuilderFactory = DocumentBuilderFactory.newInstance();
        DocumentBuilder docBuilder = docBuilderFactory.newDocumentBuilder();
        /* Create a document to contain the content. */
        document = docBuilder.newDocument();
        createDOM(jsoupDocument, document, document, new HashMap<String, String>());
    } catch (ParserConfigurationException pce) {
        throw new RuntimeException(pce);
    }
    return document;
}
Example 49
Project: stocks-master  File: YahooSearchProviderTest.java View source code
@Test
public void testParsingHtml() throws IOException {
    try (Scanner scanner = new Scanner(getClass().getResourceAsStream("response_yahoo_search.txt"), "UTF-8")) {
        String html = scanner.useDelimiter("\\A").next();
        Document document = Jsoup.parse(html);
        List<ResultItem> items = new YahooSearchProvider().extractFrom(document);
        assertThat(items.size(), equalTo(20));
        ResultItem p = items.get(0);
        assertThat(p.getSymbol(), equalTo("D979C.LS"));
        assertThat(p.getName(), equalTo("BASF AG/CITI WT 14"));
        assertThat(p.getIsin(), equalTo("DE000CF79JW9"));
        assertThat(p.getLastTrade(), equalTo(Values.Quote.factorize(0.11)));
        assertThat(p.getType(), equalTo("Zertifikate & OS"));
        assertThat(p.getExchange(), equalTo("LIS"));
    }
}
Example 50
Project: TACIT-master  File: SupremCrawlerFilter.java View source code
public List<String> filters(String segment) throws IOException {
    List<String> filterContents = new ArrayList<String>();
    URI crawlUrl = URI.create(this.crawlerUrl + "/" + segment);
    Document doc = parseContentFromUrl(crawlUrl.toString());
    Element itemList = doc.select(".exmenu").get(0);
    Elements items = itemList.select("a");
    filterContents.add("All");
    for (Element element : items) {
        filterContents.add(element.attr("href").trim());
    }
    return filterContents;
}
Example 51
Project: TopNews-master  File: NewsDetailsService.java View source code
public static String getNewsDetails(String url, String news_title, String news_date) {
    Document document = null;
    String data = "<body>" + "<center><h2 style='font-size:16px;'>" + news_title + "</h2></center>";
    data = data + "<p align='left' style='margin-left:10px'>" + "<span style='font-size:10px;'>" + news_date + "</span>" + "</p>";
    data = data + "<hr size='1' />";
    try {
        document = Jsoup.connect(url).timeout(9000).get();
        Element element = null;
        if (TextUtils.isEmpty(url)) {
            data = "";
            element = document.getElementById("memberArea");
        } else {
            element = document.getElementById("artibody");
        }
        if (element != null) {
            data = data + element.toString();
        }
        data = data + "</body>";
    } catch (IOException e) {
        e.printStackTrace();
    }
    return data;
}
Example 52
Project: tori-master  File: DOMBuilder.java View source code
/**
     * Returns a W3C DOM that exposes the same content as the supplied Jsoup
     * document into a W3C DOM.
     * 
     * @param jsoupDocument
     *            The Jsoup document to convert.
     * @return A W3C Document.
     */
public static Document jsoup2DOM(final org.jsoup.nodes.Document jsoupDocument) {
    Document document = null;
    try {
        /* Obtain the document builder for the configured XML parser. */
        DocumentBuilderFactory docBuilderFactory = DocumentBuilderFactory.newInstance();
        DocumentBuilder docBuilder = docBuilderFactory.newDocumentBuilder();
        /* Create a document to contain the content. */
        document = docBuilder.newDocument();
        createDOM(jsoupDocument, document, document, new HashMap<String, String>());
    } catch (ParserConfigurationException pce) {
        throw new RuntimeException(pce);
    }
    return document;
}
Example 53
Project: voj-master  File: HtmlTextFilter.java View source code
/**
	 * 过滤包�HTML字符串.
	 * @param text - 待过滤的字符串
	 * @return 过滤�的字符串.
	 */
public static String filter(String text) {
    if (text == null) {
        return text;
    }
    Document document = Jsoup.parse(text);
    document.outputSettings(new Document.OutputSettings().prettyPrint(false));
    document.select("br").append("\\n");
    document.select("p").prepend("\\n\\n");
    String s = document.html().replaceAll("\\\\n", "\n");
    return Jsoup.clean(s, "", Whitelist.none(), new Document.OutputSettings().prettyPrint(false));
}
Example 54
Project: WaveTact-master  File: Quote.java View source code
@Override
public void onCommand(String command, User user, PircBotX network, String prefix, Channel channel, boolean isPrivate, int userPermLevel, String... args) throws Exception {
    Document doc = Jsoup.connect("http://wwww.quotationspage.com/random.php3").userAgent(Registry.USER_AGENT).get();
    String c = doc.select(".quote").get(0).text();
    String d = doc.select(".author").get(0).text();
    if (d.contains("-")) {
        if (!d.contains("("))
            d = d.split("-")[0];
        else
            d = d.split("\\(")[0];
    }
    IRCUtils.sendMessage(user, network, channel, c + " -" + IRCUtils.noPing(d), prefix);
}
Example 55
Project: storm-crawler-master  File: JSoupDOMBuilder.java View source code
/**
     * Returns a W3C DOM that exposes the same content as the supplied Jsoup
     * document into a W3C DOM.
     * 
     * @param jsoupDocument
     *            The Jsoup document to convert.
     * @return A W3C Document.
     */
public static Document jsoup2DOM(org.jsoup.nodes.Document jsoupDocument) {
    Document document;
    try {
        /* Obtain the document builder for the configured XML parser. */
        DocumentBuilderFactory docBuilderFactory = DocumentBuilderFactory.newInstance();
        DocumentBuilder docBuilder = docBuilderFactory.newDocumentBuilder();
        /* Create a document to contain the content. */
        document = docBuilder.newDocument();
        createDOM(jsoupDocument, document, document, new HashMap<String, String>());
    } catch (ParserConfigurationException pce) {
        throw new RuntimeException(pce);
    }
    return document;
}
Example 56
Project: web-crawler-master  File: JSoupDOMBuilder.java View source code
/**
     * Returns a W3C DOM that exposes the same content as the supplied Jsoup
     * document into a W3C DOM.
     * 
     * @param jsoupDocument
     *            The Jsoup document to convert.
     * @return A W3C Document.
     */
public static Document jsoup2DOM(org.jsoup.nodes.Document jsoupDocument) {
    Document document;
    try {
        /* Obtain the document builder for the configured XML parser. */
        DocumentBuilderFactory docBuilderFactory = DocumentBuilderFactory.newInstance();
        DocumentBuilder docBuilder = docBuilderFactory.newDocumentBuilder();
        /* Create a document to contain the content. */
        document = docBuilder.newDocument();
        createDOM(jsoupDocument, document, document, new HashMap<String, String>());
    } catch (ParserConfigurationException pce) {
        throw new RuntimeException(pce);
    }
    return document;
}
Example 57
Project: SOCIETIES-Platform-master  File: Status.java View source code
public static Status fromJson(String json) {
    Preconditions.checkNotNull(json);
    JsonObject obj = (JsonObject) parser.parse(json);
    if (obj.get("html") == null)
        return null;
    Status status = new Status();
    String html = obj.get("html").getAsString();
    html = StringEscapeUtils.unescapeXml(html);
    // use some jsoup magic to parse html and fetch require elements
    org.jsoup.nodes.Document document = Jsoup.parse(html);
    Element dateElement = document.select("a[class*=tweet-timestamp]").last();
    status.setCreatedAt(dateElement.text());
    Element textElement = document.select("p[class*=js-tweet-text]").first();
    status.setText(textElement.text());
    String idRaw = parseUrlGetLastElementInPath(obj.get("url").getAsString());
    status.setId(Long.parseLong(idRaw));
    status.setScreenName(parseUrlGetLastElementInPath(obj.get("author_url").getAsString()));
    // TODO: We need to parse out the other fields.
    status.jsonObject = obj;
    status.jsonString = json;
    return status;
}
Example 58
Project: AcFun-Area63-master  File: DocumentRequest.java View source code
@Override
protected Response<Document> parseNetworkResponse(NetworkResponse response) {
    String html;
    try {
        html = new String(response.data, HttpHeaderParser.parseCharset(response.headers));
        return Response.success(parse(html), HttpHeaderParser.parseCacheHeaders(response));
    } catch (UnsupportedEncodingException e) {
        html = new String(response.data);
        return Response.success(parse(html), HttpHeaderParser.parseCacheHeaders(response));
    } catch (Exception e) {
        return Response.error(new ParseError(e));
    }
}
Example 59
Project: ache-master  File: GoogleSearch.java View source code
public List<BackLinkNeighborhood> submitQuery(String query, int page) throws IOException {
    timer.waitMinimumDelayIfNecesary();
    // 21 -> max number allowed by google... decreases after
    String queryUrl = "https://www.google.com/search?q=" + query + "&num=" + docsPerPage + "&start=" + page * docsPerPage;
    System.out.println("URL:" + queryUrl);
    try {
        FetchedResult result = fetcher.get(queryUrl);
        InputStream is = new ByteArrayInputStream(result.getContent());
        Document doc = Jsoup.parse(is, "UTF-8", query);
        is.close();
        Elements searchItems = doc.select("div#search");
        Elements linkHeaders = searchItems.select(".r");
        Elements linksUrl = linkHeaders.select("a[href]");
        List<BackLinkNeighborhood> links = new ArrayList<>();
        for (Element link : linksUrl) {
            String title = link.text();
            String url = link.attr("href");
            links.add(new BackLinkNeighborhood(url, title));
        }
        System.out.println(getClass().getSimpleName() + " hits: " + links.size());
        return links;
    } catch (IOExceptionBaseFetchException |  e) {
        throw new IOException("Failed to download backlinks from Google.", e);
    }
}
Example 60
Project: asoiaf-master  File: FetchUrls.java View source code
public static ImageUrl FetchImageUrl(String url) {
    ImageUrl iu = new ImageUrl();
    try {
        Document doc = Jsoup.connect(url).timeout(5000).get();
        Elements e = doc.select("li.outlink a");
        for (Element item : e) {
            if (item.text().equals("200")) {
                //Log.d("","200:"+item.select("a[href]").attr("href"));
                iu.setThumbUrl(item.select("a[href]").attr("href"));
            }
            if (item.text().equals("original")) {
                //Log.d("","original:"+item.select("a[href]").attr("href"));
                iu.setOringinUrl(item.select("a[href]").attr("href"));
            }
        }
    } catch (Exception e) {
        e.printStackTrace();
    }
    return iu;
}
Example 61
Project: asta4d-master  File: ElementNotFoundHandlerOnDocumentTest.java View source code
@Test
public void notFoundOnDocument() throws Exception {
    String html = "<html><body><span>x</span></body></html>";
    Document doc = Jsoup.parse(html);
    Renderer renderer = Renderer.create();
    renderer.add(new ElementNotFoundHandler("div") {

        @Override
        public Renderer alternativeRenderer() {
            return Renderer.create("span", "y");
        }
    });
    RenderUtil.apply(doc, renderer);
    Assert.assertEquals(doc.select("span").text(), "y");
}
Example 62
Project: baleen-master  File: Jsp101HeadingsTest.java View source code
@Test
public void testSubjectHeading() {
    Document document = Jsoup.parseBodyFragment("<p><b>THIS IS A SUBJECT HEADING</b></p><p>THIS IS A NOT SUBJECT HEADING</p><p>THIS IS not a SUBJECT HEADING</p><p>THIS IS NOT A SUBJECT HEADING EITHER.</p>");
    manipulator.manipulate(document);
    Elements h1s = document.select("h1");
    assertEquals(1, h1s.size());
    assertEquals("THIS IS A SUBJECT HEADING", h1s.first().text());
}
Example 63
Project: bank-importer-master  File: ItauPoupancaImportador.java View source code
@Override
public List<BancoRegistro> carregarLancamentosExtrato() {
    carregarOpcoesMenu();
    String html = /*carregarHtml(poupancaUrl, 200);
		html = */
    carregarHtml("https://ww70.itau.com.br/M/SaldoPoupanca.aspx", 200);
    Document doc = carregarHtmlDeLink(html, "a[href^=SaldoPoupanca]", "Últimos 30 dias");
    Element tableExtrato = doc.getElementById("ctl00_ContentPlaceHolder1_Fieldset2");
    Iterator<Element> iterator = tableExtrato.select("div.rowPar, div.rowImpar").iterator();
    List<BancoRegistro> list = new ArrayList<BancoRegistro>();
    while (iterator.hasNext()) {
        Element e = iterator.next();
        Elements children = e.select("td");
        String data = children.get(1).text();
        String desc = children.get(2).text().trim();
        String val = children.get(3).text();
        if (!descricoesIgnorar.contains(desc)) {
            list.add(gerarRegistro(data, desc, val));
        }
    }
    return list;
}
Example 64
Project: bennu-master  File: Component.java View source code
public static String process(String origin) {
    Document doc = Jsoup.parse(origin);
    Elements components = doc.select("[bennu-component]");
    for (Element component : components) {
        String key = component.attr("bennu-component");
        Optional.ofNullable(COMPONENTS.get(key)).ifPresent( x -> component.replaceWith(x.process(component)));
    }
    return doc.toString();
}
Example 65
Project: CarHome-master  File: TMallHomePageDownload.java View source code
@Override
public Page download(String url) throws IOException {
    Preconditions.checkNotNull(url);
    URL indexUrl = new URL(url);
    Document document = getDocument(url, "UTF-8");
    String content = document.html();
    Html mainHtml = Html.create(content);
    String siteId = mainHtml.regex("site_instance_id=(\\d+)", 1).get();
    List<String> asyncIdList = mainHtml.xpath("//div[@class='J_TAsyncModule']/@data-widgetid").all();
    List<Html> asyncHtmlList = Lists.newArrayListWithExpectedSize(asyncIdList.size());
    for (String id : asyncIdList) {
        String aUrl = String.format(ASYNC_URL_TEMPLATE, indexUrl.getHost(), id, indexUrl.getPath(), id, siteId);
        Document aDocument = getDocument(aUrl, "UTF-8");
        String aHtml = aDocument.html();
        String aContent = aHtml.substring(aHtml.indexOf("{"), aHtml.lastIndexOf("}"));
        if (LOGGER.isInfoEnabled()) {
            LOGGER.info("content is :{}", aContent);
        }
        asyncHtmlList.add(Html.create(aContent));
        sleep(1, 3);
    }
    return Page.create(url, mainHtml, asyncHtmlList);
}
Example 66
Project: clicker-master  File: CN88ProxyGetter.java View source code
@Override
public Set<Proxy> find() {
    final Set<Proxy> ret = new HashSet<Proxy>();
    for (int i = 2; i < 11; i++) {
        try {
            final Document doc = Jsoup.parse(new URL("http://www.cz88.net/proxy/http_" + i + ".aspx"), TIMEOUT);
            final Elements tables = doc.getElementsByTag("table");
            final Element table = tables.get(0);
            final Elements trs = table.getElementsByTag("tr");
            for (int j = 1; j < trs.size(); j++) {
                final Element tr = trs.get(j);
                try {
                    final Element hostTd = tr.getElementsByTag("td").get(0);
                    final Element portTd = tr.getElementsByTag("td").get(1);
                    final String host = hostTd.text();
                    final int port = Integer.valueOf(portTd.text());
                    final Proxy proxy = new Proxy(host, port, this.properties);
                    ret.add(proxy);
                } catch (final Exception e) {
                }
            }
        } catch (final Exception e) {
        }
    }
    return ret;
}
Example 67
Project: CN1ML-NetbeansModule-master  File: TreeBuilder.java View source code
protected void initialiseParse(String input, String baseUri, ParseErrorList errors) {
    Validate.notNull(input, "String input must not be null");
    Validate.notNull(baseUri, "BaseURI must not be null");
    doc = new Document(baseUri);
    reader = new CharacterReader(input);
    this.errors = errors;
    tokeniser = new Tokeniser(reader, errors);
    stack = new DescendableLinkedList<Element>();
    this.baseUri = baseUri;
}
Example 68
Project: crawler-master  File: DefaultAssetsParser.java View source code
@Override
public Set<CrawlerURL> getAssets(Document doc, String referer) {
    Elements media = doc.select("[src]");
    Elements imports = doc.select("link[href]");
    Set<CrawlerURL> urls = new HashSet<CrawlerURL>(media.size() + imports.size());
    for (Element link : imports) {
        urls.add(new CrawlerURL(link.attr("abs:href"), referer));
    }
    for (Element src : media) {
        urls.add(new CrawlerURL(src.attr("abs:src"), referer));
    }
    return urls;
}
Example 69
Project: curiosity-maps-master  File: WebCrawler.java View source code
protected Document httpGet(Connection conn) {
    try {
        // TODO: execute network request in a separate thread pool
        return conn.get();
    } catch (IOException e) {
        if (e instanceof HttpStatusException) {
            HttpStatusException statusException = (HttpStatusException) e;
            if (statusException.getStatusCode() == 503) {
                try {
                    Thread.sleep(backoffTime);
                } catch (InterruptedException e1) {
                    throw new RuntimeException(e1);
                }
            }
        }
        throw new RuntimeException(e);
    }
}
Example 70
Project: en-webmagic-master  File: CssSelector.java View source code
@Override
public List<String> selectList(String text) {
    List<String> strings = new ArrayList<String>();
    Document doc = Jsoup.parse(text);
    Elements elements = doc.select(selectorText);
    if (CollectionUtils.isNotEmpty(elements)) {
        for (Element element : elements) {
            String value = getValue(element);
            if (!StringUtils.isEmpty(value)) {
                strings.add(value);
            }
        }
    }
    return strings;
}
Example 71
Project: EventApp-master  File: BazaarEntryLoader.java View source code
@Override
public void onResponse(String body) {
    List<BazaarEntry> entries = new ArrayList<BazaarEntry>();
    Document document = Jsoup.parse(body);
    Elements elements = document.select("table");
    for (Element element : elements) {
        BazaarEntry entry = new BazaarEntry();
        Elements trs = element.select("tr");
        if (trs.size() >= 3) {
            entry.setName(trs.get(0).text());
            entry.setTitle(trs.get(1).text());
            Element summary = trs.get(2);
            entry.setSummary(summary.text());
            Elements a = summary.select("a");
            if (!a.isEmpty()) {
                entry.setUrl(a.attr("href"));
            }
        }
        entries.add(entry);
    }
    listener.onSuccess(entries);
}
Example 72
Project: extentreports-java-master  File: SystemAttributeTests.java View source code
private void performAssertForKVPairs(String key, String value) {
    Boolean keyFound = false;
    Boolean valueFound = false;
    extent.flush();
    String html = Reader.readAllText(htmlFilePath);
    Document doc = Jsoup.parse(html);
    Elements tdColl = doc.select(".environment td");
    for (Element td : tdColl) {
        if (td.text().equals(key))
            keyFound = true;
        if (td.text().equals(value))
            valueFound = true;
    }
    Assert.assertTrue(keyFound);
    Assert.assertTrue(valueFound);
}
Example 73
Project: FakeWeather-master  File: MzituZiPaiFragment.java View source code
@Override
public List<Girl> call(String url) {
    List<Girl> girls = new ArrayList<>();
    try {
        Document doc = Jsoup.connect(url).timeout(10000).get();
        Element total = doc.select("div.postlist").first();
        Elements items = total.select("li");
        for (Element element : items) {
            Girl girl = new Girl(element.select("img").first().attr("src"));
            girls.add(girl);
        }
    } catch (IOException e) {
        e.printStackTrace();
    }
    return girls;
}
Example 74
Project: FudanBBS-master  File: TreeBuilder.java View source code
protected void initialiseParse(String input, String baseUri, ParseErrorList errors) {
    Validate.notNull(input, "String input must not be null");
    Validate.notNull(baseUri, "BaseURI must not be null");
    doc = new Document(baseUri);
    reader = new CharacterReader(input);
    this.errors = errors;
    tokeniser = new Tokeniser(reader, errors);
    stack = new DescendableLinkedList<Element>();
    this.baseUri = baseUri;
}
Example 75
Project: Gazetti_Newspaper_Reader-master  File: toi.java View source code
public String[] getToiArticleContent() {
    Document doc;
    String[] result = new String[3];
    String url = mArticleURL;
    try {
        Connection connection = Jsoup.connect(url).userAgent("Mozilla").timeout(10 * 1000);
        Response response = connection.execute();
        if (response == null) {
            Crashlytics.log("Is response null ? " + (null == response));
            return null;
        } else if (response.statusCode() != 200) {
            Crashlytics.log("Received response - " + response.statusCode() + " -- " + response.statusMessage());
            Crashlytics.log("Received response - " + response.body());
            return null;
        }
        doc = connection.get();
        // get Title
        String ToiTitleXPath = ConfigService.getInstance().getTOIHead();
        titleText = doc.select(ToiTitleXPath).text();
        // get HeaderImageUrl
        mImageURL = getImageURL(doc);
        String ToiArticleXPath = ConfigService.getInstance().getTOIBody();
        Element bodyArticleElements = doc.select(ToiArticleXPath).first();
        String temp = bodyArticleElements.html().replace("<br />", "$$$");
        Document bodyNewLine = Jsoup.parse(temp);
        bodyText = bodyNewLine.text().replace("$$$", "\n");
        result[0] = titleText;
        result[1] = mImageURL;
        result[2] = bodyText;
    } catch (IOException e) {
        Crashlytics.logException(e);
        return null;
    } catch (NullPointerException npe) {
        bodyText = null;
        Crashlytics.logException(npe);
        return null;
    } catch (Exception e) {
        Crashlytics.logException(e);
        return null;
    }
    return result;
}
Example 76
Project: gvoa-master  File: ItemHtmlParser.java View source code
public static void parseItemDetail(RssItem item) throws Exception {
    /*
		if(null==item.getLink())
		{
			return;
		}*/
    //String testurl ="http://www.51voa.com/VOA_Standard_English/us-weighs-boosting-training-for-syrian-rebels-52551.html";
    String respContent = NetworkUtil.httpGetContent(item.getLink());
    Document doc = Jsoup.parse(respContent);
    Element mp3link = doc.select("a[id=mp3]").first();
    if (mp3link != null) {
        Log.i(tag, mp3link.attr("href"));
        item.setMp3url(mp3link.attr("href"));
    } else {
        Log.i(tag, "can't get mp3");
    }
    Element content = doc.getElementById("content");
    Element imageEl = content.select("div.contentImage").first();
    if (imageEl != null) {
        Log.i(tag, "remove image element from content");
        imageEl.remove();
    }
    String contentStr = content.html();
    Log.i(tag, contentStr);
    item.setFullText(contentStr);
    Element lrclink = content.select("a[id=lrc]").first();
    if (lrclink != null) {
        Log.i(tag, lrclink.attr("href"));
    }
    item.setStatus(RssItem.E_PARSE_TXT_OK);
    return;
}
Example 77
Project: HabReader-master  File: PostShowLoader.java View source code
@Override
public PostsFullData loadInBackground() {
    PostsFullData data = new PostsFullData();
    try {
        Document document = Jsoup.connect(url).get();
        Element title = document.select("span.post_title").first();
        Element hubs = document.select("div.hubs").first();
        Element content = document.select("div.content").first();
        Element date = document.select("div.published").first();
        Element author = document.select("div.author > a").first();
        if (title != null) {
            data.setUrl(url);
            data.setTitle(title.text());
            data.setHubs(hubs.text());
            data.setContent(content.html());
            data.setDate(date.text());
            data.setAuthor(author.text());
        } else
            data.setContent(context.getString(R.string.error_404));
    } catch (IOException e) {
    }
    return data;
}
Example 78
Project: HackerNews-master  File: UserParser.java View source code
public static User parseUser(String username) {
    try {
        User user = new User();
        user.username = username;
        // don't use user cookie so that "about" text appears correctly
        Document page = ConnectionManager.anonConnect("/user?id=" + username).get();
        Elements trs = page.select("form > table > tbody > tr");
        user.created = trs.select("td:containsOwn(created:) + td").first().text();
        user.karma = Integer.parseInt(trs.select("td:containsOwn(karma:) + td").first().text());
        try {
            user.avg = Float.parseFloat(trs.select("td:containsOwn(avg:) + td").first().text());
        } catch (Exception e) {
            user.avg = -1.0f;
        }
        user.aboutHtml = trs.select("td:containsOwn(about:) + td").first().html();
        return user;
    } catch (IOException e) {
        e.printStackTrace();
        Log.d(TAG, "IOException parsing UserModel for: " + username);
        return null;
    } catch (NumberFormatException e) {
        e.printStackTrace();
        Log.d(TAG, "NumberFormatException parsing UserModel for: " + username);
        return null;
    } catch (NullPointerException e) {
        e.printStackTrace();
        Log.d(TAG, "NullPointerException parsing UserModel for: " + username);
        return null;
    }
}
Example 79
Project: HappyResearch-master  File: MTimeCrawler.java View source code
public void crawl_web_pages() throws Exception {
    String filePath = "./src/main/resources/mtime.txt";
    List<String> urls = FileIO.readAsList(filePath);
    for (String url : urls) {
        String html = URLReader.read(url);
        Document doc = Jsoup.parse(html);
        String name = doc.select("span[property=v:itemreviewed]").text();
        name = Strings.filterWebString(name, '_');
        String dirPath = dir + name + "/";
        FileIO.makeDirectory(dirPath);
        FileIO.writeString(dirPath + name + ".html", html);
    }
}
Example 80
Project: jabref-master  File: ACS.java View source code
/**
     * Tries to find a fulltext URL for a given BibTex entry.
     *
     * Currently only uses the DOI if found.
     *
     * @param entry The Bibtex entry
     * @return The fulltext PDF URL Optional, if found, or an empty Optional if not found.
     * @throws NullPointerException if no BibTex entry is given
     * @throws java.io.IOException
     */
@Override
public Optional<URL> findFullText(BibEntry entry) throws IOException {
    Objects.requireNonNull(entry);
    Optional<URL> pdfLink = Optional.empty();
    // DOI search
    Optional<DOI> doi = entry.getField(FieldName.DOI).flatMap(DOI::parse);
    if (doi.isPresent()) {
        String source = String.format(SOURCE, doi.get().getDOI());
        // Retrieve PDF link
        Document html = Jsoup.connect(source).ignoreHttpErrors(true).get();
        Element link = html.select(".pdf-high-res a").first();
        if (link != null) {
            LOGGER.info("Fulltext PDF found @ ACS.");
            pdfLink = Optional.of(new URL(source.replaceFirst("/abs/", "/pdf/")));
        }
    }
    return pdfLink;
}
Example 81
Project: jacorb-master  File: Client.java View source code
public static void main(String args[]) throws Exception {
    String updateString, ior;
    if (args.length >= 1) {
        updateString = args[0];
    } else {
        updateString = UUID.randomUUID().toString();
    }
    // Grab the IOR from the servlet.
    Document doc = Jsoup.connect("http://localhost:8080/jacorb-appserver/PrintIOR").get();
    ior = doc.select("h1").first().text();
    System.out.println("Retrieved ior " + ior);
    Properties orbProps = new Properties();
    orbProps.setProperty("org.omg.CORBA.ORBClass", "org.jacorb.orb.ORB");
    orbProps.setProperty("org.omg.CORBA.ORBSingletonClass", "org.jacorb.orb.ORBSingleton");
    orbProps.setProperty("jacorb.interop.null_string_encoding", "true");
    ORB orb = ORB.init(args, orbProps);
    org.omg.CORBA.Object obj = orb.string_to_object(ior);
    GoodDay goodDay = GoodDayHelper.narrow(obj);
    // Invoke remote server
    System.out.println("Retrieved initial string " + goodDay.get_string());
    goodDay.record_string(updateString);
    System.out.println("Retrieved string " + goodDay.get_string());
}
Example 82
Project: janglipse-master  File: KeywordDocParser.java View source code
private List<KeywordDocumentation> parse(Document doc) {
    List<KeywordDocumentation> list = new ArrayList<KeywordDocumentation>();
    Elements tables = doc.select("table.detailHeader");
    if (tables.size() > 0) {
        for (Element table : tables) {
            KeywordDocumentation keyword = new KeywordDocumentation();
            Elements header = table.select("td.detailHeaderName");
            keyword.setName(header.get(0).text());
            keyword.setDescription(table.nextSibling().outerHtml());
            list.add(keyword);
        }
    }
    return list;
}
Example 83
Project: java-manga-reader-master  File: MangaUtil.java View source code
/**
	 * Retrieves a list of licensed Manga from Anime News Network. 
	 * @return A list of Manga licensed in English.
	 * @throws IOException If it cannot complete the request.
	 */
public static List<String> getLicensedManga() throws IOException {
    StringBuilder sb = new StringBuilder("http://www.animenewsnetwork.com/encyclopedia/anime-list.php");
    sb.append("?licensed=1");
    sb.append("&sort=title");
    sb.append("&showG=1");
    Document doc = Jsoup.connect(sb.toString()).maxBodySize(0).get();
    Elements list = doc.getElementsByClass("HOVERLINE");
    List<String> blackList = new ArrayList<String>(list.size());
    for (Element e : list) {
        String title = e.text();
        if (title.startsWith("(The)")) {
            title = title.replace("(The)", "The");
        }
        if (title.contains("(")) {
            title = title.substring(0, title.lastIndexOf('(')).trim();
        }
        blackList.add(title);
    }
    return blackList;
}
Example 84
Project: JAVMovieScraper-master  File: Data18SharedMethods.java View source code
//Used to implement the SecurityPassthrough interface for both data18 scrapers
public static Document runSecurityPassthrough(Document document, SearchResult originalSearchResult) {
    //find the first link in the document, download the href, then try to download the original result again
    if (document != null) {
        Element firstLink = document.select("a").first();
        if (firstLink != null && firstLink.attr("href") != null) {
            Document captchaSolved = SiteParsingProfile.downloadDocument(new SearchResult(firstLink.attr("href")));
            if (captchaSolved != null) {
                return SiteParsingProfile.downloadDocument(originalSearchResult);
            }
        }
    }
    return document;
}
Example 85
Project: JCommons-master  File: DownloaderTest.java View source code
public static void main(String[] args) throws IOException {
    Document doc = Jsoup.connect("http://meta.stackexchange.com/questions/134495/academic-papers-using-stack-exchange-data").get();
    Elements eles = doc.getElementsContainingText("[PDF]");
    eles.addAll(doc.getElementsContainingText("[arXiv]"));
    String folderName = "D:/dl";
    for (Element ele : eles) {
        String src = ele.attr("href");
        if (src == null || src.trim().equals(""))
            continue;
        URL url = new URL(src);
        Element parent = ele.parent();
        Elements eles1 = parent.getElementsByTag("strong");
        Element nameEle = eles1.get(0);
        String fileName = nameEle.text().replace(":", " ").replace("\"", "").replace("'", "").replace("?", "");
        if (fileName.contains("Fit or"))
            continue;
        if (!fileName.endsWith("."))
            fileName = fileName.concat(".");
        fileName = fileName.concat("pdf");
        System.out.println(fileName);
        InputStream in = null;
        try {
            in = url.openStream();
        } catch (Exception e) {
            continue;
        }
        OutputStream out = new BufferedOutputStream(new FileOutputStream(folderName + "/" + fileName));
        for (int b; (b = in.read()) != -1; ) {
            out.write(b);
        }
        out.close();
        in.close();
    }
}
Example 86
Project: jeboorker-master  File: ComicsOrgDownloader.java View source code
@Override
public List<MetadataDownloadEntry> search(String phrase) {
    try {
        List<URL> searchUrl = MetadataDownloadUtils.getSearchPageUrls(phrase, PAGES_TO_LOAD, QUERY_URL);
        List<byte[]> pageHtmlContent = MetadataDownloadUtils.loadPages(searchUrl, PAGES_TO_LOAD);
        List<Document> htmlDocs = MetadataDownloadUtils.getDocuments(pageHtmlContent, MAIN_URL);
        List<String> searchResultLinks = findSearchResultLinks(htmlDocs);
        List<byte[]> metadataHtmlContent = MetadataDownloadUtils.loadLinkContent(searchResultLinks, MAIN_URL);
        return getMetadataDownloadEntries(metadataHtmlContent);
    } catch (IOException e) {
        LoggerFactory.getLogger(this).log(Level.WARNING, "Failed to fetch metadata for search '" + phrase + "'", e);
    }
    return null;
}
Example 87
Project: JKuuza-master  File: ConditionsResolver.java View source code
/**
	 * Apply all conditions on this Document
	 *
	 * @param doc Jsoup Document with html code of a web page
	 * @return true if ALL conditions pass, false if AT LEAST ONE not pass
	 * @throws InstantiationException
	 * @throws IllegalAccessException
	 * @throws ClassNotFoundException
	 * @throws IllegalArgumentException
	 * @throws InvocationTargetException
	 * @throws NoSuchMethodException
	 */
public boolean resolve(Document doc) throws InstantiationException, IllegalAccessException, ClassNotFoundException, IllegalArgumentException, InvocationTargetException, NoSuchMethodException {
    boolean okStatus = true;
    for (Iterator<Condition> it = conditions.iterator(); it.hasNext(); ) {
        Condition condition = it.next();
        String[] params = new String[condition.getParams().size()];
        condition.getParams().toArray(params);
        condition.getConditionObject().setDocument(doc);
        Object result = Reflector.call(condition.getConditionObject(), condition.getFunctionName(), params);
        if (!result.toString().equals(condition.getExpectedValue())) {
            okStatus = false;
            failedConditions.add(condition);
        }
    }
    return okStatus;
}
Example 88
Project: karma-exchange-master  File: SalesforceUtil.java View source code
private static void updateSalesforceCdnImgLinks(Document doc, EventSourceInfo sourceInfo) {
    Elements imgs = doc.getElementsByTag("img");
    for (Element img : imgs) {
        URI uri = null;
        try {
            uri = new URI(img.attr("src"));
        } catch (URISyntaxException e) {
        }
        if (uri != null) {
            String domain = uri.getHost();
            if (domain.toLowerCase().endsWith(IMG_CDN_DOMAIN)) {
                img.attr("src", "https://" + sourceInfo.getDomain() + uri.getPath() + "?" + uri.getQuery());
            }
        }
    }
}
Example 89
Project: kempes-master  File: EvomagProductExtractor.java View source code
@Override
public void map(WebPage page, Product object) throws Exception {
    Document document = page.getDocument();
    // parse for title
    Element element = document.select("div h1").first();
    object.setName(element.text());
    // parse for price
    element = document.select("div.pret_ron").first();
    String tmp = element.ownText();
    tmp = tmp.split(" ")[0];
    object.setPrice(parsePrice(tmp));
}
Example 90
Project: ManalithBot-master  File: TranslatorPlugin.java View source code
@BotCommand("번역")
public String translate(@Option(name = "ko|en...", help = "번역할 대� 언어") String to, @Option(name = "메시지", help = "번역할 메시지") String message) {
    final String url = "https://api.datamarket.azure.com/Bing/MicrosoftTranslator/v1/Translate?Text='%s'&To='%s'";
    String login = "USER_ID_IGNORED:" + clientSecret;
    String base64login = new String(Base64.encodeBase64(login.getBytes()));
    try {
        Document doc = Jsoup.connect(String.format(url, message, to)).header("Authorization", "Basic " + base64login).ignoreContentType(true).get();
        logger.debug("response", doc);
        Elements elem = doc.select("d|text[m:type=Edm.String]");
        return elem.text();
    } catch (IOException e) {
        logger.error(e.getMessage(), e);
    }
    return "번역할 내용� 없습니다.";
}
Example 91
Project: mayocat-shop-master  File: DefaultPdfTemplateRenderer.java View source code
@Override
public void generatePDF(OutputStream outputStream, Path template, Path renderingRoot, Map<String, Object> context) throws PdfRenderingException {
    ITextRenderer renderer = new ITextRenderer();
    try {
        String html = templateRenderer.renderAsString(template, context);
        // Ensure we have a valid XHTML document using JSoup
        Document jsoupDoc = Jsoup.parse(html);
        jsoupDoc.outputSettings().syntax(Document.OutputSettings.Syntax.xml);
        jsoupDoc.outputSettings().escapeMode(Entities.EscapeMode.xhtml);
        jsoupDoc.outputSettings().charset("UTF-8");
        String path = renderingRoot.toAbsolutePath().toUri().toString();
        renderer.setDocumentFromString(jsoupDoc.toString(), path);
        renderer.layout();
        renderer.createPDF(outputStream);
    } catch (DocumentExceptionTemplateRenderingException |  e) {
        throw new PdfRenderingException(e);
    }
}
Example 92
Project: medium-textview-master  File: JsoupUtils.java View source code
public static List<String> findAllVideoLinks(String content) {
    final List<String> links = new ArrayList<>();
    final Document document = Jsoup.parse(content);
    Elements medias = document.select("[src]");
    for (Element element : medias) {
        if (element.tagName().equals("iframe")) {
            links.add(element.attr("abs:src"));
        } else {
        }
    }
    return links;
}
Example 93
Project: memorabilia-master  File: PostTag.java View source code
public Elements selectFrom(Document doc) {
    Elements elements = new Elements();
    Elements fromXpath = doc.select(xpath);
    elements.addAll(fromXpath);
    Elements scripts = doc.getElementsByTag("script");
    for (Element script : scripts) {
        String html = script.html();
        if (html.contains(pattern)) {
            elements.add(script);
        }
    }
    return elements;
}
Example 94
Project: mensaapp-master  File: WeeklyMenuTask.java View source code
@Override
protected Pair<WeeklyMenu, Exception> doInBackground(String... urls) {
    List<WeeklyMenu> menus = new ArrayList<WeeklyMenu>();
    for (String url : urls) {
        try {
            Document document = Jsoup.connect(url).get();
            WeeklyMenuParser parser = WeeklyMenuParser.create(context, document, mensa);
            menus.add(parser.parse());
        } catch (WeeklyMenuParseException wmpe) {
            Log.w(TAG, String.format(context.getString(R.string.error_menu_parse), url), wmpe);
            return new Pair<WeeklyMenu, Exception>(null, wmpe);
        } catch (Exception e) {
            Log.e(TAG, String.format(context.getString(R.string.error_menu_download), url), e);
            return new Pair<WeeklyMenu, Exception>(null, e);
        }
    }
    return new Pair<WeeklyMenu, Exception>(WeeklyMenu.merge(mensa, Utils.now(), menus), null);
}
Example 95
Project: meta-server-master  File: ServerHtmlContentTest.java View source code
@Test
public void testShowHtml() throws IOException {
    String url = URL_BASE + "/servers/show";
    Document doc = Jsoup.connect(url).get();
    Element table = doc.getElementById("server-list");
    Assert.assertTrue(table.nodeName().equals("table"));
    Element tableBody = table.select("tbody").first();
    Element firstRow = tableBody.select("tr").first();
    Assert.assertEquals(firstEntry.getName(), firstRow.getElementsByClass("server-name").first().text());
    Assert.assertEquals(firstEntry.getOwner(), firstRow.getElementsByClass("server-owner").first().text());
    Assert.assertEquals("" + firstEntry.getPort(), firstRow.getElementsByClass("server-port").first().text());
    Assert.assertEquals(firstEntry.getAddress(), firstRow.getElementsByClass("server-address").first().text());
}
Example 96
Project: mini-blog-master  File: YouKuVideoHandler.java View source code
/**
	 * 获�优酷视频
	 * 
	 * @param url
	 *            视频URL
	 */
public Video getVideo(String url) {
    if (url.indexOf("v.youku.com") != -1) {
        try {
            Document doc = VideoUtil.getURLContent(url);
            /**
				 * 获�视频标题
				 */
            String title = doc.title();
            /**
				 * 获�视频缩略图
				 */
            String pic = VideoUtil.getElementAttrById(doc, "s_sina", "href");
            int local = pic.indexOf("pic=");
            pic = pic.substring(local + 4);
            /**
				 * 获�视频地�
				 */
            String flash = VideoUtil.getElementAttrById(doc, "link2", "value");
            /**
				 * 获�视频时间
				 */
            String time = VideoUtil.getElementAttrById(doc, "download", "href");
            if (time != null && !"".equals(time)) {
                String[] arrays = time.split("\\|");
                time = arrays[4];
            }
            Video video = new Video();
            video.setPic(pic);
            video.setFlash(flash);
            video.setTime("");
            video.setTitle(title);
            return video;
        } catch (Exception e) {
            logger.error("---------------->error is " + e.getMessage());
            e.printStackTrace();
        }
    } else if (this.successor != null) {
        return this.successor.getVideo(url);
    }
    return null;
}
Example 97
Project: mylyn.docs-master  File: DocumentProcessorTest.java View source code
@Test
public void testNormalizeTextNodes() {
    Document document = new Document("");
    Element element = document.appendElement("root");
    element.appendText("first ");
    element.appendText("second,");
    element.appendText(" third");
    element.appendElement("break");
    element.appendText("fourth");
    assertEquals(5, element.childNodes().size());
    TestDocumentProcessor.normalizeTextNodes(element);
    assertEquals(3, element.childNodes().size());
    assertTrue(element.childNode(0) instanceof TextNode);
    assertEquals("first second, third", ((TextNode) element.childNode(0)).text());
    assertTrue(element.childNode(2) instanceof TextNode);
    assertEquals("fourth", ((TextNode) element.childNode(2)).text());
}
Example 98
Project: myrobotlab-master  File: JSoupExtractor.java View source code
@Override
public List<Document> processDocument(Document doc) {
    for (Object o : doc.getField(htmlField)) {
        org.jsoup.nodes.Document jSoupDoc = Jsoup.parse(o.toString());
        Elements links = jSoupDoc.select(jSoupSelector);
        for (Element link : links) {
            doc.addToField(outputField, link);
        }
    }
    return null;
}
Example 99
Project: NewsStats-master  File: NewYorkTimesContentHandler.java View source code
@Override
public List extractArticles(Page page) {
    if (page.getParseData() instanceof HtmlParseData) {
        System.out.println("Current URL: " + page.getWebURL());
        HtmlParseData htmlParseData = (HtmlParseData) page.getParseData();
        String html = htmlParseData.getHtml();
        Document doc = Jsoup.parseBodyFragment(html);
        Element articleElement = doc.getElementById("story");
        if (articleElement == null) {
            // if no article can be found
            return articles;
        }
        String title = articleElement.getElementById("story-heading").ownText();
        String dateString = articleElement.getElementsByClass("dateline").first().attr("datetime");
        Date date = null;
        try {
            date = new SimpleDateFormat("yyyy-MM-dd").parse(dateString);
        } catch (ParseException e) {
            e.printStackTrace();
        }
        String author = articleElement.getElementsByClass("byline-author").first().ownText();
        String content = "";
        Elements contentElements = articleElement.select("p.story-body-text.story-content");
        for (Element contentElement : contentElements) {
            content += contentElement.ownText();
        }
        if (!filterArticles(content)) {
            // ignore the article if filter does not approve
            return articles;
        }
        Article article = new NewYorkTimesArticle();
        article.setTitle(title);
        article.setCreatedDate(date);
        article.setAuthor(author);
        article.setContent(content);
        articles.add(article);
    }
    return articles;
}
Example 100
Project: nocket-master  File: AbstractHtmlVisitor.java View source code
protected void checkAndAddGroupTabbedPanel(Document document) {
    if (!(getContext().getFileAndClassNameStrategy() instanceof GroupNameFileAndClassNameStrategy)) {
        return;
    }
    GroupNameFileAndClassNameStrategy strategy = (GroupNameFileAndClassNameStrategy) getContext().getFileAndClassNameStrategy();
    if (!strategy.isDomainObjectWithGroupAnnotations() || !strategy.isStrategyForMainObject()) {
        return;
    }
    if (document.getElementsByAttributeValue("wicket:id", "groupTabbedPanel").isEmpty()) {
        Elements elementsByTag = document.getElementsByTag("form");
        if (!elementsByTag.isEmpty()) {
            org.jsoup.nodes.Element element = elementsByTag.first();
            org.jsoup.nodes.Element ulElement = element.appendElement("ul");
            ulElement.attr("wicket:id", "groupTabbedPanel");
        }
    }
}
Example 101
Project: org.eclipse.mylyn.docs-master  File: RemoveEmptySpansProcessor.java View source code
@Override
public void process(Document document) {
    Element body = document.body();
    boolean modifiedOne = false;
    do {
        modifiedOne = false;
        // remove empty spans, and eliminate tags that only contain whitespace
        for (Element element : body.getAllElements()) {
            if (Html.isSpanElement(element)) {
                // remove span with no children
                List<Node> childNodes = element.childNodes();
                if (childNodes.isEmpty()) {
                    element.remove();
                    modifiedOne = true;
                } else {
                    // a span with a single text child that is only whitespace is removed (text is retained)
                    if (childNodes.size() == 1) {
                        Node node = childNodes.get(0);
                        if (node instanceof TextNode) {
                            TextNode textNode = (TextNode) node;
                            String text = textNode.text();
                            if (text.trim().length() == 0) {
                                textNode.remove();
                                element.before(textNode);
                                element.remove();
                                modifiedOne = true;
                            }
                            normalizeTextNodes((Element) textNode.parent());
                        }
                    }
                }
            }
            // a br within a span that is a first or last child is moved out
            Element parent = element.parent();
            if (//$NON-NLS-1$
            element.tagName().equalsIgnoreCase("br") && Html.isSpanElement(parent)) {
                List<Node> childNodes = parent.childNodes();
                if (childNodes.get(0) == element) {
                    element.remove();
                    parent.before(element);
                    modifiedOne = true;
                } else if (childNodes.get(childNodes.size() - 1) == element) {
                    element.remove();
                    parent.after(element);
                    modifiedOne = true;
                }
            }
        }
    } while (modifiedOne);
}