Java Examples for org.jsoup.Jsoup
The following java examples will help you to understand the usage of org.jsoup.Jsoup. These source code samples are taken from different open source projects.
Example 1
| Project: lavender-master File: RamdomImgParser.java View source code |
public static String parserImg(String html) {
Document document = Jsoup.parse(html);
Elements divs = document.select("div");
for (Element div : divs) {
if (!div.attr("id").equals("photo-detail-wrapper")) {
continue;
}
return div.select("img").first().attr("src");
}
return null;
}Example 2
| Project: sagan-master File: QuestionsIndexTests.java View source code |
@Ignore
@Test
public void showsQuestionsIndex() throws Exception {
MvcResult result = mockMvc.perform(get("/questions")).andExpect(status().isOk()).andExpect(content().contentTypeCompatibleWith("text/html")).andReturn();
Document document = Jsoup.parse(result.getResponse().getContentAsString());
String body = document.select("body").text();
// header title
assertThat(body, containsString("Spring at StackOverflow"));
// latest spring-* questions pulled into the left 2/3 of the page
// tags on the right 1/3 of the page. see seed data in
// sagan-common/src/main/resources/database/V4__stackoverflow_tags.sql
assertThat(body, containsString("Spring Framework"));
assertThat(body, containsString("[spring-framework]"));
assertThat(body, containsString("[spring-core]"));
assertThat(body, containsString("[dependency-injection]"));
assertThat(body, containsString("Spring Data"));
assertThat(body, containsString("[spring-data]"));
assertThat(body, containsString("[spring-data-mongodb]"));
assertThat(body, containsString("[spring-data-neo4j]"));
}Example 3
| Project: android-opensource-library-56-master File: SanitizeActivity.java View source code |
@Override
protected void onCreate(Bundle savedInstanceState) {
super.onCreate(savedInstanceState);
setContentView(R.layout.activity_sanitize);
final EditText inputText = (EditText) findViewById(R.id.input_text);
inputText.setText("<p><a href='http://example.com/' onclick='doAttack()'>Link</a></p>");
final EditText sanitizedText = (EditText) findViewById(R.id.sanitized_text);
findViewById(R.id.sanitize_button).setOnClickListener(new OnClickListener() {
@Override
public void onClick(View v) {
String sanitized = Jsoup.clean(inputText.getText().toString(), Whitelist.basic());
sanitizedText.setText(sanitized);
}
});
}Example 4
| Project: Android-Studio-Project-master File: ContentParser.java View source code |
public static Content Parser(String html) {
Document doc = Jsoup.parse(html);
Elements links = doc.select("img[src~=(?i)\\.(png|jpe?g)]");
Content content = new Content();
Element element = links.get(1).getElementsByTag("img").first();
content.setUrl(element.attr("src"));
content.setTitle(element.attr("alt"));
return content;
}Example 5
| Project: coolreader-master File: DownloadPageTask.java View source code |
@Override
protected AsyncTaskResult<Document> doInBackground(URL... arg0) {
try {
Log.d("DownloadPageTask", "Downloading: " + arg0[0].toString());
Response response = Jsoup.connect(arg0[0].toString()).timeout(7000).execute();
Log.d("DownloadPageTask", "Complete: " + arg0[0].toString());
return new AsyncTaskResult<Document>(response.parse());
} catch (Exception e) {
return new AsyncTaskResult<Document>(e);
}
}Example 6
| Project: jinjava-master File: GroupByFilterTest.java View source code |
@Test
public void testGroupByAttr() throws Exception {
Document dom = Jsoup.parseBodyFragment(jinjava.render(Resources.toString(Resources.getResource("filter/groupby-attr.jinja"), StandardCharsets.UTF_8), ImmutableMap.of("persons", (Object) Lists.newArrayList(new Person("male", "jared", "stehler"), new Person("male", "foo", "bar"), new Person("female", "sarah", "jones"), new Person("male", "jim", "jones"), new Person("female", "barb", "smith")))));
assertThat(dom.select("ul.root > li")).hasSize(2);
assertThat(dom.select("ul.root > li.male > ul > li")).hasSize(3);
assertThat(dom.select("ul.root > li.female > ul > li")).hasSize(2);
}Example 7
| Project: jooby-master File: Issue624d.java View source code |
@Test
public void shouldForceARedirect() throws Exception {
request().get("/saved-url").expect( rsp -> {
Document html = Jsoup.parse(rsp);
String action = (html.select("form").attr("action"));
assertEquals("/auth?client_name=FormClient", action);
});
request().get("/auth?username=test&password=test").expect("/saved-url");
}Example 8
| Project: LNReader-Android-master File: DownloadPageTask.java View source code |
@Override
protected AsyncTaskResult<Document> doInBackground(URL... arg0) {
try {
Log.d("DownloadPageTask", "Downloading: " + arg0[0].toString());
Response response = Jsoup.connect(arg0[0].toString()).timeout(7000).execute();
Log.d("DownloadPageTask", "Complete: " + arg0[0].toString());
return new AsyncTaskResult<Document>(response.parse(), Document.class);
} catch (Exception e) {
return new AsyncTaskResult<Document>(null, Document.class, e);
}
}Example 9
| Project: moulder-j-master File: TexterTest.java View source code |
@Test
public void testRegularText() throws Exception {
Value<String> text = mock(Value.class);
when(text.get()).thenReturn("text");
Texter a = new Texter(text);
Document document = Jsoup.parseBodyFragment("<html><body><outer>test</outer></body></html>");
Element element = document.getElementsByTag("outer").first();
List<Node> processed = a.process(element);
// verify that bind and get were called, in this order
InOrder inOrder = inOrder(text);
inOrder.verify(text).get();
assertXMLEqual(new StringReader("<body><outer>text</outer></body>"), new StringReader(html(processed)));
}Example 10
| Project: muzima-android-master File: HTMLConceptParser.java View source code |
public List<String> parse(String html) {
Set<String> concepts = new HashSet<String>();
Document htmlDoc = Jsoup.parse(html);
//Select all elements containing data-concept attr and is not a div.
Elements elements = htmlDoc.select("*:not(div)[" + DATA_CONCEPT_TAG + "]");
for (Element element : elements) {
concepts.add(getConceptName(element.attr(DATA_CONCEPT_TAG)));
}
return new ArrayList<String>(concepts);
}Example 11
| Project: NiceText-master File: NTImpl.java View source code |
public String extract(String url) {
String t = null;
try {
Connection connection = Jsoup.connect(url).userAgent(Constants.USER_AGENT).header("Accept", "text/html,application/xhtml+xml,application/xml").header("Accept-Encoding", "gzip,deflate,sdch").followRedirects(true).timeout(Constants.CONN_TIMEOUT);
Connection.Response response = connection.execute();
Document document = response.parse();
t = extract(document);
} catch (IOException e) {
e.printStackTrace();
}
return t;
}Example 12
| Project: pictorial_android_client-master File: ParserImageList.java View source code |
public static ImageListBean parser(String mRet) {
ImageListBean imageListBean = new ImageListBean();
if (mRet != null) {
Document document = Jsoup.parse(mRet);
Elements elements = document.getElementsByClass("post-inner");
for (Element element : elements) {
String imgurl = element.select("a[title]").attr("href");
Element element2 = element.select("img[src]").first();
String src = element2.attr("src");
String width = element2.attr("width");
String height = element2.attr("height");
String alt = element2.attr("alt");
ImageBean imageBean = new ImageBean();
imageBean.setAlt(alt);
imageBean.setDetailurl(imgurl);
imageBean.setHeight(height);
imageBean.setWidth(width);
imageBean.setImgurl(src);
imageListBean.add(imageBean);
}
}
return imageListBean;
}Example 13
| Project: playconf-master File: IndexViewTest.java View source code |
@Override
public void run() {
Context.current.set(testHttpContext());
Proposal s = sampleProposal();
Speaker speaker = sampleSpeaker();
s.speaker = speaker;
Html html = views.html.index.render(s);
Document doc = Jsoup.parse(contentAsString(html));
assertThat(doc.select("#title").text()).isEqualTo("Keynote - " + s.title);
assertThat(doc.select("#speakerName").text()).isEqualTo(speaker.name);
}Example 14
| Project: SimpleFunctionalTest-master File: HtmlBreadcrumb.java View source code |
@Override
public String applyOnUseCase(UseCaseResult useCaseResult, String... parameters) {
String result = getHtmlReport().applyOnUseCase(useCaseResult);
final Document parse = Jsoup.parse(result);
parse.select(".page-header .text-center").append("<ol class=\"breadcrumb\">" + printFirstUseCase(useCaseResult.useCase, useCaseResult.useCase) + "</ol>");
return parse.toString();
}Example 15
| Project: TPPIBot-master File: HTML.java View source code |
@Override
public void onCommand(PircBotX bot, User user, Channel channel, List<String> lines, String... args) {
if (args.length < 1) {
lines.add("This command requires 1 arg.");
return;
}
String html;
try {
html = Jsoup.connect(args[0]).get().html();
} catch (IOException e) {
lines.add(e.getClass().getName());
return;
}
String paste = poster.pasteData(html);
lines.add("HTML Source of " + args[0] + " : " + paste);
}Example 16
| Project: ulti-master File: UtilsDemo.java View source code |
public static String TestJsoup() {
String html1 = "<html><head><title>First parse</title></head>" + "<body><p>Parsed HTML into a doc.</p></body></html>";
StringBuffer sb = new StringBuffer();
Document doc = Jsoup.parse(html1);
Logs.d("docs---" + doc.title() + " " + doc.getAllElements().size());
Logs.d("docs---" + doc.children().size() + " " + doc.location());
for (Element element : doc.getAllElements()) {
sb.append(element.tagName() + " " + element.nodeName() + " " + element.children().size() + " " + element.data() + " " + element.text() + "\n");
Logs.d(element.text() + " ");
}
return sb.toString();
}Example 17
| Project: UltimateAndroid-master File: UtilsDemo.java View source code |
public static String TestJsoup() {
String html1 = "<html><head><title>First parse</title></head>" + "<body><p>Parsed HTML into a doc.</p></body></html>";
StringBuffer sb = new StringBuffer();
Document doc = Jsoup.parse(html1);
Logs.d("docs---" + doc.title() + " " + doc.getAllElements().size());
Logs.d("docs---" + doc.children().size() + " " + doc.location());
for (Element element : doc.getAllElements()) {
sb.append(element.tagName() + " " + element.nodeName() + " " + element.children().size() + " " + element.data() + " " + element.text() + "\n");
Logs.d(element.text() + " ");
}
return sb.toString();
}Example 18
| Project: v2ex-android-master File: NotificationListModel.java View source code |
public void parse(String responseBody) throws Exception {
Document doc = Jsoup.parse(responseBody);
Element body = doc.body();
Elements elements = body.getElementsByAttributeValue("class", "cell");
for (Element el : elements) {
NotificationModel notification = new NotificationModel();
if (notification.parse(el))
add(notification);
}
int[] pages = ContentUtils.parsePage(body);
currentPage = pages[0];
totalPage = pages[1];
}Example 19
| Project: webmagic-master File: LinksSelectorTest.java View source code |
@Test
public void testLinks() throws Exception {
LinksSelector linksSelector = new LinksSelector();
List<String> links = linksSelector.selectList(html);
System.out.println(links);
html = "<div><a href='aaa'></a></div><div><a href='http://whatever.com/bbb'></a></div><div><a href='http://other.com/bbb'></a></div>";
links = linksSelector.selectList(Jsoup.parse(html, "http://whatever.com/"));
System.out.println(links);
}Example 20
| Project: Android_RssReader-master File: DescriptionFormatter.java View source code |
@Override
protected String LoadFromCache(Blog blog) {
if (blog != null && blog.Description.length() == 0) {
return "";
}
Document doc = Jsoup.parse(blog.Description);
List<Element> embeds = doc.getElementsByTag("embed");
for (Element d : doc.getElementsByTag("iframe")) {
if (d.hasAttr("src") && (d.attr("src").contains("swf") || d.attr("src").contains("youku") || d.attr("src").contains("sohu") || d.attr("src").contains("tudou") || d.attr("src").contains("youtube") || d.attr("src").contains("ku6")))
embeds.add(d);
}
for (Element d : doc.getElementsByTag("a")) {
if (d.hasAttr("href") && (d.attr("href").contains("swf") || d.attr("href").contains("youku") || d.attr("href").contains("sohu") || d.attr("href").contains("tudou") || d.attr("href").contains("youtube") || d.attr("href").contains("ku6")))
embeds.add(d);
}
if (embeds.size() != 0)
return "";
for (Element img : doc.getElementsByTag("img")) {
if (img.hasAttr("src") && !img.attr("src").startsWith(prefix)) {
return "";
}
}
return blog.Description;
}Example 21
| Project: bashoid-master File: Parser.java View source code |
static ArrayList<Quote> getQuotes(WebPage page) {
ArrayList<Quote> quotes = new ArrayList<>();
Element container = Jsoup.parse(page.getContent()).getElementsByAttribute("valign").first();
Elements headers = container.getElementsByClass("quote");
Elements bodies = container.getElementsByClass("qt");
final int COUNT = headers.size();
for (int i = 0; i < COUNT; ++i) {
String[] body = bodies.get(i).html().split("<br />");
Element header = headers.get(i);
String quoteId = header.getElementsByTag("b").first().text().substring(1);
int id = Integer.parseInt(quoteId);
String quoteScore = header.ownText().substring(1, header.ownText().length() - 1);
int score = Integer.parseInt(quoteScore);
quotes.add(new Quote(body, score, id));
}
return quotes;
}Example 22
| Project: cms-ce-master File: HtmlExtractor.java View source code |
@Override
public String extractText(final String mimeType, final InputStream inputStream, final String encoding) throws IOException {
if (!canHandle(mimeType)) {
return null;
}
StringBuilder builder = new StringBuilder();
Document doc = Jsoup.parse(inputStream, encoding, "");
for (Element element : doc.getAllElements()) {
for (TextNode textNode : element.textNodes()) {
final String text = textNode.text();
builder.append(text);
appendWhitespaceAfterTextIfNotThere(builder, text);
}
}
return builder.toString();
}Example 23
| Project: CrashMonkey4Android_tradefederation-master File: JsonHelper.java View source code |
public static String getJsonString(String url) {
String ret = null;
Connection conn = Jsoup.connect(url);
Response resp = null;
conn.ignoreContentType(true);
try {
resp = conn.execute();
return resp.body();
} catch (IOException e) {
CLog.i("failed to get json result for %s, %s", url, e.getMessage());
}
return ret;
}Example 24
| Project: deepnighttwo-master File: FirstTry.java View source code |
public static void main(String[] args) throws IOException {
Document doc = Jsoup.connect("http://www.envir.gov.cn/airnews/index.asp").data("Fdate", "2000-6-1").data("Tdate", "2000-6-8").userAgent("I'm jsoup").timeout(3000).post();
// System.out.println(doc);
Elements eles = doc.select("table[bordercolor] > tr");
eles.remove(0);
for (Element ele : eles) {
Elements rows = ele.select("td");
for (Element row : rows) {
System.out.println(row.ownText());
}
}
// Element content = doc.getElementById("content");
// Elements links = content.getElementsByTag("a");
// for (Element link : links) {
// String linkHref = link.attr("href");
// String linkText = link.text();
// System.out.println(linkHref);
// System.out.println(linkText);
// }
}Example 25
| Project: downlords-faf-client-master File: LastNewsController.java View source code |
@PostConstruct
void postConstruct() {
List<NewsItem> newsItems = newsService.fetchNews();
if (!newsItems.isEmpty()) {
NewsItem newsItem = newsItems.get(0);
authoredLabel.setText(i18n.get("news.authoredFormat", newsItem.getAuthor(), newsItem.getDate()));
titleLabel.setText(newsItem.getTitle());
String text = Jsoup.parse(newsItem.getContent()).text();
textLabel.setText(text);
}
// TODO only use this if there's no thumbnail. However, there's never a thumbnail ATM.
imageView.setImage(themeService.getThemeImage(ThemeService.DEFAULT_NEWS_IMAGE));
}Example 26
| Project: dungproxy-master File: WaitProxyTest.java View source code |
public static void main(String[] args) {
// å¼€å?¯ä»£ç?†IPæ± ,设置IPæ± ç©ºé˜»å¡žç‰å¾…
DungProxyContext dungProxyContext = DungProxyContext.create().setWaitIfNoAvailableProxy(true).setPoolEnabled(true);
IpPoolHolder.init(dungProxyContext);
for (int i = 0; i < 5; i++) {
new Thread() {
@Override
public void run() {
for (int i = 0; i < 5; i++) {
String s = HttpInvoker.get("http://ip.cn/");
if (StringUtils.isEmpty(s)) {
continue;
}
Document parse = Jsoup.parse(s);
System.out.println(parse.select("#result").text());
}
}
}.start();
}
for (int i = 0; i < 10; i++) {
String s = HttpInvoker.get("http://ip.cn/");
if (StringUtils.isEmpty(s)) {
continue;
}
Document parse = Jsoup.parse(s);
System.out.println(parse.select("#result").text());
}
}Example 27
| Project: EhViewer-master File: ProfileParser.java View source code |
public static Result parse(String body) throws ParseException {
try {
Result result = new Result();
Document d = Jsoup.parse(body);
Element profilename = d.getElementById("profilename");
result.displayName = profilename.child(0).text();
try {
result.avatar = profilename.nextElementSibling().nextElementSibling().child(0).attr("src");
if (TextUtils.isEmpty(result.avatar)) {
result.avatar = null;
} else if (!result.avatar.startsWith("http")) {
result.avatar = EhUrl.URL_FORUMS + result.avatar;
}
} catch (Exception e) {
Log.i(TAG, "No avatar");
}
return result;
} catch (Exception e) {
throw new ParseException("Parse forums error", body);
}
}Example 28
| Project: email-master File: UriParserTestHelper.java View source code |
public static void assertContainsLink(String expected, StringBuffer actual) {
String linkifiedUri = actual.toString();
Document document = Jsoup.parseBodyFragment(linkifiedUri);
Element anchorElement = document.select("a").first();
assertNotNull("No <a> element found", anchorElement);
assertEquals(expected, anchorElement.text());
assertEquals(expected, anchorElement.attr("href"));
}Example 29
| Project: example-webapp-master File: ExceptionHandlingIntegrationTests.java View source code |
@Test
public void shouldSeeErrorReferenceDisplayedOnThePage() throws Exception {
SpringDispatcherServlet servlet = SpringDispatcherServlet.create();
MockHttpServletResponse response = servlet.process(new MockHttpServletRequest("GET", "/bad"));
String redirectedUrl = response.getRedirectedUrl();
assertThat(redirectedUrl, matchesPattern(sequence("/error/", exactly(7, anyCharacterIn("A-Z0-9")))));
String errorRef = StringUtils.substringAfterLast(redirectedUrl, "/");
response = servlet.process(new MockHttpServletRequest("GET", redirectedUrl));
String html = response.getContentAsString();
Document document = Jsoup.parse(html);
Elements elements = document.select("#errorRef");
assertThat(elements.size(), equalTo(1));
assertThat(elements.first().text(), equalTo(errorRef));
}Example 30
| Project: GameRaven-master File: DocumentParser.java View source code |
@Override
public Future<FinalDoc> parse(DataEmitter emitter) {
return new ByteBufferListParser().parse(emitter).then(new TransformFuture<FinalDoc, ByteBufferList>() {
@Override
protected void transform(ByteBufferList result) throws Exception {
byte[] bytes = result.getAllByteArray();
setComplete(new FinalDoc(bytes, Jsoup.parse(new String(bytes, CHARSET))));
}
});
}Example 31
| Project: GoVRE-master File: ProxyNetworkTrainMapImage.java View source code |
//METHODS
private static String fetchTrainImageUrlFromVRE(Context context) {
try {
String imgUrl = "";
String url = context.getResources().getString(R.string.urlVREImgMap);
Document doc = Jsoup.connect(url).get();
//Focus on all tags with source attributes
Elements media = doc.select("[src]");
for (Element src : media) {
//Verify this is an image
if (src.tagName().equals("img")) {
imgUrl = src.attr("abs:src");
//Check if link contains the action query string, the map is the only image that will have it.
if (imgUrl.contains("app?action=getimg")) {
return imgUrl;
}
}
}
//Else Return Empty String
return "";
} catch (IOException e) {
}
return null;
}Example 32
| Project: japicmp-master File: ITReportTitle.java View source code |
@Test
public void testReportTitle() throws IOException {
Path htmlPath = Paths.get(System.getProperty("user.dir"), "target", "site", "project-reports.html");
assertThat(Files.exists(htmlPath), is(true));
Document document = Jsoup.parse(htmlPath.toFile(), "UTF-8");
Elements leftNav = document.select("#leftColumn [href=\"japicmp.html\"]");
assertThat(leftNav.attr("title"), is("japicmp"));
assertThat(leftNav.text(), is("japicmp"));
Elements overviewRow = document.select("#bodyColumn tr:has([href=\"japicmp.html\"])");
Elements link = overviewRow.select("[href=\"japicmp.html\"]");
assertThat(link.text(), is("japicmp"));
Elements description = overviewRow.select("td:eq(1)");
String projectVersion = System.getProperty("project.version");
assertThat(description.text(), is("Comparing source compatibility of japicmp-test-v2-" + projectVersion + ".jar against japicmp-test-v1-" + projectVersion + ".jar"));
}Example 33
| Project: JAViewer-master File: TorrentKittyLinkProvider.java View source code |
@Override
public List<DownloadLink> parseDownloadLinks(String htmlContent) {
ArrayList<DownloadLink> links = new ArrayList<>();
Element table = Jsoup.parse(htmlContent).getElementById("archiveResult");
for (Element tr : table.getElementsByTag("tr")) {
try {
links.add(DownloadLink.create(tr.getElementsByClass("name").first().text(), "", tr.getElementsByClass("date").first().text(), null, tr.getElementsByAttributeValue("rel", "magnet").first().attr("href")));
} catch (Exception ignored) {
}
}
return links;
}Example 34
| Project: k-9-master File: UriParserTestHelper.java View source code |
public static void assertContainsLink(String expected, StringBuffer actual) {
String linkifiedUri = actual.toString();
Document document = Jsoup.parseBodyFragment(linkifiedUri);
Element anchorElement = document.select("a").first();
assertNotNull("No <a> element found", anchorElement);
assertEquals(expected, anchorElement.text());
assertEquals(expected, anchorElement.attr("href"));
}Example 35
| Project: KinoCast-master File: NowVideo.java View source code |
@Override
public String getVideoPath(DetailActivity.QueryPlayTask queryTask) {
if (TextUtils.isEmpty(url))
return null;
try {
String id = url.substring(url.lastIndexOf("/") + 1);
queryTask.updateProgress(queryTask.getContext().getString(R.string.host_progress_getvideoforid, id));
Document doc = Jsoup.connect("http://www.nowvideo.sx/mobile/video.php?id=" + id).userAgent(Utils.USER_AGENT).timeout(3000).get();
return doc.select("source[type=video/mp4]").attr("src");
} catch (Exception e) {
e.printStackTrace();
}
return null;
}Example 36
| Project: learn_crawler-master File: HtmlParserTool.java View source code |
public static Set<String> extracLinks(String url, LinkFilter filter) {
Set<String> result = new HashSet<String>();
Document doc;
try {
doc = Jsoup.connect(url).timeout(5000).get();
Elements links = doc.select("a[href]");
Elements frames = doc.select("frame[src]");
Elements iframes = doc.select("iframe[src]");
for (Element e : links) {
System.out.println(e.absUrl("href"));
if (filter.accept(e.absUrl("href")))
result.add(e.absUrl("href"));
}
for (Element e : frames) {
if (filter.accept(e.absUrl("src")))
result.add(e.absUrl("src"));
}
for (Element e : iframes) {
if (filter.accept(e.absUrl("src")))
result.add(e.absUrl("src"));
}
} catch (IOException e) {
e.printStackTrace();
}
return result;
}Example 37
| Project: like_googleplus_layout-master File: PhoneKRNewsContentUtils.java View source code |
public static LinkedList<String> getPhoneKRNewsDataList(String newsUrl) {
LinkedList<String> data = null;
Document document;
try {
document = Jsoup.connect(newsUrl).get();
Element element = document.getElementById("xs-post");
Elements elements = element.getElementsByTag("p");
if (!elements.isEmpty()) {
data = new LinkedList<String>();
for (int i = 0; i < elements.size(); i++) {
String text = null;
element = elements.get(i);
if (element.getElementsByTag("a").isEmpty()) {
text = FOUR_BLANK_SPACE + element.text();
} else {
if (!element.getElementsByTag("a").get(0).getElementsByTag("img").isEmpty()) {
// System.out.println("图片 = "+element.getElementsByTag("a").get(0).getElementsByTag("img").get(0).attr("src"));
text = element.getElementsByTag("a").get(0).getElementsByTag("img").get(0).attr("src");
}
}
if (!TextUtils.isEmpty(text)) {
data.add(text);
}
}
}
} catch (IOException e) {
e.printStackTrace();
}
return data;
}Example 38
| Project: mechanize-master File: JsoupUtilTest.java View source code |
@Test
public void testFindFirstByTagSingleTag() {
Document document = Jsoup.parse("<html><body><a href=\"A\">A</a><a href=\"B\">B</a></body></html>");
assertNotNull(JsoupDataUtil.findFirstByTag(document, "a"));
assertNotNull(JsoupDataUtil.findFirstByTag(document, "body"));
assertNotNull(JsoupDataUtil.findFirstByTag(document, "body/a"));
assertNotNull(JsoupDataUtil.findFirstByTag(document, "html/body/a"));
assertNotNull(JsoupDataUtil.findFirstByTag(document, "html/a"));
assertNull(JsoupDataUtil.findFirstByTag(document, "body/html/a"));
assertNull(JsoupDataUtil.findFirstByTag(document, "body/unknown"));
}Example 39
| Project: mlcomp-master File: TitleMap.java View source code |
@Override
public void map(long recordNum, Record record, TaskContext context) throws IOException {
String url = (String) record.get(0);
String html = (String) record.get(1);
// Boolean isWebshell=QueryWebshell.isWebshell(postdata);
Document doc = Jsoup.parse(html);
Elements links = doc.getElementsByTag("title");
String title = "";
for (Element link : links) {
title = title + "," + link.text();
}
Record result_record = context.createOutputRecord();
result_record.set("url", url);
result_record.set("title", title);
context.write(result_record);
}Example 40
| Project: mobile-ycjw-master File: StudentDevelopmentScheduleQuery.java View source code |
@Override
public String getDevelopmentScheduleQueryInfo(Context context) throws Exception {
try {
YCApplication app = (YCApplication) context.getApplicationContext();
String url = (String) app.get("selectedIp") + Constant.developScheduleQuery;
HttpGet request = new HttpGet(url);
HttpResponse response = app.getClient().execute(request);
InputStream is = response.getEntity().getContent();
BufferedReader br = new BufferedReader(new InputStreamReader(is, Constant.ENCODING));
StringBuilder sb = new StringBuilder();
String temp = null;
while ((temp = br.readLine()) != null) {
sb.append(temp);
}
Document doc = Jsoup.parse(sb.toString());
Elements table = doc.select("#DG_GetGrjh");
return table.toString();
} catch (Exception e) {
throw new Exception(e);
}
}Example 41
| Project: Muzik-master File: SearchDownloadsNL.java View source code |
public static ArrayList<SongResult> getSongs(String query) {
ArrayList<SongResult> temp = new ArrayList<SongResult>();
//base query url.
String u = "http://www.downloads.nl/results/mp3/1/" + Uri.parse(query);
Elements searchResults = new Elements();
try {
Document document = Jsoup.connect(u).get();
searchResults = document.select(".tl");
for (Element x : searchResults) {
String url = "http://www.downloads.nl" + x.attr("href");
//todo add artist string to the name so that result is clearer
URL url2 = new URL(url);
HttpURLConnection ucon = (HttpURLConnection) url2.openConnection();
ucon.setInstanceFollowRedirects(false);
URL secondURL = new URL(ucon.getHeaderField("Location"));
String name = x.select("span").text();
if (HomescreenActivity.debugMode) {
Log.d("Play", "Downloads.nl Name=" + name + " url=" + secondURL);
}
temp.add(new SongResult(name, secondURL.toString()));
}
} catch (IOException e) {
e.printStackTrace();
}
return temp;
}Example 42
| Project: opacclient-master File: WinBiapAccountTest.java View source code |
@Test
public void testParseMediaList() throws OpacApi.OpacErrorException {
String html = readResource("/winbiap/medialist/" + file);
// we may not have all files for all libraries
if (html == null)
return;
List<LentItem> media = WinBiap.parseMediaList(Jsoup.parse(html), new JSONObject());
assertTrue(media.size() > 0);
for (LentItem item : media) {
assertNotNull(item.getTitle());
assertNotNull(item.getDeadline());
assertNotNull(item.getMediaType());
assertContainsData(item.getCover());
}
}Example 43
| Project: orcid-update-java-master File: DelegatingMetaScraper.java View source code |
@Override
public IsOrcidWork fetch(String url) throws IOException {
//check to see if we have an ethos ID
if (url.startsWith("uk.bl.ethos")) {
EthosMetaScraper scrape = new EthosMetaScraper();
return scrape.fetch(url);
}
HTMLMetaBuilder builder = cache.getIfPresent(url);
if (builder == null) {
System.out.println("looking up " + url);
Document doc = Jsoup.connect(url).timeout(10000).get();
builder = new HTMLMetaBuilder(doc);
}
return builder.getDublinCoreMeta();
}Example 44
| Project: sample-skeleton-projects-master File: MainRunner.java View source code |
public static void main(String[] args) {
String faviconImagePath = "";
Connection conn = Jsoup.connect(URL).timeout(LONG_TIMEOUT);
try {
Document documentObject = conn.get();
System.out.println("URL title: " + documentObject.title());
Element domElement = documentObject.head().select(hrefLink).first();
if (domElement == null) {
domElement = documentObject.head().select(imgMeta).first();
faviconImagePath = domElement.attr("content");
}
System.out.println("Favicon img: " + faviconImagePath);
} catch (IOException e) {
e.printStackTrace();
}
}Example 45
| Project: selfoss-android-master File: ArticleContentParser.java View source code |
public List<String> getImagesUrls() {
List<String> imageUrls = new ArrayList<String>();
Document document = Jsoup.parse(article.getContent());
for (Element element : document.getElementsByTag("img")) {
String src = element.attr("src");
if (src != null && !src.isEmpty()) {
imageUrls.add(src);
}
}
return imageUrls;
}Example 46
| Project: SocialConnect-master File: JsoupBaseCrwaler.java View source code |
@Override
public Document crwal(String url) throws IOException {
if (logger.isDebugEnabled()) {
logger.debug("Start crawling data from: " + url);
}
Exception ex = null;
int maxTriesToGetRemoteData = 4;
int tries = 0;
while (tries < maxTriesToGetRemoteData) {
try {
return Jsoup.connect(url).timeout(5000).get();
} catch (IOException e) {
ex = e;
if (logger.isWarnEnabled()) {
logger.warn("Got a " + e.getMessage() + " Exception, try again to fetch data from remote address. Number of previous tries: " + tries + ". At request: " + url);
}
tries++;
}
}
throw new IOException("After " + maxTriesToGetRemoteData + " runs, gave up on fatching data from remote url: " + url, ex);
}Example 47
| Project: StartupNews-master File: JsoupConnector.java View source code |
public Connection newJsoupConnection(String url) {
if (TextUtils.isEmpty(url)) {
return null;
}
Connection conn = null;
String user = mSessionManager.getSessionUser();
if (TextUtils.isEmpty(user)) {
Log.i(LOG_TAG, "user is empty!");
conn = Jsoup.connect(url);
} else {
conn = Jsoup.connect(url).cookie("user", user);
}
return conn;
}Example 48
| Project: stocks-master File: YahooSearchProviderTest.java View source code |
@Test
public void testParsingHtml() throws IOException {
try (Scanner scanner = new Scanner(getClass().getResourceAsStream("response_yahoo_search.txt"), "UTF-8")) {
String html = scanner.useDelimiter("\\A").next();
Document document = Jsoup.parse(html);
List<ResultItem> items = new YahooSearchProvider().extractFrom(document);
assertThat(items.size(), equalTo(20));
ResultItem p = items.get(0);
assertThat(p.getSymbol(), equalTo("D979C.LS"));
assertThat(p.getName(), equalTo("BASF AG/CITI WT 14"));
assertThat(p.getIsin(), equalTo("DE000CF79JW9"));
assertThat(p.getLastTrade(), equalTo(Values.Quote.factorize(0.11)));
assertThat(p.getType(), equalTo("Zertifikate & OS"));
assertThat(p.getExchange(), equalTo("LIS"));
}
}Example 49
| Project: TopNews-master File: NewsDetailsService.java View source code |
public static String getNewsDetails(String url, String news_title, String news_date) {
Document document = null;
String data = "<body>" + "<center><h2 style='font-size:16px;'>" + news_title + "</h2></center>";
data = data + "<p align='left' style='margin-left:10px'>" + "<span style='font-size:10px;'>" + news_date + "</span>" + "</p>";
data = data + "<hr size='1' />";
try {
document = Jsoup.connect(url).timeout(9000).get();
Element element = null;
if (TextUtils.isEmpty(url)) {
data = "";
element = document.getElementById("memberArea");
} else {
element = document.getElementById("artibody");
}
if (element != null) {
data = data + element.toString();
}
data = data + "</body>";
} catch (IOException e) {
e.printStackTrace();
}
return data;
}Example 50
| Project: voj-master File: HtmlTextFilter.java View source code |
/**
* 过滤包å?«HTMLå—符串.
* @param text - 待过滤的å—符串
* @return 过滤å?Žçš„å—符串.
*/
public static String filter(String text) {
if (text == null) {
return text;
}
Document document = Jsoup.parse(text);
document.outputSettings(new Document.OutputSettings().prettyPrint(false));
document.select("br").append("\\n");
document.select("p").prepend("\\n\\n");
String s = document.html().replaceAll("\\\\n", "\n");
return Jsoup.clean(s, "", Whitelist.none(), new Document.OutputSettings().prettyPrint(false));
}Example 51
| Project: WaveTact-master File: Quote.java View source code |
@Override
public void onCommand(String command, User user, PircBotX network, String prefix, Channel channel, boolean isPrivate, int userPermLevel, String... args) throws Exception {
Document doc = Jsoup.connect("http://wwww.quotationspage.com/random.php3").userAgent(Registry.USER_AGENT).get();
String c = doc.select(".quote").get(0).text();
String d = doc.select(".author").get(0).text();
if (d.contains("-")) {
if (!d.contains("("))
d = d.split("-")[0];
else
d = d.split("\\(")[0];
}
IRCUtils.sendMessage(user, network, channel, c + " -" + IRCUtils.noPing(d), prefix);
}Example 52
| Project: ache-master File: GoogleSearch.java View source code |
public List<BackLinkNeighborhood> submitQuery(String query, int page) throws IOException {
timer.waitMinimumDelayIfNecesary();
// 21 -> max number allowed by google... decreases after
String queryUrl = "https://www.google.com/search?q=" + query + "&num=" + docsPerPage + "&start=" + page * docsPerPage;
System.out.println("URL:" + queryUrl);
try {
FetchedResult result = fetcher.get(queryUrl);
InputStream is = new ByteArrayInputStream(result.getContent());
Document doc = Jsoup.parse(is, "UTF-8", query);
is.close();
Elements searchItems = doc.select("div#search");
Elements linkHeaders = searchItems.select(".r");
Elements linksUrl = linkHeaders.select("a[href]");
List<BackLinkNeighborhood> links = new ArrayList<>();
for (Element link : linksUrl) {
String title = link.text();
String url = link.attr("href");
links.add(new BackLinkNeighborhood(url, title));
}
System.out.println(getClass().getSimpleName() + " hits: " + links.size());
return links;
} catch (IOExceptionBaseFetchException | e) {
throw new IOException("Failed to download backlinks from Google.", e);
}
}Example 53
| Project: asoiaf-master File: FetchUrls.java View source code |
public static ImageUrl FetchImageUrl(String url) {
ImageUrl iu = new ImageUrl();
try {
Document doc = Jsoup.connect(url).timeout(5000).get();
Elements e = doc.select("li.outlink a");
for (Element item : e) {
if (item.text().equals("200")) {
//Log.d("","200:"+item.select("a[href]").attr("href"));
iu.setThumbUrl(item.select("a[href]").attr("href"));
}
if (item.text().equals("original")) {
//Log.d("","original:"+item.select("a[href]").attr("href"));
iu.setOringinUrl(item.select("a[href]").attr("href"));
}
}
} catch (Exception e) {
e.printStackTrace();
}
return iu;
}Example 54
| Project: asta4d-master File: ElementNotFoundHandlerOnDocumentTest.java View source code |
@Test
public void notFoundOnDocument() throws Exception {
String html = "<html><body><span>x</span></body></html>";
Document doc = Jsoup.parse(html);
Renderer renderer = Renderer.create();
renderer.add(new ElementNotFoundHandler("div") {
@Override
public Renderer alternativeRenderer() {
return Renderer.create("span", "y");
}
});
RenderUtil.apply(doc, renderer);
Assert.assertEquals(doc.select("span").text(), "y");
}Example 55
| Project: baleen-master File: Jsp101HeadingsTest.java View source code |
@Test
public void testSubjectHeading() {
Document document = Jsoup.parseBodyFragment("<p><b>THIS IS A SUBJECT HEADING</b></p><p>THIS IS A NOT SUBJECT HEADING</p><p>THIS IS not a SUBJECT HEADING</p><p>THIS IS NOT A SUBJECT HEADING EITHER.</p>");
manipulator.manipulate(document);
Elements h1s = document.select("h1");
assertEquals(1, h1s.size());
assertEquals("THIS IS A SUBJECT HEADING", h1s.first().text());
}Example 56
| Project: bennu-master File: Component.java View source code |
public static String process(String origin) {
Document doc = Jsoup.parse(origin);
Elements components = doc.select("[bennu-component]");
for (Element component : components) {
String key = component.attr("bennu-component");
Optional.ofNullable(COMPONENTS.get(key)).ifPresent( x -> component.replaceWith(x.process(component)));
}
return doc.toString();
}Example 57
| Project: clicker-master File: CN88ProxyGetter.java View source code |
@Override
public Set<Proxy> find() {
final Set<Proxy> ret = new HashSet<Proxy>();
for (int i = 2; i < 11; i++) {
try {
final Document doc = Jsoup.parse(new URL("http://www.cz88.net/proxy/http_" + i + ".aspx"), TIMEOUT);
final Elements tables = doc.getElementsByTag("table");
final Element table = tables.get(0);
final Elements trs = table.getElementsByTag("tr");
for (int j = 1; j < trs.size(); j++) {
final Element tr = trs.get(j);
try {
final Element hostTd = tr.getElementsByTag("td").get(0);
final Element portTd = tr.getElementsByTag("td").get(1);
final String host = hostTd.text();
final int port = Integer.valueOf(portTd.text());
final Proxy proxy = new Proxy(host, port, this.properties);
ret.add(proxy);
} catch (final Exception e) {
}
}
} catch (final Exception e) {
}
}
return ret;
}Example 58
| Project: CN1ML-NetbeansModule-master File: StringUtilTest.java View source code |
@Test
public void normaliseWhiteSpaceHandlesHighSurrogates() {
String test71540chars = "か゚ 1";
String test71540charsExpectedSingleWhitespace = "か゚ 1";
assertEquals(test71540charsExpectedSingleWhitespace, StringUtil.normaliseWhitespace(test71540chars));
String extractedText = Jsoup.parse(test71540chars).text();
assertEquals(test71540charsExpectedSingleWhitespace, extractedText);
}Example 59
| Project: constellio-master File: ConnectorHttpUtils.java View source code |
public static FetchedDocumentContent fetch(String url) throws IOException {
FetchedDocumentContent fetchedDocumentContent = new FetchedDocumentContent();
Response response = Jsoup.connect(url).execute();
//String contentType = response.contentType();
fetchedDocumentContent.document = response.parse();
fetchedDocumentContent.title = fetchedDocumentContent.document.title();
String[] urlParts = url.split("/");
fetchedDocumentContent.fileName = urlParts[urlParts.length - 1];
return fetchedDocumentContent;
}Example 60
| Project: dataverse-master File: MarkupChecker.java View source code |
/**
* Wrapper around Jsoup clean method with the basic White list
* http://jsoup.org/cookbook/cleaning-html/whitelist-sanitizer
* @param unsafe
* @return
*/
public static String sanitizeBasicHTML(String unsafe) {
if (unsafe == null) {
return null;
}
// basic includes: a, b, blockquote, br, cite, code, dd, dl, dt, em, i, li, ol, p, pre, q, small, span, strike, strong, sub, sup, u, ul
//Whitelist wl = Whitelist.basic().addTags("img", "h1", "h2", "h3", "kbd", "hr", "s", "del");
Whitelist wl = Whitelist.basicWithImages().addTags("h1", "h2", "h3", "kbd", "hr", "s", "del", "map", "area").addAttributes("img", "usemap").addAttributes("map", "name").addAttributes("area", "shape", "coords", "href", "title", "alt").addEnforcedAttribute("a", "target", "_blank");
return Jsoup.clean(unsafe, wl);
}Example 61
| Project: en-webmagic-master File: CssSelector.java View source code |
@Override
public List<String> selectList(String text) {
List<String> strings = new ArrayList<String>();
Document doc = Jsoup.parse(text);
Elements elements = doc.select(selectorText);
if (CollectionUtils.isNotEmpty(elements)) {
for (Element element : elements) {
String value = getValue(element);
if (!StringUtils.isEmpty(value)) {
strings.add(value);
}
}
}
return strings;
}Example 62
| Project: EventApp-master File: BazaarEntryLoader.java View source code |
@Override
public void onResponse(String body) {
List<BazaarEntry> entries = new ArrayList<BazaarEntry>();
Document document = Jsoup.parse(body);
Elements elements = document.select("table");
for (Element element : elements) {
BazaarEntry entry = new BazaarEntry();
Elements trs = element.select("tr");
if (trs.size() >= 3) {
entry.setName(trs.get(0).text());
entry.setTitle(trs.get(1).text());
Element summary = trs.get(2);
entry.setSummary(summary.text());
Elements a = summary.select("a");
if (!a.isEmpty()) {
entry.setUrl(a.attr("href"));
}
}
entries.add(entry);
}
listener.onSuccess(entries);
}Example 63
| Project: extentreports-java-master File: SystemAttributeTests.java View source code |
private void performAssertForKVPairs(String key, String value) {
Boolean keyFound = false;
Boolean valueFound = false;
extent.flush();
String html = Reader.readAllText(htmlFilePath);
Document doc = Jsoup.parse(html);
Elements tdColl = doc.select(".environment td");
for (Element td : tdColl) {
if (td.text().equals(key))
keyFound = true;
if (td.text().equals(value))
valueFound = true;
}
Assert.assertTrue(keyFound);
Assert.assertTrue(valueFound);
}Example 64
| Project: FakeWeather-master File: MzituZiPaiFragment.java View source code |
@Override
public List<Girl> call(String url) {
List<Girl> girls = new ArrayList<>();
try {
Document doc = Jsoup.connect(url).timeout(10000).get();
Element total = doc.select("div.postlist").first();
Elements items = total.select("li");
for (Element element : items) {
Girl girl = new Girl(element.select("img").first().attr("src"));
girls.add(girl);
}
} catch (IOException e) {
e.printStackTrace();
}
return girls;
}Example 65
| Project: FudanBBS-master File: ListLinks.java View source code |
public static void main(String[] args) throws IOException {
Validate.isTrue(args.length == 1, "usage: supply url to fetch");
String url = args[0];
print("Fetching %s...", url);
Document doc = Jsoup.connect(url).get();
Elements links = doc.select("a[href]");
Elements media = doc.select("[src]");
Elements imports = doc.select("link[href]");
print("\nMedia: (%d)", media.size());
for (Element src : media) {
if (src.tagName().equals("img"))
print(" * %s: <%s> %sx%s (%s)", src.tagName(), src.attr("abs:src"), src.attr("width"), src.attr("height"), trim(src.attr("alt"), 20));
else
print(" * %s: <%s>", src.tagName(), src.attr("abs:src"));
}
print("\nImports: (%d)", imports.size());
for (Element link : imports) {
print(" * %s <%s> (%s)", link.tagName(), link.attr("abs:href"), link.attr("rel"));
}
print("\nLinks: (%d)", links.size());
for (Element link : links) {
print(" * a: <%s> (%s)", link.attr("abs:href"), trim(link.text(), 35));
}
}Example 66
| Project: Gazetti_Newspaper_Reader-master File: toi.java View source code |
public String[] getToiArticleContent() {
Document doc;
String[] result = new String[3];
String url = mArticleURL;
try {
Connection connection = Jsoup.connect(url).userAgent("Mozilla").timeout(10 * 1000);
Response response = connection.execute();
if (response == null) {
Crashlytics.log("Is response null ? " + (null == response));
return null;
} else if (response.statusCode() != 200) {
Crashlytics.log("Received response - " + response.statusCode() + " -- " + response.statusMessage());
Crashlytics.log("Received response - " + response.body());
return null;
}
doc = connection.get();
// get Title
String ToiTitleXPath = ConfigService.getInstance().getTOIHead();
titleText = doc.select(ToiTitleXPath).text();
// get HeaderImageUrl
mImageURL = getImageURL(doc);
String ToiArticleXPath = ConfigService.getInstance().getTOIBody();
Element bodyArticleElements = doc.select(ToiArticleXPath).first();
String temp = bodyArticleElements.html().replace("<br />", "$$$");
Document bodyNewLine = Jsoup.parse(temp);
bodyText = bodyNewLine.text().replace("$$$", "\n");
result[0] = titleText;
result[1] = mImageURL;
result[2] = bodyText;
} catch (IOException e) {
Crashlytics.logException(e);
return null;
} catch (NullPointerException npe) {
bodyText = null;
Crashlytics.logException(npe);
return null;
} catch (Exception e) {
Crashlytics.logException(e);
return null;
}
return result;
}Example 67
| Project: gvoa-master File: ItemHtmlParser.java View source code |
public static void parseItemDetail(RssItem item) throws Exception {
/*
if(null==item.getLink())
{
return;
}*/
//String testurl ="http://www.51voa.com/VOA_Standard_English/us-weighs-boosting-training-for-syrian-rebels-52551.html";
String respContent = NetworkUtil.httpGetContent(item.getLink());
Document doc = Jsoup.parse(respContent);
Element mp3link = doc.select("a[id=mp3]").first();
if (mp3link != null) {
Log.i(tag, mp3link.attr("href"));
item.setMp3url(mp3link.attr("href"));
} else {
Log.i(tag, "can't get mp3");
}
Element content = doc.getElementById("content");
Element imageEl = content.select("div.contentImage").first();
if (imageEl != null) {
Log.i(tag, "remove image element from content");
imageEl.remove();
}
String contentStr = content.html();
Log.i(tag, contentStr);
item.setFullText(contentStr);
Element lrclink = content.select("a[id=lrc]").first();
if (lrclink != null) {
Log.i(tag, lrclink.attr("href"));
}
item.setStatus(RssItem.E_PARSE_TXT_OK);
return;
}Example 68
| Project: HabReader-master File: PostShowLoader.java View source code |
@Override
public PostsFullData loadInBackground() {
PostsFullData data = new PostsFullData();
try {
Document document = Jsoup.connect(url).get();
Element title = document.select("span.post_title").first();
Element hubs = document.select("div.hubs").first();
Element content = document.select("div.content").first();
Element date = document.select("div.published").first();
Element author = document.select("div.author > a").first();
if (title != null) {
data.setUrl(url);
data.setTitle(title.text());
data.setHubs(hubs.text());
data.setContent(content.html());
data.setDate(date.text());
data.setAuthor(author.text());
} else
data.setContent(context.getString(R.string.error_404));
} catch (IOException e) {
}
return data;
}Example 69
| Project: HackerNews-master File: ConnectionManager.java View source code |
/** Connects to news.ycombinator.com with no user cookie authentication **/
public static Connection anonConnect(String baseUrlExtension) {
Connection conn = Jsoup.connect(ConnectionManager.BASE_URL + baseUrlExtension).timeout(TIMEOUT_MILLIS).userAgent(ConnectionManager.USER_AGENT);
UserPrefs prefs = new UserPrefs(MainApplication.getInstance().getApplicationContext());
boolean compress = prefs.getCompressData();
Crashlytics.setBool("ConnectionManager :: GZip Responses", compress);
if (compress) {
conn.header("Accept-Encoding", "gzip");
}
return conn;
}Example 70
| Project: HappyResearch-master File: MTimeCrawler.java View source code |
public void crawl_web_pages() throws Exception {
String filePath = "./src/main/resources/mtime.txt";
List<String> urls = FileIO.readAsList(filePath);
for (String url : urls) {
String html = URLReader.read(url);
Document doc = Jsoup.parse(html);
String name = doc.select("span[property=v:itemreviewed]").text();
name = Strings.filterWebString(name, '_');
String dirPath = dir + name + "/";
FileIO.makeDirectory(dirPath);
FileIO.writeString(dirPath + name + ".html", html);
}
}Example 71
| Project: jabref-master File: ACS.java View source code |
/**
* Tries to find a fulltext URL for a given BibTex entry.
*
* Currently only uses the DOI if found.
*
* @param entry The Bibtex entry
* @return The fulltext PDF URL Optional, if found, or an empty Optional if not found.
* @throws NullPointerException if no BibTex entry is given
* @throws java.io.IOException
*/
@Override
public Optional<URL> findFullText(BibEntry entry) throws IOException {
Objects.requireNonNull(entry);
Optional<URL> pdfLink = Optional.empty();
// DOI search
Optional<DOI> doi = entry.getField(FieldName.DOI).flatMap(DOI::parse);
if (doi.isPresent()) {
String source = String.format(SOURCE, doi.get().getDOI());
// Retrieve PDF link
Document html = Jsoup.connect(source).ignoreHttpErrors(true).get();
Element link = html.select(".pdf-high-res a").first();
if (link != null) {
LOGGER.info("Fulltext PDF found @ ACS.");
pdfLink = Optional.of(new URL(source.replaceFirst("/abs/", "/pdf/")));
}
}
return pdfLink;
}Example 72
| Project: jacorb-master File: Client.java View source code |
public static void main(String args[]) throws Exception {
String updateString, ior;
if (args.length >= 1) {
updateString = args[0];
} else {
updateString = UUID.randomUUID().toString();
}
// Grab the IOR from the servlet.
Document doc = Jsoup.connect("http://localhost:8080/jacorb-appserver/PrintIOR").get();
ior = doc.select("h1").first().text();
System.out.println("Retrieved ior " + ior);
Properties orbProps = new Properties();
orbProps.setProperty("org.omg.CORBA.ORBClass", "org.jacorb.orb.ORB");
orbProps.setProperty("org.omg.CORBA.ORBSingletonClass", "org.jacorb.orb.ORBSingleton");
orbProps.setProperty("jacorb.interop.null_string_encoding", "true");
ORB orb = ORB.init(args, orbProps);
org.omg.CORBA.Object obj = orb.string_to_object(ior);
GoodDay goodDay = GoodDayHelper.narrow(obj);
// Invoke remote server
System.out.println("Retrieved initial string " + goodDay.get_string());
goodDay.record_string(updateString);
System.out.println("Retrieved string " + goodDay.get_string());
}Example 73
| Project: java-manga-reader-master File: MangaUtil.java View source code |
/**
* Retrieves a list of licensed Manga from Anime News Network.
* @return A list of Manga licensed in English.
* @throws IOException If it cannot complete the request.
*/
public static List<String> getLicensedManga() throws IOException {
StringBuilder sb = new StringBuilder("http://www.animenewsnetwork.com/encyclopedia/anime-list.php");
sb.append("?licensed=1");
sb.append("&sort=title");
sb.append("&showG=1");
Document doc = Jsoup.connect(sb.toString()).maxBodySize(0).get();
Elements list = doc.getElementsByClass("HOVERLINE");
List<String> blackList = new ArrayList<String>(list.size());
for (Element e : list) {
String title = e.text();
if (title.startsWith("(The)")) {
title = title.replace("(The)", "The");
}
if (title.contains("(")) {
title = title.substring(0, title.lastIndexOf('(')).trim();
}
blackList.add(title);
}
return blackList;
}Example 74
| Project: JCommons-master File: DownloaderTest.java View source code |
public static void main(String[] args) throws IOException {
Document doc = Jsoup.connect("http://meta.stackexchange.com/questions/134495/academic-papers-using-stack-exchange-data").get();
Elements eles = doc.getElementsContainingText("[PDF]");
eles.addAll(doc.getElementsContainingText("[arXiv]"));
String folderName = "D:/dl";
for (Element ele : eles) {
String src = ele.attr("href");
if (src == null || src.trim().equals(""))
continue;
URL url = new URL(src);
Element parent = ele.parent();
Elements eles1 = parent.getElementsByTag("strong");
Element nameEle = eles1.get(0);
String fileName = nameEle.text().replace(":", " ").replace("\"", "").replace("'", "").replace("?", "");
if (fileName.contains("Fit or"))
continue;
if (!fileName.endsWith("."))
fileName = fileName.concat(".");
fileName = fileName.concat("pdf");
System.out.println(fileName);
InputStream in = null;
try {
in = url.openStream();
} catch (Exception e) {
continue;
}
OutputStream out = new BufferedOutputStream(new FileOutputStream(folderName + "/" + fileName));
for (int b; (b = in.read()) != -1; ) {
out.write(b);
}
out.close();
in.close();
}
}Example 75
| Project: jeboorker-master File: ComicsOrgDownloader.java View source code |
private List<MetadataDownloadEntry> getMetadataDownloadEntries(List<byte[]> metadataHtmlContent) throws IOException {
List<MetadataDownloadEntry> result = new ArrayList<>(metadataHtmlContent.size());
for (byte[] html : metadataHtmlContent) {
if (html != null) {
Document htmlDoc = Jsoup.parse(new ByteArrayInputStream(html), StringUtil.UTF_8, MAIN_URL);
result.add(new ComicsOrgDownloadEntry(htmlDoc, MAIN_URL));
}
}
return result;
}Example 76
| Project: JianShuApp-master File: DataPool.java View source code |
private Object[] load(String url) throws IOException, LoginRequiredException {
Object httpResult = JianshuSession.getsInstance().getSync(url, true);
if (httpResult instanceof String) {
Document doc = Jsoup.parse((String) httpResult);
if (doc.select("div.login-page").size() > 0) {
JianshuSession.getsInstance().validate();
if (JianshuSession.getsInstance().getState() instanceof JianshuSession.LogoutState) {
throw new LoginRequiredException();
}
}
parsePageUserInfo(doc);
return this.getItems(doc);
} else {
JianshuSession.getsInstance().validate();
if (JianshuSession.getsInstance().getState() instanceof JianshuSession.LogoutState) {
throw new LoginRequiredException();
}
}
return null;
}Example 77
| Project: JKuuza-master File: ContentAnalyzerTest.java View source code |
/**
* Test of classExists method, of class TestHelpers.
*/
@Test
public void testDocHasClass() {
System.out.println("classExists");
String html = "";
String message = "";
html = "<div><span class=\"foo\"></span></div>";
message = "expected: true - " + html;
assertTrue(message, new ContentAnalyzer(Jsoup.parse(html)).docHasClass("foo"));
html = "<div><span class=\"bar\"></span></div>";
message = "expected: false - " + html;
assertFalse(message, new ContentAnalyzer(Jsoup.parse(html)).docHasClass("foo"));
}Example 78
| Project: jmeter-master File: RenderAsHTMLFormatted.java View source code |
private void showHTMLFormattedResponse(String response) {
// $NON-NLS-1$
results.setContentType("text/plain");
// $NON-NLS-1$
results.setText(response == null ? "" : Jsoup.parse(response).html());
results.setCaretPosition(0);
resultsScrollPane.setViewportView(results);
// Bug 55111 - Refresh JEditor pane size depending on the presence or absence of scrollbars
resultsScrollPane.setPreferredSize(resultsScrollPane.getMinimumSize());
results.revalidate();
}Example 79
| Project: jsoup-master File: ListLinks.java View source code |
public static void main(String[] args) throws IOException {
Validate.isTrue(args.length == 1, "usage: supply url to fetch");
String url = args[0];
print("Fetching %s...", url);
Document doc = Jsoup.connect(url).get();
Elements links = doc.select("a[href]");
Elements media = doc.select("[src]");
Elements imports = doc.select("link[href]");
print("\nMedia: (%d)", media.size());
for (Element src : media) {
if (src.tagName().equals("img"))
print(" * %s: <%s> %sx%s (%s)", src.tagName(), src.attr("abs:src"), src.attr("width"), src.attr("height"), trim(src.attr("alt"), 20));
else
print(" * %s: <%s>", src.tagName(), src.attr("abs:src"));
}
print("\nImports: (%d)", imports.size());
for (Element link : imports) {
print(" * %s <%s> (%s)", link.tagName(), link.attr("abs:href"), link.attr("rel"));
}
print("\nLinks: (%d)", links.size());
for (Element link : links) {
print(" * a: <%s> (%s)", link.attr("abs:href"), trim(link.text(), 35));
}
}Example 80
| Project: ManalithBot-master File: TranslatorPlugin.java View source code |
@BotCommand("번ì—")
public String translate(@Option(name = "ko|en...", help = "번ì—í• ëŒ€ìƒ? 언어") String to, @Option(name = "메시지", help = "번ì—í• ë©”ì‹œì§€") String message) {
final String url = "https://api.datamarket.azure.com/Bing/MicrosoftTranslator/v1/Translate?Text='%s'&To='%s'";
String login = "USER_ID_IGNORED:" + clientSecret;
String base64login = new String(Base64.encodeBase64(login.getBytes()));
try {
Document doc = Jsoup.connect(String.format(url, message, to)).header("Authorization", "Basic " + base64login).ignoreContentType(true).get();
logger.debug("response", doc);
Elements elem = doc.select("d|text[m:type=Edm.String]");
return elem.text();
} catch (IOException e) {
logger.error(e.getMessage(), e);
}
return "번ì—í• ë‚´ìš©ì?´ 없습니다.";
}Example 81
| Project: mayocat-shop-master File: DefaultPdfTemplateRenderer.java View source code |
@Override
public void generatePDF(OutputStream outputStream, Path template, Path renderingRoot, Map<String, Object> context) throws PdfRenderingException {
ITextRenderer renderer = new ITextRenderer();
try {
String html = templateRenderer.renderAsString(template, context);
// Ensure we have a valid XHTML document using JSoup
Document jsoupDoc = Jsoup.parse(html);
jsoupDoc.outputSettings().syntax(Document.OutputSettings.Syntax.xml);
jsoupDoc.outputSettings().escapeMode(Entities.EscapeMode.xhtml);
jsoupDoc.outputSettings().charset("UTF-8");
String path = renderingRoot.toAbsolutePath().toUri().toString();
renderer.setDocumentFromString(jsoupDoc.toString(), path);
renderer.layout();
renderer.createPDF(outputStream);
} catch (DocumentExceptionTemplateRenderingException | e) {
throw new PdfRenderingException(e);
}
}Example 82
| Project: medium-textview-master File: JsoupUtils.java View source code |
public static List<String> findAllVideoLinks(String content) {
final List<String> links = new ArrayList<>();
final Document document = Jsoup.parse(content);
Elements medias = document.select("[src]");
for (Element element : medias) {
if (element.tagName().equals("iframe")) {
links.add(element.attr("abs:src"));
} else {
}
}
return links;
}Example 83
| Project: mensaapp-master File: WeeklyMenuTask.java View source code |
@Override
protected Pair<WeeklyMenu, Exception> doInBackground(String... urls) {
List<WeeklyMenu> menus = new ArrayList<WeeklyMenu>();
for (String url : urls) {
try {
Document document = Jsoup.connect(url).get();
WeeklyMenuParser parser = WeeklyMenuParser.create(context, document, mensa);
menus.add(parser.parse());
} catch (WeeklyMenuParseException wmpe) {
Log.w(TAG, String.format(context.getString(R.string.error_menu_parse), url), wmpe);
return new Pair<WeeklyMenu, Exception>(null, wmpe);
} catch (Exception e) {
Log.e(TAG, String.format(context.getString(R.string.error_menu_download), url), e);
return new Pair<WeeklyMenu, Exception>(null, e);
}
}
return new Pair<WeeklyMenu, Exception>(WeeklyMenu.merge(mensa, Utils.now(), menus), null);
}Example 84
| Project: meta-server-master File: ServerHtmlContentTest.java View source code |
@Test
public void testShowHtml() throws IOException {
String url = URL_BASE + "/servers/show";
Document doc = Jsoup.connect(url).get();
Element table = doc.getElementById("server-list");
Assert.assertTrue(table.nodeName().equals("table"));
Element tableBody = table.select("tbody").first();
Element firstRow = tableBody.select("tr").first();
Assert.assertEquals(firstEntry.getName(), firstRow.getElementsByClass("server-name").first().text());
Assert.assertEquals(firstEntry.getOwner(), firstRow.getElementsByClass("server-owner").first().text());
Assert.assertEquals("" + firstEntry.getPort(), firstRow.getElementsByClass("server-port").first().text());
Assert.assertEquals(firstEntry.getAddress(), firstRow.getElementsByClass("server-address").first().text());
}Example 85
| Project: myrobotlab-master File: JSoupExtractor.java View source code |
@Override
public List<Document> processDocument(Document doc) {
for (Object o : doc.getField(htmlField)) {
org.jsoup.nodes.Document jSoupDoc = Jsoup.parse(o.toString());
Elements links = jSoupDoc.select(jSoupSelector);
for (Element link : links) {
doc.addToField(outputField, link);
}
}
return null;
}Example 86
| Project: NewsStats-master File: NewYorkTimesContentHandler.java View source code |
@Override
public List extractArticles(Page page) {
if (page.getParseData() instanceof HtmlParseData) {
System.out.println("Current URL: " + page.getWebURL());
HtmlParseData htmlParseData = (HtmlParseData) page.getParseData();
String html = htmlParseData.getHtml();
Document doc = Jsoup.parseBodyFragment(html);
Element articleElement = doc.getElementById("story");
if (articleElement == null) {
// if no article can be found
return articles;
}
String title = articleElement.getElementById("story-heading").ownText();
String dateString = articleElement.getElementsByClass("dateline").first().attr("datetime");
Date date = null;
try {
date = new SimpleDateFormat("yyyy-MM-dd").parse(dateString);
} catch (ParseException e) {
e.printStackTrace();
}
String author = articleElement.getElementsByClass("byline-author").first().ownText();
String content = "";
Elements contentElements = articleElement.select("p.story-body-text.story-content");
for (Element contentElement : contentElements) {
content += contentElement.ownText();
}
if (!filterArticles(content)) {
// ignore the article if filter does not approve
return articles;
}
Article article = new NewYorkTimesArticle();
article.setTitle(title);
article.setCreatedDate(date);
article.setAuthor(author);
article.setContent(content);
articles.add(article);
}
return articles;
}Example 87
| Project: pack-master File: Ch5Coz4.java View source code |
// 如果使用é ?è¨çš„ xmlParser會發生什麼? 試試看
public static void normalXmlParse() {
String json = CrawlerPack.getFromRemote(url);
String xml = CrawlerPack.jsonToXml(json);
// 原始 json 轉為 xml 的�果
System.out.println("原始XML");
System.out.println(xml);
Document jsoupDoc = Jsoup.parse(xml, "", Parser.xmlParser());
jsoupDoc.charset(StandardCharsets.UTF_8);
// 發生了什麼事?
System.out.println("轉�後XML");
System.out.println(jsoupDoc.toString());
}Example 88
| Project: qiushi_baike-master File: KJFMUtils.java View source code |
public static LinkedList<KJFMItem> handleKJFMItems(String pageNo) throws IOException {
LinkedList<KJFMItem> items = null;
String url = KE_JI_FENG_MANG_URL + pageNo + "/";
System.out.println("url = " + url);
Document document = Jsoup.connect(url).timeout(20000).get();
Element divTag = document.getElementById("xs-main");
if (null != divTag) {
Elements entryTags = divTag.getElementsByClass("xs-entry");
if (null != entryTags && entryTags.size() > 0) {
items = new LinkedList<KJFMItem>();
for (Element e : entryTags) {
KJFMItem item = new KJFMItem();
Elements aTags = e.getElementsByTag("a");
if (null != aTags && aTags.size() > 0) {
String url1 = aTags.get(0).attr("href");
System.out.println("url1 = " + url1);
if (url1 != null && url1.startsWith("www")) {
url1 += "http://";
}
item.title = url1;
}
Elements imgTags = e.getElementsByTag("img");
if (null != imgTags && imgTags.size() > 0) {
String img = imgTags.get(0).attr("src");
String title = imgTags.get(0).attr("alt");
// System.out.println("img = " + img + " title = " +
// title);
item.img = img;
item.title = title;
}
Elements pTags = e.getElementsByTag("p");
if (null != pTags && pTags.size() > 0) {
String content = pTags.get(0).text();
// System.out.println("content = " + content);
item.content = content;
}
items.add(item);
}
}
}
return items;
}Example 89
| Project: quadriga-master File: HTMLContentValidator.java View source code |
/**
* This method validates the entered title, description of a about text for
* a project. Validates if the values are available or not. If values are
* not available error is thrown
*
* @param obj
* @param err
*/
@Override
public void validate(Object obj, Errors err) {
String description = null;
String title = null;
if (obj instanceof IAboutText) {
IAboutText abtText = (IAboutText) obj;
description = abtText.getDescription();
title = abtText.getTitle();
} else if (obj instanceof ProjectBlogEntry) {
ProjectBlogEntry projectBlog = (ProjectBlogEntry) obj;
description = projectBlog.getDescription();
title = projectBlog.getTitle();
}
Whitelist whitelist = ExtendedWhitelist.extendedWhiteListWithBase64();
Whitelist titleWhitelist = Whitelist.simpleText();
// validate all the input parameters
ValidationUtils.rejectIfEmptyOrWhitespace(err, "title", "about_title.required");
ValidationUtils.rejectIfEmptyOrWhitespace(err, "description", "about_description.required");
if (!Jsoup.isValid(description, whitelist)) {
err.rejectValue("description", "about_description.proper");
}
if (!Jsoup.isValid(title, titleWhitelist)) {
err.rejectValue("title", "about_title.proper");
}
}Example 90
| Project: serengeti-ws-master File: HostInstallDetailsParser.java View source code |
@Override
public ParseResult parse(String pageContent) {
Document doc = Jsoup.parse(pageContent);
ParseResult result = new ParseResult();
for (Element item : doc.getElementsByClass("wrap")) {
if (item.hasAttr("style")) {
//result.put(PROGRESS_KEY, Integer.valueOf(item.attr("style").split("\\s+")[1].replace("%", "")));
}
for (Element subItem : item.getElementsByClass("message")) {
result.setMessage(subItem.text());
}
}
for (Element item : doc.getElementsByClass("progress")) {
for (Element subItem : item.getElementsByClass("bar")) {
if (subItem.hasAttr("style")) {
result.setPercent(Integer.valueOf(subItem.attr("style").split("\\s+")[1].replace("%", "")));
}
}
}
return result;
}Example 91
| Project: slack-capybara-master File: nomurishHandlers.java View source code |
/**
* 入力�れ�言葉をノムリッシュ化��返�
* @param word word
* @return nomurished word
* @throws URISyntaxException URISyntaxException
* @throws IOException IOException
*/
private static String nomurish(final String word) throws URISyntaxException, IOException {
final String url = new URIBuilder().setScheme(BASE_SCHEME).setHost(BASE_HOST).setPath(BASE_PATH).build().toString();
final Document document = Jsoup.connect(url).data("before", word).data("level", "1").data("option", "nochk").data("transbtn", "翻訳").post();
return document.select("[name=after1]").text();
}Example 92
| Project: SpiderJackson-master File: YouDaiLiPage1.java View source code |
@Override
public boolean responseHandle(Proxy ip, ProxyController proxyController, Url url, UrlService urlService, ContextSrc contextSrc, HttpRequestBase request, CloseableHttpResponse response, String content) {
ArrayList<Url> urls = new ArrayList<>();
Document doc = Jsoup.parse(content);
Elements elements = doc.select("div.chunlist");
for (Element e : elements) {
Elements es = e.select("a[href]");
for (Element el : es) {
Url url1 = Url.newHttpGetUrl(el.attr("href"), YouDaiLiPage2.class);
url1.setPriority(url.getPriority() + 1);
urls.add(url1);
}
}
urlService.insert(urls);
return true;
}Example 93
| Project: spimedb-master File: HTML.java View source code |
public static String filterHTML(String html) {
try {
Document dirty = Jsoup.parseBodyFragment(html);
Document clean = cleaner.clean(dirty);
clean.outputSettings(outputSettings);
return clean.body().html();
// String compressedHtml = compressor.compress(html);
// return compressedHtml;
} catch (Exception e) {
logger.error("filterHTML {}: \"{}\"", e, html);
return html;
}
}Example 94
| Project: spring-documenter-master File: Application.java View source code |
public static void main(String[] args) throws Exception {
Reflections reflections = new Reflections("org.springframework");
List<Class<?>> clazzes = new ArrayList<Class<?>>(reflections.getTypesAnnotatedWith(Documented.class));
Collections.sort(clazzes, new Comparator<Class<?>>() {
@Override
public int compare(Class o1, Class o2) {
return o1.getName().compareTo(o2.getName());
}
});
System.out.println("##################################################");
System.out.println("Total Annotations: " + clazzes.size());
System.out.println("##################################################");
String old = IOUtils.toString(new FileReader("annotations-bkp.csv"));
FileWriter out = new FileWriter(new File("annotations.csv"));
out.write("\"Name\",\"Class\",\"URL\"\n");
for (Class<?> class1 : clazzes) {
System.out.println(class1.getName());
Document doc = Jsoup.connect("http://www.bing.com/search?q=" + URLEncoder.encode(class1.getName(), "UTF-8")).get();
int ctr = 1;
for (Element elem : doc.select("h2 a")) {
ctr++;
System.out.println(elem.attr("href"));
out.append("\"" + class1.getSimpleName() + "\",\"" + class1.getName() + "\",\"" + elem.attr("href") + "\"\n");
if (ctr > 2)
break;
}
}
out.close();
System.out.println("##################################################");
}Example 95
| Project: StatusParser-master File: vkOld.java View source code |
public String[] vkPart(int id, int firstPost) {
for (int i = 0; i < 8; i++) {
statuses[i] = null;
}
int iter = 0;
String partURL = "http://vk.com/al_wall.php?act=get_wall&al=1&fixed=&offset=" + firstPost + "&owner_id=-" + id + "&type=all";
try {
String partSource = getUrlSource(partURL);
partSource = partSource.substring(4);
Document partDom = Jsoup.parse(partSource);
try {
Elements postTexts = partDom.select("div.wall_post_text");
for (Element postText : postTexts) {
statuses[iter] = postText.text();
iter++;
System.out.println(statuses[iter]);
}
} catch (NullPointerException eText) {
eText.printStackTrace();
}
} catch (IOException e) {
e.printStackTrace();
}
return statuses;
}Example 96
| Project: storm-crawler-master File: RefreshTagTest.java View source code |
@Test
public void testExtractRefreshURL() throws MalformedURLException, IOException {
String expected = "http://www.example.com/";
String[] htmlStrings = new String[] { "<html><head><META http-equiv=\"refresh\" content=\"0; URL=http://www.example.com/\"></head><body>Lorem ipsum.</body></html>", "<html><head><META http-equiv=\"refresh\" content=\"0;URL=http://www.example.com/\"></head><body>Lorem ipsum.</body></html>" };
for (String htmlString : htmlStrings) {
Document doc = Jsoup.parseBodyFragment(htmlString);
DocumentFragment fragment = JSoupDOMBuilder.jsoup2HTML(doc);
String redirection = RefreshTag.extractRefreshURL(fragment);
Assert.assertEquals(expected, redirection);
}
}Example 97
| Project: SuZhouTong-client-for-android-master File: HtmlResolving.java View source code |
/*
* 解�新闻类容
*/
public ArrayList<NewsContentVo> getNewsContent(String news_detaiol) {
ArrayList<NewsContentVo> contents = new ArrayList<NewsContentVo>();
NewsContentVo ncv = null;
Document document = Jsoup.parse(news_detaiol);
Elements info = document.getElementsByTag("span");
for (Element element : info) {
ncv = new NewsContentVo();
ncv.setIsImg(0);
ncv.setContentList(element.text());
contents.add(ncv);
}
Elements elements = document.getElementsByTag("p");
Elements media = document.select("[src]");
int i = 1;
for (Element element : elements) {
if (element.hasText()) {
ncv = new NewsContentVo();
ncv.setIsImg(0);
ncv.setContentList(element.text());
contents.add(ncv);
} else {
if (element.hasAttr("align") && media != null && media.size() > 0 && media.size() > i) {
Element src = media.get(i);
if (src.tagName().equals("img")) {
ncv = new NewsContentVo();
ncv.setIsImg(1);
ncv.setContentList(src.attr("src"));
contents.add(ncv);
}
i++;
}
}
}
return contents;
}Example 98
| Project: symphony-master File: XSSTestCase.java View source code |
@Test
public void xss() {
String src = "http://error\" onerror=\"this.src='http://7u2fje.com1.z0.glb.clouddn.com/girl.jpg';this.removeAttribute('onerror');if(!window.a){console.log('Where am I ?');window.a=1}";
assertFalse(Jsoup.isValid("<img src=\"" + src + "\"/>", Whitelist.basicWithImages()));
src = "http://7u2fje.com1.z0.glb.clouddn.com/girl.jpg";
assertTrue(Jsoup.isValid("<img src=\"" + src + "\"/>", Whitelist.basicWithImages()));
}Example 99
| Project: Tanaguru-master File: JsoupFunc.java View source code |
public Document getDocument() {
try {
URL url = new URL("http://www.accessiweb.org/index.php/accessiweb-html5aria-liste-deployee.html");
return Jsoup.parse(url, 5000);
} catch (IOException ex) {
Logger.getLogger(ExportDomToCsv.class.getName()).log(Level.SEVERE, null, ex);
return null;
}
}Example 100
| Project: tradeframework-master File: HtmlSelectorMsgParser.java View source code |
public boolean parseContent(InputStream input, long length, String contentType, MsgHandler handler) throws IOException {
Matcher charset = charsetPattern.matcher(contentType);
Document doc = Jsoup.parse(input, charset.find() ? charset.group(1) : null, "");
Elements nodes = doc.select(selector);
for (Element node : nodes) {
Msg msg = createMsg(node);
if (msg != null && !handler.newMsg(msg))
return false;
}
return true;
}Example 101
| Project: validadorAcessibilidade-master File: ListLinks.java View source code |
public static void main(String[] args) throws IOException {
Validate.isTrue(args.length == 1, "usage: supply url to fetch");
String url = args[0];
print("Fetching %s...", url);
Document doc = Jsoup.connect(url).get();
Elements links = doc.select("a[href]");
Elements media = doc.select("[src]");
Elements imports = doc.select("link[href]");
print("\nMedia: (%d)", media.size());
for (Element src : media) {
if (src.tagName().equals("img"))
print(" * %s: <%s> %sx%s (%s)", src.tagName(), src.attr("abs:src"), src.attr("width"), src.attr("height"), trim(src.attr("alt"), 20));
else
print(" * %s: <%s>", src.tagName(), src.attr("abs:src"));
}
print("\nImports: (%d)", imports.size());
for (Element link : imports) {
print(" * %s <%s> (%s)", link.tagName(), link.attr("abs:href"), link.attr("rel"));
}
print("\nLinks: (%d)", links.size());
for (Element link : links) {
print(" * a: <%s> (%s)", link.attr("abs:href"), trim(link.text(), 35));
}
}