/** * Copyright (C) 2012-2014 Gist Labs, LLC. (http://gistlabs.com) * * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ package com.gistlabs.mechanize.document.html; import java.util.Arrays; import java.util.Collection; import java.util.List; import org.apache.http.HttpResponse; import org.apache.http.client.methods.HttpRequestBase; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import com.gistlabs.mechanize.Mechanize; import com.gistlabs.mechanize.document.AbstractDocument; import com.gistlabs.mechanize.document.html.form.Forms; import com.gistlabs.mechanize.document.html.image.Images; import com.gistlabs.mechanize.document.link.Links; import com.gistlabs.mechanize.document.node.Node; import com.gistlabs.mechanize.exceptions.MechanizeExceptionFactory; import com.gistlabs.mechanize.util.apache.ContentType; /** * @author Martin Kersten <Martin.Kersten.mk@gmail.com> */ public class HtmlDocument extends AbstractDocument { public static Collection<String> CONTENT_MATCHERS = Arrays.asList( ContentType.TEXT_HTML.getMimeType(), ContentType.APPLICATION_ATOM_XML.getMimeType(), ContentType.APPLICATION_XHTML_XML.getMimeType(), ContentType.APPLICATION_XML.getMimeType()); private HtmlElements htmlElements; private String baseUri; public HtmlDocument(final Mechanize agent, final HttpRequestBase request, final HttpResponse response) { super(agent, request, response); } @Override public HtmlElement getRoot() { return htmlElements().getRoot(); } @Override public HtmlElement find(String csss) { return (HtmlElement)super.find(csss); } @SuppressWarnings("unchecked") @Override public List<? extends HtmlElement> findAll(String csss) { return (List<? extends HtmlElement>) super.findAll(csss); } @Override protected void loadPage() throws Exception { Document jsoup = Jsoup.parse(getInputStream(), getContentEncoding(response), getUri()); setBaseUri(jsoup.head().baseUri()); this.htmlElements = new HtmlElements(this, jsoup); } private void setBaseUri(final String baseUri) { if (! this.getUri().equals(baseUri)) this.baseUri = baseUri; } @Override public String getUri() { return this.baseUri==null ? super.getUri() : this.baseUri; } @Override protected Links loadLinks() { List<? extends Node> links = htmlElements().findAll("a"); return new Links(this, links); } @Override protected Forms loadForms() { List<? extends Node> forms = htmlElements().findAll("form"); return new Forms(this, forms); } @Override protected Images loadImages() { List<HtmlElement> images = htmlElements().findAll("img"); return new Images(this, images); } public HtmlElements htmlElements() { if(htmlElements == null) try { loadPage(); } catch (Exception e) { throw MechanizeExceptionFactory.newException(e); } return htmlElements; } /** * Returns the title of the page or null. */ @Override public String getTitle() { HtmlElement title = htmlElements().find("title"); return title != null ? title.getText() : null; } /** * Serialize the contents of this page into a string * * @return */ @Override public String asString() { return htmlElements.toString(); } }