package com.brucezee.jspider.selenium; import com.brucezee.jspider.Page; import com.brucezee.jspider.Request; import com.brucezee.jspider.SiteConfig; import com.brucezee.jspider.downloader.Downloader; import org.apache.http.Header; import org.apache.http.message.BasicHeader; import org.openqa.selenium.Cookie; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.Closeable; import java.io.IOException; import java.util.ArrayList; import java.util.List; import java.util.Set; /** * 基于Selenium的WebDriver下载器 * Created by brucezee on 2017/1/11. */ public class WebDriverDownloader implements Downloader, Closeable { private static Logger logger = LoggerFactory.getLogger(WebDriverDownloader.class); private WebDriverPool webDriverPool; //WebDriver池 private RequestWaiter requestWaiter; //请求等待器 private DriverConfig driverConfig; public WebDriverDownloader() { this(new WebDriverPool(new WebDriverFactory())); } public WebDriverDownloader(WebDriverPool webDriverPool) { this(webDriverPool, DriverConfig.create()); } public WebDriverDownloader(WebDriverPool webDriverPool, DriverConfig driverConfig) { this(webDriverPool, new DefaultRequestWaiter(), driverConfig); } public WebDriverDownloader(WebDriverPool webDriverPool, RequestWaiter requestWaiter, DriverConfig driverConfig) { this.webDriverPool = webDriverPool; this.requestWaiter = requestWaiter; this.driverConfig = driverConfig; } @Override public Page download(SiteConfig siteConfig, Request request) { WebDriverEx webDriver = null; try { webDriver = webDriverPool.getWebDriver(siteConfig, driverConfig, request); } catch (Exception e) { logger.error("Failed to get web driver from pool, url : {} {}", request.getUrl(), e); } if (webDriver == null) { return processFailedPage(request); } try { webDriver.get(request.getUrl()); requestWaiter.waitResponse( siteConfig, request, webDriver); } catch (Exception e) { logger.error("Failed to request by web driver, url : {} {}", request.getUrl(), e); } try { return processPage(request, webDriver); } catch (Exception e) { logger.error("Failed to process page by web driver, url : {} {}", request.getUrl(), e); return processFailedPage(request); } finally { webDriverPool.shutdownOrReturn(webDriver, request, driverConfig.getExpiresMillis()); } } private Page processFailedPage(Request request) { return new Page(request.getUrl(), 0, null, null); } private Page processPage(Request request, WebDriverEx webDriver) { Header[] headers = getHeaderFromCookieSet(webDriver.manage().getCookies()); String resource = webDriver.getPageSource(); return new Page(request.getUrl(), 200, headers, resource); } private Header[] getHeaderFromCookieSet(Set<Cookie> cookieSet) { if (cookieSet != null && cookieSet.size() > 0) { List<Header> headers = new ArrayList<Header>(cookieSet.size()); for (Cookie cookie : cookieSet) { headers.add(new BasicHeader("Set-Cookie", cookie.toString())); } return headers.toArray(new Header[headers.size()]); } return null; } @Override public void close() throws IOException { webDriverPool.shutdown(); } }