package us.codecraft.webmagic.downloader; import com.github.dreamhead.moco.HttpServer; import com.github.dreamhead.moco.Runnable; import com.github.dreamhead.moco.Runner; import org.apache.commons.collections.map.HashedMap; import org.apache.commons.io.IOUtils; import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpUriRequest; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClients; import org.apache.http.util.EntityUtils; import org.junit.Ignore; import org.junit.Test; import us.codecraft.webmagic.Page; import us.codecraft.webmagic.Request; import us.codecraft.webmagic.Site; import us.codecraft.webmagic.Task; import us.codecraft.webmagic.model.HttpRequestBody; import us.codecraft.webmagic.proxy.Proxy; import us.codecraft.webmagic.proxy.SimpleProxyProvider; import us.codecraft.webmagic.selector.Html; import us.codecraft.webmagic.utils.CharsetUtils; import us.codecraft.webmagic.utils.HttpConstant; import java.io.IOException; import java.io.UnsupportedEncodingException; import java.util.Map; import static com.github.dreamhead.moco.Moco.*; import static org.assertj.core.api.Assertions.assertThat; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; /** * @author code4crafer@gmail.com */ public class HttpClientDownloaderTest { public static final String PAGE_ALWAYS_NOT_EXISTS = "http://localhost:13423/404"; @Test public void testDownloader() { HttpClientDownloader httpClientDownloader = new HttpClientDownloader(); Html html = httpClientDownloader.download("https://www.baidu.com/"); assertTrue(!html.getFirstSourceText().isEmpty()); } @Test(expected = IllegalArgumentException.class) public void testDownloaderInIllegalUrl() throws UnsupportedEncodingException { HttpClientDownloader httpClientDownloader = new HttpClientDownloader(); httpClientDownloader.download("http://www.oschina.net/>"); } @Test public void test_download_fail() { HttpClientDownloader httpClientDownloader = new HttpClientDownloader(); Task task = Site.me().setDomain("localhost").setCycleRetryTimes(5).toTask(); Request request = new Request(PAGE_ALWAYS_NOT_EXISTS); Page page = httpClientDownloader.download(request, task); assertThat(page.isDownloadSuccess()).isFalse(); } @Test public void testGetHtmlCharset() throws Exception { HttpServer server = httpserver(13423); server.get(by(uri("/header"))).response(header("Content-Type", "text/html; charset=gbk")); server.get(by(uri("/meta4"))).response(with(text("<html>\n" + " <head>\n" + " <meta charset='gbk'/>\n" + " </head>\n" + " <body></body>\n" + "</html>")),header("Content-Type","")); server.get(by(uri("/meta5"))).response(with(text("<html>\n" + " <head>\n" + " <meta http-equiv=\"Content-Type\" content=\"text/html; charset=gbk\" />\n" + " </head>\n" + " <body></body>\n" + "</html>")),header("Content-Type","")); Runner.running(server, new Runnable() { @Override public void run() { String charset = getCharsetByUrl("http://127.0.0.1:13423/header"); assertEquals(charset, "gbk"); charset = getCharsetByUrl("http://127.0.0.1:13423/meta4"); assertEquals(charset, "gbk"); charset = getCharsetByUrl("http://127.0.0.1:13423/meta5"); assertEquals(charset, "gbk"); } private String getCharsetByUrl(String url) { HttpClientDownloader downloader = new HttpClientDownloader(); Site site = Site.me(); CloseableHttpClient httpClient = new HttpClientGenerator().getClient(site); // encoding in http header Content-Type Request requestGBK = new Request(url); CloseableHttpResponse httpResponse = null; try { httpResponse = httpClient.execute(new HttpUriRequestConverter().convert(requestGBK, site, null).getHttpUriRequest()); } catch (IOException e) { e.printStackTrace(); } String charset = null; try { byte[] contentBytes = IOUtils.toByteArray(httpResponse.getEntity().getContent()); charset = CharsetUtils.detectCharset(httpResponse.getEntity().getContentType().getValue(), contentBytes); } catch (IOException e) { e.printStackTrace(); } return charset; } }); } @Test public void test_selectRequestMethod() throws Exception { HttpServer server = httpserver(13423); server.get(eq(query("q"), "webmagic")).response("get"); server.post(eq(form("q"), "webmagic")).response("post"); server.put(eq(form("q"), "webmagic")).response("put"); server.delete(eq(query("q"), "webmagic")).response("delete"); server.request(and(by(method("HEAD")),eq(query("q"), "webmagic"))).response(header("method","head")); server.request(and(by(method("TRACE")),eq(query("q"), "webmagic"))).response("trace"); final HttpUriRequestConverter httpUriRequestConverter = new HttpUriRequestConverter(); final Site site = Site.me(); Runner.running(server, new Runnable() { @Override public void run() throws Exception { Request request = new Request(); request.setUrl("http://127.0.0.1:13423/search?q=webmagic"); request.setMethod(HttpConstant.Method.GET); Map<String,Object> params = new HashedMap(); params.put("q","webmagic"); HttpUriRequest httpUriRequest = httpUriRequestConverter.convert(request,site,null).getHttpUriRequest(); assertThat(EntityUtils.toString(HttpClients.custom().build().execute(httpUriRequest).getEntity())).isEqualTo("get"); request.setMethod(HttpConstant.Method.DELETE); httpUriRequest = httpUriRequestConverter.convert(request, site, null).getHttpUriRequest(); assertThat(EntityUtils.toString(HttpClients.custom().build().execute(httpUriRequest).getEntity())).isEqualTo("delete"); request.setMethod(HttpConstant.Method.HEAD); httpUriRequest = httpUriRequestConverter.convert(request, site, null).getHttpUriRequest(); assertThat(HttpClients.custom().build().execute(httpUriRequest).getFirstHeader("method").getValue()).isEqualTo("head"); request.setMethod(HttpConstant.Method.TRACE); httpUriRequest = httpUriRequestConverter.convert(request, site, null).getHttpUriRequest(); assertThat(EntityUtils.toString(HttpClients.custom().build().execute(httpUriRequest).getEntity())).isEqualTo("trace"); request.setUrl("http://127.0.0.1:13423/search"); request.setMethod(HttpConstant.Method.POST); request.setRequestBody(HttpRequestBody.form(params, "utf-8")); httpUriRequest = httpUriRequestConverter.convert(request, site, null).getHttpUriRequest(); assertThat(EntityUtils.toString(HttpClients.custom().build().execute(httpUriRequest).getEntity())).isEqualTo("post"); request.setMethod(HttpConstant.Method.PUT); httpUriRequest = httpUriRequestConverter.convert(request, site, null).getHttpUriRequest(); assertThat(EntityUtils.toString(HttpClients.custom().build().execute(httpUriRequest).getEntity())).isEqualTo("put"); } }); } @Test public void test_set_request_cookie() throws Exception { HttpServer server = httpserver(13423); server.get(eq(cookie("cookie"), "cookie-webmagic")).response("ok"); Runner.running(server, new Runnable() { @Override public void run() throws Exception { HttpClientDownloader httpClientDownloader = new HttpClientDownloader(); Request request = new Request(); request.setUrl("http://127.0.0.1:13423"); request.addCookie("cookie","cookie-webmagic"); Page page = httpClientDownloader.download(request, Site.me().toTask()); assertThat(page.getRawText()).isEqualTo("ok"); } }); } @Test public void test_set_request_header() throws Exception { HttpServer server = httpserver(13423); server.get(eq(header("header"), "header-webmagic")).response("ok"); Runner.running(server, new Runnable() { @Override public void run() throws Exception { HttpClientDownloader httpClientDownloader = new HttpClientDownloader(); Request request = new Request(); request.setUrl("http://127.0.0.1:13423"); request.addHeader("header","header-webmagic"); Page page = httpClientDownloader.download(request, Site.me().toTask()); assertThat(page.getRawText()).isEqualTo("ok"); } }); } @Test public void test_set_site_cookie() throws Exception { HttpServer server = httpserver(13423); server.get(eq(cookie("cookie"), "cookie-webmagic")).response("ok"); Runner.running(server, new Runnable() { @Override public void run() throws Exception { HttpClientDownloader httpClientDownloader = new HttpClientDownloader(); Request request = new Request(); request.setUrl("http://127.0.0.1:13423"); Site site = Site.me().addCookie("cookie", "cookie-webmagic").setDomain("127.0.0.1"); Page page = httpClientDownloader.download(request, site.toTask()); assertThat(page.getRawText()).isEqualTo("ok"); } }); } @Test public void test_download_when_task_is_null() throws Exception { HttpServer server = httpserver(13423); server.response("foo"); Runner.running(server, new Runnable() { @Override public void run() throws Exception { final HttpClientDownloader httpClientDownloader = new HttpClientDownloader(); Request request = new Request(); request.setUrl("http://127.0.0.1:13423/"); Page page = httpClientDownloader.download(request, Site.me().toTask()); assertThat(page.getRawText()).isEqualTo("foo"); } }); } @Ignore("need proxy server") @Test public void test_download_by_SimpleProxyProvider(){ HttpClientDownloader httpClientDownloader = new HttpClientDownloader(); httpClientDownloader.setProxyProvider(SimpleProxyProvider.from(new Proxy("127.0.0.1", 1087))); Request request = new Request(); request.setUrl("https://www.baidu.com"); Page page = httpClientDownloader.download(request, Site.me().toTask()); assertThat(page.isDownloadSuccess()); } }