package com.geccocrawler.gecco.downloader;
import java.io.IOException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.http.HttpHost;
import com.geccocrawler.gecco.request.HttpGetRequest;
import com.geccocrawler.gecco.request.HttpPostRequest;
import com.geccocrawler.gecco.request.HttpRequest;
import com.geccocrawler.gecco.response.HttpResponse;
import com.mashape.unirest.http.Unirest;
import com.mashape.unirest.http.exceptions.UnirestException;
import com.mashape.unirest.http.utils.ResponseUtils;
import com.mashape.unirest.request.HttpRequestWithBody;
public class UnirestDownloader implements Downloader {
private static Log log = LogFactory.getLog(UnirestDownloader.class);
public UnirestDownloader() {
Unirest.setConcurrency(1000, 50);
}
@Override
public HttpResponse download(HttpRequest request) throws DownloaderException {
if(log.isDebugEnabled()) {
log.debug("downloading..." + request.getUrl());
}
try {
HttpHost proxy = Proxys.getProxy();
if(proxy != null) {
Unirest.setProxy(proxy);
} else {
Unirest.setProxy(null);
}
request.addHeader("User-Agent", UserAgent.getUserAgent());
com.mashape.unirest.http.HttpResponse<String> response = null;
if(request instanceof HttpPostRequest) {
HttpPostRequest post = (HttpPostRequest)request;
HttpRequestWithBody httpRequestWithBody = Unirest.post(post.getUrl());
httpRequestWithBody.headers(post.getHeaders());
httpRequestWithBody.fields(post.getFields());
response = httpRequestWithBody.asString();
} else {
response = Unirest.get(request.getUrl()).headers(request.getHeaders()).asString();
}
String contentType = response.getHeaders().getFirst("Content-Type");
HttpResponse resp = new HttpResponse();
resp.setStatus(response.getStatus());
resp.setRaw(response.getRawBody());
resp.setContent(response.getBody());
resp.setContentType(contentType);
resp.setCharset(getCharset(request, contentType));
return resp;
} catch (UnirestException e) {
throw new DownloaderException(e);
}
}
private String getCharset(HttpRequest request, String contentType) {
String charset = ResponseUtils.getCharsetFromContentType(contentType);
if(charset == null) {
charset = request.getCharset();
}
if(charset == null) {
charset = "UTF-8";
}
return charset;
}
@Override
public void timeout(long timeout) {
if(timeout > 0) {
Unirest.setTimeouts(timeout, timeout);
} else {
Unirest.setTimeouts(3000, 3000);
}
}
@Override
public void shutdown() {
try {
Unirest.shutdown();
} catch (IOException e) {
e.printStackTrace();
}
}
}