package io.monokkel.core; import com.google.common.collect.ImmutableList; import com.sun.nio.sctp.IllegalReceiveException; import io.monokkel.core.domain.ResponseFeedback; import io.monokkel.domain.UrlVisitResponse; import io.monokkel.exceptions.UrlVisitException; import io.monokkel.factories.HttpClient4Builder; import org.apache.clerezza.utils.UriUtil; import org.apache.http.Header; import org.apache.http.HttpEntity; import org.apache.http.HttpResponse; import org.apache.http.client.ClientProtocolException; import org.apache.http.client.ResponseHandler; import org.apache.http.client.methods.HttpGet; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.util.EntityUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; import java.io.Serializable; import java.net.URLEncoder; import java.util.Arrays; import java.util.BitSet; import java.util.List; import java.util.stream.Collector; import java.util.stream.Collectors; import static java.lang.String.format; import static java.lang.System.currentTimeMillis; /** * Created by tarjei on 28/05/14. */ public class UrlVisitor implements Serializable { private static final long serialVersionUID = 1L; public static final String CONTENT_TYPE = "Content-Type"; private transient Logger log = LoggerFactory.getLogger(this.getClass()); private final HttpClient4Builder httpClient4Builder; public UrlVisitor(final HttpClient4Builder httpClient4Builder) { this.httpClient4Builder = httpClient4Builder; } /** * * Visit the url and retrieve the the content of the page. The method will open a connection * and return a response object with serialized content * * @param url to visit * @return UrlVisitResponse or null */ public UrlVisitResponse visitUrlAndRespond(final String url) throws UrlVisitException{ CloseableHttpClient httpClient = httpClient4Builder.buildClientBuilder(); log.info("Visit url {}", url); UrlVisitResponse urlVisitResponse = null; try { HttpGet httpget = new HttpGet(UriUtil.encodeQuery(url)); ResponseHandler<ResponseFeedback> responseHandler = createResponseHandler(url); ResponseFeedback responseFeedback = httpClient.execute(httpget, responseHandler); urlVisitResponse = new UrlVisitResponse(responseFeedback.getResponseString(), responseFeedback.getHttpStatus(), responseFeedback.getTypesFromTheResponseHeader(), currentTimeMillis(), url); } catch (Exception e) { log.error("Failed to retrieve url {}. Exception was: ", url, e); throw new UrlVisitException(format("Failed to retrieve url %s", url),e); } finally { try { httpClient.close(); } catch (IOException e) { log.error("Failed to close the http client", e); } } return urlVisitResponse; } private ResponseHandler<ResponseFeedback> createResponseHandler(final String url) throws IOException { return response -> { final int status = response.getStatusLine().getStatusCode(); if (status >= 200 && status < 300) { final HttpEntity entity = response.getEntity(); final String responseString = entity != null ? EntityUtils.toString(entity) : null; final List<String> typesFromTheResponseHeader = Arrays.asList(response.getHeaders(CONTENT_TYPE)).stream().map(Header::getValue).collect(Collectors.toList()); return new ResponseFeedback(responseString,status,typesFromTheResponseHeader); } else { log.warn("Received status {} when visiting {}",status,url); throw new IllegalReceiveException(); } }; } }