/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package focusedCrawler.crawler.crawlercommons.fetcher; import java.nio.charset.Charset; import java.security.InvalidParameterException; import java.util.Arrays; import org.apache.tika.metadata.Metadata; public class FetchedResult { private final String _baseUrl; private final String _fetchedUrl; private final long _fetchTime; private final byte[] _content; private final String _contentType; private final int _responseRate; private final Metadata _headers; private final String _newBaseUrl; private final int _numRedirects; private final String _hostAddress; private final int _statusCode; // HTTP status code private final String _reasonPhrase; // HTTP reason phrase, or null private Payload _payload; public FetchedResult(String baseUrl, String redirectedUrl, long fetchTime, Metadata headers, byte[] content, String contentType, int responseRate, Payload payload, String newBaseUrl, int numRedirects, String hostAddress, int statusCode, String reasonPhrase) { _payload = payload; if (baseUrl == null) { throw new InvalidParameterException("baseUrl cannot be null"); } if (redirectedUrl == null) { throw new InvalidParameterException("redirectedUrl cannot be null"); } if (headers == null) { throw new InvalidParameterException("headers cannot be null"); } if (content == null) { throw new InvalidParameterException("content cannot be null"); } if (contentType == null) { throw new InvalidParameterException("contentType cannot be null"); } if (hostAddress == null) { throw new InvalidParameterException("hostAddress cannot be null"); } _baseUrl = baseUrl; _fetchedUrl = redirectedUrl; _fetchTime = fetchTime; _content = content; _contentType = contentType; _responseRate = responseRate; _headers = headers; _newBaseUrl = newBaseUrl; _numRedirects = numRedirects; _hostAddress = hostAddress; _statusCode = statusCode; _reasonPhrase = reasonPhrase; } public Payload getPayload() { return _payload; } public void setPayload(Payload payload) { _payload = payload; } public String getBaseUrl() { return _baseUrl; } public String getFetchedUrl() { return _fetchedUrl; } public long getFetchTime() { return _fetchTime; } public byte[] getContent() { return _content; } public int getContentLength() { return _content.length; } public String getContentType() { return _contentType; } public int getResponseRate() { return _responseRate; } public Metadata getHeaders() { return _headers; } public String getNewBaseUrl() { return _newBaseUrl; } public int getNumRedirects() { return _numRedirects; } public String getHostAddress() { return _hostAddress; } public int getStatusCode() { return _statusCode; } public String getReasonPhrase() { return _reasonPhrase; } /** * Produces a neat report containing everything from a {@link FetchedResult} * . The order of the report is based on the logical population of * FetchedReport entities as per a non-public algorithm within * {@link crawlercommons.fetcher.http.SimpleHttpFetcher}. * * @return returns a String report of the FetchedResult. */ public String report() { StringBuilder report = new StringBuilder(); report.append("FetchedResult Report:\n"); report.append("*********************\n"); report.append(" BaseUrl : " + getBaseUrl() + "\n"); report.append(" Headers : __\n"); // Map Tika Metadata to // individual string entries for (String header : getHeaders().names()) { String mdString = getHeaders().get(header) + Arrays.toString(getHeaders().getValues(header)); report.append(" " + mdString + "\n"); } report.append(" StatusCode : " + getStatusCode() + "\n"); report.append(" ReasonPhrase : " + getReasonPhrase() + "\n"); report.append(" NumRedirects : " + getNumRedirects() + "\n"); report.append(" NewBaseUrl : " + getNewBaseUrl() + "\n"); report.append(" HostAddress : " + getHostAddress() + "\n"); report.append(" ResponseRate : " + getResponseRate() + "\n"); report.append(" PayLoad : __\n"); // Map Keysets to individual // string entries for (String payLoad : getPayload().keySet()) { String payString = payLoad + getPayload().get(payLoad); report.append(" " + payString + "\n"); } report.append(" FetchTime : " + getFetchTime() + "\n"); report.append(" FetchedUrl : " + getFetchedUrl() + "\n"); report.append(" ContentType : " + getContentType() + "\n"); report.append(" ContentLength : " + getContentLength() + "\n"); report.append(" Content : " + new String(getContent(), Charset.defaultCharset()) + "\n"); // byte // array // to // string report.append("*********************\n"); report.append("End of Report:\n"); return report.toString(); } }