package org.archive.wayback.liveweb;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.cookie.CookiePolicy;
import org.apache.commons.httpclient.methods.GetMethod;
import org.apache.commons.io.IOUtils;
import org.archive.wayback.accesscontrol.robotstxt.redis.RobotsTxtResource;
import org.archive.wayback.core.Resource;
import org.archive.wayback.exception.LiveDocumentNotAvailableException;
import org.archive.wayback.exception.LiveWebCacheUnavailableException;
import org.archive.wayback.exception.LiveWebTimeoutException;
import com.google.common.io.ByteStreams;
public class LiveRobotsNoCache extends ArcRemoteLiveWebCache {
protected int maxRobotsSize = 512000;
public int getMaxRobotsSize() {
return maxRobotsSize;
}
public void setMaxRobotsSize(int maxRobotsSize) {
this.maxRobotsSize = maxRobotsSize;
}
@Override
public Resource getCachedResource(URL url, long maxCacheMS,
boolean record) throws LiveDocumentNotAvailableException,
LiveWebCacheUnavailableException, LiveWebTimeoutException,
IOException {
HttpClient http = super.getHttpClient();
GetMethod method = new GetMethod(url.toString());
method.setFollowRedirects(true);
method.getParams().setCookiePolicy(CookiePolicy.IGNORE_COOKIES);
InputStream in = null;
try {
int responseStatus = http.executeMethod(method);
if (responseStatus >= 400 || responseStatus < 200) {
throw new LiveDocumentNotAvailableException("Invalid Status: " + responseStatus);
}
in = ByteStreams.limit(method.getResponseBodyAsStream(), maxRobotsSize);
return new RobotsTxtResource(IOUtils.toString(in));
} catch (IOException io) {
throw new LiveDocumentNotAvailableException(io.toString());
} finally {
if (in != null) {
in.close();
}
method.abort();
method.releaseConnection();
}
}
@Override
public void shutdown() {
// TODO Auto-generated method stub
}
}