/*
* This file is part of the Heritrix web crawler (crawler.archive.org).
*
* Licensed to the Internet Archive (IA) by one or more individual
* contributors.
*
* The IA licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.archive.wayback.accesscontrol.robotstxt;
import java.io.Serializable;
import java.util.concurrent.ConcurrentSkipListSet;
/**
* Represents the directives that apply to a user-agent (or set of
* user-agents)
*/
public class RobotsDirectives implements Serializable {
private static final long serialVersionUID = 5386542759286155383L;
ConcurrentSkipListSet<String> disallows = new ConcurrentSkipListSet<String>();
ConcurrentSkipListSet<String> allows = new ConcurrentSkipListSet<String>();
float crawlDelay = -1;
public boolean allows(String path) {
return !(longestPrefixLength(disallows, path) > longestPrefixLength(allows, path));
}
/**
* @param prefixSet
* @param str
* @return length of longest entry in {@code prefixSet} that prefixes {@code str}, or zero
* if no entry prefixes {@code str}
*/
protected int longestPrefixLength(ConcurrentSkipListSet<String> prefixSet,
String str) {
String possiblePrefix = prefixSet.floor(str);
if (possiblePrefix != null && str.startsWith(possiblePrefix)) {
return possiblePrefix.length();
} else {
return 0;
}
}
public void addDisallow(String path) {
if(path.length()==0) {
// ignore empty-string disallows
// (they really mean allow, when alone)
return;
}
disallows.add(path);
}
public void addAllow(String path) {
allows.add(path);
}
public void setCrawlDelay(float i) {
crawlDelay=i;
}
public float getCrawlDelay() {
return crawlDelay;
}
}