/**
* Copyright 2008 - CommonCrawl Foundation
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
**/
package org.commoncrawl.util;
import java.net.URL;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.Collections;
import java.util.Comparator;
import java.util.Date;
import java.util.TimeZone;
import java.util.Vector;
import junit.framework.Assert;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.commoncrawl.service.crawler.CrawlHostImpl;
import org.commoncrawl.io.HttpCookie;
import org.commoncrawl.io.NIOHttpCookieStore;
import org.junit.Test;
/**
*
* @author rana
*
*/
public class HttpCookieUtils {
/** logging **/
private static final Log LOG = LogFactory.getLog(HttpCookieUtils.class);
public static class ParsedCookie {
public static class TokenValuePair {
public String first;
public String second;
}
//typedef std::pair<String, String> TokenValuePair;
// typedef std::vector<TokenValuePair> PairList;
// The maximum length of a cookie string we will try to parse
public static final int kMaxCookieSize = 4096;
// The maximum number of Token/Value pairs. Shouldn't have more than 8.
public static final int kMaxPairs = 16;
// ruct from a cookie string like "BLAH=1; path=/; domain=.google.com"
public ParsedCookie(String cookie_line) {
if (cookie_line.length() <= kMaxCookieSize) {
ParseTokenValuePairs(cookie_line);
if (pairs_.size() != 0) {
is_valid_ = true;
SetupAttributes();
}
}
}
// You should not call any other methods on the class if !IsValid
boolean IsValid() { return is_valid_; }
String Name() { return pairs_.get(0).first; }
String Token() { return Name(); }
String Value() { return pairs_.get(0).second; }
boolean HasPath() { return path_index_ != 0; }
String Path() { return pairs_.get(path_index_).second; }
boolean HasDomain() { return domain_index_ != 0; }
String Domain() { return pairs_.get(domain_index_).second; }
boolean HasExpires() { return expires_index_ != 0; }
String Expires() { return pairs_.get(expires_index_).second; }
boolean HasMaxAge() { return maxage_index_ != 0; }
String MaxAge() { return pairs_.get(maxage_index_).second; }
boolean IsSecure() { return secure_index_ != 0; }
boolean IsHttpOnly() { return httponly_index_ != 0; }
int NumberOfAttributes() { return pairs_.size() - 1; }
// Returns true if |c| occurs in |chars|
// TODO maybe make this take an iterator, could check for end also?
static boolean CharIsA(char c, String chars) {
return chars.indexOf(c) != -1;
}
// Seek the iterator to the first occurrence of a character in |chars|.
// Returns true if it hit the end, false otherwise.
static int SeekTo(String source,int beginIndex,int endIndex,String chars) {
for (; beginIndex < endIndex && !CharIsA(source.charAt(beginIndex), chars); ++beginIndex);
return beginIndex;
}
// Seek the iterator to the first occurrence of a character not in |chars|.
// Returns true if it hit the end, false otherwise.
static int SeekPast(String source,int beginIndex,int endIndex,String chars) {
for (; beginIndex < endIndex && CharIsA(source.charAt(beginIndex), chars); ++beginIndex);
return beginIndex;
}
static int SeekBackPast(String source,int beginIndex,int endIndex,String chars) {
for (; beginIndex != endIndex && CharIsA(source.charAt(beginIndex), chars); --beginIndex);
return beginIndex;
}
static final String kTerminator = "\n\r\0";
static final String kWhitespace = " \t";
static final String kValueSeparator = ";";
static final String kTokenSeparator = ";=";
/** find the first occurrence of ANY of the tokens in patterns in the target string **/
static int firstIndexOf(String strToSearch,int fromIndex,String tokens) {
int max = strToSearch.length();
char v[] = tokens.toCharArray();
if (fromIndex < 0) {
fromIndex = 0;
} else if (fromIndex >= strToSearch.length()) {
// Note: fromIndex might be near -1>>>1.
return -1;
}
int i = fromIndex;
// handle most cases here (ch is a BMP code point or a
// negative value (invalid code point))
for (; i < strToSearch.length() ; i++) {
char ch = strToSearch.charAt(i);
for (int j=0;j<v.length;++j) {
if (v[j] == ch)
return i;
}
}
return -1;
}
// Parse all token/value pairs and populate pairs_.
void ParseTokenValuePairs(String cookie_line) {
pairs_.clear();
// Ok, here we go. We should be expecting to be starting somewhere
// before the cookie line, not including any header name...
int start = 0;
int end = cookie_line.length();
int it = start;
// TODO Make sure we're stripping \r\n in the network code. Then we
// can log any unexpected terminators.
int term_pos = firstIndexOf(cookie_line,0,kTerminator);
if (term_pos != -1) {
// We found a character we should treat as an end of string.
end = start + term_pos;
}
for (int pair_num = 0; pair_num < kMaxPairs && it != end; ++pair_num) {
TokenValuePair pair = new TokenValuePair();
int token_start, token_real_end, token_end;
// Seek past any whitespace before the "token" (the name).
// token_start should point at the first character in the token
if ((it = SeekPast(cookie_line,it, end, kWhitespace)) == end)
break; // No token, whitespace or empty.
token_start = it;
// Seek over the token, to the token separator.
// token_real_end should point at the token separator, i.e. '='.
// If it == end after the seek, we probably have a token-value.
it = SeekTo(cookie_line,it, end, kTokenSeparator);
token_real_end = it;
// Ignore any whitespace between the token and the token separator.
// token_end should point after the last interesting token character,
// pointing at either whitespace, or at '=' (and equal to token_real_end).
if (it != token_start) { // We could have an empty token name.
--it; // Go back before the token separator.
// Skip over any whitespace to the first non-whitespace character.
it = SeekBackPast(cookie_line,it, token_start, kWhitespace);
// Point after it.
++it;
}
token_end = it;
// Seek us back to the end of the token.
it = token_real_end;
if (it == end || cookie_line.charAt(it) != '=') {
// We have a token-value, we didn't have any token name.
if (pair_num == 0) {
// For the first time around, we want to treat single values
// as a value with an empty name. (Mozilla bug 169091).
// IE seems to also have this behavior, ex "AAA", and "AAA=10" will
// set 2 different cookies, and setting "BBB" will then replace "AAA".
pair.first = "";
// Rewind to the beginning of what we thought was the token name,
// and let it get parsed as a value.
it = token_start;
} else {
// Any not-first attribute we want to treat a value as a
// name with an empty value... This is so something like
// "secure;" will get parsed as a Token name, and not a value.
pair.first = cookie_line.substring(token_start, token_end);
}
} else {
// We have a TOKEN=VALUE.
pair.first = cookie_line.substring(token_start, token_end);
++it; // Skip past the '='.
}
// OK, now try to parse a value.
int value_start, value_end;
// Seek past any whitespace that might in-between the token and value.
it = SeekPast(cookie_line,it, end, kWhitespace);
// value_start should point at the first character of the value.
value_start = it;
// It is unclear exactly how quoted string values should be handled.
// Major browsers do different things, for example, Firefox supports
// semicolons embedded in a quoted value, while IE does not. Looking at
// the specs, RFC 2109 and 2965 allow for a quoted-string as the value.
// However, these specs were apparently written after browsers had
// implemented cookies, and they seem very distant from the reality of
// what is actually implemented and used on the web. The original spec
// from Netscape is possibly what is closest to the cookies used today.
// This spec didn't have explicit support for double quoted strings, and
// states that ; is not allowed as part of a value. We had originally
// implement the Firefox behavior (A="B;C"; -> A="B;C";). However, since
// there is no standard that makes sense, we decided to follow the behavior
// of IE and Safari, which is closer to the original Netscape proposal.
// This means that A="B;C" -> A="B;. This also makes the code much simpler
// and reduces the possibility for invalid cookies, where other browsers
// like Opera currently reject those invalid cookies (ex A="B" "C";).
// Just look for ';' to terminate ('=' allowed).
// We can hit the end, maybe they didn't terminate.
it = SeekTo(cookie_line,it, end, kValueSeparator);
// Will be pointed at the ; seperator or the end.
value_end = it;
// Ignore any unwanted whitespace after the value.
if (value_end != value_start) { // Could have an empty value
--value_end;
value_end = SeekBackPast(cookie_line,value_end, value_start, kWhitespace);
++value_end;
}
// OK, we're finished with a Token/Value.
pair.second = cookie_line.substring(value_start, value_end);
// From RFC2109: "Attributes (names) (attr) are case-insensitive."
if (pair_num != 0)
pair.first = pair.first.toLowerCase();
pairs_.add(pair);
// We've processed a token/value pair, we're either at the end of
// the string or a ValueSeparator like ';', which we want to skip.
if (it != end)
++it;
}
}
static final String kPathTokenName = "path";
static final String kDomainTokenName = "domain";
static final String kExpiresTokenName = "expires";
static final String kMaxAgeTokenName = "max-age";
static final String kSecureTokenName = "secure";
static final String kHttpOnlyTokenName = "httponly";
void SetupAttributes() {
// We skip over the first token/value, the user supplied one.
for (int i = 1; i < pairs_.size(); ++i) {
if (pairs_.get(i).first.equals(kPathTokenName))
path_index_ = i;
else if (pairs_.get(i).first.equals(kDomainTokenName))
domain_index_ = i;
else if (pairs_.get(i).first.equals(kExpiresTokenName))
expires_index_ = i;
else if (pairs_.get(i).first.equals(kMaxAgeTokenName))
maxage_index_ = i;
else if (pairs_.get(i).first.equals(kSecureTokenName))
secure_index_ = i;
else if (pairs_.get(i).first.equals(kHttpOnlyTokenName))
httponly_index_ = i;
else { /* some attribute we don't know or don't care about. */ }
}
}
Vector<TokenValuePair> pairs_ = new Vector<TokenValuePair>();
boolean is_valid_ = false;
// These will default to 0, but that should never be valid since the
// 0th index is the user supplied token/value, not an attribute.
// We're really never going to have more than like 8 attributes, so we
// could fit these into 3 bits each if we're worried about size...
int path_index_ =0;
int domain_index_=0;
int expires_index_=0;
int maxage_index_=0;
int secure_index_=0;
int httponly_index_=0;
static public class ParsedCookieTests {
@Test
public void parsedCookieTests() throws Exception {
TestBasic();
TestQuoted();
TestNameless();
TestAttributeCase();
TestDoubleQuotedNameless();
QuoteOffTheEnd();
MissingName();
MissingValue();
Whitespace();
MultipleEquals();
QuotedTrailingWhitespace();
TrailingWhitespace();
TooManyPairs();
InvalidWhitespace();
InvalidTooLong();
InvalidEmpty();
EmbeddedTerminator();
}
void TestBasic() {
ParsedCookie pc = new ParsedCookie("a=b");
Assert.assertTrue(pc.IsValid());
Assert.assertFalse(pc.IsSecure());
Assert.assertEquals("a", pc.Name());
Assert.assertEquals("b", pc.Value());
}
void TestQuoted() {
// These are some quoting cases which the major browsers all
// handle differently. I've tested Internet Explorer 6, Opera 9.6,
// Firefox 3, and Safari Windows 3.2.1. We originally tried to match
// Firefox closely, however we now match Internet Explorer and Safari.
String values[] = {
// Trailing whitespace after a quoted value. The whitespace after
// the quote is stripped in all browsers.
"\"zzz \" ", "\"zzz \"",
// Handling a quoted value with a ';', like FOO="zz;pp" ;
// IE and Safari: "zz;
// Firefox and Opera: "zz;pp"
"\"zz;pp\" ;", "\"zz",
// Handling a value with multiple quoted parts, like FOO="zzz " "ppp" ;
// IE and Safari: "zzz " "ppp";
// Firefox: "zzz ";
// Opera: <rejects cookie>
"\"zzz \" \"ppp\" ", "\"zzz \" \"ppp\"",
// A quote in a value that didn't start quoted. like FOO=A"B ;
// IE, Safari, and Firefox: A"B;
// Opera: <rejects cookie>
"A\"B", "A\"B",
};
for (int i = 0; i < values.length; i += 2) {
String input = values[i];
String expected = values[i + 1];
ParsedCookie pc = new ParsedCookie("aBc=" + input + " ; path=\"/\" ; httponly ");
Assert.assertTrue(pc.IsValid());
Assert.assertFalse(pc.IsSecure());
Assert.assertTrue(pc.IsHttpOnly());
Assert.assertTrue(pc.HasPath());
Assert.assertEquals("aBc", pc.Name());
Assert.assertEquals(expected, pc.Value());
// If a path was quoted, the path attribute keeps the quotes. This will
// make the cookie effectively useless, but path parameters aren't supposed
// to be quoted. Bug 1261605.
Assert.assertEquals("\"/\"", pc.Path());
}
}
void TestNameless() {
ParsedCookie pc = new ParsedCookie("BLAHHH; path=/; secure;");
Assert.assertTrue(pc.IsValid());
Assert.assertTrue(pc.IsSecure());
Assert.assertTrue(pc.HasPath());
Assert.assertEquals("/", pc.Path());
Assert.assertEquals("", pc.Name());
Assert.assertEquals("BLAHHH", pc.Value());
}
void TestAttributeCase() {
ParsedCookie pc = new ParsedCookie("BLAHHH; Path=/; sECuRe; httpONLY");
Assert.assertTrue(pc.IsValid());
Assert.assertTrue(pc.IsSecure());
Assert.assertTrue(pc.IsHttpOnly());
Assert.assertTrue(pc.HasPath());
Assert.assertEquals("/", pc.Path());
Assert.assertEquals("", pc.Name());
Assert.assertEquals("BLAHHH", pc.Value());
Assert.assertEquals(3, pc.NumberOfAttributes());
}
void TestDoubleQuotedNameless() {
ParsedCookie pc = new ParsedCookie("\"BLA\\\"HHH\"; path=/; secure;");
Assert.assertTrue(pc.IsValid());
Assert.assertTrue(pc.IsSecure());
Assert.assertTrue(pc.HasPath());
Assert.assertEquals("/", pc.Path());
Assert.assertEquals("", pc.Name());
Assert.assertEquals("\"BLA\\\"HHH\"", pc.Value());
Assert.assertEquals(2, pc.NumberOfAttributes());
}
void QuoteOffTheEnd() {
ParsedCookie pc = new ParsedCookie("a=\"B");
Assert.assertTrue(pc.IsValid());
Assert.assertEquals("a", pc.Name());
Assert.assertEquals("\"B", pc.Value());
Assert.assertEquals(0, pc.NumberOfAttributes());
}
void MissingName() {
ParsedCookie pc = new ParsedCookie("=ABC");
Assert.assertTrue(pc.IsValid());
Assert.assertEquals("", pc.Name());
Assert.assertEquals("ABC", pc.Value());
Assert.assertEquals(0, pc.NumberOfAttributes());
}
void MissingValue() {
ParsedCookie pc = new ParsedCookie("ABC=; path = /wee");
Assert.assertTrue(pc.IsValid());
Assert.assertEquals("ABC", pc.Name());
Assert.assertEquals("", pc.Value());
Assert.assertTrue(pc.HasPath());
Assert.assertEquals("/wee", pc.Path());
Assert.assertEquals(1, pc.NumberOfAttributes());
}
void Whitespace() {
ParsedCookie pc = new ParsedCookie(" A = BC ;secure;;; httponly");
Assert.assertTrue(pc.IsValid());
Assert.assertEquals("A", pc.Name());
Assert.assertEquals("BC", pc.Value());
Assert.assertFalse(pc.HasPath());
Assert.assertFalse(pc.HasDomain());
Assert.assertTrue(pc.IsSecure());
Assert.assertTrue(pc.IsHttpOnly());
// We parse anything between ; as attributes, so we end up with two
// attributes with an empty string name and value.
Assert.assertEquals(4, pc.NumberOfAttributes());
}
void MultipleEquals() {
ParsedCookie pc = new ParsedCookie(" A=== BC ;secure;;; httponly");
Assert.assertTrue(pc.IsValid());
Assert.assertEquals("A", pc.Name());
Assert.assertEquals("== BC", pc.Value());
Assert.assertFalse(pc.HasPath());
Assert.assertFalse(pc.HasDomain());
Assert.assertTrue(pc.IsSecure());
Assert.assertTrue(pc.IsHttpOnly());
Assert.assertEquals(4, pc.NumberOfAttributes());
}
void QuotedTrailingWhitespace() {
ParsedCookie pc = new ParsedCookie("ANCUUID=\"zohNumRKgI0oxyhSsV3Z7D\" ; "+
"expires=Sun, 18-Apr-2027 21:06:29 GMT ; "+
"path=/ ; ");
Assert.assertTrue(pc.IsValid());
Assert.assertEquals("ANCUUID", pc.Name());
// Stripping whitespace after the quotes matches all other major browsers.
Assert.assertEquals("\"zohNumRKgI0oxyhSsV3Z7D\"", pc.Value());
Assert.assertTrue(pc.HasExpires());
Assert.assertTrue(pc.HasPath());
Assert.assertEquals("/", pc.Path());
Assert.assertEquals(2, pc.NumberOfAttributes());
}
void TrailingWhitespace() {
ParsedCookie pc = new ParsedCookie("ANCUUID=zohNumRKgI0oxyhSsV3Z7D ; "+
"expires=Sun, 18-Apr-2027 21:06:29 GMT ; "+
"path=/ ; ");
Assert.assertTrue(pc.IsValid());
Assert.assertEquals("ANCUUID", pc.Name());
Assert.assertEquals("zohNumRKgI0oxyhSsV3Z7D", pc.Value());
Assert.assertTrue(pc.HasExpires());
Assert.assertTrue(pc.HasPath());
Assert.assertEquals("/", pc.Path());
Assert.assertEquals(2, pc.NumberOfAttributes());
}
void TooManyPairs() {
StringBuffer blankpairs = new StringBuffer();
blankpairs.setLength(ParsedCookie.kMaxPairs - 1);
for (int i=0;i<blankpairs.length();++i)
blankpairs.setCharAt(i, ';');
ParsedCookie pc1 = new ParsedCookie(blankpairs.toString() + "secure");
Assert.assertTrue(pc1.IsValid());
Assert.assertTrue(pc1.IsSecure());
ParsedCookie pc2 = new ParsedCookie(blankpairs + ";secure");
Assert.assertTrue(pc2.IsValid());
Assert.assertFalse(pc2.IsSecure());
}
// TODO some better test cases for invalid cookies.
void InvalidWhitespace() {
ParsedCookie pc = new ParsedCookie(" ");
Assert.assertFalse(pc.IsValid());
}
void InvalidTooLong() {
StringBuffer maxstr = new StringBuffer();
maxstr.setLength(ParsedCookie.kMaxCookieSize);
for (int i=0;i<maxstr.length();++i)
maxstr.setCharAt(i, 'a');
ParsedCookie pc1 = new ParsedCookie(maxstr.toString());
Assert.assertTrue(pc1.IsValid());
ParsedCookie pc2 = new ParsedCookie(maxstr + "A");
Assert.assertFalse(pc2.IsValid());
}
void InvalidEmpty() {
ParsedCookie pc = new ParsedCookie("");
Assert.assertFalse(pc.IsValid());
}
void EmbeddedTerminator() {
// ParsedCookie pc1 = new ParsedCookie("AAA=BB\0ZYX");
ParsedCookie pc2 = new ParsedCookie("AAA=BB\rZYX");
ParsedCookie pc3 = new ParsedCookie("AAA=BB\nZYX");
// Assert.assertTrue(pc1.IsValid());
// Assert.assertEquals("AAA", pc1.Name());
// Assert.assertEquals("BB", pc1.Value());
Assert.assertTrue(pc2.IsValid());
Assert.assertEquals("AAA", pc2.Name());
Assert.assertEquals("BB", pc2.Value());
Assert.assertTrue(pc3.IsValid());
Assert.assertEquals("AAA", pc3.Name());
Assert.assertEquals("BB", pc3.Value());
}
}
}
/*
@Override
public int compareTo(RootDomainCookie o) {
int result = _rootDomainHash - o._rootDomainHash;
if (result == 0) {
return (_cookieId < o._cookieId) ? -1 : 1;
}
return result;
}
*/
public static class CanonicalCookie implements HttpCookie {
/**
* the public constructor
*
* @param rootDomainName
* @param fqDomainName
* @param name
* @param value
* @param path
* @param secure
* @param httponly
* @param creation
* @param last_access
* @param has_expires
* @param expires
*/
public CanonicalCookie(String rootDomainName,String fqDomainName,String name,
String value,
String path,
boolean secure,
boolean httponly,
long creation,
long last_access,
boolean has_expires,
long expires) {
_rootDomainHash = rootDomainName.hashCode();
domain_ = fqDomainName;
name_ = name;
value_ = value;
path_=path;
creation_date_=creation;
last_access_date_=last_access;
expiry_date_=expires;
has_expires_=has_expires;
secure_=secure;
httponly_=httponly;
}
/**
* internal specialized constructor
*/
public CanonicalCookie(int rootDomainHash) {
_rootDomainHash = rootDomainHash;
}
@Override
public boolean equals(Object obj) {
if (obj instanceof CanonicalCookie) {
CanonicalCookie other = (CanonicalCookie)obj;
if (_rootDomainHash == other._rootDomainHash) {
if (domain_.equalsIgnoreCase(other.domain_)) {
return IsEquivalent(other);
}
}
}
return false;
}
@Override
public int hashCode() {
return _rootDomainHash;
};
public int getRootDomainHash() { return _rootDomainHash; }
/** cookie attributes **/
public String Domain() { return domain_; }
public String Name() { return name_; }
public String Value() { return value_; }
public String Path() { return path_; }
public long CreationDate() { return creation_date_; }
public long LastAccessDate() { return last_access_date_; }
public boolean DoesExpire() { return has_expires_; }
public boolean IsPersistent() { return DoesExpire(); }
public long ExpiryDate() { return expiry_date_; }
public boolean IsSecure() { return secure_; }
public boolean IsHttpOnly() { return httponly_; }
boolean IsExpired(long current) {
return has_expires_ && current >= expiry_date_;
}
// Are the cookies considered equivalent in the eyes of the RFC.
// This says that the domain and path should string match identically.
boolean IsEquivalent(CanonicalCookie ecc) {
// It seems like it would make sense to take secure and httponly into
// account, but the RFC doesn't specify this.
return name_.equals(ecc.Name()) && path_.equals(ecc.Path());
}
void SetLastAccessDate(long date) {
last_access_date_ = date;
}
boolean IsOnPath(String url_path) {
if (url_path.length() == 0)
url_path = "/";
// A zero length would be unsafe for our trailing '/' checks, and
// would also make no sense for our prefix match. The code that
// creates a CanonicalCookie should make sure the path is never zero length,
// but we double check anyway.
if (path_.length() == 0)
return false;
// The Mozilla code broke it into 3 cases, if it's strings lengths
// are less than, equal, or greater. I think this is simpler:
// Make sure the cookie path is a prefix of the url path. If the
// url path is shorter than the cookie path, then the cookie path
// can't be a prefix.
if (url_path.indexOf(path_) != 0)
return false;
// Now we know that url_path is >= cookie_path, and that cookie_path
// is a prefix of url_path. If they are the are the same length then
// they are identical, otherwise we need an additional check:
// In order to avoid in correctly matching a cookie path of /blah
// with a request path of '/blahblah/', we need to make sure that either
// the cookie path ends in a trailing '/', or that we prefix up to a '/'
// in the url path. Since we know that the url path length is greater
// than the cookie path length, it's safe to index one byte past.
if (path_.length() != url_path.length() &&
path_.charAt(path_.length() - 1) != '/' &&
url_path.charAt(path_.length()) != '/')
return false;
return true;
}
private static final ThreadLocal<SimpleDateFormat> dateFormat = new ThreadLocal<SimpleDateFormat>() {
protected SimpleDateFormat initialValue() {
return new SimpleDateFormat("yyyy.MM.dd-HH:mm:ss.SSS ZZZ");
};
};
@Override
public String toString() {
return
"domain:" + domain_ + " name:" + name_ + " path:" + path_ + "\n"
+ "created:" + dateFormat.get().format(new Date(creation_date_))
+ " last accessed:" + dateFormat.get().format(new Date(last_access_date_))
+ " expires:" + dateFormat.get().format(new Date(expiry_date_)) +"\n"
+ "value:" + value_ + "\n";
}
int _rootDomainHash;
String domain_;
String name_;
String value_;
String path_;
long creation_date_;
long last_access_date_;
long expiry_date_;
boolean has_expires_;
boolean secure_;
boolean httponly_;
}
public static class CookieStore implements NIOHttpCookieStore {
Vector<CanonicalCookie> _cookies = new Vector<CanonicalCookie>();
private static int kNumCookiesPerHost = 70;
private static int kNumCookiesPerHostPurge = 20;
public synchronized void GetAllCookies(Vector<CanonicalCookie> cookiesOut) {
cookiesOut.addAll(_cookies);
}
public synchronized String GetCookies(URL url) {
String cookiesOut = "";
Vector<CanonicalCookie> cookieVector = findCookiesForURL(url);
if (cookieVector != null) {
Collections.sort(cookieVector,new Comparator<CanonicalCookie>() {
// Mozilla sorts on the path length (longest first), and then it
// sorts by creation time (oldest first).
// The RFC says the sort order for the domain attribute is undefined.
@Override
public int compare(CanonicalCookie cc1, CanonicalCookie cc2) {
if (cc1.Path().length() == cc2.Path().length()) {
return cc1.CreationDate() < cc2.CreationDate() ? -1: cc1.CreationDate() > cc2.CreationDate() ? 1 : 0;
}
return (cc1.Path().length() > cc2.Path().length()) ? -1 : cc2.Path().length() > cc1.Path().length() ? 1: 0;
}
});
for (int i=0;i<cookieVector.size();++i) {
if (i != 0)
cookiesOut += "; ";
CanonicalCookie cookie = cookieVector.elementAt(i);
// In Mozilla if you set a cookie like AAAA, it will have an empty token
// and a value of AAAA. When it sends the cookie back, it will send AAAA,
// so we need to avoid sending =AAAA for a blank token value.
if (cookie.Name().length() != 0)
cookiesOut += cookie.Name() + "=";
cookiesOut += cookie.Value();
}
}
return cookiesOut;
}
private static class CookieComparator implements Comparator<CanonicalCookie> {
@Override
public int compare(CanonicalCookie o1, CanonicalCookie o2) {
int result = o1._rootDomainHash - o2._rootDomainHash;
if (result == 0) {
if (o1.domain_ == null && o2.domain_ != null)
return -1;
else if (o1.domain_ != null && o2.domain_ == null)
return 1;
else if (o1.domain_ == null && o2.domain_ == null)
return 0;
else if (o1.domain_ != null && o2.domain_ != null) {
result = o1.domain_.compareTo(o2.domain_);
if (result == 0) {
result = o1.name_.compareTo(o2.name_);
if (result == 0) {
result = o1.path_.compareTo(o2.path_);
}
}
}
}
return result;
}
}
public Vector<CanonicalCookie> findCookiesForURL(URL url) {
Vector<CanonicalCookie> cookiesOut = new Vector<CanonicalCookie>();
long currentTime = System.currentTimeMillis();
// get the url's tld name
String urlHost = url.getHost().toLowerCase();
String urlRootDomainName = URLUtils.extractRootDomainName(urlHost);
if (urlRootDomainName == null) {
LOG.error("###FIND COOKIE GET ROOT DOMAIN NAME FOR HOST RETURNED NULL:" + urlHost);
}
if (urlRootDomainName != null) {
String urlSubDomainName = urlHost.substring(0,urlHost.length() - urlRootDomainName.length());
// construct a CrawlHostCookie for the query
CanonicalCookie queryCookie = new CanonicalCookie(urlRootDomainName.hashCode());
// now search in the vector
int itemPosition = Collections.binarySearch(_cookies,queryCookie,new CookieComparator());
itemPosition = -(itemPosition + 1);
// if an entry exists ...
// start walking the list ...
for (int i=itemPosition;i<_cookies.size();++i) {
CanonicalCookie currentCookie = (CanonicalCookie)_cookies.get(i);
if (currentCookie._rootDomainHash != queryCookie._rootDomainHash)
break;
String currentCookieRootDomainName = URLUtils.extractRootDomainName(currentCookie.Domain());
if (currentCookieRootDomainName == null) {
LOG.error("###EXTRACT ROOT DOMAIN FOR HOST RETURNED NULL:" + currentCookie.Domain());
}
if (currentCookieRootDomainName != null && currentCookieRootDomainName.equalsIgnoreCase(urlRootDomainName)) {
// if at the high level the cookies match
// time to dig a little deeper ...
int lengthDelta = urlHost.length() - currentCookie.Domain().length();
// Ensure |url_host| is |cookie_domain| or one of its subdomains.
boolean validMatch = false;
if (lengthDelta == 0) {
// ok both cookies domains are the same length ...
// probably a match ...
String currentCookieSubDomainName = currentCookie.Domain().substring(0,currentCookie.Domain().length() - currentCookieRootDomainName.length());
if (urlSubDomainName.length() ==0) {
validMatch = true;
}
else {
validMatch = urlSubDomainName.equals(currentCookieSubDomainName);
}
}
else if (lengthDelta == -1) {
if (currentCookie.Domain().charAt(0) == '.')
validMatch = true;
}
// else if
else if (lengthDelta >= 1 && currentCookie.Domain().charAt(0) == '.') {
// www.google.com
// .google.com
if (urlHost.substring(lengthDelta).compareToIgnoreCase(currentCookie.Domain()) == 0) {
validMatch = true;
}
}
// now check for valid match ...
if (validMatch) {
// check to see if the cookie expired ...
if (currentCookie.IsExpired(currentTime)) {
// time to remove it from the store ...
_cookies.remove(i);
--i;
}
else {
// ok check path ...
if (currentCookie.IsOnPath(url.getPath())) {
// matched ... TOUCH the cookie
currentCookie.SetLastAccessDate(System.currentTimeMillis());
// and add it to the vector ...
cookiesOut.add(currentCookie);
}
}
}
}
}
return (cookiesOut.size() != 0) ? cookiesOut : null;
}
return null;
}
public boolean setCookie(URL urlObject,String cookie) {
ParsedCookie cookieObj = new ParsedCookie(cookie);
if (cookieObj.IsValid()) {
return setCookie(urlObject,cookieObj,System.currentTimeMillis());
}
return false;
}
public synchronized boolean setCookie(URL url,ParsedCookie cookie,long creation_time) {
boolean result = false;
// validate cookie domain key against url domain ...
String cookieDomain = getDomainKeyForCookie(url,cookie);
if (cookieDomain != null) {
String urlTLDName = URLUtils.extractRootDomainName(url.getHost().toLowerCase());
if (urlTLDName != null) {
// get canonical path
String canonicalPath = getCanonicalPathForCookie(url, cookie);
// and date
long canonicalDate = getCanonicalTimeForCookie(cookie,creation_time);
// create a canonical cookie object ...
CanonicalCookie canonicalCookie = new CanonicalCookie(urlTLDName,cookieDomain,
cookie.Name(), cookie.Value(), canonicalPath,
cookie.IsSecure(), cookie.IsHttpOnly(),
creation_time, creation_time,
canonicalDate != -1, canonicalDate);
// and delete the cookie if it exists ...
int deletionIndex = deleteAnyEquivalentCookie(canonicalCookie);
// and if the new cookie is not expired ...
if (!canonicalCookie.IsExpired(creation_time)) {
insertCookie(canonicalCookie,deletionIndex);
result = true;
}
// ok now garbage collect based on domain ...
garbageCollectCookies(urlTLDName);
}
else {
LOG.error("###COOKIE: Unable to Extract Root Domain from Cookie Domain:" + cookieDomain);
}
}
return result;
}
private void garbageCollectCookies(String rootDomainName) {
Vector<Integer> range = collectCookieRange(rootDomainName);
if (range.size() > kNumCookiesPerHost) {
// ok time to purge some cookies.
// sort range by lru
Collections.sort(range,new Comparator<Integer>() {
@Override
public int compare(Integer o1, Integer o2) {
return ((Long)_cookies.get(o1).last_access_date_).compareTo((Long)_cookies.get(o2).last_access_date_);
}
});
// Purge down to (|num_max| - |num_purge|) total cookies.
int num_purge = kNumCookiesPerHostPurge;
num_purge += range.size() - kNumCookiesPerHost;
// subset
range.setSize(num_purge);
// resort based on index
Collections.sort(range);
// iterate and remove
int removed = 0;
for (int index : range) {
_cookies.remove(index-removed);
removed++;
}
}
}
private Vector<Integer> collectCookieRange(String rootDomainNameFilter) {
// construct a CrawlHostCookie for the query
CanonicalCookie queryCookie = new CanonicalCookie(rootDomainNameFilter.hashCode());
// now search in the vector
int itemPosition = Collections.binarySearch(_cookies,queryCookie,new CookieComparator());
Vector<Integer> rangeOut = new Vector<Integer>();
itemPosition = -(itemPosition + 1);
for (int i=itemPosition;i<_cookies.size();++i) {
CanonicalCookie currentCookie = (CanonicalCookie)_cookies.get(i);
if (currentCookie.getRootDomainHash() == queryCookie._rootDomainHash) {
String currentCookieRootDomainName = URLUtils.extractRootDomainName(currentCookie.Domain());
if (currentCookieRootDomainName == null) {
LOG.error("#### GET ROOT DOMAIN FOR HOST RETURNED NULL:" + currentCookie);
}
if (currentCookieRootDomainName != null && currentCookieRootDomainName.equalsIgnoreCase(rootDomainNameFilter)) {
rangeOut.add(i);
}
}
else {
break;
}
}
return rangeOut;
}
private void insertCookie(CanonicalCookie cookie,int deletionIndexHint) {
if (deletionIndexHint != -1) {
// fast path ... straightforward insertion ...
_cookies.insertElementAt(cookie, deletionIndexHint);
}
else {
// search for proper location for the insertion ...
int insertionIndex = Collections.binarySearch(_cookies,cookie,new CookieComparator());
if (insertionIndex < 0 ) {
insertionIndex = -(insertionIndex + 1);
}
// insert at head of sub-list ...
_cookies.insertElementAt(cookie,insertionIndex);
}
}
private int deleteAnyEquivalentCookie(CanonicalCookie cookie) {
int location = Collections.binarySearch(_cookies,cookie,new CookieComparator());
if (location >= 0) {
_cookies.remove(location);
return location;
}
return -1;
}
private static final String EXPIRES_PATTERN_1 = "EEE, dd-MMM-yyyy HH:mm:ss";
private static final String EXPIRES_PATTERN_2 = "EEE, dd-MMM-yyyy HH:mm:ss z";
static long getCanonicalTimeForCookie(ParsedCookie cookie, long currentTime) {
// First, try the Max-Age attribute.
if (cookie.HasMaxAge()) {
// parse max age as a long ...
try {
long maxAgeInSeconds = Long.parseLong(cookie.MaxAge());
return currentTime + (maxAgeInSeconds * 1000);
}
catch (NumberFormatException e) {
}
}
// Try the Expires attribute.
if (cookie.HasExpires()) {
return DateUtils.parseHttpDate(cookie.Expires());
}
return -1;
}
private static String getCanonicalPathForCookie(URL url,ParsedCookie cookie) {
// The path was supplied in the cookie, we'll take it.
if (cookie.HasPath() && cookie.Path().length() !=0 && cookie.Path().charAt(0) == '/')
return cookie.Path();
// The path was not supplied in the cookie or invalid, we will default
// to the current URL path.
// """Defaults to the path of the request URL that generated the
// Set-Cookie response, up to, but not including, the
// right-most /."""
// How would this work for a cookie on /? We will include it then.
String url_path = url.getPath();
int idx = url_path.lastIndexOf('/');
// The cookie path was invalid or a single '/'.
if (idx == 0 || idx == -1)
return "/";
// Return up to the rightmost '/'.
return url_path.substring(0, idx);
}
private static String getDomainKeyForCookie(URL url,ParsedCookie cookie) {
if (cookie.Domain().length() != 0) {
String urlHost = url.getHost().toLowerCase();
if (!cookie.HasDomain() || cookie.Domain().length() == 0)
return urlHost;
String cookieHost = cookie.Domain().toLowerCase();
if (cookieHost.charAt(0) != '.')
cookieHost = '.' + cookieHost;
// validate that the host contains more than tld names
if (URLUtils.extractRootDomainName(cookieHost) == null) {
return null;
}
// validate that there are more than one parts to the cookie host domain
if (cookieHost.indexOf('.', 1) == -1)
return null;
// now get tld name for cookie domain and url domain
String urlHostTLD = URLUtils.extractRootDomainName(urlHost);
String cookieTLD = URLUtils.extractRootDomainName(cookieHost);
// they must match ..
if (urlHostTLD != null && cookieTLD != null && urlHostTLD.equals(cookieTLD)) {
int delta = urlHost.length() - cookieHost.length();
// Ensure |url_host| is |cookie_domain| or one of its subdomains.
boolean validCookie = false;
if (delta == 0) {
validCookie = true;
}
else if (delta == -1) {
if (cookieHost.charAt(0) == '.')
validCookie = true;
}
// else if
else if (delta >= 1) {
// www.google.com
// .google.com
if (urlHost.substring(delta).compareTo(cookieHost) == 0) {
validCookie = true;
}
}
if (validCookie) {
return cookieHost;
}
}
}
return null;
}
public static class CookieStoreUnitTest {
static final String kUrlGoogle= "http://www.google.izzle";
static final String kUrlGoogleSecure= "https://www.google.izzle";
static final String kUrlFtp= "ftp://ftp.google.izzle/";
static final String kValidCookieLine= "A=B; path=/";
static final String kValidDomainCookieLine= "A=B; path=/; domain=google.izzle";
public static void main(String[] args) {
CookieStoreUnitTest utils = new CookieStoreUnitTest();
try {
GarbageCollectorTest();
utils.TimeParseTest();
utils.DomainTest();
utils.DomainWithTrailingDotTest();
utils.ValidSubdomainTest();
utils.InvalidDomainTest();
utils.DomainWithoutLeadingDotTest();
utils.CaseInsensitiveDomainTest();
//TestIpAddress();
utils.TestNonDottedAndTLD();
// TestHostEndsWithDot();
}
catch (Exception e) {
e.printStackTrace();
}
}
private static class TimeParseTestCase {
long parsedValue = -1;
long expectedValue = -1;
boolean isValidTime = false;
public TimeParseTestCase(String stringToParse,boolean isValidTime,long expectedEpochValue) {
this.parsedValue = DateUtils.parseHttpDate(stringToParse);
Calendar gmtCalendar = Calendar.getInstance(TimeZone.getTimeZone("GMT"));
gmtCalendar.setTimeInMillis(this.parsedValue);
System.out.println(gmtCalendar.toString());
this.expectedValue = expectedEpochValue;
this.isValidTime = isValidTime;
}
public boolean validate() {
if (!isValidTime) {
return parsedValue == -1;
}
else {
return (parsedValue/1000) == expectedValue;
}
}
}
void TimeParseTest()throws Exception {
Assert.assertTrue(new TimeParseTestCase("Sat, 15-Apr-17 21:01:22 GMT",true,1492290082).validate());
Assert.assertTrue(new TimeParseTestCase("Thu, 19-Apr-2007 16:00:00 GMT",true,1176998400).validate());
Assert.assertTrue(new TimeParseTestCase("Wed, 25 Apr 2007 21:02:13 GMT",true,1177534933).validate());
Assert.assertTrue(new TimeParseTestCase("Thu, 19/Apr\\2007 16:00:00 GMT",true,1176998400).validate());
Assert.assertTrue(new TimeParseTestCase("Fri, 1 Jan 2010 01:01:50 GMT",true,1262307710).validate());
Assert.assertTrue(new TimeParseTestCase("Wednesday, 1-Jan-2003 00:00:00 GMT",true,1041379200).validate());
Assert.assertTrue(new TimeParseTestCase(", 1-Jan-2003 00:00:00 GMT",true,1041379200).validate());
Assert.assertTrue(new TimeParseTestCase(" 1-Jan-2003 00:00:00 GMT",true,1041379200).validate());
Assert.assertTrue(new TimeParseTestCase("1-Jan-2003 00:00:00 GMT",true,1041379200).validate());
Assert.assertTrue(new TimeParseTestCase("Wed,18-Apr-07 22:50:12 GMT",true,1176936612).validate());
Assert.assertTrue(new TimeParseTestCase("WillyWonka , 18-Apr-07 22:50:12 GMT",true,1176936612).validate());
Assert.assertTrue(new TimeParseTestCase("WillyWonka , 18-Apr-07 22:50:12",true,1176936612).validate());
Assert.assertTrue(new TimeParseTestCase("WillyWonka , 18-apr-07 22:50:12",true,1176936612).validate());
Assert.assertTrue(new TimeParseTestCase("Mon, 18-Apr-1977 22:50:13 GMT",true,230251813).validate());
Assert.assertTrue(new TimeParseTestCase("Mon, 18-Apr-77 22:50:13 GMT",true,230251813).validate());
Assert.assertTrue(new TimeParseTestCase("\"Sat, 15-Apr-17\\\"21:01:22\\\"GMT\"", true, 1492290082).validate());
Assert.assertTrue(new TimeParseTestCase("Partyday, 18- April-07 22:50:12",true,1176936612).validate());
Assert.assertTrue(new TimeParseTestCase("Partyday, 18 - Apri-07 22:50:12",true,1176936612).validate());
Assert.assertTrue(new TimeParseTestCase("Wednes, 1-Januar-2003 00:00:00 GMT",true,1041379200).validate());
Assert.assertTrue(new TimeParseTestCase("Sat, 15-Apr-17 21:01:22",true,1492290082).validate());
Assert.assertTrue(new TimeParseTestCase("Sat, 15-Apr-17 21:01:22 GMT-2",true,1492290082).validate());
Assert.assertTrue(new TimeParseTestCase("Sat, 15-Apr-17 21:01:22 GMT BLAH",true,1492290082).validate());
Assert.assertTrue(new TimeParseTestCase("Sat, 15-Apr-17 21:01:22 GMT-0400",true,1492290082).validate());
Assert.assertTrue(new TimeParseTestCase("Sat, 15-Apr-17 21:01:22 GMT-0400 (EDT)",true,1492290082).validate());
Assert.assertTrue(new TimeParseTestCase("Sat, 15-Apr-17 21:01:22 DST",true,1492290082).validate());
Assert.assertTrue(new TimeParseTestCase("Sat, 15-Apr-17 21:01:22 -0400",true,1492290082).validate());
Assert.assertTrue(new TimeParseTestCase("Sat, 15-Apr-17 21:01:22 (hello there)",true,1492290082).validate());
Assert.assertTrue(new TimeParseTestCase("Sat, 15-Apr-17 21:01:22 11:22:33",true,1492290082).validate());
Assert.assertTrue(new TimeParseTestCase("Sat, 15-Apr-17 ::00 21:01:22",true,1492290082).validate());
Assert.assertTrue(new TimeParseTestCase("Sat, 15-Apr-17 boink:z 21:01:22",true,1492290082).validate());
Assert.assertTrue(new TimeParseTestCase("Sat, 15-Apr-17 91:22:33 21:01:22",false,0).validate());
Assert.assertTrue(new TimeParseTestCase("Thu Apr 18 22:50:12 2007 GMT",true,1176936612).validate());
Assert.assertTrue(new TimeParseTestCase("22:50:12 Thu Apr 18 2007 GMT",true,1176936612).validate());
Assert.assertTrue(new TimeParseTestCase("Thu 22:50:12 Apr 18 2007 GMT",true,1176936612).validate());
Assert.assertTrue(new TimeParseTestCase("Thu Apr 22:50:12 18 2007 GMT",true,1176936612).validate());
Assert.assertTrue(new TimeParseTestCase("Thu Apr 18 22:50:12 2007 GMT",true,1176936612).validate());
Assert.assertTrue(new TimeParseTestCase("Thu Apr 18 2007 22:50:12 GMT",true,1176936612).validate());
Assert.assertTrue(new TimeParseTestCase("Thu Apr 18 2007 GMT 22:50:12",true,1176936612).validate());
Assert.assertTrue(new TimeParseTestCase("Sat, 15-Apr-17 21:01:22 GMT",true,1492290082).validate());
Assert.assertTrue(new TimeParseTestCase("15-Sat, Apr-17 21:01:22 GMT",true,1492290082).validate());
Assert.assertTrue(new TimeParseTestCase("15-Sat, Apr 21:01:22 GMT 17",true,1492290082).validate());
Assert.assertTrue(new TimeParseTestCase("15-Sat, Apr 21:01:22 GMT 2017",true,1492290082).validate());
Assert.assertTrue(new TimeParseTestCase("15 Apr 21:01:22 2017",true,1492290082).validate());
Assert.assertTrue(new TimeParseTestCase("15 17 Apr 21:01:22",true,1492290082).validate());
Assert.assertTrue(new TimeParseTestCase("Apr 15 17 21:01:22",true,1492290082).validate());
Assert.assertTrue(new TimeParseTestCase("Apr 15 21:01:22 17",true,1492290082).validate());
Assert.assertTrue(new TimeParseTestCase("2017 April 15 21:01:22",true,1492290082).validate());
Assert.assertTrue(new TimeParseTestCase("15 April 2017 21:01:22",true,1492290082).validate());
Assert.assertTrue(new TimeParseTestCase("98 April 17 21:01:22",false,0).validate());
Assert.assertTrue(new TimeParseTestCase("Thu, 012-Aug-2008 20:49:07 GMT",false,0).validate());
Assert.assertTrue(new TimeParseTestCase("Thu, 12-Aug-31841 20:49:07 GMT",false,0).validate());
Assert.assertTrue(new TimeParseTestCase("Thu, 12-Aug-9999999999 20:49:07 GMT",false,0).validate());
Assert.assertTrue(new TimeParseTestCase("Thu, 999999999999-Aug-2007 20:49:07 GMT",false,0).validate());
Assert.assertTrue(new TimeParseTestCase("Thu, 12-Aug-2007 20:61:99999999999 GMT",false,0).validate());
Assert.assertTrue(new TimeParseTestCase("IAintNoDateFool",false,0).validate());
}
void DomainTest() throws Exception {
URL url_google = new URL(kUrlGoogle);
CookieStore cm = new CookieStore();
Assert.assertTrue(cm.setCookie(url_google, "A=B"));
Assert.assertEquals("A=B", cm.GetCookies(url_google));
Assert.assertTrue(cm.setCookie(url_google, "C=D; domain=.google.izzle"));
Assert.assertEquals("A=B; C=D", cm.GetCookies(url_google));
// Verify that A=B was set as a host cookie rather than a domain
// cookie -- should not be accessible from a sub sub-domain.
Assert.assertEquals("C=D", cm.GetCookies(new URL("http://foo.www.google.izzle")));
// Test and make sure we find domain cookies on the same domain.
Assert.assertTrue(cm.setCookie(url_google, "E=F; domain=.www.google.izzle"));
Assert.assertEquals("A=B; C=D; E=F", cm.GetCookies(url_google));
// Test setting a domain= that doesn't start w/ a dot, should
// treat it as a domain cookie, as if there was a pre-pended dot.
Assert.assertTrue(cm.setCookie(url_google, "G=H; domain=www.google.izzle"));
Assert.assertEquals("A=B; C=D; E=F; G=H", cm.GetCookies(url_google));
// Test domain enforcement, should fail on a sub-domain or something too deep.
Assert.assertFalse(cm.setCookie(url_google, "I=J; domain=.izzle"));
Assert.assertEquals("", cm.GetCookies(new URL("http://a.izzle")));
Assert.assertFalse(cm.setCookie(url_google, "K=L; domain=.bla.www.google.izzle"));
Assert.assertEquals("C=D; E=F; G=H",
cm.GetCookies(new URL("http://bla.www.google.izzle")));
Assert.assertEquals("A=B; C=D; E=F; G=H", cm.GetCookies(url_google));
}
// FireFox recognizes domains containing trailing periods as valid.
// IE and Safari do not. Assert the expected policy here.
void DomainWithTrailingDotTest() throws Exception {
CookieStore cm = new CookieStore();
CrawlHostImpl crawlHost = new CrawlHostImpl(null,1234);
URL url_google = new URL("http://www.google.com");
Assert.assertFalse(cm.setCookie(url_google, "a=1; domain=.www.google.com."));
Assert.assertFalse(cm.setCookie(url_google, "b=2; domain=.www.google.com.."));
Assert.assertEquals("", cm.GetCookies(url_google));
}
// Test that cookies can bet set on higher level domains.
// http://b/issue?id=896491
void ValidSubdomainTest() throws Exception {
CookieStore cm = new CookieStore();
CrawlHostImpl crawlHost = new CrawlHostImpl(null,1234);
URL url_abcd = new URL("http://a.b.c.d.com");
URL url_bcd = new URL("http://b.c.d.com");
URL url_cd = new URL("http://c.d.com");
URL url_d = new URL("http://d.com");
Assert.assertTrue(cm.setCookie(url_abcd, "a=1; domain=.a.b.c.d.com"));
Assert.assertTrue(cm.setCookie(url_abcd, "b=2; domain=.b.c.d.com"));
Assert.assertTrue(cm.setCookie(url_abcd, "c=3; domain=.c.d.com"));
Assert.assertTrue(cm.setCookie(url_abcd, "d=4; domain=.d.com"));
Assert.assertEquals("a=1; b=2; c=3; d=4", cm.GetCookies(url_abcd));
Assert.assertEquals("b=2; c=3; d=4", cm.GetCookies(url_bcd));
Assert.assertEquals("c=3; d=4", cm.GetCookies(url_cd));
Assert.assertEquals("d=4", cm.GetCookies(url_d));
// Check that the same cookie can exist on different sub-domains.
Assert.assertTrue(cm.setCookie(url_bcd, "X=bcd; domain=.b.c.d.com"));
Assert.assertTrue(cm.setCookie(url_bcd, "X=cd; domain=.c.d.com"));
Assert.assertEquals("b=2; c=3; d=4; X=bcd; X=cd", cm.GetCookies(url_bcd));
Assert.assertEquals("c=3; d=4; X=cd", cm.GetCookies(url_cd));
}
// Test that setting a cookie which specifies an invalid domain has
// no side-effect. An invalid domain in this context is one which does
// not match the originating domain.
// http://b/issue?id=896472
void InvalidDomainTest() throws Exception{
{
CookieStore cm = new CookieStore();
URL url_foobar = new URL("http://foo.bar.com");
CrawlHostImpl crawlHost = new CrawlHostImpl(null,1234);
// More specific sub-domain than allowed.
Assert.assertFalse(cm.setCookie(url_foobar, "a=1; domain=.yo.foo.bar.com"));
Assert.assertFalse(cm.setCookie(url_foobar, "b=2; domain=.foo.com"));
Assert.assertFalse(cm.setCookie(url_foobar, "c=3; domain=.bar.foo.com"));
// Different TLD, but the rest is a substring.
Assert.assertFalse(cm.setCookie(url_foobar, "d=4; domain=.foo.bar.com.net"));
// A substring that isn't really a parent domain.
Assert.assertFalse(cm.setCookie(url_foobar, "e=5; domain=ar.com"));
// Completely invalid domains:
Assert.assertFalse(cm.setCookie(url_foobar, "f=6; domain=."));
Assert.assertFalse(cm.setCookie(url_foobar, "g=7; domain=/"));
Assert.assertFalse(cm.setCookie(url_foobar, "h=8; domain=http://foo.bar.com"));
Assert.assertFalse(cm.setCookie(url_foobar, "i=9; domain=..foo.bar.com"));
Assert.assertFalse(cm.setCookie(url_foobar, "j=10; domain=..bar.com"));
// Make sure there isn't something quirky in the domain canonicalization
// that supports full URL semantics.
Assert.assertFalse(cm.setCookie(url_foobar, "k=11; domain=.foo.bar.com?blah"));
Assert.assertFalse(cm.setCookie(url_foobar, "l=12; domain=.foo.bar.com/blah"));
Assert.assertFalse(cm.setCookie(url_foobar, "m=13; domain=.foo.bar.com:80"));
Assert.assertFalse(cm.setCookie(url_foobar, "n=14; domain=.foo.bar.com:"));
Assert.assertFalse(cm.setCookie(url_foobar, "o=15; domain=.foo.bar.com#sup"));
Assert.assertEquals("", cm.GetCookies(url_foobar));
}
{
// Make sure the cookie code hasn't gotten its subdomain string handling
// reversed, missed a suffix check, etc. It's important here that the two
// hosts below have the same domain + registry.
CookieStore cm = new CookieStore();
CrawlHostImpl crawlHost = new CrawlHostImpl(null,1234);
URL url_foocom = new URL("http://foo.com.com");
Assert.assertFalse(cm.setCookie(url_foocom, "a=1; domain=.foo.com.com.com"));
Assert.assertEquals("", cm.GetCookies(url_foocom));
}
}
// Test the behavior of omitting dot prefix from domain, should
// function the same as FireFox.
// http://b/issue?id=889898
void DomainWithoutLeadingDotTest() throws Exception{
{ // The omission of dot results in setting a domain cookie.
CookieStore cm = new CookieStore();
CrawlHostImpl crawlHost = new CrawlHostImpl(null,1234);
URL url_hosted = new URL("http://manage.hosted.filefront.com");
URL url_filefront = new URL("http://www.filefront.com");
Assert.assertTrue(cm.setCookie(url_hosted, "sawAd=1; domain=filefront.com"));
Assert.assertEquals("sawAd=1", cm.GetCookies(url_hosted));
Assert.assertEquals("sawAd=1", cm.GetCookies(url_filefront));
}
{ // Even when the domains match exactly, don't consider it host cookie.
CookieStore cm = new CookieStore();
CrawlHostImpl crawlHost = new CrawlHostImpl(null,1234);
URL url = new URL("http://www.google.com");
Assert.assertTrue(cm.setCookie(url, "a=1; domain=www.google.com"));
Assert.assertEquals("a=1", cm.GetCookies(url));
Assert.assertEquals("a=1", cm.GetCookies(new URL("http://sub.www.google.com")));
Assert.assertEquals("", cm.GetCookies(new URL("http://something-else.com")));
}
}
// Test that the domain specified in cookie string is treated case-insensitive
// http://b/issue?id=896475.
void CaseInsensitiveDomainTest()throws Exception {
CookieStore cm = new CookieStore();
CrawlHostImpl crawlHost = new CrawlHostImpl(null,1234);
URL url_google = new URL("http://www.google.com");
Assert.assertTrue(cm.setCookie(url_google, "a=1; domain=.GOOGLE.COM"));
Assert.assertTrue(cm.setCookie(url_google, "b=2; domain=.wWw.gOOgLE.coM"));
Assert.assertEquals("a=1; b=2", cm.GetCookies(url_google));
}
void TestIpAddress() throws Exception {
URL url_ip = new URL("http://1.2.3.4/weee");
{
CookieStore cm = new CookieStore();
CrawlHostImpl crawlHost = new CrawlHostImpl(null,1234);
Assert.assertTrue(cm.setCookie(url_ip, kValidCookieLine));
Assert.assertEquals("A=B", cm.GetCookies(url_ip));
}
{ // IP addresses should not be able to set domain cookies.
CookieStore cm = new CookieStore();
CrawlHostImpl crawlHost = new CrawlHostImpl(null,1234);
Assert.assertFalse(cm.setCookie(url_ip, "b=2; domain=.1.2.3.4"));
Assert.assertFalse(cm.setCookie(url_ip, "c=3; domain=.3.4"));
Assert.assertEquals("", cm.GetCookies(url_ip));
// It should be allowed to set a cookie if domain= matches the IP address
// exactly. This matches IE/Firefox, even though it seems a bit wrong.
Assert.assertFalse(cm.setCookie(url_ip, "b=2; domain=1.2.3.3"));
Assert.assertEquals("", cm.GetCookies(url_ip));
Assert.assertTrue(cm.setCookie(url_ip, "b=2; domain=1.2.3.4"));
Assert.assertEquals("b=2", cm.GetCookies(url_ip));
}
}
// Test host cookies, and setting of cookies on TLD.
void TestNonDottedAndTLD() throws Exception{
{
CookieStore cm = new CookieStore();
URL url = new URL("http://com/");
CrawlHostImpl crawlHost = new CrawlHostImpl(null,1234);
// Allow setting on "com", (but only as a host cookie).
Assert.assertTrue(cm.setCookie(url, "a=1"));
Assert.assertFalse(cm.setCookie(url, "b=2; domain=.com"));
Assert.assertFalse(cm.setCookie(url, "c=3; domain=com"));
Assert.assertEquals("a=1", cm.GetCookies(url));
// Make sure it doesn't show up for a normal .com, it should be a host
// not a domain cookie.
Assert.assertEquals("", cm.GetCookies(new URL("http://hopefully-no-cookies.com/")));
Assert.assertEquals("", cm.GetCookies(new URL("http://.com/")));
}
{ // http://com. should be treated the same as http://com.
CookieStore cm = new CookieStore();
URL url = new URL("http://com./index.html");
CrawlHostImpl crawlHost = new CrawlHostImpl(null,1234);
Assert.assertTrue(cm.setCookie(url, "a=1"));
Assert.assertEquals("a=1", cm.GetCookies(url));
Assert.assertEquals("", cm.GetCookies(new URL("http://hopefully-no-cookies.com./")));
}
{ // Should not be able to set host cookie from a subdomain.
CookieStore cm = new CookieStore();
CrawlHostImpl crawlHost = new CrawlHostImpl(null,1234);
URL url = new URL("http://a.b");
Assert.assertFalse(cm.setCookie(url, "a=1; domain=.b"));
Assert.assertFalse(cm.setCookie(url, "b=2; domain=b"));
Assert.assertEquals("", cm.GetCookies(url));
}
{ // Same test as above, but explicitly on a known TLD (com).
CookieStore cm = new CookieStore();
CrawlHostImpl crawlHost = new CrawlHostImpl(null,1234);
URL url = new URL("http://google.com");
Assert.assertFalse(cm.setCookie(url, "a=1; domain=.com"));
Assert.assertFalse(cm.setCookie(url, "b=2; domain=com"));
Assert.assertEquals("", cm.GetCookies(url));
}
{ // Make sure can't set cookie on TLD which is dotted.
CookieStore cm = new CookieStore();
CrawlHostImpl crawlHost = new CrawlHostImpl(null,1234);
URL url = new URL("http://google.co.uk");
Assert.assertFalse(cm.setCookie(url, "a=1; domain=.co.uk"));
Assert.assertFalse(cm.setCookie(url, "b=2; domain=.uk"));
Assert.assertEquals("", cm.GetCookies(url));
Assert.assertEquals("", cm.GetCookies(new URL("http://something-else.co.uk")));
Assert.assertEquals("", cm.GetCookies(new URL("http://something-else.uk")));
}
{ // Intranet URLs should only be able to set host cookies.
CookieStore cm = new CookieStore();
CrawlHostImpl crawlHost = new CrawlHostImpl(null,1234);
URL url = new URL("http://b");
Assert.assertTrue(cm.setCookie(url, "a=1"));
Assert.assertFalse(cm.setCookie(url, "b=2; domain=.b"));
Assert.assertFalse(cm.setCookie(url, "c=3; domain=b"));
Assert.assertEquals("a=1", cm.GetCookies(url));
}
}
// Test reading/writing cookies when the domain ends with a period,
// as in "www.google.com."
void TestHostEndsWithDot()throws Exception {
CookieStore cm = new CookieStore();
CrawlHostImpl crawlHost = new CrawlHostImpl(null,1234);
URL url = new URL("http://www.google.com");
URL url_with_dot = new URL("http://www.google.com.");
Assert.assertTrue(cm.setCookie(url, "a=1"));
Assert.assertEquals("a=1", cm.GetCookies(url));
// Do not share cookie space with the dot version of domain.
// Note: this is not what FireFox does, but it _is_ what IE+Safari do.
Assert.assertFalse(cm.setCookie(url, "b=2; domain=.www.google.com."));
Assert.assertEquals("a=1", cm.GetCookies(url));
Assert.assertTrue(cm.setCookie(url_with_dot, "b=2; domain=.google.com."));
Assert.assertEquals("b=2", cm.GetCookies(url_with_dot));
// Make sure there weren't any side effects.
Assert.assertEquals(cm.GetCookies(new URL("http://hopefully-no-cookies.com/")), "");
Assert.assertEquals("", cm.GetCookies(new URL("http://.com/")));
}
static void GarbageCollectorTest() throws Exception {
CookieStore cm = new CookieStore();
int oldCookiesPerHost = kNumCookiesPerHost;
int oldCookiesPerHostPurge = kNumCookiesPerHostPurge;
kNumCookiesPerHost = 10;
kNumCookiesPerHostPurge =5;
cm.setCookie(new URL("http://www.google.com/foo"), "a=1; domain=www.google.com");
cm.setCookie(new URL("http://www.google.com/foo"), "b=2; domain=www.google.com");
cm.setCookie(new URL("http://www.google.com/foo"), "c=3; domain=www.google.com");
cm.setCookie(new URL("http://www.google.com/foo"), "d=4; domain=www.google.com");
cm.setCookie(new URL("http://www.google.com/foo"), "e=5; domain=www.google.com");
cm.setCookie(new URL("http://www.google.com/foo"), "f=6; domain=www.google.com");
cm.setCookie(new URL("http://www.google.com/foo"), "g=7; domain=www.google.com");
cm.setCookie(new URL("http://www.google.com/foo"), "h=8; domain=www.google.com");
cm.setCookie(new URL("http://www.google.com/foo"), "i=9; domain=www.google.com");
cm.setCookie(new URL("http://www.google.com/foo"), "j=10; domain=www.google.com");
Thread.currentThread().sleep(1);
// touch first guys again ...
cm.setCookie(new URL("http://www.google.com/foo"), "a=1; domain=www.google.com");
cm.setCookie(new URL("http://www.google.com/foo"), "b=2; domain=www.google.com");
cm.setCookie(new URL("http://www.google.com/foo"), "c=3; domain=www.google.com");
cm.setCookie(new URL("http://www.google.com/foo"), "d=4; domain=www.google.com");
//
cm.setCookie(new URL("http://www.google.com/foo"), "k=11; domain=www.google.com");
System.out.println(cm.GetCookies(new URL("http://www.google.com/foo")));
Assert.assertTrue(cm.GetCookies(new URL("http://www.google.com/foo")).equals("a=1; b=2; c=3; d=4; k=11"));
kNumCookiesPerHost = oldCookiesPerHost;
kNumCookiesPerHostPurge = oldCookiesPerHostPurge;
}
}
}
}