/**
*
*/
package ecologylab.collections;
import java.util.ArrayList;
import ecologylab.net.ParsedURL;
/**
* An optimized data structure for managing a hierarchical collection of prefixes, automatically
* merging and removing entries, and providing a fast matching function.
*
* @author andruid
*/
public class PrefixCollection extends PrefixPhrase
{
final char separator;
/**
* true means use file portion of the path when creating entries.
* false means use host and directory portions of the path only.
*
*/
boolean usePathFile;
/**
* Construct a PrefixCollection in which each prefix can be parsed into PrefixPhrases,
* using the separator to split the phrases.
*
* @param separator
*/
public PrefixCollection(char separator, boolean usePathFile)
{
super(null, null);
this.separator = separator;
this.usePathFile = usePathFile;
}
public PrefixCollection(char separator)
{
this(separator, false);
}
/**
* Construct a PrefixCollection in which each prefix can be parsed into PrefixPhrases,
* using the separator to split the phrases.
*
* @param separator
* @param usePathFile
*/
public PrefixCollection()
{
this(false);
}
/**
* Construct a PrefixCollection in which each prefix can be parsed into PrefixPhrases,
* using '/' as the separator to split the phrases.
*
* @param usePathFile true to include file portion of path as a p
*/
public PrefixCollection(boolean usePathFile)
{
this('/', usePathFile);
}
/**
*
* @param purl
* @return
*/
public PrefixPhrase add(ParsedURL purl)
{
String host = purl.url().getHost();
// domainPrefix is a child of this, the root (with no parent)
PrefixPhrase hostPrefix = getPrefix(null, host);
// children of hostPrefix
String pathStringToParse = usePathFile ? purl.toString() : purl.pathDirectoryString();
return (hostPrefix != null) ? hostPrefix.add(pathStringToParse, separator) : lookupChild(host);
}
public PrefixPhrase getMatchingPrefix(ParsedURL purl)
{
String host = purl.url().getHost();
// domainPrefix is a child of this, the root (with no parent)
PrefixPhrase hostPrefix = lookupChild(host);
// children of hostPrefix
String path = purl.path();
return (hostPrefix == null) ? null : hostPrefix.getMatchingPrefix(path, 1, separator); // skip over starting '/'
}
public boolean match(ParsedURL purl)
{
String host = purl.url().getHost();
// domainPrefix is a child of this, the root (with no parent)
PrefixPhrase hostPrefix = lookupChild(host);
// children of hostPrefix
return (hostPrefix == null) ? false : hostPrefix.match(purl.pathDirectoryString(), separator);
}
public ArrayList<String> values()
{
return values(separator);
}
static final ParsedURL[] TEST_ADD =
{
// ParsedURL.getAbsolute("http://nytimes.com"),
// ParsedURL.getAbsolute("http://www.nytimes.com/2008"),
// ParsedURL.getAbsolute("http://nytimes.com/pages/sports/foo/bar/baz/bloch"),
// ParsedURL.getAbsolute("http://nytimes.com/pages/sports/"),
// ParsedURL.getAbsolute("http://nytimes.com/pages/sports/foo/"),
// ParsedURL.getAbsolute("http://nytimes.com/pages/sports/foo/bar/baz/bloch"),
// ParsedURL.getAbsolute("http://nytimes.com/pages/arts/interactive"),
// ParsedURL.getAbsolute("http://nytimes.com/pages/sports/foo"),
// ParsedURL.getAbsolute("http://nytimes.com/pages/arts"),
// ParsedURL.getAbsolute("http://www.nytimes.com/2008/01/26/sports/football/26giants.html?ref=sports"),
ParsedURL.getAbsolute("http://www.amazon.com/*/lm"),
};
static final ParsedURL[] TEST_MATCH =
{
// ParsedURL.getAbsolute("http://nytimes.com/pages/sports/hoops"),
// ParsedURL.getAbsolute("http://nytimes.com/pages/"),
// ParsedURL.getAbsolute("http://nytimes.com/pages/sports/"),
// ParsedURL.getAbsolute("http://nytimes.mom/"),
// ParsedURL.getAbsolute("http://nytimes.com/pages/arts/interactive"),
// ParsedURL.getAbsolute("http://nytimes.com/"),
// ParsedURL.getAbsolute("http://nytimes.com/pages/arts/"),
// ParsedURL.getAbsolute("http://www.nytimes.com/2008/01/26/sports/baseball"),
ParsedURL.getAbsolute("http://www.amazon.com/Cook-Books-amp-more/lm/R1PADW7FZALCHA"),
};
public static void main(String[] s)
{
char separator = '/';
PrefixCollection pc = new PrefixCollection(separator);
StringBuilder buffy = new StringBuilder(32);
for (int i=0; i<TEST_ADD.length; i++)
{
// println(TEST[i].directoryString());
PrefixPhrase pp = pc.add(TEST_ADD[i]);
buffy.setLength(0);
pp.toStringBuilder(buffy, separator);
println(buffy);
}
println("\n");
for (int i=0; i<TEST_MATCH.length; i++)
{
ParsedURL purl = TEST_MATCH[i];
println(purl.toString() + "\t" + pc.match(purl));
}
println("\n");
for (String phrase : pc.values())
{
println(phrase);
}
}
public char separator()
{
return separator;
}
}