package edu.princeton.cs.algs4;
import edu.princeton.cs.algs4.ch13.Queue;
import edu.princeton.cs.introcs.*;
/*************************************************************************
* Compilation: javac TrieSET.java
* Execution: java TrieSET < words.txt
* Dependencies: StdIn.java
*
* An set for extended ASCII strings, implemented using a 256-way trie.
*
* Sample client reads in a list of words from standard input and
* prints out each word, removing any duplicates.
*
*************************************************************************/
import java.util.Iterator;
/**
* The <tt>TrieSET</tt> class represents an ordered set of strings over
* the extended ASCII alphabet.
* It supports the usual <em>add</em>, <em>contains</em>, and <em>delete</em>
* methods. It also provides character-based methods for finding the string
* in the set that is the <em>longest prefix</em> of a given prefix,
* finding all strings in the set that <em>start with</em> a given prefix,
* and finding all strings in the set that <em>match</em> a given pattern.
* <p>
* This implementation uses a 256-way trie.
* The <em>add</em>, <em>contains</em>, <em>delete</em>, and
* <em>longest prefix</em> methods take time proportional to the length
* of the key (in the worst case). Construction takes constant time.
* <p>
* For additional documentation, see
* <a href="http://algs4.cs.princeton.edu/52trie">Section 5.2</a> of
* <i>Algorithms in Java, 4th Edition</i> by Robert Sedgewick and Kevin Wayne.
*
* @author Robert Sedgewick
* @author Kevin Wayne
*/
public class TrieSET implements Iterable<String> {
private static final int R = 256; // extended ASCII
private Node root; // root of trie
private int N; // number of keys in trie
// R-way trie node
private static class Node {
private Node[] next = new Node[R];
private boolean isString;
}
/**
* Initializes an empty set of strings.
*/
public TrieSET() {
}
/**
* Does the set contain the given key?
* @param key the key
* @return <tt>true</tt> if the set contains <tt>key</tt> and
* <tt>false</tt> otherwise
* @throws NullPointerException if <tt>key</tt> is <tt>null</tt>
*/
public boolean contains(String key) {
Node x = get(root, key, 0);
if (x == null) return false;
return x.isString;
}
private Node get(Node x, String key, int d) {
if (x == null) return null;
if (d == key.length()) return x;
char c = key.charAt(d);
return get(x.next[c], key, d+1);
}
/**
* Adds the key to the set if it is not already present.
* @param key the key to add
* @throws NullPointerException if <tt>key</tt> is <tt>null</tt>
*/
public void add(String key) {
root = add(root, key, 0);
}
private Node add(Node x, String key, int d) {
if (x == null) x = new Node();
if (d == key.length()) {
if (!x.isString) N++;
x.isString = true;
}
else {
char c = key.charAt(d);
x.next[c] = add(x.next[c], key, d+1);
}
return x;
}
/**
* Returns the number of strings in the set.
* @return the number of strings in the set
*/
public int size() {
return N;
}
/**
* Is the set empty?
* @return <tt>true</tt> if the set is empty, and <tt>false</tt> otherwise
*/
public boolean isEmpty() {
return size() == 0;
}
/**
* Returns all of the keys in the set, as an iterator.
* To iterate over all of the keys in a set named <tt>set</tt>, use the
* foreach notation: <tt>for (Key key : set)</tt>.
* @return an iterator to all of the keys in the set
*/
public Iterator<String> iterator() {
return keysWithPrefix("").iterator();
}
/**
* Returns all of the keys in the set that start with <tt>prefix</tt>.
* @param prefix the prefix
* @return all of the keys in the set that start with <tt>prefix</tt>,
* as an iterable
*/
public Iterable<String> keysWithPrefix(String prefix) {
Queue<String> results = new Queue<String>();
Node x = get(root, prefix, 0);
collect(x, new StringBuilder(prefix), results);
return results;
}
private void collect(Node x, StringBuilder prefix, Queue<String> results) {
if (x == null) return;
if (x.isString) results.enqueue(prefix.toString());
for (char c = 0; c < R; c++) {
prefix.append(c);
collect(x.next[c], prefix, results);
prefix.deleteCharAt(prefix.length() - 1);
}
}
/**
* Returns all of the keys in the set that match <tt>pattern</tt>,
* where . symbol is treated as a wildcard character.
* @param pattern the pattern
* @return all of the keys in the set that match <tt>pattern</tt>,
* as an iterable, where . is treated as a wildcard character.
*/
public Iterable<String> keysThatMatch(String pattern) {
Queue<String> results = new Queue<String>();
StringBuilder prefix = new StringBuilder();
collect(root, prefix, pattern, results);
return results;
}
private void collect(Node x, StringBuilder prefix, String pattern, Queue<String> results) {
if (x == null) return;
int d = prefix.length();
if (d == pattern.length() && x.isString)
results.enqueue(prefix.toString());
if (d == pattern.length())
return;
char c = pattern.charAt(d);
if (c == '.') {
for (char ch = 0; ch < R; ch++) {
prefix.append(ch);
collect(x.next[ch], prefix, pattern, results);
prefix.deleteCharAt(prefix.length() - 1);
}
}
else {
prefix.append(c);
collect(x.next[c], prefix, pattern, results);
prefix.deleteCharAt(prefix.length() - 1);
}
}
/**
* Returns the string in the set that is the longest prefix of <tt>query</tt>,
* or <tt>null</tt>, if no such string.
* @param query the query string
* @throws NullPointerException if <tt>query</tt> is <tt>null</tt>
* @return the string in the set that is the longest prefix of <tt>query</tt>,
* or <tt>null</tt> if no such string
*/
public String longestPrefixOf(String query) {
int length = longestPrefixOf(root, query, 0, -1);
if (length == -1) return null;
return query.substring(0, length);
}
// returns the length of the longest string key in the subtrie
// rooted at x that is a prefix of the query string,
// assuming the first d character match and we have already
// found a prefix match of length length
private int longestPrefixOf(Node x, String query, int d, int length) {
if (x == null) return length;
if (x.isString) length = d;
if (d == query.length()) return length;
char c = query.charAt(d);
return longestPrefixOf(x.next[c], query, d+1, length);
}
/**
* Removes the key from the set if the key is present.
* @param key the key
* @throws NullPointerException if <tt>key</tt> is <tt>null</tt>
*/
public void delete(String key) {
root = delete(root, key, 0);
}
private Node delete(Node x, String key, int d) {
if (x == null) return null;
if (d == key.length()) {
if (x.isString) N--;
x.isString = false;
}
else {
char c = key.charAt(d);
x.next[c] = delete(x.next[c], key, d+1);
}
// remove subtrie rooted at x if it is completely empty
if (x.isString) return x;
for (int c = 0; c < R; c++)
if (x.next[c] != null)
return x;
return null;
}
/**
* Unit tests the <tt>TrieSET</tt> data type.
*/
public static void main(String[] args) {
TrieSET set = new TrieSET();
while (!StdIn.isEmpty()) {
String key = StdIn.readString();
set.add(key);
}
// print results
if (set.size() < 100) {
StdOut.println("keys(\"\"):");
for (String key : set) {
StdOut.println(key);
}
StdOut.println();
}
StdOut.println("longestPrefixOf(\"shellsort\"):");
StdOut.println(set.longestPrefixOf("shellsort"));
StdOut.println();
StdOut.println("longestPrefixOf(\"xshellsort\"):");
StdOut.println(set.longestPrefixOf("xshellsort"));
StdOut.println();
StdOut.println("keysWithPrefix(\"shor\"):");
for (String s : set.keysWithPrefix("shor"))
StdOut.println(s);
StdOut.println();
StdOut.println("keysWithPrefix(\"shortening\"):");
for (String s : set.keysWithPrefix("shortening"))
StdOut.println(s);
StdOut.println();
StdOut.println("keysThatMatch(\".he.l.\"):");
for (String s : set.keysThatMatch(".he.l."))
StdOut.println(s);
}
}
/*************************************************************************
* Copyright 2002-2012, Robert Sedgewick and Kevin Wayne.
*
* This file is part of algs4-package.jar, which accompanies the textbook
*
* Algorithms, 4th edition by Robert Sedgewick and Kevin Wayne,
* Addison-Wesley Professional, 2011, ISBN 0-321-57351-X.
* http://algs4.cs.princeton.edu
*
*
* algs4-package.jar is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* algs4-package.jar is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
* You should have received a copy of the GNU General Public License
* along with algs4-package.jar. If not, see http://www.gnu.org/licenses.
*************************************************************************/