package com.miguelfonseca.completely.text.index;
import com.miguelfonseca.completely.common.Strings;
import com.miguelfonseca.completely.data.ScoredObject;
import com.miguelfonseca.completely.text.match.Automaton;
import com.miguelfonseca.completely.text.match.EqualityAutomaton;
import com.miguelfonseca.completely.util.ArraySet;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import static com.miguelfonseca.completely.common.Precondition.checkPointer;
/**
* Trie based implementation of the {@link FuzzyIndex} interface.
*
* <p>Note that this implementation is not synchronized.
*/
public class PatriciaTrie<V> extends AbstractIndex<V> implements FuzzyIndex<V>
{
private Node root;
/**
* Constructs a new {@link PatriciaTrie}.
*/
public PatriciaTrie()
{
root = new Node();
}
@Override
public void clear()
{
root = new Node();
}
@Override
public Set<V> getAll(String key)
{
checkPointer(key != null);
Node node = find(root, key);
if (node != null)
{
return new HashSet<>(node.values());
}
return new HashSet<>();
}
@Override
public Set<ScoredObject<V>> getAny(String fragment)
{
checkPointer(fragment != null);
Set<ScoredObject<V>> result = new HashSet<>();
for (FuzzyMatch match : findAll(root, new EqualityAutomaton(fragment), ""))
{
result.addAll(values(match.getNode(), match.getMatcher()));
}
return result;
}
@Override
public Set<ScoredObject<V>> getAny(Automaton matcher)
{
checkPointer(matcher != null);
Set<ScoredObject<V>> result = new HashSet<>();
for (FuzzyMatch match : findAll(root, matcher, ""))
{
result.addAll(values(match.getNode(), match.getMatcher()));
}
return result;
}
@Override
public boolean isEmpty()
{
return root.isEmpty();
}
@Override
public boolean putAll(String key, Collection<V> values)
{
checkPointer(key != null);
checkPointer(values != null);
return putAll(root, key, values);
}
@Override
public boolean removeAll(Collection<V> values)
{
checkPointer(values != null);
return removeAll(root, values);
}
@Override
public Set<V> removeAll(String key)
{
checkPointer(key != null);
return removeAll(root, key);
}
@Override
public boolean removeAll(String key, Collection<V> values)
{
checkPointer(key != null);
checkPointer(values != null);
return removeAll(root, key, values);
}
@Override
public int size()
{
return size(root);
}
private Node find(Node node, String key)
{
assert node != null;
assert key != null;
if (key.length() <= 0)
{
return node;
}
else
{
for (Entry<String, Node> entry : node.childEntries())
{
String edge = entry.getKey();
Node child = entry.getValue();
int commonPrefixLength = Strings.getCommonPrefixLength(edge, key);
// Exact match
if (commonPrefixLength >= edge.length())
{
return find(child, key.substring(commonPrefixLength));
}
}
}
return null;
}
@SuppressWarnings("checkstyle:parameterassignment")
private Collection<FuzzyMatch> findAll(Node node, Automaton matcher, String word)
{
assert node != null;
assert matcher != null;
assert word != null;
if (matcher.isWordAccepted())
{
// Resume partial match
if (matcher.getWord().length() < word.length())
{
String suffix = word.substring(matcher.getWord().length());
matcher = matcher.step(suffix);
}
return Arrays.asList(new FuzzyMatch(node, matcher));
}
else if (!matcher.isWordRejected())
{
List<FuzzyMatch> result = new LinkedList<>();
for (Entry<String, Node> entry : node.childEntries())
{
String edge = entry.getKey();
Node child = entry.getValue();
result.addAll(findAll(child, matcher.stepUntilWordAccepted(edge), word + edge));
}
return result;
}
return Collections.emptyList();
}
private boolean putAll(Node node, String key, Collection<V> values)
{
assert node != null;
assert key != null;
assert values != null;
if (key.length() <= 0)
{
return node.addAllValues(values);
}
else
{
Node child = null;
int commonPrefixLength = 0;
for (Entry<String, Node> entry : node.childEntries())
{
String edge = entry.getKey();
commonPrefixLength = Strings.getCommonPrefixLength(edge, key);
// Exact match
if (commonPrefixLength >= edge.length())
{
child = entry.getValue();
break;
}
// Prefix match
else if (commonPrefixLength > 0)
{
child = node.bisect(edge, commonPrefixLength);
break;
}
}
if (child == null)
{
child = new Node();
commonPrefixLength = key.length();
node.putChild(key, child);
}
return putAll(child, key.substring(commonPrefixLength), values);
}
}
private boolean removeAll(Node node, Collection<V> values)
{
assert node != null;
assert values != null;
boolean result = node.removeAllValues(values);
List<String> legacyEdges = new LinkedList<>();
Iterator<Entry<String, Node>> iterator = node.childEntries().iterator();
while (iterator.hasNext())
{
Entry<String, Node> entry = iterator.next();
String edge = entry.getKey();
Node child = entry.getValue();
if (removeAll(child, values))
{
result = true;
}
if (child.isEmpty())
{
iterator.remove();
}
else if (child.isUnary())
{
legacyEdges.add(edge);
}
}
for (String edge : legacyEdges)
{
node.squash(edge);
}
return result;
}
private Set<V> removeAll(Node node, String key)
{
assert node != null;
assert key != null;
if (key.length() <= 0)
{
return node.removeAllValues();
}
else
{
for (Entry<String, Node> entry : node.childEntries())
{
String edge = entry.getKey();
int commonPrefixLength = Strings.getCommonPrefixLength(edge, key);
// Exact match
if (commonPrefixLength >= edge.length())
{
Node child = entry.getValue();
Set<V> result = removeAll(child, key.substring(commonPrefixLength));
if (child.isEmpty())
{
node.removeChild(edge);
}
else if (child.isUnary())
{
node.squash(edge);
}
return result;
}
}
}
return Collections.emptySet();
}
private boolean removeAll(Node node, String key, Collection<V> values)
{
assert node != null;
assert key != null;
assert values != null;
if (key.length() <= 0)
{
return node.removeAllValues(values);
}
else
{
for (Entry<String, Node> entry : node.childEntries())
{
String edge = entry.getKey();
int commonPrefixLength = Strings.getCommonPrefixLength(edge, key);
// Exact match
if (commonPrefixLength >= edge.length())
{
Node child = entry.getValue();
boolean result = removeAll(child, key.substring(commonPrefixLength), values);
if (child.isEmpty())
{
node.removeChild(edge);
}
else if (child.isUnary())
{
node.squash(edge);
}
return result;
}
}
}
return false;
}
private int size(Node node)
{
assert node != null;
int result = node.values().size();
for (Node child : node.childNodes())
{
result += size(child);
}
return result;
}
private Set<ScoredObject<V>> values(Node node, Automaton matcher)
{
assert node != null;
assert matcher != null;
Set<ScoredObject<V>> result = new HashSet<>();
for (V value : node.values())
{
result.add(new ScoredObject<>(value, matcher.getScore()));
}
for (Entry<String, Node> entry : node.childEntries())
{
result.addAll(values(entry.getValue(), matcher.step(entry.getKey())));
}
return result;
}
private class Node
{
private Map<String, Node> children;
private Set<V> values;
@SuppressWarnings("checkstyle:hiddenfield")
boolean addAllValues(Collection<V> values)
{
assert values != null;
if (this.values == null)
{
this.values = new ArraySet<>();
}
return this.values.addAll(values);
}
Node bisect(String key, int pivot)
{
assert key != null;
assert pivot >= 0;
String prefix = key.substring(0, pivot);
String suffix = key.substring(pivot);
Node child = new Node();
child.putChild(suffix, removeChild(key));
putChild(prefix, child);
return child;
}
Collection<Entry<String, Node>> childEntries()
{
if (children == null)
{
return Collections.emptyList();
}
return children.entrySet();
}
Collection<Node> childNodes()
{
if (children == null)
{
return Collections.emptyList();
}
return children.values();
}
boolean isEmpty()
{
return (children == null || children.isEmpty()) && (values == null || values.isEmpty());
}
boolean isUnary()
{
return (values == null || values.isEmpty()) && children != null && children.size() == 1;
}
Node putChild(String key, Node value)
{
assert key != null;
assert value != null;
if (children == null)
{
children = new HashMap<>(4);
}
return children.put(key, value);
}
Set<V> removeAllValues()
{
if (values == null)
{
return new ArraySet<>();
}
Set<V> result = values;
values = null;
return result;
}
@SuppressWarnings("checkstyle:hiddenfield")
boolean removeAllValues(Collection<V> values)
{
assert values != null;
if (this.values == null)
{
return false;
}
return this.values.removeAll(values);
}
Node removeChild(String key)
{
assert key != null;
if (children == null)
{
return null;
}
return children.remove(key);
}
Node squash(String key)
{
assert key != null;
Node child = removeChild(key);
for (Entry<String, Node> entry : child.childEntries())
{
String edge = entry.getKey();
putChild(key.concat(edge), child.removeChild(edge));
}
return child;
}
Set<V> values()
{
if (values == null)
{
return Collections.emptySet();
}
return values;
}
}
private class FuzzyMatch
{
private Node node;
private Automaton matcher;
FuzzyMatch(Node node, Automaton matcher)
{
assert node != null;
assert matcher != null;
this.node = node;
this.matcher = matcher;
}
Node getNode()
{
return node;
}
Automaton getMatcher()
{
return matcher;
}
}
}