package com.github.liblevenshtein.transducer.factory;
import java.io.Serializable;
import java.util.Collection;
import java.util.Collections;
import lombok.NonNull;
import lombok.Setter;
import lombok.extern.slf4j.Slf4j;
import com.github.liblevenshtein.collection.dictionary.Dawg;
import com.github.liblevenshtein.collection.dictionary.DawgNode;
import com.github.liblevenshtein.collection.dictionary.factory.DawgFactory;
import com.github.liblevenshtein.transducer.Algorithm;
import com.github.liblevenshtein.transducer.DistanceFunction;
import com.github.liblevenshtein.transducer.ITransducer;
import com.github.liblevenshtein.transducer.MergeFunction;
import com.github.liblevenshtein.transducer.SpecialPositionComparator;
import com.github.liblevenshtein.transducer.StandardPositionComparator;
import com.github.liblevenshtein.transducer.State;
import com.github.liblevenshtein.transducer.SubsumesFunction;
import com.github.liblevenshtein.transducer.Transducer;
import com.github.liblevenshtein.transducer.TransducerAttributes;
import com.github.liblevenshtein.transducer.UnsubsumeFunction;
/**
* Fluently-builds Levenshtein transducers.
* @author Dylon Edwards
* @since 2.1.0
*/
@Slf4j
@Setter
@SuppressWarnings("checkstyle:classdataabstractioncoupling")
public class TransducerBuilder implements Serializable {
private static final long serialVersionUID = 1L;
/**
* Builds DAWG collections from dictionaries.
*/
private final DawgFactory dawgFactory = new DawgFactory();
/**
* Dictionary automaton for seeking spelling candidates.
*/
@Setter
@SuppressWarnings("unchecked")
private Collection<String> dictionary = Collections.EMPTY_LIST;
/**
* Whether {@link #dictionary} is sorted.
*/
@Setter
private boolean isSorted = false;
/**
* Desired Levenshtein algorithm for searching.
*/
@Setter
@NonNull
private Algorithm algorithm = Algorithm.STANDARD;
/**
* Default maximum number of errors tolerated between each spelling candidate
* and the query term.
*/
@Setter
private int defaultMaxDistance = 2;
/**
* Whether the distances between each spelling candidate and the query term
* should be included in the collections of spelling candidates.
*/
@Setter
private boolean includeDistance = true;
/**
* Specifies the collection of dictionary terms for the dictionary automaton.
* @param dictionary Collection of dictionary terms to consider when
* generating spelling candidates.
* @param isSorted Whether the dictionary is sorted. If it is not sorted then
* it will probably be sorted.
* @return This {@link TransducerBuilder} or an equivalent one, for fluency.
*/
public TransducerBuilder dictionary(
@NonNull final Collection<String> dictionary,
final boolean isSorted) {
this.dictionary = dictionary;
this.isSorted = isSorted;
return this;
}
/**
* Builds a Levenshtein transducer according to the parameters set for this
* {@link TransducerBuilder}.
* @param <CandidateType> Implicit type of the spelling candidates generated
* by the transducer.
* @return Levenshtein transducer for seeking spelling candidates for query
* terms (fuzzy searching!).
*/
@SuppressWarnings("unchecked")
public <CandidateType> ITransducer<CandidateType> build() {
log.info("Building transducer out of [{}] terms with isSorted [{}], "
+ "algorithm [{}], defaultMaxDistance [{}], and includeDistance [{}]",
dictionary.size(), isSorted, algorithm, defaultMaxDistance,
includeDistance);
final Dawg dictionary = dawgFactory.build(this.dictionary, this.isSorted);
final PositionFactory positionFactory = new PositionFactory();
final StateFactory stateFactory = new StateFactory();
final PositionTransitionFactory positionTransitionFactory = positionTransitionFactory();
positionTransitionFactory.stateFactory(stateFactory);
positionTransitionFactory.positionFactory(positionFactory);
final StateTransitionFactory stateTransitionFactory = stateTransitionFactory();
stateTransitionFactory.stateFactory(stateFactory);
stateTransitionFactory.positionTransitionFactory(positionTransitionFactory);
final State initialState = stateFactory.build(positionFactory.build(0, 0));
final TransducerAttributes<DawgNode, CandidateType> attributes =
TransducerAttributes.<DawgNode, CandidateType>builder()
.maxDistance(defaultMaxDistance)
.stateTransitionFactory(stateTransitionFactory)
.candidateFactory(candidateFactory())
.minDistance(minDistance())
.isFinal(dawgFactory.finalFunction(dictionary))
.dictionaryTransition(dawgFactory.transitionFunction(dictionary))
.dictionaryRoot(dictionary.root())
.initialState(initialState)
.dictionary(dictionary)
.algorithm(algorithm)
.includeDistance(includeDistance)
.build();
return new Transducer<>(attributes);
}
/**
* Builds the factory for spelling candidates, according to whether they
* should include the candidates' distances from query terms.
* @param <CandidateType> Implicit type of the spelling candidates generated
* by the transducer.
* @return Factory for spelling candidates.
*/
@SuppressWarnings("unchecked")
protected <CandidateType> CandidateFactory<CandidateType> candidateFactory() {
return (CandidateFactory<CandidateType>)
(includeDistance
? new CandidateFactory.WithDistance()
: new CandidateFactory.WithoutDistance());
}
/**
* Builds the function that finds the distance between spelling candidates and
* the query term.
* @return Levenshtein algorithm-specific, distance function.
*/
protected DistanceFunction minDistance() {
switch (algorithm) {
case STANDARD:
return new DistanceFunction.ForStandardPositions();
case TRANSPOSITION: // fall through
case MERGE_AND_SPLIT:
return new DistanceFunction.ForSpecialPositions();
default:
throw new IllegalArgumentException(unsupportedAlgorithm(algorithm));
}
}
/**
* Builds an {@link #algorithm}-specific, position-transition factory.
* @return {@link #algorithm}-specific, position-transition factory.
*/
protected PositionTransitionFactory positionTransitionFactory() {
switch (algorithm) {
case STANDARD:
return new PositionTransitionFactory.ForStandardPositions();
case TRANSPOSITION:
return new PositionTransitionFactory.ForTranspositionPositions();
case MERGE_AND_SPLIT:
return new PositionTransitionFactory.ForMergeAndSplitPositions();
default:
throw new IllegalArgumentException(unsupportedAlgorithm(algorithm));
}
}
/**
* Builds a state-transition factory from the parameters specified at the time
* {@link #build()} was called.
* @return New state-transition factory.
*/
protected StateTransitionFactory stateTransitionFactory() {
switch (algorithm) {
case STANDARD:
return new StateTransitionFactory()
.comparator(new StandardPositionComparator())
.merge(new MergeFunction.ForStandardPositions())
.unsubsume(new UnsubsumeFunction.ForStandardPositions()
.subsumes(new SubsumesFunction.ForStandardAlgorithm()));
case TRANSPOSITION:
return new StateTransitionFactory()
.comparator(new SpecialPositionComparator())
.merge(new MergeFunction.ForSpecialPositions())
.unsubsume(new UnsubsumeFunction.ForSpecialPositions()
.subsumes(new SubsumesFunction.ForTransposition()));
case MERGE_AND_SPLIT:
return new StateTransitionFactory()
.comparator(new SpecialPositionComparator())
.merge(new MergeFunction.ForSpecialPositions())
.unsubsume(new UnsubsumeFunction.ForSpecialPositions()
.subsumes(new SubsumesFunction.ForMergeAndSplit()));
default:
throw new IllegalArgumentException(unsupportedAlgorithm(algorithm));
}
}
/**
* Generates a message for algorithms that aren't supported by various
* methods.
* @param algorithm The unsupported algorithm.
* @return A message stating that some algorithm is unsupported.
*/
protected String unsupportedAlgorithm(final Algorithm algorithm) {
return String.format("Unsupported algorithm [%s]", algorithm);
}
}