package com.github.liblevenshtein.serialization;
import java.io.ByteArrayInputStream;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.Serializable;
import java.util.IdentityHashMap;
import java.util.Map;
import it.unimi.dsi.fastutil.chars.Char2ObjectMap;
import it.unimi.dsi.fastutil.chars.Char2ObjectRBTreeMap;
import it.unimi.dsi.fastutil.chars.Char2ObjectSortedMap;
import com.google.protobuf.CodedInputStream;
import lombok.EqualsAndHashCode;
import lombok.NonNull;
import lombok.ToString;
import lombok.extern.slf4j.Slf4j;
import com.github.liblevenshtein.collection.dictionary.Dawg;
import com.github.liblevenshtein.collection.dictionary.DawgNode;
import com.github.liblevenshtein.collection.dictionary.FinalDawgNode;
import com.github.liblevenshtein.collection.dictionary.SortedDawg;
import com.github.liblevenshtein.proto.LibLevenshteinProtos;
import com.github.liblevenshtein.transducer.Algorithm;
import com.github.liblevenshtein.transducer.Transducer;
import com.github.liblevenshtein.transducer.TransducerAttributes;
import com.github.liblevenshtein.transducer.factory.TransducerBuilder;
/**
* (De)Serializer for Google's Protocol Buffer, data interchange format.
*/
@Slf4j
@ToString(callSuper = false)
@SuppressWarnings("unchecked")
@EqualsAndHashCode(callSuper = false)
public class ProtobufSerializer extends AbstractSerializer {
// Serializers
// ---------------------------------------------------------------------------
/**
* {@inheritDoc}
*/
@Override
public void serialize(
@NonNull final Serializable object,
@NonNull final OutputStream stream) throws Exception {
log.info("Serializing an instance of [{}] to a stream", object.getClass());
if (object instanceof SortedDawg) {
final SortedDawg dawg = (SortedDawg) object;
final LibLevenshteinProtos.Dawg proto = protoOf(dawg);
proto.writeTo(stream);
return;
}
if (object instanceof Transducer) {
final Transducer<DawgNode, Object> transducer =
(Transducer<DawgNode, Object>) object;
final LibLevenshteinProtos.Transducer proto = protoOf(transducer);
proto.writeTo(stream);
return;
}
throw unknownType(object.getClass());
}
/**
* {@inheritDoc}
*/
@Override
public byte[] serialize(@NonNull final Serializable object) throws Exception {
log.info("Serializing an instance of [{}] to a byte array", object.getClass());
if (object instanceof SortedDawg) {
final SortedDawg dawg = (SortedDawg) object;
final LibLevenshteinProtos.Dawg proto = protoOf(dawg);
return proto.toByteArray();
}
if (object instanceof Transducer) {
final Transducer<DawgNode, Object> transducer =
(Transducer<DawgNode, Object>) object;
final LibLevenshteinProtos.Transducer proto = protoOf(transducer);
return proto.toByteArray();
}
throw unknownType(object.getClass());
}
// Deserializers
// ---------------------------------------------------------------------------
/**
* {@inheritDoc}
*/
@Override
public <Type extends Serializable> Type deserialize(
@NonNull final Class<Type> type,
@NonNull final InputStream stream) throws Exception {
log.info("Deserializing an instance of [{}] from a stream", type);
final CodedInputStream protoStream = CodedInputStream.newInstance(stream);
protoStream.setRecursionLimit(Integer.MAX_VALUE);
protoStream.setSizeLimit(Integer.MAX_VALUE);
if (SortedDawg.class.isAssignableFrom(type)) {
final LibLevenshteinProtos.Dawg proto =
LibLevenshteinProtos.Dawg.parseFrom(protoStream);
return (Type) modelOf(proto);
}
if (Transducer.class.isAssignableFrom(type)) {
final LibLevenshteinProtos.Transducer proto =
LibLevenshteinProtos.Transducer.parseFrom(protoStream);
return (Type) modelOf(proto);
}
throw unknownType(type);
}
/**
* {@inheritDoc}
*/
@Override
public <Type extends Serializable> Type deserialize(
@NonNull final Class<Type> type,
@NonNull final byte[] bytes) throws Exception {
log.info("Deserializing an instance of [{}] from a byte array", type);
try (final InputStream stream = new ByteArrayInputStream(bytes)) {
return deserialize(type, stream);
}
}
// Models
// ---------------------------------------------------------------------------
/**
* Returns the node of the prototype.
* @param proto Prototype of the node.
* @param nodes Tracks {@link DawgNode}s that have already been deserialized,
* to avoid deserializing a full trie.
* @return Node of the prototype.
*/
protected DawgNode modelOf(
final LibLevenshteinProtos.DawgNode proto,
final Map<LibLevenshteinProtos.DawgNode, DawgNode> nodes) {
if (nodes.containsKey(proto)) {
return nodes.get(proto);
}
final Char2ObjectSortedMap<DawgNode> edges = new Char2ObjectRBTreeMap<>();
for (final LibLevenshteinProtos.DawgNode.Edge edge : proto.getEdgeList()) {
final char label = (char) edge.getCharKey();
edges.put(label, modelOf(edge.getValue(), nodes));
}
final DawgNode node = proto.getIsFinal()
? new FinalDawgNode(edges)
: new DawgNode(edges);
nodes.put(proto, node);
return node;
}
/**
* Returns the dictionary of the prototype.
* @param proto Prototype of the dictionary.
* @return Dictionary of the prototype.
*/
protected SortedDawg modelOf(final LibLevenshteinProtos.Dawg proto) {
final Map<LibLevenshteinProtos.DawgNode, DawgNode> nodes =
new IdentityHashMap<>();
final DawgNode root = modelOf(proto.getRoot(), nodes);
return new SortedDawg(proto.getSize(), root);
}
/**
* Returns the transducer of the prototype.
* @param proto Prototype of the transducer.
* @return Transducer of the prototype.
*/
protected Transducer<DawgNode, Object> modelOf(final LibLevenshteinProtos.Transducer proto) {
return (Transducer<DawgNode, Object>)
new TransducerBuilder()
.dictionary(modelOf(proto.getDictionary()))
.algorithm(modelOf(proto.getAlgorithm()))
.defaultMaxDistance(proto.getDefaultMaxDistance())
.includeDistance(proto.getIncludeDistance())
.build();
}
/**
* Returns the Levenshtein algorithm for the prototype.
* @param proto Levenshtein algorithm prototype.
* @return Levenshtein algorithm for the prototype.
*/
protected Algorithm modelOf(final LibLevenshteinProtos.Transducer.Algorithm proto) {
switch (proto) {
case STANDARD:
return Algorithm.STANDARD;
case TRANSPOSITION:
return Algorithm.TRANSPOSITION;
case MERGE_AND_SPLIT:
return Algorithm.MERGE_AND_SPLIT;
default:
throw unknownAlgorithm(proto);
}
}
// Prototypes
// ---------------------------------------------------------------------------
/**
* Returns the prototype of a transducer.
* @param transducer Transducer whose prototype is to be returned.
* @return Prototype of the transducer.
*/
protected LibLevenshteinProtos.Transducer protoOf(final Transducer<DawgNode, Object> transducer) {
final TransducerAttributes<DawgNode, Object> attributes =
transducer.attributes();
return LibLevenshteinProtos.Transducer.newBuilder()
.setDefaultMaxDistance(attributes.maxDistance())
.setIncludeDistance(attributes.includeDistance())
.setAlgorithm(protoOf(attributes.algorithm()))
.setDictionary(protoOf(attributes.dictionary()))
.build();
}
/**
* Returns the prototype of the Levenshtein algorithm.
* @param algorithm Levenshtein algorithm whose prototype is to be returned.
* @return Prototype of the Levenshtein algorithm.
*/
protected LibLevenshteinProtos.Transducer.Algorithm protoOf(final Algorithm algorithm) {
switch (algorithm) {
case STANDARD:
return LibLevenshteinProtos.Transducer.Algorithm.STANDARD;
case TRANSPOSITION:
return LibLevenshteinProtos.Transducer.Algorithm.TRANSPOSITION;
case MERGE_AND_SPLIT:
return LibLevenshteinProtos.Transducer.Algorithm.MERGE_AND_SPLIT;
default:
throw unknownAlgorithm(algorithm);
}
}
/**
* Returns the prototype of the dictionary.
* @param dawg Dictionary whose prototype is to be returned.
* @return Prototype of the dictionary.
*/
protected LibLevenshteinProtos.Dawg protoOf(final Dawg dawg) {
if (dawg instanceof SortedDawg) {
return protoOf((SortedDawg) dawg);
}
final String message =
String.format("Unsupported Dawg type [%s]", dawg.getClass());
throw new IllegalArgumentException(message);
}
/**
* Returns the prototype of the dictionary.
* @param dawg Dictionary whose prototype is to be returned.
* @return Prototype of the dictionary.
*/
protected LibLevenshteinProtos.Dawg protoOf(final SortedDawg dawg) {
final Map<DawgNode, LibLevenshteinProtos.DawgNode> nodes =
new IdentityHashMap<>();
return LibLevenshteinProtos.Dawg.newBuilder()
.setSize(dawg.size())
.setRoot(protoOf(dawg.root(), nodes))
.build();
}
/**
* Returns the prototype of a node.
* @param node Node whose prototype is to be returned.
* @param nodes Mapping of {@link DawgNode}s to
* {@link LibLevenshteinProtos.DawgNode}s, to avoid constructing a full trie.
* @return The prototype of the node.
*/
protected LibLevenshteinProtos.DawgNode protoOf(
final DawgNode node,
final Map<DawgNode, LibLevenshteinProtos.DawgNode> nodes) {
if (nodes.containsKey(node)) {
return nodes.get(node);
}
final LibLevenshteinProtos.DawgNode.Builder builder =
LibLevenshteinProtos.DawgNode.newBuilder();
builder.setIsFinal(node.isFinal());
for (final Char2ObjectMap.Entry<DawgNode> edge : node.edges().char2ObjectEntrySet()) {
builder.addEdge(protoOf(edge.getCharKey(), edge.getValue(), nodes));
}
final LibLevenshteinProtos.DawgNode proto = builder.build();
nodes.put(node, proto);
return proto;
}
/**
* Returns the prototype of an edge.
* @param label Annotation leading out of the current {@link DawgNode} to the
* target {@link DawgNode}.
* @param node Target {@link DawgNode} for the transition.
* @param nodes Mapping of {@link DawgNode}s to
* {@link LibLevenshteinProtos.DawgNode}s, to avoid constructing a full trie.
* @return The prototype of an edge.
*/
protected LibLevenshteinProtos.DawgNode.Edge protoOf(
final char label,
final DawgNode node,
final Map<DawgNode, LibLevenshteinProtos.DawgNode> nodes) {
return LibLevenshteinProtos.DawgNode.Edge.newBuilder()
.setCharKey(label)
.setValue(protoOf(node, nodes))
.build();
}
// Utilities
// ---------------------------------------------------------------------------
/**
* Returns an {@link IllegalArgumentException} for an unsupported class.
* @param type Subject of the exception.
* @return An {@link IllegalArgumentException} for an unsupported class.
*/
private IllegalArgumentException unknownType(final Class<?> type) {
final String message = String.format("Unknown type [%s]", type);
return new IllegalArgumentException(message);
}
/**
* Returns an {@link IllegalArgumentException} for an unsupported algorithm.
* @param algorithm Subject of the exception.
* @param <AlgorithmType> Generic type of the unsupported algorithm.
* @return An {@link IllegalArgumentException} for an unsupported algorithm.
*/
private <AlgorithmType> IllegalArgumentException unknownAlgorithm(
final AlgorithmType algorithm) {
final String message = String.format("Unknown Algorithm [%s]", algorithm);
return new IllegalArgumentException(message);
}
}