package com.github.liblevenshtein.serialization;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.Serializable;
import java.nio.charset.StandardCharsets;
import java.text.MessageFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Date;
import java.util.List;
import java.util.Properties;
import java.util.TreeSet;
import com.google.common.base.Joiner;
import lombok.AllArgsConstructor;
import lombok.EqualsAndHashCode;
import lombok.NoArgsConstructor;
import lombok.NonNull;
import lombok.ToString;
import lombok.experimental.ExtensionMethod;
import lombok.extern.slf4j.Slf4j;
import com.github.liblevenshtein.collection.dictionary.Dawg;
import com.github.liblevenshtein.collection.dictionary.DawgNode;
import com.github.liblevenshtein.collection.dictionary.SortedDawg;
import com.github.liblevenshtein.transducer.Algorithm;
import com.github.liblevenshtein.transducer.Transducer;
import com.github.liblevenshtein.transducer.TransducerAttributes;
import com.github.liblevenshtein.transducer.factory.TransducerBuilder;
/**
* (De)Serializer for plain text files. Dictionaries have their terms
* serialized to a newline-delimited, text file. Transducers are serialized to
* a plain text, property file.
*/
@Slf4j
@NoArgsConstructor
@AllArgsConstructor
@ToString(callSuper = false)
@SuppressWarnings("unchecked")
@EqualsAndHashCode(callSuper = false)
@ExtensionMethod(PlainTextSerializer.PropertiesExtensions.class)
public class PlainTextSerializer extends AbstractSerializer {
/**
* "maxDistance" literal for accessors.
*/
private static final String MAX_DISTANCE = "maxDistance";
/**
* "includeDistance" literal for accessors.
*/
private static final String INCLUDE_DISTANCE = "includeDistance";
/**
* "algorithm" literal for accessors.
*/
private static final String ALGORITHM = "algorithm";
/**
* "isSorted" literal for accessors.
*/
private static final String IS_SORTED = "isSorted";
/**
* "dictionary" literal for accessors.
*/
private static final String DICTIONARY = "dictionary";
/**
* Specifies that this {@link PlainTextSerializer} only deals with sorted
* collections. If you do not know whether the collections you'll be
* (de)serializing are sorted, you should leave this false.
*/
private boolean isSorted = false;
/**
* Serializes SortedDawg dictionaries for Transducer, Properties files.
*/
private final Serializer serializer = new ProtobufSerializer();
// Serializers
// ---------------------------------------------------------------------------
/**
* {@inheritDoc}
*/
@Override
public void serialize(
@NonNull final Serializable object,
@NonNull final OutputStream stream) throws Exception {
log.info("Serializing instance of [{}] to stream", object.getClass());
if (object instanceof SortedDawg) {
final Collection<String> dictionary = dictionaryFor((SortedDawg) object);
try (final BufferedWriter writer =
new BufferedWriter(new OutputStreamWriter(stream, StandardCharsets.UTF_8))) {
for (final String term : dictionary) {
writer.write(term);
writer.newLine();
}
}
return;
}
if (object instanceof Transducer) {
final Transducer<DawgNode, Object> transducer =
(Transducer<DawgNode, Object>) object;
final TransducerAttributes<DawgNode, Object> attributes =
transducer.attributes();
final Collection<String> dictionary = dictionaryFor(attributes.dictionary());
new Properties()
.setInteger(MAX_DISTANCE, attributes.maxDistance())
.setBoolean(INCLUDE_DISTANCE, attributes.includeDistance())
.setAlgorithm(ALGORITHM, attributes.algorithm())
.setBoolean(IS_SORTED, isSorted)
.setCollection(DICTIONARY, dictionary)
.store(stream,
MessageFormat.format("Serialized on {0,date,long} at {0,time,full}",
new Date()));
return;
}
throw unsupportedType(object.getClass());
}
/**
* {@inheritDoc}
*/
@Override
public byte[] serialize(@NonNull final Serializable object) throws Exception {
log.info("Serializing instance of [{}] to byte array", object.getClass());
try (final ByteArrayOutputStream stream = new ByteArrayOutputStream()) {
serialize(object, stream);
return stream.toByteArray();
}
}
// Deserializers
// ---------------------------------------------------------------------------
/**
* {@inheritDoc}
*/
@Override
public <Type extends Serializable> Type deserialize(
@NonNull final Class<Type> type,
@NonNull final InputStream stream) throws Exception {
log.info("Deserializing an instance of [{}] from a stream", type);
if (SortedDawg.class.isAssignableFrom(type)) {
try (final BufferedReader reader =
new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8))) {
if (isSorted) {
log.info("Assuming the dictionary is sorted for deserialization");
final SortedDawg dictionary = new SortedDawg();
for (String term = reader.readLine(); null != term; term = reader.readLine()) {
dictionary.add(term);
}
dictionary.finish();
return (Type) dictionary;
}
final Collection<String> dictionary = new TreeSet<>();
for (String term = reader.readLine(); null != term; term = reader.readLine()) {
dictionary.add(term);
}
return (Type) new SortedDawg(dictionary);
}
}
if (Transducer.class.isAssignableFrom(type)) {
final Properties properties = new Properties();
properties.load(stream);
return (Type) new TransducerBuilder()
.dictionary(
properties.getCollection(DICTIONARY),
properties.getBoolean(IS_SORTED))
.algorithm(properties.getAlgorithm(ALGORITHM))
.defaultMaxDistance(properties.getInteger(MAX_DISTANCE))
.includeDistance(properties.getBoolean(INCLUDE_DISTANCE))
.build();
}
throw unsupportedType(type);
}
/**
* {@inheritDoc}
*/
@Override
public <Type extends Serializable> Type deserialize(
@NonNull final Class<Type> type,
@NonNull final byte[] bytes) throws Exception {
log.info("Deserializing an instance of [{}] from a byte array", type);
try (final InputStream stream = new ByteArrayInputStream(bytes)) {
return deserialize(type, stream);
}
}
/**
* Returns a sorted {@link Collection} of the dictionary.
* @param dictionary {@link Collection} to sort.
* @return Sorted version of dictionary.
*/
private Collection<String> dictionaryFor(@NonNull final Dawg dictionary) {
if (!isSorted) {
return dictionary;
}
final List<String> sorted = new ArrayList<>(dictionary);
Collections.sort(sorted);
return sorted;
}
/**
* Builds an {@link IllegalArgumentException} for a method that does not
* support some type of object.
* @param type Unsupported type to specify in the exception.
* @return New {@link IllegalArgumentException} for the unsupported type.
*/
private IllegalArgumentException unsupportedType(final Class<?> type) {
final String message = String.format(
"Unsupported type [%s] for serializer [%s]",
type, getClass());
return new IllegalArgumentException(message);
}
/**
* Extension methods for {@link Properties}.
*/
public static class PropertiesExtensions {
/**
* Returns the property mapped-to by key.
* @param self Contains the property value to return.
* @param key Name of the property to return.
* @return Property value mapped-to by key.
* @throws IllegalArgumentException When no value is mapped-to by key.
*/
public static String getValue(final Properties self, final String key) {
final String value = self.getProperty(key);
assertNotNull(key, value);
return value;
}
/**
* Sets an integer property.
* @param self Contains the integer property.
* @param key Name of the integer property.
* @param value Integer value to set.
* @return self for fluency.
*/
public static Properties setInteger(
final Properties self,
final String key,
final int value) {
self.setProperty(key, Integer.toString(value));
return self;
}
/**
* Returns an integer property.
* @param self Contains the integer property.
* @param key Name of the integer property.
* @return Integer mapped-to by key.
* @throws IllegalArgumentException When no valid integer is mapped-to by
* key.
*/
public static int getInteger(final Properties self, final String key) {
final String value = getValue(self, key);
try {
return Integer.parseInt(value);
}
catch (final NumberFormatException exception) {
final String message =
String.format("Invalid integer [%s] for property [%s]",
value, key);
throw new IllegalArgumentException(message, exception);
}
}
/**
* Sets a boolean property.
* @param self Contains the boolean property.
* @param key Name of the boolean property.
* @param value Boolean to set.
* @return self for fluency.
*/
public static Properties setBoolean(
final Properties self,
final String key,
final boolean value) {
self.setProperty(key, Boolean.toString(value));
return self;
}
/**
* Returns a boolean property.
* @param self Contains the boolean property.
* @param key Name of the boolean property.
* @return Boolean mapped-to by key.
* @throws IllegalArgumentException When no valid boolean is mapped-to by
* key.
*/
public static boolean getBoolean(final Properties self, final String key) {
final String value = getValue(self, key);
if ("true".equals(value)) {
return true;
}
if ("false".equals(value)) {
return false;
}
final String message =
String.format("Invalid boolean [%s] for property [%s]",
value, key);
throw new IllegalArgumentException(message);
}
/**
* Sets a {@link Collection} property.
* @param self Contains the {@link Collection} property.
* @param key Name of the {@link Collection} property.
* @param collection {@link Collection} to set.
* @return self for fluency.
*/
public static Properties setCollection(
final Properties self,
final String key,
final Collection<String> collection) {
self.setProperty(key, Joiner.on("\\n").join(collection));
return self;
}
/**
* Returns a {@link Collection} property.
* @param self Contains the {@link Collection} property.
* @param key Name of the {@link Collection} property.
* @return {@link Collection} mapped-to by key.
* @throws IllegalArgumentException When no {@link Collection} is mapped-to
* by key.
*/
public static Collection<String> getCollection(
final Properties self,
final String key) {
final String value = getValue(self, key);
final String[] collection = value.split("\\\\n");
return Arrays.asList(collection);
}
/**
* Sets an {@link Algorithm} property.
* @param self Contains the {@link Algorithm} property.
* @param key Name of the {@link Algorithm} property.
* @param algorithm {@link Algorithm} to set.
* @return self for fluency.
*/
public static Properties setAlgorithm(
final Properties self,
final String key,
final Algorithm algorithm) {
self.setProperty(key, algorithm.name());
return self;
}
/**
* Returns an {@link Algorithm} property.
* @param self Contains the {@link Algorithm} property.
* @param key Name of the {@link Algorithm} property.
* @return {@link Algorithm} mapped-to by key.
* @throws IllegalArgumentException When no {@link Algorithm} is mapped-to
* by key.
*/
public static Algorithm getAlgorithm(
final Properties self,
final String key) {
final String value = getValue(self, key);
return Algorithm.valueOf(value);
}
/**
* Asserts that value is not null.
* @param key Identifier for the value.
* @param value Value to assert-against.
* @throws IllegalArgumentException When value is null.
*/
private static void assertNotNull(final String key, final String value) {
if (null == value) {
final String message =
String.format("No value defined for property [%s]", key);
throw new IllegalArgumentException(message);
}
}
}
}