NaturalLanguageCipher.java example

Explorer
SquidLib-master
package squidpony;

import regexodus.*;
import squidpony.squidmath.*;

import java.io.Serializable;
import java.util.HashMap;
import java.util.Map;

/**
 * Class that builds up a dictionary of words in a source text to words generated by a FakeLanguageGen, and can
 * "translate" a source text to a similarly-punctuated, similarly-capitalized fake text. Uses a hash of each word in the
 * source text to determine the RNG seed that FakeLanguageGen will use, so the translation is not random. Can cipher a
 * typically English text and generate a text with FakeLanguageGen, but also decipher such a generated text with a
 * fully-complete, partially-complete, or partially-incorrect vocabulary.
 * <br>
 * This defaults to caching source-language words to their generated-language word translations in the field table, as
 * well as the reverse translation in reverse. This can be changed to reduce memory usage for large vocabularies with
 * {@code setCacheLevel()}, where it starts at 2 (writing to table and reverse), and can be lowered to 1 (writing to
 * table only) if you don't need reverse to decipher a language easily, or to 0 (writing to neither) if you expect that
 * memory will be at a premium and don't mind re-generating the same word each time it occurs in a source text. If
 * cacheLevel is 1 or less, then this will not check for overlap between previously-generated words (it won't have an
 * easy way to look up previously-generated ones) and so may be impossible to accurately decipher. As an example, one
 * test of level 1 generated "he" as the translation for both "a" and "at", so every time "a" had been ciphered and then
 * deciphered, the reproduced version said "at" instead. This won't happen by default, but the default instead relies on
 * words being entered as inputs to cipher() or lookup() in the same order. If words are entered in two different orders
 * to different runs of the program, they may have different generated results if cacheLevel is 2. One way to handle
 * this is to use cacheLevel 2 and cipher the whole game script, or just the unique words in it (maybe just a large word
 * list, such as http://wordlist.aspell.net/12dicts/ ), then serialize the LanguageCipher for later usage.
 * @author Tommy Ettinger
 * Created by Tommy Ettinger on 5/1/2016.
 */
public class NaturalLanguageCipher implements Serializable{

    private static class SemiRandom implements StatefulRandomness, Serializable{
        private static final long serialVersionUID = 1287835632461186341L;
        private long state;
        public SemiRandom()
        {
            state = (long) (Long.MAX_VALUE * (Math.random() * 2.0 - 1.0));
        }
        public SemiRandom(long state)
        {
            this.state = state;
        }
        /**
         * Get the current internal state of the StatefulRandomness as a long.
         *
         * @return the current internal state of this object.
         */
        @Override
        public long getState() {
            return state;
        }

        /**
         * Set the current internal state of this StatefulRandomness with a long.
         *
         * @param state a 64-bit long. You should avoid passing 0, even though some implementations can handle that.
         */
        @Override
        public void setState(long state) {
            this.state = state;
        }

        /**
         * Using this method, any algorithm that might use the built-in Java Random
         * can interface with this randomness source.
         *
         * @param bits the number of bits to be returned
         * @return the integer containing the appropriate number of bits
         */
        @Override
        public int next(int bits) {
            return (int) ((state += 0x41041041041041L) & ~(-1 << bits));
        }

        /**
         * Using this method, any algorithm that needs to efficiently generate more
         * than 32 bits of random data can interface with this randomness source.
         * <p>
         * Get a random long between Long.MIN_VALUE and Long.MAX_VALUE (both inclusive).
         *
         * @return a random long between Long.MIN_VALUE and Long.MAX_VALUE (both inclusive)
         */
        @Override
        public long nextLong() {
            return state += 0x41041041041041L;
        }

        public double nextDouble()
        {
            return NumberTools.longBitsToDouble(0x3FFL << 52 | (state += 0x41041041041041L) >>> 12) - 1.0;
        }

        /**
         * Produces a copy of this RandomnessSource that, if next() and/or nextLong() are called on this object and the
         * copy, both will generate the same sequence of random numbers from the point copy() was called. This just need to
         * copy the state so it isn't shared, usually, and produce a new value with the same exact state.
         *
         * @return a copy of this RandomnessSource
         */
        @Override
        public RandomnessSource copy() {
            return new SemiRandom(state);
        }
    }

    private static final long serialVersionUID = 1287835632461186341L;
    /**
     * The FakeLanguageGen this will use to construct words; normally one of the static fields in FakeLanguageGen or a
     * FakeLanguageGen produced by using the mix() method of one of them. Manually constructing FakeLanguageGen objects
     * isn't especially easy, and if you decide to do that it's recommended you look at SquidLib's source to see how the
     * existing calls to constructors work.
     */
    public FakeLanguageGen language;
    private SemiRandom rs;
    private RNG rng;

    String pluralSuffix, verbingSuffix, verbedSuffix, verberSuffix, verbationSuffix,
            verbmentSuffix, nounySuffix, nounenSuffix, nounistSuffix, nounismSuffix,
            nounicSuffix, nouniveSuffix, adjectivelySuffix, adjectivestSuffix,
            reverbPrefix, ennounPrefix, preverbPrefix, postverbPrefix,
            proverbPrefix, antiverbPrefix, disnounPrefix;

    private static final long PLURAL = 1L, VERBING = 1L << 1, VERBED = 1L << 2, VERBER = 1L << 3,
            VERBATION = 1L << 4, VERBMENT = 1L << 5, NOUNY = 1L << 6, NOUNEN = 1L << 7, NOUNIST = 1L << 8,
            NOUNISM = 1L << 9, NOUNIC = 1L << 10, NOUNIVE = 1L << 11, ADJECTIVELY = 1L << 12,
            ADJECTIVEST = 1L << 13, REVERB = 1L << 14, PREVERB = 1L << 15, POSTVERB = 1L << 16,  ENNOUN = 1L << 17,
            PROVERB = 1L << 18,  ANTIVERB = 1L << 19,  DISNOUN = 1L << 20;

    /*
    qu->kw
x->ks
y->i
kh->q
ck->k
ch->x
cq->kh
tx->x
zh->j
ge->j
ew->eu
eigh->ae
p[fh]->f
n([gk])->y$1
a([bdfjlmnprtvz])e->ae$1
e([bdjlmnptvz])e->ee$1
i([bdfjlmnprtvz])e->ai$1
o([bdfjlmnprtvz])e->oa$1
u([bdfjlmnprtvz])e->uu$1
([bdfgklmnpqrtvwxz])\1+->$1
ace$->aes
ece$->ees
ice$->ais
oce$->oas
uce$->uus
se$->z
^[pc]([nts])->$1
^fth->t

     */
    private static final Replacer[] preproc = {
            new Replacer(Pattern.compile("([bdfgklmnpqrtvwxz])\\1+"), "$1"),
            new Replacer(Pattern.compile("qu"), "kw", false),
            new Replacer(Pattern.compile("x"), "ks", false),
            new Replacer(Pattern.compile("y"), "i", false),
            new Replacer(Pattern.compile("kh"), "q", false),
            new Replacer(Pattern.compile("ck"), "k", false),
            new Replacer(Pattern.compile("ch"), "x", false),
            new Replacer(Pattern.compile("cq"), "kh", false),
            new Replacer(Pattern.compile("tx"), "x", false),
            new Replacer(Pattern.compile("zh"), "j", false),
            new Replacer(Pattern.compile("ge$"), "j", false),
            new Replacer(Pattern.compile("we$"), "w", false),
            new Replacer(Pattern.compile("ew"), "eu", false),
            new Replacer(Pattern.compile("eigh"), "ae", false),
            new Replacer(Pattern.compile("p[fh]"), "f", false),
            new Replacer(Pattern.compile("nc"), "yk", false),
            new Replacer(Pattern.compile("n([gk])"), "y$1"),
            new Replacer(Pattern.compile("a([bdfjlmnprtvz])e"), "ae$1"),
            new Replacer(Pattern.compile("e([bdjlmnptvz])e"), "ee$1"),
            new Replacer(Pattern.compile("i([bdfjlmnprtz])e"), "ai$1"),
            new Replacer(Pattern.compile("o([bdfjlmnprtvz])e"), "oa$1"),
            new Replacer(Pattern.compile("u([bdfjlmnprtvz])e"), "uu$1"),
            new Replacer(Pattern.compile("ace$"), "aes", false),
            new Replacer(Pattern.compile("ece$"), "ees", false),
            new Replacer(Pattern.compile("ice$"), "ais", false),
            new Replacer(Pattern.compile("oce$"), "oas", false),
            new Replacer(Pattern.compile("uce$"), "uus", false),
            new Replacer(Pattern.compile("se$"), "z", false),
            new Replacer(Pattern.compile("e$"), "", false),
            new Replacer(Pattern.compile("^[pc]([nts])"), "$1"),
            new Replacer(Pattern.compile("^fth"), "t", false),
    }, conjugationProc = { // 17 is REFlags.UNICODE | REFlags.IGNORE_CASE
            new Replacer(Pattern.compile("([^àáâãäåæāăąǻǽaèéêëēĕėęěeìíîïĩīĭįıiòóôõöøōŏőœǿoùúûüũūŭůűųuýÿŷỳyαοειυаеёийоуъыэюя]+)" +
                    "([àáâãäåæāăąǻǽaèéêëēĕėęěeìíîïĩīĭįıiòóôõöøōŏőœǿoùúûüũūŭůűųuýÿŷỳyαοειυаеёийоуъыэюя])\\2" +
                    "([àáâãäåæāăąǻǽaèéêëēĕėęěeìíîïĩīĭįıiòóôõöøōŏőœǿoùúûüũūŭůűųuýÿŷỳyαοειυаеёийоуъыэюя])", 17), "$1$2$1$2$3"),
            new Replacer(Pattern.compile("([^àáâãäåæāăąǻǽaèéêëēĕėęěeìíîïĩīĭįıiòóôõöøōŏőœǿoùúûüũūŭůűųuýÿŷỳyαοειυаеёийоуъыэюя]+)" +
                    "([àáâãäåæāăąǻǽaèéêëēĕėęěeìíîïĩīĭįıiòóôõöøōŏőœǿoùúûüũūŭůűųuýÿŷỳyαοειυаеёийоуъыэюя])" +
                    "([àáâãäåæāăąǻǽaèéêëēĕėęěeìíîïĩīĭįıiòóôõöøōŏőœǿoùúûüũūŭůűųuýÿŷỳyαοειυаеёийоуъыэюя])\\3", 17), "$1$2$3$1$3"),
            new Replacer(Pattern.compile("([^àáâãäåæāăąǻǽaèéêëēĕėęěeìíîïĩīĭįıiòóôõöøōŏőœǿoùúûüũūŭůűųuýÿŷỳyαοειυаеёийоуъыэюя]{3})" +
                    "(?:[^àáâãäåæāăąǻǽaèéêëēĕėęěeìíîïĩīĭįıiòóôõöøōŏőœǿoùúûüũūŭůűųuýÿŷỳyαοειυаеёийоуъыэюя]+)", 17), "$1"),
            new Replacer(Pattern.compile("([àáâãäåæāăąǻǽaèéêëēĕėęěeìíîïĩīĭįıiòóôõöøōŏőœǿoùúûüũūŭůűųuýÿŷỳyαοειυаеёийоуъыэюя])" +
                    "([àáâãäåæāăąǻǽaèéêëēĕėęěeìíîïĩīĭįıiòóôõöøōŏőœǿoùúûüũūŭůűųuýÿŷỳyαοειυаеёийоуъыэюя])(?:\\1\\2)+", 17), "$1$2"),
            new Replacer(Pattern.compile("[æǽœìíîïĩīĭįıiùúûüũūŭůűųuýÿŷỳy]([æǽœýÿŷỳy])", 17), "$1"),
            new Replacer(Pattern.compile("q([ùúûüũūŭůűųu])$", 17), "q$1e"),
            new Replacer(Pattern.compile("([ìíîïĩīĭįıi])[ìíîïĩīĭįıi]", 17), "$1"),
            new Replacer(Pattern.compile("([æǽœìíîïĩīĭįıiùúûüũūŭůűųuýÿŷỳy])[wŵẁẃẅ]$", 17), "$1"),
            new Replacer(Pattern.compile("([ùúûüũūŭůűųu])([òóôõöøōŏőǿo])", 17), "$2$1"),
            new Replacer(Pattern.compile("[àáâãäåāăąǻaèéêëēĕėęěeìíîïĩīĭįıiòóôõöøōŏőǿoùúûüũūŭůűųuýÿŷỳy]([æǽœ])", 17), "$1"),
            new Replacer(Pattern.compile("([æǽœ])[àáâãäåāăąǻaèéêëēĕėęěeìíîïĩīĭįıiòóôõöøōŏőǿoùúûüũūŭůűųuýÿŷỳy]", 17), "$1"),
            new Replacer(Pattern.compile("([wŵẁẃẅ])[wŵẁẃẅ]", 17), "$1"),
            new Replacer(Pattern.compile("q{2,}", 17), "q")
    };

    static final long[] bigrams = {
//a
            5, 22, 20, 22, 21, 22, 22, 5, 11, 20, 22, 4, 22, 22, 20, 22, 4, 4, 22, 22, 5, 22, 5, 22, 20, 22, 8,
//b
            52, 52, 52, 52, 52, 52, 52, 53, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52,
//c
            58, 58, 58, 58, 33, 58, 39, 58, 32, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58,
//d
            60, 60, 60, 60, 60, 60, 60, 61, 60, 39, 60, 60, 60, 60, 60, 60, 60, 60, 60, 63, 60, 60, 60, 60, 60, 60, 60,
//e
            19, 6, 18, 6, 19, 6, 6, 7, 19, 6, 6, 6, 6, 6, 18, 6, 6, 6, 6, 6, 16, 6, 6, 6, 6, 6, 0,
//f
            42, 42, 42, 42, 42, 42, 42, 43, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
//g
            56, 56, 56, 56, 56, 56, 56, 41, 56, 56, 56, 56, 56, 51, 56, 56, 56, 56, 56, 57, 56, 56, 56, 56, 56, 56, 56,
//h
            24, 53, 59, 61, 24, 43, 57, 24, 24, 39, 59, 31, 49, 51, 24, 55, 47, 29, 33, 63, 24, 41, 27, 47, 51, 35, 0,
//i
            16, 16, 16, 16, 11, 16, 16, 17, 11, 16, 16, 16, 16, 16, 18, 16, 16, 2, 16, 16, 16, 16, 16, 16, 18, 16, 18,
//j
            38, 38, 38, 38, 38, 38, 38, 39, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
//k
            58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58,
//l
            30, 30, 30, 30, 30, 30, 30, 31, 30, 30, 30, 31, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
//m
            48, 49, 48, 48, 48, 48, 48, 49, 48, 48, 48, 48, 48, 49, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
//n
            50, 50, 50, 50, 50, 50, 50, 51, 50, 50, 50, 50, 50, 51, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 51, 50, 50,
//o
            14, 4, 4, 4, 14, 4, 4, 15, 2, 4, 4, 14, 4, 4, 2, 4, 4, 14, 4, 4, 12, 4, 14, 4, 4, 4, 14,
//p
            54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54,
//q
            46, 46, 46, 46, 46, 46, 46, 47, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 47, 46, 46, 46, 46,
//r
            28, 28, 28, 28, 28, 28, 28, 29, 28, 28, 28, 28, 28, 28, 28, 28, 28, 29, 28, 28, 28, 28, 28, 28, 28, 28, 28,
//s
            32, 32, 32, 34, 32, 32, 32, 37, 32, 32, 32, 32, 34, 32, 32, 32, 32, 32, 33, 32, 32, 32, 32, 32, 32, 34, 34,
//t
            62, 62, 62, 63, 62, 62, 62, 45, 62, 62, 62, 58, 62, 62, 62, 62, 62, 62, 62, 63, 62, 62, 62, 47, 62, 62, 62,
//u
            26, 8, 8, 8, 12, 8, 8, 9, 26, 8, 8, 2, 8, 8, 2, 8, 8, 2, 8, 8, 13, 8, 13, 8, 8, 8, 12,
//v
            40, 40, 40, 40, 40, 40, 40, 41, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 41, 40, 40, 40, 40, 40,
//w
            26, 53, 59, 61, 26, 43, 57, 24, 26, 39, 59, 31, 49, 51, 26, 55, 47, 29, 35, 63, 24, 41, 27, 47, 51, 35, 0,
//x
            46, 46, 46, 46, 46, 46, 46, 47, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 47, 46, 46, 46,
//y
            50, 50, 50, 50, 50, 50, 50, 51, 50, 50, 50, 50, 50, 51, 50, 50, 50, 50, 50, 50, 50, 50, 46, 50, 51, 50, 50,
//z
            34, 34, 34, 34, 34, 34, 34, 39, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 35, 34, 34, 34, 34, 34, 34, 34, 34,
    };


    // not an OrderedMap because this should never be need a random element to be requested
    /**
     * The mapping of lower-case word keys to lower-case word values, where keys are in the source language and values
     * are generated by language.
     */
    public HashMap<String, String> table,
    /**
     * The mapping of lower-case word keys to lower-case word values, where keys are generated by language and values
     * are in the source language. Can be used as a complete vocabulary when passed to decipher.
     */
    reverse;
    private static final Pattern wordMatch = Pattern.compile("(\\pL+)|(\\pL[\\pL-]*\\pL)");

    /**
     * The degree of vocabulary to cache to speed up future searches at the expense of memory usage.
     * <ul>
     * <li>2 will cache source words to generated words in table, and generated to source in reverse.</li>
     * <li>1 will cache source words to generated words in table, and won't write to reverse.</li>
     * <li>0 won't write to table or reverse.</li>
     * </ul>
     * Defaults to 2, writing to both table and reverse.
     */
    public int cacheLevel = 2;

    public final long shift;

    /**
     * Constructs a LanguageCipher that will generate English-like or Dutch-like text by default.
     */
    public NaturalLanguageCipher()
    {
        this(FakeLanguageGen.ENGLISH);
    }

    /**
     * Constructs a LanguageCipher that will use the given style of language generator to produce its text.
     * @param language a FakeLanguageGen, typically one of the static constants in that class or a mix of them.
     */
    public NaturalLanguageCipher(FakeLanguageGen language)
    {
        this(language, 0);
    }

    private Pattern[] additionalPrefixChecks = {
            //17 is REFlags.UNICODE | REFlags.IGNORE_CASE
            Pattern.compile("(?:(?:[pрρ][hн])|[fd])[aаαiτιuμυνv]$", 17),
            Pattern.compile("[kкκcсςq][uμυνv]$", 17),
            Pattern.compile("[bъыбвβЪЫБ][iτι][tтτг]$", 17),
            Pattern.compile("[sξζzcсς](?:[hн]?)[iτιyуλγУ]$", 17),
            Pattern.compile("[aаαΛ][nи][aаαΛiτιyуλγУuμυνvoоюσο]*$", 17),
            Pattern.compile("[tтτΓг][iτιyуλγУ]+$", 17),
            Pattern.compile("[cсςkкκq][lι]?[iτιyуλγУ]+$", 17),
            Pattern.compile("[aаαΛ][sξζz]$", 17),
            Pattern.compile("[nиfvν][iτιyуλγУaаαΛ]+$", 17),
            Pattern.compile("[pрρ][eезξεЗΣoоюσοiτιyуλγУuμυνv]+$", 17),
            Pattern.compile("[g][hн]?[aаαΛeезξεЗΣyуλγУ]+$", 17),
            Pattern.compile("[wψшщuμυνv](?:[hн]?)[aаαΛeезξεЗΣoоюσοuμυνv]+$", 17),
    }, additionalSuffixChecks = {
            Pattern.compile("^(?:[aаαeезξεЗΣoоюσοuμυ]*)(?:[nи]+)[tтτΓгdgkкκcсςq]", 17),
            Pattern.compile("^(?:[aаαeезξεЗΣoоюσοuμυ]+)(?:[nи]*)[tтτΓгdgkкκcсςq]", 17),
            Pattern.compile("^(?:[iτιyуλγУaаαΛ]*)[gj]", 17),
            Pattern.compile("^[nи]..?[Ssξlιζz]", 17),
            Pattern.compile("^[iτιyуλγУaаαΛ][dtтτΓг]", 17),
            Pattern.compile("^[iτιyуλγУaаαΛ][kкκcсςq][kкκcсςq]", 17),
            Pattern.compile("^[uμυ]*[mм]", 17),
    };

    private String addPart(String original, int syllables)
    {
        String done;
        Pattern[] checks = null;
        if(original.endsWith("-"))
        {
            checks = additionalPrefixChecks;
        }
        else if(original.startsWith("-"))
        {
            checks = additionalSuffixChecks;
        }
        syllables <<= 1;
        do {
            done = language.word(rng, false, syllables >> 1, checks);
            if(cacheLevel < 2 || ++syllables > 5)
                break;
        }while(reverse.containsKey(done));
        switch (cacheLevel) {
            case 2: reverse.put(done, original);
            case 1: table.put(original, done);
        }
        return done;
    }
    /**
     * Constructs a LanguageCipher that will use the given style of language generator to produce its text.
     * @param language a FakeLanguageGen, typically one of the static constants in that class or a mix of them.
     * @param shift any long; this will be used to alter the specific words generated unless it is 0
     */
    public NaturalLanguageCipher(FakeLanguageGen language, long shift)
    {
        this.shift = shift;
        this.language = language.copy();
        rs = new SemiRandom(0xDF58476D1CE4E5B9L + shift);
        rng = new RNG(rs);
        table = new HashMap<>(512);
        reverse = new HashMap<>(512);
        pluralSuffix = addPart("-s", 0);
        nounySuffix = addPart("-y", 0);
        nounicSuffix = addPart("-ic", 0);
        nouniveSuffix = addPart("-ive", 0);
        nounistSuffix = addPart("-ist", 0);
        nounismSuffix = addPart("-ism", 1 + (rng.nextIntHasty(3) >> 1));
        nounenSuffix = addPart("-en", 0);
        verbedSuffix = addPart("-ed", 0);
        verberSuffix = addPart("-er", 0);
        verbingSuffix = addPart("-ing", 1);
        verbmentSuffix = addPart("-ment", 0);
        verbationSuffix = addPart("-ation", rng.nextIntHasty(2) + 1);
        adjectivelySuffix = addPart("-ly", 0);
        adjectivestSuffix = addPart("-est", 0);
        reverbPrefix = addPart("re-", 0);
        ennounPrefix = addPart("en-", 0);
        preverbPrefix = addPart("pre-", 0);
        proverbPrefix = addPart("pro-", 0);
        postverbPrefix = addPart("post-", 0);
        antiverbPrefix = addPart("anti-", 2 - (rng.nextIntHasty(3) >> 1));
        disnounPrefix = addPart("dis-", 0);
        table.clear();
        reverse.clear();
    }

    /**
     * Copies another LanguageCipher and constructs this one with the information in the other. Copies the dictionary
     * of known words, as well as the FakeLanguageGen style and everything else.
     * @param other a previously-constructed LanguageCipher.
     */
    public NaturalLanguageCipher(NaturalLanguageCipher other)
    {
        language = other.language.copy();
        rs = new SemiRandom();
        rng = new RNG(rs);
        table = new HashMap<>(other.table);
        reverse = new HashMap<>(other.reverse);
        shift = other.shift;
        pluralSuffix = other.pluralSuffix;
        nounySuffix = other.nounySuffix;
        nounicSuffix = other.nounicSuffix;
        nouniveSuffix = other.nouniveSuffix;
        nounistSuffix = other.nounistSuffix;
        nounismSuffix = other.nounismSuffix;
        nounenSuffix = other.nounenSuffix;
        verbedSuffix = other.verbedSuffix;
        verberSuffix = other.verberSuffix;
        verbingSuffix = other.verbingSuffix;
        verbmentSuffix = other.verbmentSuffix;
        verbationSuffix = other.verbationSuffix;
        adjectivelySuffix = other.adjectivelySuffix;
        adjectivestSuffix = other.adjectivestSuffix;
        reverbPrefix = other.reverbPrefix;
        ennounPrefix = other.ennounPrefix;
        preverbPrefix = other.preverbPrefix;
        postverbPrefix = other.postverbPrefix;
        proverbPrefix = other.proverbPrefix;
        antiverbPrefix = other.antiverbPrefix;
        disnounPrefix = other.disnounPrefix;
    }

    public static long phoneticHash64(char[] data, int start, int end)
    {
        long h = 0, b = 0;
        if(data == null || end <= start || start >= data.length)
            return h;
        int current, next, count = 0, used = 0;
        long got, vc = 0;
        boolean vowelStream = false;
        for (int i = start; i < end && count < 10; i++, count++) { // && vc < 7
            current = data[i] - 'a';
            if(current > 26)
                continue;
            if(i + 1 < end) {
                if((next = data[i + 1] - 'a') > 26)
                    continue;
            }
            else
                next = 26;
            got = bigrams[27 * current + next];
            if(got == 0)
                continue;
            h <<= 6;
            //b <<= 3;
            got = bigrams[27 * current + next];
            i += got & 1L;
            h |= got >>= 1;
            used++;
            //used += 5;
            if(count == 0) {
                vowelStream = got > 0 && got < 12;
            }else if (vowelStream != (got > 0 && got < 12)) {
                vc+= vowelStream ? 1 : 0;
                vowelStream = !vowelStream;
            }
            //b |= (got >> 2);
        }
        vc += vowelStream ? 1 : 0;

        if(used > 0 && count > 0) {
            got = h;
            for (; count < 11; count += used) {
                h |= got << (6 * count);
            }
            h &= 0xFFFFFFFFFFFFFFFL; // 60 bits
        }
        /*b &= ~(-1 << (35-Math.min(used, 35)));
        if(used <= 20)
            b ^= b << 8;
        h ^= ((vc & 7L) << 39) | (b << (used + 3));
        */
        vc = Math.max(1L, vc);
        return h | ((vc & 15L) << 60);
    }

    String conjugate(String data, long mods)
    {
        if(data == null)
            return "";
        StringBuilder sb = new StringBuilder(data);

        if((mods & ENNOUN) != 0)
        {
            sb.insert(0, ennounPrefix);
        }
        if((mods & DISNOUN) != 0)
        {
            sb.insert(0, disnounPrefix);
        }
        if((mods & REVERB) != 0)
        {
            sb.insert(0, reverbPrefix);
        }
        if((mods & ANTIVERB) != 0)
        {
            sb.insert(0, antiverbPrefix);
        }
        if((mods & PROVERB) != 0)
        {
            sb.insert(0, proverbPrefix);
        }
        if((mods & POSTVERB) != 0)
        {
            sb.insert(0, postverbPrefix);
        }
        if((mods & PREVERB) != 0)
        {
            sb.insert(0, preverbPrefix);
        }
        if((mods & NOUNEN) != 0) {
            sb.append(nounenSuffix);
        }
        if((mods & VERBER) != 0) {
            sb.append(verberSuffix);
        }
        if((mods & VERBMENT) != 0) {
            sb.append(verbmentSuffix);
        }
        if((mods & VERBATION) != 0) {
            sb.append(verbationSuffix);
        }
        if((mods & NOUNIVE) != 0) {
            sb.append(nouniveSuffix);
        }
        if((mods & NOUNISM) != 0) {
            sb.append(nounismSuffix);
        }
        if((mods & NOUNIST) != 0) {
            sb.append(nounistSuffix);
        }
        if((mods & NOUNIC) != 0) {
            sb.append(nounicSuffix);
        }
        if((mods & ADJECTIVEST) != 0) {
            sb.append(adjectivestSuffix);
        }
        if((mods & VERBED) != 0) {
            sb.append(verbedSuffix);
        }
        if((mods & VERBING) != 0) {
            sb.append(verbingSuffix);
        }
        if((mods & NOUNY) != 0) {
            sb.append(nounySuffix);
        }
        if((mods & ADJECTIVELY) != 0) {
            sb.append(adjectivelySuffix);
        }
        if((mods & PLURAL) != 0) {
            sb.append(pluralSuffix);
        }
        String done = sb.toString();
        for(int conproc = 0; conproc < conjugationProc.length; conproc++)
        {
            done = conjugationProc[conproc].replace(done);
        }
        return done;
    }
    /**
     * Given a word in the source language (usually English), looks up an existing translation for that word, or if none
     * exists, generates a new word based on the hash of the source word and this LanguageCipher's FakeLanguageGen.
     * @param source a word in the source language
     * @return a word in the fake language
     */
    public String lookup(String source)
    {
        if(source == null || source.isEmpty())
            return "";
        String s2 = source.toLowerCase(), ciphered;
        if(table.containsKey(s2))
            ciphered = table.get(s2);
        else {
            CharSequence altered = FakeLanguageGen.removeAccents(s2);
            for (int i = 0; i < preproc.length; i++) {
                altered = preproc[i].replace(altered);
            }

            char[] sc = ((String)altered).toCharArray(), scO = s2.toCharArray();
            int start = 0, end = sc.length, endO = scO.length;
            long mods = 0;
            /*
            boolean plural = false, verbing = false, verbed = false, verber = false, verbation = false,
                    verbment = false, nouny = false, nounen = false, nounist = false, nounism = false,
                    nounic = false, nounive = false, adjectively = false, adjectivest = false,
                    //prefixes
                    reverb = false, ennoun = false, preverb = false, postverb = false,
                    proverb = false, antiverb = false, disnoun = false;
            */
            if(end >= 4 && endO >= 4 && sc[end-1]=='s')
            {
                mods |= PLURAL;
                end--;
                endO--;
                if(scO[endO-1] == 'e')
                {
                    end--;
                    endO--;
                }
            }
            if(end >= 5 && endO >= 5 && sc[end - 2] == 'l' && sc[end-1] == 'y')
            {
                mods |= ADJECTIVELY;
                end -= 2;
                endO -= 2;
            }
            /*
            else if(end >= 4 && endO >= 4 && scO[endO-1] == 'y')
            {
                mods |= NOUNY;
                end--;
                endO--;
            }*/
            if(end >= 5 && endO >= 5 && scO[endO-3] == 'i' && scO[endO-2] == 'n' && scO[endO-1]=='g')
            {
                mods |= VERBING;
                end-=3;
                endO -= 3;
            }
            if(end >= 4 && endO >= 4 && (scO[endO-3] == 'a' || scO[endO-3] == 'o') && scO[endO-2] == 'd' && scO[endO-1]=='e')
            {
                mods |= VERBED;
                end-=3;
                endO-=3;
            }
            else if(end >= 4 && endO >= 4 && scO[endO-2] == 'e' && scO[endO-1] == 'd')
            {
                mods |= VERBED;
                end-=2;
                endO-=2;
            }
            else if(end >= 5 && endO >= 5 && sc[end - 3] == 'e' && sc[end - 2] == 's' && sc[end-1] == 't')
            {
                mods |= ADJECTIVEST;
                end -= 3;
                endO -= 3;
            }
            if(end >= 5 && endO >= 5 && scO[endO-2] == 'i' && scO[endO-1] == 'c')
            {
                mods |=NOUNIC;
                end -= 2;
                endO-=2;
            }
            else if(end >= 6 && endO >= 6 && scO[endO-3] == 'i' && scO[endO-2] == 'v' && scO[endO-1] == 'e') {
                mods |= NOUNIVE;
                end -= 3;
                endO -= 3;
                if (end >= 4 && endO >= 4 && (scO[endO - 2] == 'a' || scO[endO - 2] == 'i') && scO[endO - 1] == 't') {
                    end -= 2;
                    endO -= 2;
                }
            }
            if(end >= 5 && sc[end-3] == 'i' && sc[end-2] == 's' && sc[end-1] == 't')
            {
                mods |=NOUNIST;
                end -= 3;
                endO-=3;
                if(endO >= 5 && scO[endO-2] == 'i' && scO[endO-1] == 'v')
                {
                    mods |= NOUNIVE;
                    end-=2;
                    endO-=2;
                }
            }
            if(end >= 5 && sc[end-3] == 'i' && sc[end-2] == 's' && sc[end-1] == 'm')
            {
                mods |=NOUNISM;
                end -= 3;
                endO -= 3;
                if(endO >= 5 && scO[endO-2] == 'i' && scO[endO-1] == 'v')
                {
                    mods |= NOUNIVE;
                    end-=2;
                    endO-=2;
                }

            }
            if(end >= 8 && endO >= 8 && (scO[endO - 4] == 't' || scO[endO - 4] == 's' || scO[endO - 4] == 'c') && scO[endO-3] == 'i' && scO[endO-2] == 'o' && scO[endO-1]=='n')
            {
                mods |=VERBATION;
                end-=4;
                endO -= 4;
            }
            if(end >= 6 && sc[end-4] == 'm' && sc[end-3] == 'e' && sc[end-2] == 'n' && sc[end-1] == 't')
            {
                mods |=VERBMENT;
                end-=4;
                endO -= 4;
            }
            if(end >= 7 && endO >= 7 && scO[endO-3] == 'i' && scO[endO-2] == 'a' && scO[endO-1]=='n')
            {
                mods |=VERBER;
                end-=3;
                endO-=3;
            }
            else if(end >= 4 && endO >= 4 && (sc[end-2] == 'e' || sc[end-2] == 'o') && sc[end-1] == 'r')
            {
                mods |= VERBER;
                end-=2;
            }
            if(end >= 4 && sc[end-2] == 'e' && sc[end-1]=='n')
            {
                mods |=NOUNEN;
                end-=2;
            }
            if(end - start >= 5 && sc[start] == 'p' && sc[start+1] == 'r' && sc[start+2] == 'e')
            {
                mods |=PREVERB;
                start += 3;
            }
            if(end - start >= 6 && sc[start] == 'p' && sc[start+1] == 'o' && sc[start+2] == 's' && sc[start+3] == 't')
            {
                mods |= POSTVERB;
                start += 4;
            }

            if(end - start >= 5 && sc[start] == 'p' && sc[start+1] == 'r' && sc[start+2] == 'o')
            {
                mods |= PROVERB;
                start += 3;
            }
            else {
                if (end - start >= 6 && sc[start] == 'a' && sc[start + 1] == 'n' && sc[start + 2] == 't' && sc[start + 3] == 'i') {
                    mods |= ANTIVERB;
                    start += 4;
                }
                else if (end - start >= 8 && sc[start] == 'c' && sc[start + 1] == 'o' && sc[start + 2] == 'n' && sc[start + 3] == 't' && sc[start + 4] == 'r' && sc[start + 5] == 'a') {
                    mods |= ANTIVERB;
                    start += 6;
                }
            }
            if(end - start >= 4 && sc[start] == 'r' && sc[start+1] == 'e')
            {
                mods |= REVERB;
                start += 2;
            }
            if(end - start >= 5 && sc[start] == 'd' && sc[start+1] == 'i' && sc[start+2] == 's')
            {
                mods |= DISNOUN;
                start += 3;
            }
            if(end - start >= 4 && sc[start] == 'u' && sc[start+1] == 'n')
            {
                mods |= ANTIVERB;
                start += 2;
            }
            if(end - start >= 4 && (sc[start] == 'e' || sc[start] == 'i') && sc[start+1] == 'n')
            {
                mods |= ENNOUN;
                start += 2;
            }
            long h = phoneticHash64(sc, start, end) ^ (shift & 0xFFFFFFFFFFFFFFFL) ^ (shift >>> 14), frustration = 0;
            //System.out.print(source + ":" + ((h >>> 60) & 7) + ":" + StringKit.hex(h) + ", ");
            rs.setState(h);
            do {
                ciphered = conjugate(language.word(rng, false, (int) Math.ceil((h >>> 60) / (0.9 + 0.5 * rng.nextDouble()))), mods);
                if(cacheLevel < 2 || frustration++ > 9)
                    break;
            }while (reverse.containsKey(ciphered));
            switch (cacheLevel) {
                case 2: reverse.put(ciphered, s2);
                case 1: table.put(s2, ciphered);
            }
        }
        char[] chars = ciphered.toCharArray();
        // Lu is the upper case letter category in Unicode; we're using regexodus for this because GWT won't
        // respect unicode case data on its own (see
        // https://github.com/gwtproject/gwt/blob/2.6.1/user/super/com/google/gwt/emul/java/lang/Character.java#L54-L61
        // ). We are using GWT to capitalize, though, which appears to work in practice and the docs agree.
        if(Category.Lu.contains(source.charAt(0)))
            chars[0] = Character.toUpperCase(chars[0]);
        if(source.length() > 1 && Category.Lu.contains(source.charAt(1))) {
            for (int i = 1; i < chars.length; i++) {
                chars[i] = Character.toUpperCase(chars[i]);
            }
        }
        return new String(chars);
    }

    /**
     * Given a String, StringBuilder, or other CharSequence that should contain words in the source language, this
     * translates each word to the fake language, using existing translations if previous calls to cipher() or lookup()
     * had translated that word.
     * @param text a CharSequence, such as a String, that contains words in the source language
     * @return a String of the translated text.
     */
    public String cipher(String text)
    {
        Replacer rep = wordMatch.replacer(new CipherSubstitution());
        return rep.replace(text.replace('-', '\u2013'));
    }

    private class CipherSubstitution implements Substitution
    {
        @Override
        public void appendSubstitution(MatchResult match, TextBuffer dest) {
            dest.append(lookup(match.group(0)));
        }
    }
    private class DecipherSubstition implements Substitution
    {
        private final Map<String, String> vocabulary;
        DecipherSubstition(final Map<String, String> vocabulary)
        {
            this.vocabulary = vocabulary;
        }
        public void appendSubstitution(MatchResult match, TextBuffer dest) {
            String translated = match.group(0);
            if(translated == null) {
                return;
            }
            translated = translated.toLowerCase();
            translated = vocabulary.get(translated);
            if(translated == null) {
                dest.append(match.group(0));
                return;
            }
            char[] chars = translated.toCharArray();
            if(Category.Lu.contains(match.charAt(0)))
                chars[0] = Character.toUpperCase(chars[0]);
            if(match.length() > 1 && Category.Lu.contains(match.charAt(1))) {
                for (int i = 1; i < chars.length; i++) {
                    chars[i] = Character.toUpperCase(chars[i]);
                }
            }
            dest.append(chars, 0, chars.length);
        }
    }

    /**
     * Deciphers words in an already-ciphered text with a given String-to-String Map for a vocabulary. This Map could be
     * the reverse field of this LanguageCipher, which would give a complete translation, or it could be a
     * partially-complete or partially-correct vocabulary of words the player has learned. The vocabulary should
     * typically have entries added using the quick and accurate learnTranslations() method, unless you want to add
     * translations one word at a time (then use learnTranslation() ) or you want incorrect or biased translations added
     * (then use mismatchTranslation() ). You don't need to use one of these methods if you just pass the whole of the
     * reverse field as a vocabulary, which will translate every word. If making your own vocabulary without the learn
     * methods, the keys need to be lower-case because while regex Patterns can be case-insensitive, Map lookups cannot.
     * @param text a text in the fake language
     * @param vocabulary a Map of Strings in the fake language to Strings in the source language
     * @return a deciphered version of text that has any words as keys in vocabulary translated to the source language
     */
    public String decipher(String text, final Map<String, String> vocabulary)
    {
        Pattern pat;
        Replacer rep;
        StringBuilder sb = new StringBuilder(128);
        sb.append("(?:");
        for(String k : vocabulary.keySet())
        {
            sb.append("(?:\\Q");
            sb.append(k);
            sb.append("\\E)|");
        }
        sb.deleteCharAt(sb.length() - 1);
        sb.append(')');

        pat = Pattern.compile("(?<![\\pL\\&-])(?=[\\pL\\&-])" + sb + "(?![\\pL\\&-])", "ui");

        rep = pat.replacer(new DecipherSubstition(vocabulary));
        return rep.replace(text);
    }

    /**
     * Adds a translation pair to vocabulary so it can be used in decipher, giving a correct translation for sourceWord.
     * Modifies vocabulary in-place and returns this LanguageCipher for chaining. Can be used to correct a mismatched
     * translation added to vocabulary with mismatchTranslation.
     * @param vocabulary a Map of String keys to String values that will be modified in-place
     * @param sourceWord a word in the source language, typically English; the meaning will be "learned" for decipher
     * @return this, for chaining
     */
    public NaturalLanguageCipher learnTranslation(Map<String, String> vocabulary, String sourceWord)
    {
        vocabulary.put(lookup(sourceWord.toLowerCase()), sourceWord);
        return this;
    }

    /**
     * Adds translation pairs to vocabulary so it can be used in decipher, giving a correct translation for sourceWords.
     * Modifies vocabulary in-place and returns this LanguageCipher for chaining. Can be used to correct mismatched
     * translations added to vocabulary with mismatchTranslation.
     * @param vocabulary a Map of String keys to String values that will be modified in-place
     * @param sourceWords an array or vararg of words in the source language, typically English; their meanings will
     *                    be "learned" for decipher
     * @return this, for chaining
     */
    public NaturalLanguageCipher learnTranslations(Map<String, String> vocabulary, String... sourceWords)
    {
        for (int i = 0; i < sourceWords.length; i++) {
            learnTranslation(vocabulary, sourceWords[i]);
        }
        return this;
    }

    /**
     * Adds translation pairs to vocabulary so it can be used in decipher, giving a correct translation for sourceWords.
     * Modifies vocabulary in-place and returns this LanguageCipher for chaining. Can be used to correct mismatched
     * translations added to vocabulary with mismatchTranslation.
     * @param vocabulary a Map of String keys to String values that will be modified in-place
     * @param sourceWords an Iterable of words in the source language, typically English; their meanings will be
     *                   "learned" for decipher
     * @return this, for chaining
     */
    public NaturalLanguageCipher learnTranslations(Map<String, String> vocabulary, Iterable<String> sourceWords)
    {
        for (String s : sourceWords) {
            learnTranslation(vocabulary, s);
        }
        return this;
    }

    /**
     * Adds a translation pair to vocabulary so it can be used in decipher, giving a typically-incorrect translation for
     * correctWord where it provides mismatchWord instead when the ciphered version of correctWord appears.
     * Modifies vocabulary in-place and returns this LanguageCipher for chaining. You can use learnTranslation() to
     * correct a mismatched vocabulary word, or mismatchTranslation() again to change the mismatched word.
     * @param vocabulary a Map of String keys to String values that will be modified in-place
     * @param correctWord a word in the source language, typically English; where the ciphered version of this
     *                    appears and the text is deciphered, mismatchWord will be used instead
     * @param mismatchWord a String that will be used for deciphering in place of the translation of correctWord.
     * @return this, for chaining
     */
    public NaturalLanguageCipher mismatchTranslation(Map<String, String> vocabulary, String correctWord, String mismatchWord)
    {
        vocabulary.put(lookup(correctWord.toLowerCase()), mismatchWord);
        return this;
    }

    public int getCacheLevel() {
        return cacheLevel;
    }

    public void setCacheLevel(int cacheLevel) {
        if(cacheLevel >= 2) this.cacheLevel = 2;
        else if(cacheLevel <= 0) this.cacheLevel = 0;
        else this.cacheLevel = cacheLevel;
    }
}