HebrewUtils.java example

Explorer
step-master
package com.tyndalehouse.step.core.utils.language;

import java.util.ArrayList;
import java.util.List;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.tyndalehouse.step.core.utils.language.hebrew.ConsonantType;
import com.tyndalehouse.step.core.utils.language.hebrew.HebrewLetter;
import com.tyndalehouse.step.core.utils.language.hebrew.HebrewLetterType;
import com.tyndalehouse.step.core.utils.language.hebrew.SoundingType;
import com.tyndalehouse.step.core.utils.language.hebrew.VowelLengthType;
import com.tyndalehouse.step.core.utils.language.hebrew.VowelStressType;
import com.tyndalehouse.step.core.utils.language.transliteration.StringToStringRule;
import com.tyndalehouse.step.core.utils.language.transliteration.TransliterationRule;

/**
 * Utilities for doing Hebrew transliteration
 *
 * @author chrisburrell
 */
public final class HebrewUtils {
    private static final Logger LOGGER = LoggerFactory.getLogger(HebrewUtils.class);
    public static final char HYPHEN = '.';
    public static final char MAQAF_HYPHEN = '-';
    private static transient List<TransliterationRule> transliterationRules;

    private static final char CLOSED_QUOTE = '\u2019';
    private static final char OPEN_QUOTE = '\u2018';
    private static final char K_WITH_LINE = '\u1e35';
    private static final char T_WITH_DOT = '\u1e6d';
    private static final char H_WITH_DOT = '\u1e25';
    private static final char B_WITH_LINE = '\u1E07';

    private static final char QAMATS_QATAN = 0x5C7;

    private static final char SHEVA = 0x05B0;
    private static final char HATAF_SEGOL = 0x5B1;
    private static final char HATAF_PATAH = 0x5B2;
    private static final char HATAF_QAMATS = 0x5B3;
    private static final char HIRIQ = 0x5B4;
    private static final char TSERE = 0x5B5;
    private static final char SEGOL = 0x5B6;
    private static final char PATAH = 0x5B7;
    private static final char QAMATS = 0x5B8;
    private static final char HOLAM = 0x5B9;
    private static final char HOLAM_HASER = 0x5BA;
    private static final char QUBUTS = 0x5BB;
    private static final char DAGESH = 0x5BC;
    private static final char METEG = 0x05BD;


    private static final char SHIN_DOT = 0x05C1;
    private static final int ETNAHTA = 0x0591;
    private static final char GERESH = 0x059C;
    private static final char GERESH_MUQDAM = 0x059D;
    private static final int ZINOR = 0x05AE;
    
    private static final int DAGESH_GAP = 0xFB44 - 0x05e3;
    private static final int ALEPH = 0x05D0;
    private static final char ALEPH_LAMED = 0xFB4F;
    private static final char BET = 0x5D1;
    private static final char GIMEL = 0x5D2;
    private static final char DALET = 0x5D3;
    private static final int HE = 0x5D4;
    private static final char VAV = 0x5D5;
    private static final int ZAYIN = 0x5D6;
    private static final char HET = 0x5D7;
    private static final int TET = 0x5D8;
    private static final char YOD = 0x5D9;
    private static final char FINAL_KAF = 0x5DA;
    private static final char KAF = 0x5DB;
    private static final char LAMED = 0x5DC;
    private static final int FINAL_MEM = 0x5DD;
    private static final int MEM = 0x5DE;
    private static final int FINAL_NUN = 0x5DF;
    private static final int NUN = 0x5E0;
    private static final int SAMEKH = 0x5E1;
    private static final int AYIN = 0x5E2;
    private static final int FINAL_PE = 0x5E3;
    private static final char PE = 0x5E4;
    private static final int FINAL_TSADI = 0x5E5;
    private static final int TSADI = 0x5E6;
    private static final char QOF = 0x5E7;
    private static final char RESH = 0x5E8;
    private static final int SIN = 0x5E9;
    private static final char TAV = 0x5EA;
    private static final char MAQAF = 0x05BE;
    private static final char HEBREW_COMBINED_RANGE_START = 0xFB1D;

    /**
     * prevent instantiation
     */
    private HebrewUtils() {
        // do nothing
    }

    /**
     * @param rawForm the raw form of the word
     * @return true if it is hebrew text
     */
    public static boolean isHebrewText(final String rawForm) {
        final int firstCharacter = rawForm.charAt(0);
        return isHebrewCharacter(firstCharacter);
    }

    /**
     * @param firstCharacter the character that we are testing
     * @return true to indicate we are dealing with the Hebrew set of characters
     */
    public static boolean isHebrewCharacter(final int firstCharacter) {
        return (firstCharacter > 0x590 && firstCharacter < 0x600)
                || (firstCharacter > 0xFB10 && firstCharacter < 0xFB50);
    }


    /**
     * @param word text with pointing
     * @return text without pointing
     */
    public static String unPoint(final String word) {
        return unPoint(word, true);
    }
    
    /**
     * @param word text with pointing
     * @param unpointVowels true to indicate we also want to exclude vowels
     * @return text without pointing
     */
    public static String unPoint(final String word, boolean unpointVowels) {
        char endChar = unpointVowels ? ALEPH : SHEVA;
        
        final StringBuilder sb = new StringBuilder(word);
        int i = 0;
        while (i < sb.length()) {
            final char currentChar = sb.charAt(i);
            //ignore characters outside of the Hebrew character set
            if(currentChar < ETNAHTA || currentChar > ALEPH_LAMED) {
                i++;
            } else if (currentChar < endChar) {
                sb.deleteCharAt(i);
            } else if (currentChar >= HEBREW_COMBINED_RANGE_START && currentChar < ALEPH_LAMED) {
                sb.setCharAt(i, (char) (currentChar - DAGESH_GAP));
                i++;
            } else {
                i++;
            }
        }
        return sb.toString();
    }

    /**
     * Cleans up a String so that it can be indexed properly
     *
     * @param stepTransliteration the transliteration to be cleaned up
     * @return the new transliteration
     */
    public static String removeHebrewTranslitMarkUpForIndexing(final String stepTransliteration) {
        final StringBuilder sb = new StringBuilder(stepTransliteration);

        // also remove double letters...
        char lastLetter = 0x0;
        for (int ii = 0; ii < sb.length(); ) {
            final char currentLetter = sb.charAt(ii);
            switch (currentLetter) {
                case '.':
                case '-':
                case '\'':
                case '*':
                case CLOSED_QUOTE:
                case OPEN_QUOTE:
                    sb.deleteCharAt(ii);
                    continue;
                case K_WITH_LINE:
                    sb.setCharAt(ii, 'k');
                    break;
                case T_WITH_DOT:
                    sb.setCharAt(ii, 't');
                    break;
                case H_WITH_DOT:
                    sb.setCharAt(ii, 'h');
                    break;
                case B_WITH_LINE:
                    sb.setCharAt(ii, 'b');
                    break;
                case 'é':
                    sb.setCharAt(ii, 'e');
                    break;
                default:
                    break;
            }

            if (currentLetter == lastLetter) {
                sb.deleteCharAt(ii);
                continue;
            }

            lastLetter = currentLetter;
            ii++;
        }

        return sb.toString();
    }

    /**
     * @return gives the hebrew list of transliteration rules
     */
    public static List<TransliterationRule> getTransliterationRules() {
        ensureTransliterationRules();
        return transliterationRules;
    }

    /**
     * creates the transliteration rules lazily, on first time
     */
    private static void ensureTransliterationRules() {
        if (transliterationRules != null) {
            return;
        }

        createTransliterationRules();
    }

    /**
     * creates the rules, this is synchronized so that no-two threads are creating it at any point of time
     */
    private static synchronized void createTransliterationRules() {
        // check again if it has been initialized, as we may be coming second
        if (transliterationRules == null) {
            final List<TransliterationRule> rules = new ArrayList<TransliterationRule>();
            rules.add(new StringToStringRule("b", new String[]{"v"}));
            rules.add(new StringToStringRule("v", new String[]{"b", "w"}));
            rules.add(new StringToStringRule("w", new String[]{"v" }));
            rules.add(new StringToStringRule("h", new String[]{"ch", "chch"}));
            rules.add(new StringToStringRule("ch", new String[]{"h", "chch"}));
            rules.add(new StringToStringRule("j", new String[]{"y"}));
            rules.add(new StringToStringRule("x", new String[]{"h", "ch", "chch" }));
            rules.add(new StringToStringRule("+", new String[]{"t"}));
            rules.add(new StringToStringRule("$", new String[]{"s"}));
            rules.add(new StringToStringRule("s", new String[]{"sh", "ts", "shsh", "tsts"}));
            rules.add(new StringToStringRule("sh", new String[]{"shsh"}));
            rules.add(new StringToStringRule("gh", new String[]{"g"}));
            rules.add(new StringToStringRule("kh", new String[]{"k"}));
            rules.add(new StringToStringRule("k", new String[]{"kh" }));
            rules.add(new StringToStringRule("dh", new String[]{"d"}));
            rules.add(new StringToStringRule("th", new String[]{"t"}));
            rules.add(new StringToStringRule("ph", new String[]{"p" }));
            rules.add(new StringToStringRule("p", new String[]{"ph"}));
            rules.add(new StringToStringRule("tz", new String[]{"ts", "tsts" }));
            rules.add(new StringToStringRule("y", new String[]{""}));
            rules.add(new StringToStringRule("a", new String[]{""}));
            rules.add(new StringToStringRule("e", new String[]{""}));
            rules.add(new StringToStringRule("é", new String[]{"e"}));

            transliterationRules = rules;
        }
    }

    /**
     * vowel-based hebrew transliteration
     *
     * @param inputString the input string
     * @return the transliteration
     */
    public static String transliterateHebrew(final String inputString) {
        final HebrewLetter[] letters = new HebrewLetter[inputString.length()];
        final char[] input = inputString.toCharArray();
        try {

            // iterate through looking for Yods
            for (int ii = 0; ii < input.length; ii++) {
                if (isHebrewConsonant(input[ii])) {
                    processHebrewConsonant(letters, input, ii);
                    processForteDagesh(input, ii, letters);
                } else if (isHebrewVowel(input[ii])) {
                    processHebrewVowel(letters, input, ii);
                } else if (input[ii] != DAGESH) {
                    processNonDagesh(letters, input, ii);
                } else {
                    // dagesh
                    letters[ii] = new HebrewLetter(input[ii]);
                }
            }

            boolean stressedWord = firstPass(letters, input);
            secondPass(letters, input);
            thirdPass(letters, input);


            String transliteration = transliterate(input, letters, stressedWord);
            if (LOGGER.isTraceEnabled()) {
                outputAnalysis(letters, inputString, transliteration);
            }

            return transliteration;
            // CHECKSTYLE:OFF
        } catch (final RuntimeException ex) {
            // output the error analysis
            LOGGER.error("==================================================================");
            // output the letters first
            for (int ii = 0; ii < input.length; ii++) {
                final HebrewLetter hl = letters[ii];
                LOGGER.error("[{}]: c:[{}]", ii, input[ii]);
                if (hl != null) {
                    LOGGER.error(
                            "char=[0x{}]\tletter=[{}]\tconsonant=[{}]\tvLength[{}]\tvStress[{}]\tsounding[{}]",
                            Integer.toString(hl.getC(), 16), hl.getHebrewLetterType(),
                            hl.getConsonantType(), hl.getVowelLengthType(), hl.getVowelStressType(),
                            hl.getSoundingType());
                }
            }
            LOGGER.error("Error occured during Hebrew transliteration. Analysis is above", ex);
            throw ex;
        }
        // CHECKSTYLE:ON
    }

    /**
     * Marks Alephs & Ayins as silent
     *
     * @param letters our current set of letters
     * @param input   the input set of letters
     */
    private static void thirdPass(final HebrewLetter[] letters, final char[] input) {
        for (int ii = 0; ii < letters.length; ii++) {
            if (input[ii] == AYIN || input[ii] == ALEPH) {
                //look for vowels until next consonant
                for (int jj = ii + 1; untilEndOfWord(letters, jj); jj++) {
                    if (letters[jj].isConsonant()) {
                        letters[ii].setSoundingType(SoundingType.SILENT);
                        break;
                    } else if (letters[jj].isVowel()) {
                        letters[ii].setSoundingType(SoundingType.SOUNDING);
                        break;
                    }
                }
            }
        }
    }

    /**
     * Processes a hebrew vowel
     *
     * @param letters  the input, strongly typed
     * @param input    the input as a string
     * @param position our current position
     */
    private static void processHebrewVowel(final HebrewLetter[] letters, final char[] input,
                                           final int position) {
        // a vowel or other pointing
        final HebrewLetter letter = new HebrewLetter(input[position]);
        letter.setHebrewLetterType(HebrewLetterType.VOWEL);

        if (isAny(input[position], SHEVA, HATAF_SEGOL, HATAF_PATAH, HATAF_QAMATS)) {
            letter.setVowelLengthType(VowelLengthType.VERY_SHORT);
        } else if (isAny(input[position], TSERE, QAMATS, HOLAM_HASER, HOLAM)) {
            letter.setVowelLengthType(VowelLengthType.LONG);
        } else if (input[position] == HIRIQ && hasAnyPointing(input, position, true, METEG)
                || hasAnyPointing(input, position, false, METEG)) {
            letter.setVowelLengthType(VowelLengthType.LONG);
        } else {
            letter.setVowelLengthType(VowelLengthType.SHORT);
        }
        letters[position] = letter;
    }

    /**
     * Pre-parse processing of a Non-Dagesh character. This gets called if input[ii] character is neither a
     * vowel, nor a consonant, nor a dagesh form
     *
     * @param letters the set of letters found so far. 0->ii-1 have already been processed. ii-end are yet
     *                to be processed
     * @param input   the input, ii indicating how far through we are
     * @param ii      ii the index of how far through we are
     */
    private static void processNonDagesh(final HebrewLetter[] letters, final char[] input, final int ii) {
        if(input[ii] >= ETNAHTA && input[ii] <= ZINOR || input[ii] == METEG) {
            // accents
            final HebrewLetter letter = new HebrewLetter(input[ii]);
            letters[ii] = letter;
            letter.setHebrewLetterType(HebrewLetterType.ACCENT);
        } else {
            letters[ii] = new HebrewLetter(input[ii]);
        }
    }

    /**
     * Pre-parse processing of a Hebrew consonant, processes the input[ii] character
     *
     * @param letters the set of letters found so far. 0->ii-1 have already been processed. ii-end are yet
     *                to be processed
     * @param input   the input, ii indicating how far through we are
     * @param ii      ii the index of how far through we are
     */
    private static void processHebrewConsonant(final HebrewLetter[] letters, final char[] input, final int ii) {
        // CHECKSTYLE:OFF
        if (ii >= 2 && processYod(input, letters, ii)) {
            // do nothing
        } else if (processVav(input, letters, ii)) {
            // do nothing
        } else {
            final HebrewLetter letter = new HebrewLetter(input[ii]);
            letter.setHebrewLetterType(HebrewLetterType.CONSONANT);
            letters[ii] = letter;

            if (input[ii] == SIN && hasAnyPointing(input, ii, true, SHIN_DOT)) {
                letter.setIsShin(true);
            }

        }
        // CHECKSTYLE:ON
    }

    /**
     * Outputs the analysis at trace level
     *
     * @param letters         the list of letters
     * @param inputString     the string to be transliterated
     * @param transliteration the transliteration of these letters
     */
    private static void outputAnalysis(final HebrewLetter[] letters, final String inputString, final String transliteration) {
        LOGGER.trace("**********************************");
        LOGGER.trace("ANALYSIS FOR: [{}] => [{}]", inputString, transliteration);
        for (final HebrewLetter hl : letters) {
            LOGGER.trace(
                    "char=[{}],xchar=[0x{}]\tletter=[{}]\tconsonant=[{}]\tvLength[{}]\tvStress[{}]\tsounding[{}]",
                    hl.getC(),
                    Integer.toString(hl.getC(), 16), hl.getHebrewLetterType(),
                    hl.getConsonantType(), hl.getVowelLengthType(), hl.getVowelStressType(),
                    hl.getSoundingType());
        }
        LOGGER.trace("**********************************");
    }

    /**
     * Stresses vowels and corrects vavs with dagesh iterates through all vowels and stresses them
     *
     * @param letters the set of strongly typed letters
     * @param input   the actual characters
     * @return word has a stress
     */
    private static boolean firstPass(final HebrewLetter[] letters, final char[] input) {
        boolean hasStress = false;

        for (int ii = 0; ii < letters.length; ii++) {
            //ignore the SHIN DOT
            if (letters[ii].getC() == SHIN_DOT) {
                continue;
            }

            if (HebrewLetterType.ACCENT.equals(letters[ii].getHebrewLetterType())) {
                if (isNotGeresh(input, ii) || previousConsonant(letters, ii) != 0) {
                    final HebrewLetter letter = getCloseVowel(letters, ii);
                    letter.setVowelStressType(VowelStressType.STRESSED);
                    hasStress = true;
                }
            } else if (letters[ii].getC() == VAV && hasCloseDagesh(input, ii)) {
                letters[ii].setVowelLengthType(VowelLengthType.SHORT);
            }
        }
        return hasStress;
    }

    /**
     * @param input    our input
     * @param position the current position
     * @return true if the letter is not a GERESH MUQDAM and not a GERESH character
     */
    private static boolean isNotGeresh(final char[] input, final int position) {
        return input[position] != GERESH_MUQDAM && input[position] != GERESH;
    }

    /**
     * Looking for the first previous consonant
     *
     * @param letters         the set of strongly typed letters
     * @param currentPosition the actual position in the cahracter
     * @return position of previous consonant
     */
    private static int previousConsonant(final HebrewLetter[] letters, final int currentPosition) {
        for (int ii = currentPosition - 1; ii >= 0; ii--) {
            if (letters[ii].isConsonant()) {
                return ii;
            }
        }
        return -1;
    }

    /**
     * Marking silent shevas
     *
     * @param letters the set of strongly typed letters
     * @param input   the actual characters
     */
    private static void secondPass(final HebrewLetter[] letters, final char[] input) {
        int previousConsonantPosition = -1;
        int currentConsonantPosition = -1;
        for (int ii = 0; ii < letters.length; ii++) {
            if (letters[ii].isConsonant()) {
                previousConsonantPosition = currentConsonantPosition;
                currentConsonantPosition = ii;
            } else if (letters[ii].getC() == SHEVA) {
                if (!isLastHebrewConsonantInWordWithoutVowel(letters, ii)
                        && (isStartOfWord(letters, currentConsonantPosition) ||
                        !letters[currentConsonantPosition].hasNoDagesh() ||
                        isAfterLongUnstressedVowel(letters, currentConsonantPosition) ||
                        hasAnyPointing(input, previousConsonantPosition, true, SHEVA))
                        && !(isAfterShortUnstressedVowel(letters, currentConsonantPosition)
                        && letters[ii].hasNoDagesh())) {
                    letters[ii].setSoundingType(SoundingType.SOUNDING);

                } else {
                    letters[ii].setSoundingType(SoundingType.SILENT);
                }
            }
        }
    }

    /**
     * Transliterates letters one by one
     *
     *
     *
     * @param letters the list of letters
     * @param stressedWord true to indicate the word has at least one stress  @return the transliterated string
     */
    private static String transliterate(char[] input, final HebrewLetter[] letters,  final boolean stressedWord) {
        final StringBuilder output = new StringBuilder(letters.length + 16);
        for (int ii = 0; ii < letters.length; ii++) {
            transliterate(letters, ii, output, stressedWord);
        }

        doEndings(input, output);
        return output.toString();
    }

    /**
     * Transliterates the given input. This method creates the actual transliteration.
     *
     * @param letter  the array of letters identified so far, after the parsing has occurred.
     * @param output  current output
     * @param current the current position in 'letter' to be processed
     * @param hasStress true to indicate a word has a stress
     */
    public static void transliterate(final HebrewLetter[] letter, final int current,
                                     final StringBuilder output, boolean hasStress) {
        final HebrewLetter currentLetter = letter[current];
        final char c = currentLetter.getC();

//        if (currentLetter.isStressed()) {
//            output.append('*');
//        }

        // hyphenating vowels
        hyphenateSyllables(letter, current, output, hasStress);

        final int sizeBeforeAppending = output.length();

        mapHebrewLetterToTransliteratedLetter(current, output, currentLetter, c);

        doubleHyphenateIfApplicable(output, letter, current, sizeBeforeAppending);
    }

    /**
     * Given the current input, examines the letter and outputs the relevant character to the StringBuilder
     * output.
     *
     * @param current       the current position
     * @param output        the rendered output
     * @param currentLetter the current letter
     * @param c             the char value that we are examining
     */
    // CHECKSTYLE:OFF
    private static void mapHebrewLetterToTransliteratedLetter(final int current, final StringBuilder output,
                                                              final HebrewLetter currentLetter, final char c) {
        switch (c) {
            // consonants
            case ALEPH:
//                output.append(CLOSED_QUOTE);
                break;
            case BET:
                if (currentLetter.hasNoDagesh() && current != 0) {
                    output.append('v');
                } else {
                    output.append('b');
                }
                break;
            case GIMEL:
                output.append('g');
                break;
            case DALET:
                output.append('d');
                break;
            case HE:
                output.append('h');
                break;
            case VAV:
                if (currentLetter.isVowel()) {
                    if (currentLetter.isShureq()) {
                        output.append('u');
                    }
                } else {
                    output.append('v');
                }
                break;
            case ZAYIN:
                output.append('z');
                break;
            case HET:
                output.append("ch");
                break;
            case TET:
                output.append('t');
                break;
            case YOD:
                if (currentLetter.isConsonant()) {
                    output.append('y');
                }
                break;
            case FINAL_KAF:
            case KAF:
                if (currentLetter.hasNoDagesh() && current != 0) {
                    output.append("kh");
                } else {
                    output.append('k');
                }
                break;
            case LAMED:
                output.append('l');
                break;
            case FINAL_MEM:
            case MEM:
                output.append('m');
                break;
            case FINAL_NUN:
            case NUN:
                output.append('n');
                break;
            case SAMEKH:
                output.append('s');
                break;
            case AYIN:
//                output.append(OPEN_QUOTE);
                break;
            case FINAL_PE:
            case PE:
                if (currentLetter.hasNoDagesh() && current != 0) {
                    output.append('p');
                    output.append('h');
                } else {
                    output.append('p');
                }
                break;
            case FINAL_TSADI:
            case TSADI:
                output.append('t');
                output.append('s');
                break;
            case QOF:
                output.append('q');
                break;
            case RESH:
                output.append('r');
                break;
            case SIN:
                output.append('s');
                if (currentLetter.isShin()) {
                    output.append('h');
                }
                break;
            case TAV:
                output.append('t');
                break;

            // vowels
            case SHEVA:
                if (!currentLetter.isSilent()) {
                    output.append('e');
                }
                break;
            case HATAF_SEGOL:
                output.append('e');
                break;
            case HATAF_PATAH:
                output.append('a');
                break;
            case HATAF_QAMATS:
                output.append('o');
                break;
            case HIRIQ:
                output.append('i');
                break;
            case TSERE:
                output.append('e');
                break;
            case SEGOL:
                output.append('e');
                break;
            case PATAH:
                output.append('a');
                break;
            case QAMATS:
                output.append('a');
                break;
            case HOLAM:
                final int length = output.length();
                if (length > 0 && output.charAt(length - 1) == 'w') {
                    output.insert(length - 1, 'o');
                } else {
                    output.append('o');
                }
                break;
            case HOLAM_HASER:
                output.append('o');
                break;
            case QUBUTS:
                output.append('u');
                break;
            case QAMATS_QATAN:
                output.append('o');
                break;
            default:
                break;
        }
    }

    // CHECKSTYLE:ON

    /**
     * Adds the hyphens in for doubled letters
     *
     * @param output              the output rendered so far
     * @param letters             the letters under examination
     * @param currentPosition     position
     * @param sizeBeforeAppending the size of the output, prior to process the currentLetter.
     */
    private static void doubleHyphenateIfApplicable(final StringBuilder output, HebrewLetter[] letters,
                                                    int currentPosition, final int sizeBeforeAppending) {
        final HebrewLetter currentLetter = letters[currentPosition];
        // doubling and hyphenating
        if (currentLetter.isDoubled() && !isStartOfWord(letters, currentPosition)) {
            output.append(HYPHEN);
            // copy to the end, and discount the already added -
            final int endOfDoubleLetter = output.length() - 1;
            for (int ii = sizeBeforeAppending; ii < endOfDoubleLetter; ii++) {
                output.append(output.charAt(ii));
            }
        }
    }

    /**
     * Marks the syllables
     *
     * @param letters set of letters
     * @param current the current position
     * @param output  the current output
     * @param hasStress true to indicate a word has a stress
     */
    private static void hyphenateSyllables(final HebrewLetter[] letters, final int current,
                                           final StringBuilder output, boolean hasStress) {

        if (letters[current].getC() == MAQAF) {
            output.append(MAQAF_HYPHEN);
            return;
        }

        if (letters[current].getC() == ' ') {
            output.append(' ');
            return;
        }


        //if previous was a maqaf, then we're not going to hyphenate
        if (current - 1 >= 0 && (letters[current - 1].getC() == MAQAF || letters[current - 1].getC() == ' ')) {
            return;
        }

        if (isStartOfWord(letters, current) || !letters[current].isConsonant()
                || isLastHebrewConsonantInWordWithoutVowel(letters, current)) {
            return;
        }

        //if the previous output was a syllable marker, then we're not going to do anything
        if (output.length() > 0 && (
                output.charAt(output.length() - 1) == HYPHEN ||
                        output.charAt(output.length() - 1) == MAQAF_HYPHEN ||
                        output.charAt(output.length() - 1) == ' ')
                ) {
            //then don't output
            return;
        }

        // look for vowels
        boolean foundLongVowel = false;
        boolean foundStressedVowel = false;
        for (int ii = current - 1; ii > 0 && !letters[ii].isConsonant(); ii--) {
            if (letters[ii].isVowel()) {
                if (letters[ii].getC() == HATAF_PATAH || letters[ii].getC() == HATAF_QAMATS
                        || letters[ii].getC() == HATAF_SEGOL || letters[ii].getC() == SHEVA) {
                    output.append(HYPHEN);
                    return;
                }

                if (letters[ii].isLong()) {
                    foundLongVowel = true;
                }

                if (letters[ii].isStressed()) {
                    foundStressedVowel = true;
                }

                if (hasStress && foundLongVowel && !foundStressedVowel) {
                    output.append(HYPHEN);
                    return;
                }
            }
        }

        if (letters[current].isDoubled()) {
            return;
        }
        for (int ii = current + 1; untilEndOfWord(letters, ii) && !letters[ii].isConsonant(); ii++) {
            if (letters[ii].getC() == SHEVA && letters[ii].isSilent()) {
                return;
            }
        }

        if (letters[current].isConsonant() && letters[current].isSilent()) {
            return;
        }

        output.append(HYPHEN);
    }

    /**
     * Checks whether the letter is the last Hebrew letter in a word, doesn't check past a MAQAF
     *
     * @param letters  the set of letters
     * @param position our current position
     * @return true if it last without vowel, false if it's last with vowel OR not last consonant
     */
    private static boolean isLastHebrewConsonantInWordWithoutVowel(final HebrewLetter[] letters, final int position) {
        final boolean isLastHebrewConsonant = isLastHebrewConsonantInWord(letters, position);

        if (isLastHebrewConsonant) {
            for (int ii = position + 1; untilEndOfWord(letters, ii); ii++) {
                if (letters[ii].isVowel() && !letters[ii].isSilent()) {
                    return false;
                }
            }
            return true;
        }

        return false;
    }

    /**
     * Looks until the end of the word, and stops early if it hits a MAQAF (hebrew hyphen).
     *
     * @param letters the hebrew letters
     * @param ii      the current position in the (usually) loop
     * @return true if we should continue
     */
    private static boolean untilEndOfWord(final HebrewLetter[] letters, final int ii) {
        return isNotMaqafOrSpacing(letters, ii);
    }

    /**
     * @param letters out hebrew letters
     * @param ii      current position
     * @return true if ii is 0 or precedecing character is spacing/maqaf etc.
     */
    private static boolean isStartOfWord(final HebrewLetter[] letters, final int ii) {
        return ii == 0 || !isNotMaqafOrSpacing(letters, ii - 1);
    }

    /**
     * @param letters the current set of letters
     * @param ii      our current position
     * @return true if it is not a maqaf or spacing
     */
    private static boolean isNotMaqafOrSpacing(final HebrewLetter[] letters, final int ii) {
        return ii < letters.length && letters[ii].getC() != MAQAF && letters[ii].getC() != ' ';
    }

    /**
     * Checks whether it is the last hebrew consonant up to a MAQAF
     *
     * @param letters  the set of letters
     * @param position our current position
     * @return true if no other consonants are found after the position
     */
    private static boolean isLastHebrewConsonantInWord(final HebrewLetter[] letters, final int position) {
        boolean vowelReturnsConsonant = false;

        for (int ii = position + 1; untilEndOfWord(letters, ii); ii++) {
            if (vowelReturnsConsonant && letters[ii].isVowel()) {
                return false;
            }

            if (letters[ii].isConsonant()) {

                //check if it is an Aleph or a AYIN - if so we continue looking unless we hit a vowel
                if (letters[ii].getC() != AYIN && letters[ii].getC() != ALEPH) {
                    //no aleph/ayin, so definitely not the last consonant
                    return false;
                }

                //if we encounter a vowel, then we're going to return false
                vowelReturnsConsonant = true;
            }
        }

        return true;
    }

    /**
     * Swaps letters round if they finish in a particular order:
     * <p/>
     * <pre>
     *  ha => ah,
     *  cha = ach,
     *  (a => a(,
     * </pre>
     *
     * @param letters
     * @param output the output which may need letters swapped
     */
    private static void doEndings(final char[] letters, final StringBuilder output) {
        //find last consonant
        int lastConsonant = getLastConsonantPosition(letters);
        if(lastConsonant == -1 || (letters[lastConsonant] != HET && letters[lastConsonant] != HE)) {
            return;
        }
        
        //we've got a he or a het
        if(!hasAnyPointing(letters, lastConsonant, true, PATAH)) {
            return;
        }
        
        // check last character if a
        final int last = output.length() - 1;
        final int secondLast = last - 1;

        if (secondLast < 0) {
            return;
        }

        // ends with a
        if (output.charAt(last) == 'a') {
            // ends with ha
            final char secondChar = output.charAt(secondLast);
            if (output.charAt(secondLast) == 'h' || secondChar == H_WITH_DOT) {
                if (secondLast > 0 && output.charAt(secondLast - 1) == 'c') {
                    output.setCharAt(last, 'h');
                    output.setCharAt(secondLast, 'c');
                    output.setCharAt(secondLast - 1, 'a');
                } else {
                    // ends only with ha
                    output.setCharAt(secondLast, 'a');
                    output.setCharAt(last, secondChar);
                }
            } else if (secondChar == OPEN_QUOTE) {
                // ends with (a
                output.setCharAt(last, 'a');
                output.setCharAt(secondLast, OPEN_QUOTE);
            }
        }

    }

    /**
     * Gets the last consonant in the Hebrew word
     * @param letters the letters in the word
     * @return the index of the last consonant
     */
    private static int getLastConsonantPosition(final char[] letters) {
        for(int ii = letters.length - 1; ii >= 0; ii--) {
            if(isHebrewConsonant(letters[ii])) {
                return ii;
            }
        }
        return -1;
    }

    /**
     * looks for the previous letter and works out whether it is long and unstressed
     *
     * @param letters           the set of letters
     * @param consonantPosition the current position
     * @return true if after a long unstressed vowel
     */
    private static boolean isAfterLongUnstressedVowel(final HebrewLetter[] letters,
                                                      final int consonantPosition) {
        return isAfterAnUnstressedVowel(letters, consonantPosition, true);
    }

    /**
     * looks for the previous letter and works out whether it is short and unstressed
     *
     * @param letters           the set of letters
     * @param consonantPosition the current position
     * @return true if after a long unstressed vowel
     */
    private static boolean isAfterShortUnstressedVowel(final HebrewLetter[] letters, final int consonantPosition) {
        return isAfterAnUnstressedVowel(letters, consonantPosition, false);
    }


    /**
     * looks for the previous letter and works out whether it is long and unstressed
     *
     * @param letters           the set of letters
     * @param consonantPosition the current position
     * @return true if after a long unstressed vowel
     */
    private static boolean isAfterAnUnstressedVowel(final HebrewLetter[] letters,
                                                    final int consonantPosition, boolean lookingForLong) {
        // look for first letter we have
        int ii = consonantPosition - 1;

        while (ii >= 0 && !letters[ii].isConsonant()) {
            boolean isCorrectLength = lookingForLong ? letters[ii].isLong() : letters[ii].getVowelLengthType() == VowelLengthType.SHORT;
            boolean isSheva = letters[ii].getC() == SHEVA;

            if ((letters[ii].isVowel() && isCorrectLength && !letters[ii].isStressed())) {
                return true;
            }

            ii--;
        }
        return false;
    }

    /**
     * @param input           input string
     * @param currentPosition the current position
     * @return True if the glyph contains a DAGESH after the VAV or other consonant - only looks forward
     */
    private static boolean hasCloseDagesh(final char[] input, final int currentPosition) {
        return hasAnyPointing(input, currentPosition, true, DAGESH);
    }

    /**
     * Looks backwards to the consonant, then forwards to the beginning of the next consonant, then works
     * backwards until it hits a vowel
     *
     * @param letters         the set of letters
     * @param currentPosition out current position
     * @return the closest vowel found in the sequence of hebrew letters
     */
    private static HebrewLetter getCloseVowel(final HebrewLetter[] letters, final int currentPosition) {
        HebrewLetter vowel = getCloseVowel(letters, currentPosition, false);
        if (vowel != null) {
            return vowel;

        }

        vowel = getCloseVowel(letters, currentPosition, true);
        if (vowel != null) {
            return vowel;

        }

        for (int ii = currentPosition; ii >= 0; ii--) {
            if (letters[ii].isVowel()) {
                return letters[ii];
            }
        }

        return vowel;
    }

    /**
     * Returns the closest vowel, looking forwards or backwards depending on the parameters passed in
     *
     * @param letters         the input
     * @param currentPosition our current position in the input
     * @param forwards        true for looking ahead, false for looking backwards
     * @return the Hebrew vowel, or null if not found
     */
    private static HebrewLetter getCloseVowel(final HebrewLetter[] letters, final int currentPosition,
                                              final boolean forwards) {
        final int increment = forwards ? 1 : -1;

        for (int ii = currentPosition + increment; ii > 0 && untilEndOfWord(letters, ii); ii = ii + increment) {
            if (letters[ii].isVowel()) {
                return letters[ii];
            } else if (letters[ii].isConsonant()) {
                break;
            }
        }

        // not found
        return null;
    }

    /**
     * @param c the character
     * @return true to indicate a vowel
     */
    private static boolean isHebrewVowel(final char c) {
        return c >= SHEVA && c <= QAMATS_QATAN && c != DAGESH && c != SHIN_DOT;
    }

    /**
     * Dagesh processing for length of vowels
     *
     * @param input           input string
     * @param currentPosition the current position
     * @param letters         the set of letters
     */
    private static void processForteDagesh(final char[] input, final int currentPosition,
                                           final HebrewLetter[] letters) {
        if (!HebrewLetterType.CONSONANT.equals(letters[currentPosition].getHebrewLetterType())) {
            return;
        }

        if (!hasAnyPointing(input, currentPosition, true, DAGESH)) {
            letters[currentPosition].setConsonantType(ConsonantType.NO_DAGESH);
            return;
        }

        // first character is always single
        if (isStartOfWord(letters, currentPosition) || isLastLetterInWord(input, currentPosition)) {
            letters[0].setConsonantType(ConsonantType.SINGLE);
            return;
        }

        final char consonant = input[currentPosition];
        if (isAny(consonant, BET, GIMEL, DALET, KAF, PE, TAV)
                && hasAnyPointing(input, currentPosition, false, SHEVA, HATAF_SEGOL, HATAF_PATAH, HATAF_QAMATS)) {
            // not dagesh forte if any of those letters
            letters[currentPosition].setConsonantType(ConsonantType.SINGLE);
            return;
        }
        letters[currentPosition].setConsonantType(ConsonantType.DOUBLE);
    }

    /**
     * @param input           input string
     * @param currentPosition the current position
     * @return true to indicate a letter
     */
    private static boolean isLastLetterInWord(final char[] input, final int currentPosition) {
        for (int ii = currentPosition + 1; ii < input.length && input[ii] != MAQAF && input[ii] != ' '; ii++) {
            if (isHebrewConsonant(input[ii])) {
                //aleph or AYIN with no vowels
                if (input[ii] == ALEPH || input[ii] == AYIN) {
                    return !hasAnyPointing(input, currentPosition, true, QAMATS_QATAN, SHEVA,
                            HATAF_SEGOL, HATAF_PATAH, HATAF_QAMATS, HIRIQ, TSERE, SEGOL, PATAH,
                            QAMATS, HOLAM, HOLAM_HASER, QUBUTS);
                }

                return false;
            }
        }

        return true;
    }

    /**
     * @param charAt our current char
     * @return true if it is a consonant
     */
    private static boolean isHebrewConsonant(final char charAt) {
        return charAt >= ALEPH && charAt <= TAV;
    }

    /**
     * checks if consonant is contained in consonants
     *
     * @param letter          the one we are looking for
     * @param matchingLetters the possibilities
     * @return true if found in the list of consonants provided
     */
    private static boolean isAny(final char letter, final char... matchingLetters) {
        for (int ii = 0; ii < matchingLetters.length; ii++) {
            if (letter == matchingLetters[ii]) {
                return true;
            }
        }
        return false;
    }

    /**
     * @param inputString     input string
     * @param letters         the letters found in the word so far
     * @param currentPosition the current position
     * @return found a vowel
     */
    private static boolean processVav(final char[] inputString, final HebrewLetter[] letters,
                                      final int currentPosition) {
        final boolean isVav = inputString[currentPosition] == VAV;
        if (isVav) {
            if (isVavConsonant(inputString, currentPosition, letters)) {
                final HebrewLetter letter = new HebrewLetter(VAV);
                letter.setHebrewLetterType(HebrewLetterType.CONSONANT);
                letters[currentPosition] = letter;
            } else {
                final HebrewLetter letter = new HebrewLetter(VAV);
                letter.setHebrewLetterType(HebrewLetterType.VOWEL);

                if (hasAnyPointing(inputString, currentPosition, true, DAGESH)) {
                    letter.setShureq(true);
                }

                // next consonant has a dagesh?
                final int position = nextHebrewConsonant(inputString, currentPosition);
                if (position != -1 && hasAnyPointing(inputString, position, true, DAGESH)) {
                    letter.setVowelLengthType(VowelLengthType.SHORT);
                } else {
                    letter.setVowelLengthType(VowelLengthType.LONG);
                }
                letters[currentPosition] = letter;
            }
            return true;
        }
        return false;
    }

    /**
     * Finds where the next Hebrew consonant is
     *
     * @param inputString     the input
     * @param currentPosition our current position in the input
     * @return the position of the next hebrew consonant
     */
    private static int nextHebrewConsonant(final char[] inputString, final int currentPosition) {
        int ii = currentPosition + 1;
        while (ii < inputString.length && !isHebrewConsonant(inputString[ii])) {
            ii++;
        }

        return ii == inputString.length ? -1 : ii;
    }

    /**
     * searches for any letters provided
     *
     * @param inputString the input string
     * @param position    the current position in the string
     * @param after       true to indicate to look after, false for before
     * @param otherMarks  the unicode characters we are look for
     * @return true if all marks were matched
     */
    public static boolean hasAnyPointing(final char[] inputString, final int position, final boolean after,
                                         final char... otherMarks) {
        return hasPointing(inputString, position, after, false, false, otherMarks);
    }

    /**
     * searches for all letters provided
     *
     * @param inputString the input string
     * @param position    the current position in the string
     * @param after       true to indicate to look after, false for before
     * @param otherMarks  the unicode characters we are look for
     * @return true if all marks were matched
     */
    public static boolean hasAllPointing(final char[] inputString, final int position, final boolean after,
                                         final char... otherMarks) {
        return hasPointing(inputString, position, after, true, false, otherMarks);
    }

    /**
     * searches for all letters provided
     *
     * @param inputString the input string
     * @param position    the current position in the string
     * @param after       true to indicate to look after, false for before
     * @param otherMarks  the unicode characters we are look for
     * @return true if all marks were matched
     */
    public static boolean hasAllPointingIncludingVav(final char[] inputString, final int position,
                                                     final boolean after, final char... otherMarks) {
        return hasPointing(inputString, position, after, true, true, otherMarks);

    }

    /**
     * searches for all letters provided
     *
     * @param inputString   the input string
     * @param position      the current position in the string
     * @param after         true to indicate to look after, false for before
     * @param hasAllLetters true to include all letters
     * @param includeVav    true to include searching passed the Vav letter
     * @param otherMarks    the unicode characters we are look for
     * @return true if all marks were matched
     */
    public static boolean hasPointing(final char[] inputString, final int position, final boolean after,
                                      final boolean hasAllLetters, final boolean includeVav, final char... otherMarks) {
        final boolean[] foundAll = new boolean[otherMarks.length];
        final int increment = after ? 1 : -1;

        for (int ii = position + increment; ii < inputString.length; ii = ii + increment) {
            final char newChar = inputString[ii];
            if (newChar >= ALEPH || (includeVav && newChar != VAV)) {
                break;
            }

            for (int jj = 0; jj < otherMarks.length; jj++) {
                if (newChar == otherMarks[jj]) {
                    // has any letters? and we found one, so no need to go further
                    if (!hasAllLetters) {
                        return true;
                    }

                    foundAll[jj] = true;
                }
            }
        }

        return areAllTrue(foundAll);
    }

    /**
     * @param inputString     the input string
     * @param currentPosition the current position
     * @param letters
     * @return true if vav is a consonant
     */
    private static boolean isVavConsonant(final char[] inputString, final int currentPosition, final HebrewLetter[] letters) {
        final boolean hasDagesh = hasAnyPointing(inputString, currentPosition, true, DAGESH);
        if (isStartOfWord(letters, currentPosition)) {
            return !hasDagesh;
        }

        if (isLastLetterInWord(inputString, currentPosition)) {
            return !hasDagesh && !hasAnyPointing(inputString, currentPosition, true, HOLAM);
        }

        if (hasDagesh) {
            if (hasAnyPointing(inputString, currentPosition, true, HIRIQ, TSERE, SEGOL, SHEVA, PATAH, QAMATS,
                    QUBUTS, QAMATS_QATAN)) {
                return true;
            }

            if (hasAllPointingIncludingVav(inputString, currentPosition, true, VAV, HOLAM)) {
                return true;
            }

            return false;
        }

        if (hasAllPointing(inputString, currentPosition, true, QAMATS, HOLAM)) {
            return true;
        }

        //if we follow a vowel, then we want to be a consonant
        return isFollowingVowel(currentPosition, letters);
    }

    /**
     * True to indicate we are following a vowel - stops at the first consonant/start of word
     *
     * @param currentPosition the current position
     * @param letters         the letters
     * @return true if following a vowel
     */
    private static boolean isFollowingVowel(final int currentPosition, final HebrewLetter[] letters) {
        for (int ii = currentPosition - 1; ii > 0 && !letters[ii].isConsonant() && !isStartOfWord(letters, ii); ii--) {
            if (letters[ii].isVowel()) {
                return true;
            }
        }
        return false;
    }

    /**
     * If it's a yod, adds the letter to the array
     *
     * @param inputString     the input string
     * @param letters         the set of letters
     * @param currentPosition our current position in the input string
     * @return true if a yod was found
     */
    private static boolean processYod(final char[] inputString, final HebrewLetter[] letters,
                                      final int currentPosition) {
        final boolean isYod = inputString[currentPosition] == YOD;
        if (isYod) {
            final HebrewLetter letter = new HebrewLetter(YOD);

            if (isYodVowel(inputString, currentPosition, letters)) {
                letter.setHebrewLetterType(HebrewLetterType.VOWEL);
                letter.setVowelLengthType(VowelLengthType.LONG);
                letters[currentPosition] = letter;
                return true;
            } else {
                letter.setHebrewLetterType(HebrewLetterType.CONSONANT);
                letters[currentPosition] = letter;
                return true;
            }
        }

        return false;
    }

    /**
     * @param inputString     the input string
     * @param currentPosition the current position in the string
     * @param letters
     * @return true if yod is a vowel
     */
    private static boolean isYodVowel(final char[] inputString, final int currentPosition, final HebrewLetter[] letters) {
        return hasAnyPointing(inputString, currentPosition, false, HIRIQ, TSERE, SEGOL, QAMATS, HOLAM_HASER)
                && !hasAnyPointing(inputString, currentPosition, true, QAMATS_QATAN, SHEVA, HATAF_SEGOL,
                HATAF_PATAH, HATAF_QAMATS, HIRIQ, TSERE, SEGOL, PATAH, QAMATS, HOLAM, HOLAM_HASER,
                QUBUTS, DAGESH);
    }

    /**
     * True if all are true
     *
     * @param foundAll the list of boolean flags
     * @return true if all booleans passed are true
     */
    private static boolean areAllTrue(final boolean[] foundAll) {
        for (int ii = 0; ii < foundAll.length; ii++) {
            if (!foundAll[ii]) {
                return false;
            }
        }
        return true;
    }
}