package com.tyndalehouse.step.core.data.processors;
import static com.tyndalehouse.step.core.utils.StringConversionUtils.adaptForTransliterationForIndexing;
import static com.tyndalehouse.step.core.utils.StringConversionUtils.transliterate;
import org.apache.lucene.document.Document;
import com.tyndalehouse.step.core.data.EntityConfiguration;
import com.tyndalehouse.step.core.data.create.PostProcessor;
import com.tyndalehouse.step.core.utils.language.HebrewUtils;
import org.apache.lucene.document.Field;
/**
* Adds generated fields to the entity document - affects both "definition" and "specificForm"
*
* @author chrisburrell
*
*/
public class TransliteratorProcessor implements PostProcessor {
private static final String STEP_SIMPLIFIED_TRANSLITERATION = "simplifiedStepTransliteration";
private static final String STEP_TRANSLITERATION = "stepTransliteration";
@Override
public void process(final EntityConfiguration config, final Document doc) {
final String accentedUnicode = doc.get("accentedUnicode");
if (accentedUnicode == null || accentedUnicode.length() == 0) {
return;
}
final boolean isHebrew = HebrewUtils.isHebrewText(accentedUnicode);
final String transliteration = transliterate(accentedUnicode);
doStepTransliteration(config, doc, transliteration);
doSimplifiedStepTransliterations(doc, isHebrew, transliteration, config);
}
/**
* Does the simplified transliterations by expanding the terms
*
* @param doc the document
* @param isHebrew true for hebrew
* @param transliteration the transliteration
* @param config the config for the entity
*/
private void doSimplifiedStepTransliterations(final Document doc, final boolean isHebrew,
final String transliteration, final EntityConfiguration config) {
doc.add(config.getField(STEP_SIMPLIFIED_TRANSLITERATION,
adaptForTransliterationForIndexing(transliteration, !isHebrew)));
}
/**
* Step transliteration
*
* @param config the configuration
* @param doc the document
* @param transliteration the transliteration that needs to be stored/indexed
*/
private void doStepTransliteration(final EntityConfiguration config, final Document doc,
final String transliteration) {
doc.add(config.getField(STEP_TRANSLITERATION, transliteration));
}
}