package com.tyndalehouse.step.core.service.helpers;
import com.tyndalehouse.step.core.models.search.SuggestionType;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.CachingWrapperFilter;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.MultiTermQueryWrapperFilter;
import org.apache.lucene.search.PrefixFilter;
import com.tyndalehouse.step.core.data.EntityDoc;
import com.tyndalehouse.step.core.models.LexiconSuggestion;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryWrapperFilter;
import org.apache.lucene.search.TermQuery;
/**
* Static helper methods used by various services
*
* @author chrisburrell
*
*/
public final class OriginalWordUtils {
/** strong number field */
public static final String STRONG_NUMBER_FIELD = "strongNumber";
private static final Filter GREEK_FILTER = new CachingWrapperFilter(getStrongFilter("G"));
private static final Filter HEBREW_FILTER = new CachingWrapperFilter(getStrongFilter("H"));
private static Filter getStrongFilter(String prefix) {
BooleanQuery query = new BooleanQuery();
query.add(new PrefixQuery(new Term(STRONG_NUMBER_FIELD, prefix)), BooleanClause.Occur.MUST);
query.add(new TermQuery(new Term("stopWord", "true")), BooleanClause.Occur.MUST_NOT);
return new QueryWrapperFilter(query);
}
/** no implementation */
private OriginalWordUtils() {
// no implementation
}
/**
* converts a definition to a suggested form
*
* @param def the definition
* @return the suggestion
*/
public static LexiconSuggestion convertToSuggestion(final EntityDoc def) {
final LexiconSuggestion suggestion = new LexiconSuggestion();
suggestion.setGloss(def.get("stepGloss"));
suggestion.setMatchingForm(def.get("accentedUnicode"));
suggestion.setStepTransliteration(def.get("stepTransliteration"));
suggestion.setStrongNumber(def.get(STRONG_NUMBER_FIELD));
return suggestion;
}
/**
* Filters the query by strong number
*
* @param isGreek true for greek, false for hebrew
* @return the filter for greek or hebrew
*/
public static Filter getFilter(final boolean isGreek) {
return isGreek ? GREEK_FILTER : HEBREW_FILTER;
}
/**
* Typically, the strong numbers in the lexicon might finish with H0001a. In this method,
* we remove the extensions to the strong numbers
* @param relatedNumbers the separated list of strong numbers.
* @return
*/
public static String stripExtensions(final String relatedNumbers) {
if(relatedNumbers == null) {
return "";
}
final StringBuilder sb = new StringBuilder();
for(int ii = 0; ii < relatedNumbers.length(); ii++) {
char c = relatedNumbers.charAt(ii);
//if the character is alphabetic and finishes a word, then we ignore it.
boolean isExtension = Character.isAlphabetic(c) && (
ii+1 >= relatedNumbers.length() ||
relatedNumbers.charAt(ii+1) == ',' ||
relatedNumbers.charAt(ii+1) == ' ');
if(!isExtension) {
sb.append(c);
}
}
return sb.toString();
}
}