/*******************************************************************************
* Copyright (c) 2012, Directors of the Tyndale STEP Project
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* Neither the name of the Tyndale House, Cambridge (www.TyndaleHouse.com)
* nor the names of its contributors may be used to endorse or promote
* products derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
* IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
******************************************************************************/
package com.tyndalehouse.step.core.service.impl;
import com.tyndalehouse.step.core.data.EntityDoc;
import com.tyndalehouse.step.core.data.EntityIndexReader;
import com.tyndalehouse.step.core.data.EntityManager;
import com.tyndalehouse.step.core.exceptions.UserExceptionType;
import com.tyndalehouse.step.core.models.LexiconSuggestion;
import com.tyndalehouse.step.core.models.VocabResponse;
import com.tyndalehouse.step.core.service.StrongAugmentationService;
import com.tyndalehouse.step.core.service.VocabularyService;
import com.tyndalehouse.step.core.service.helpers.OriginalWordUtils;
import com.tyndalehouse.step.core.utils.SortingUtils;
import com.tyndalehouse.step.core.utils.StringConversionUtils;
import com.tyndalehouse.step.core.utils.StringUtils;
import org.codehaus.jackson.map.util.LRUMap;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.inject.Inject;
import javax.inject.Singleton;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.SortedSet;
import java.util.TreeSet;
import static com.tyndalehouse.step.core.utils.StringUtils.isBlank;
import static com.tyndalehouse.step.core.utils.StringUtils.split;
import static com.tyndalehouse.step.core.utils.ValidateUtils.notBlank;
/**
* defines all vocab related queries
*
* @author chrisburrell
*/
@Singleton
public class VocabularyServiceImpl implements VocabularyService {
private static final String MULTI_WORD_SEPARATOR = " | ";
private static final Logger LOGGER = LoggerFactory.getLogger(VocabularyServiceImpl.class);
private static final String STRONG_SEPARATORS = "[ ,]+";
private static final String HIGHER_STRONG = "STRONG:";
private static final String LOWER_STRONG = "strong:";
private static final int START_STRONG_KEY = HIGHER_STRONG.length();
private static final LRUMap<String, EntityDoc[]> DEFINITIION_CACHE = new LRUMap<>(128, 256);
private final EntityIndexReader definitions;
// define a few extraction methods
private final LexiconDataProvider transliterationProvider = new LexiconDataProvider() {
@Override
public String getData(final EntityDoc l) {
return l.get("stepTransliteration");
}
};
private final LexiconDataProvider englishVocabProvider = new LexiconDataProvider() {
@Override
public String getData(final EntityDoc l) {
return l.get("stepGloss");
}
};
private final LexiconDataProvider greekVocabProvider = new LexiconDataProvider() {
@Override
public String getData(final EntityDoc l) {
return l.get("accentedUnicode");
}
};
private final StrongAugmentationService strongAugmentationService;
/**
* @param manager the entity manager
*/
@Inject
public VocabularyServiceImpl(final EntityManager manager,
final StrongAugmentationService strongAugmentationService) {
this.strongAugmentationService = strongAugmentationService;
this.definitions = manager.getReader("definition");
}
/**
* Pads a strong number with the correct number of 0s
*
* @param strongNumber the strong number
* @param prefix true to indicate the strongNumber is preceded with strong:
* @return the padded strong number
*/
public static String padStrongNumber(final String strongNumber, final boolean prefix) {
final int baseIndex = prefix ? START_STRONG_KEY : 0;
String subStrong = null;
try {
subStrong = strongNumber.substring(baseIndex + 1);
return String.format("%c%04d", strongNumber.charAt(baseIndex), Integer.parseInt(subStrong));
} catch (final NumberFormatException e) {
LOGGER.trace("Unable to parse strong number.", e);
// deals with dodgy modules
// perhaps someone added some random information at the end
if (subStrong != null && subStrong.length() > 3) {
final String first4Chars = subStrong.substring(0, 4);
try {
String suffix = subStrong.length() > 4 && Character.isAlphabetic(subStrong.charAt(4)) ? subStrong.substring(4, 5).toLowerCase() : "";
return String.format("%c%04d%s", strongNumber.charAt(baseIndex),
Integer.parseInt(first4Chars), suffix);
} catch (final NumberFormatException ex) {
// couldn't convert to a padded number
LOGGER.trace("Unable to convert [{}] to a padded number.", first4Chars);
return strongNumber;
}
}
return "err";
}
}
@Override
public VocabResponse getDefinitions(final String version, final String reference, final String vocabIdentifiers) {
notBlank(vocabIdentifiers, "Vocab identifiers was null", UserExceptionType.SERVICE_VALIDATION_ERROR);
final String[] strongList = this.strongAugmentationService.augment(version, reference, getKeys(vocabIdentifiers)).getStrongList();
if (strongList.length != 0) {
final EntityDoc[] strongDefs = this.definitions.searchUniqueBySingleField("strongNumber",
strongList);
final EntityDoc[] definitions = reOrder(strongList, strongDefs);
final Map<String, List<LexiconSuggestion>> relatedWords = readRelatedWords(definitions);
return new VocabResponse(definitions, relatedWords);
}
return new VocabResponse();
}
/**
* Read related words, i.e. all the words that are in the related numbers fields.
*
* @param defs the definitions that have been looked up.
* @return the map
*/
private Map<String, List<LexiconSuggestion>> readRelatedWords(final EntityDoc[] defs) {
// this map keys the original word strong number to all the related codes
final Map<String, SortedSet<LexiconSuggestion>> relatedWords = new HashMap<String, SortedSet<LexiconSuggestion>>(
defs.length * 2);
// to avoid doing lookups twice, we key each short definition by its code as well
final Map<String, LexiconSuggestion> lookedUpWords = new HashMap<>(
defs.length * 2);
for (final EntityDoc doc : defs) {
final String sourceNumber = doc.get("strongNumber");
final String relatedWordNumbers = doc.get("relatedNumbers");
final String[] allRelatedWords = split(relatedWordNumbers, "[ ,]+");
for (final String relatedWord : allRelatedWords) {
LexiconSuggestion shortLexiconDefinition = lookedUpWords.get(relatedWord);
// look up related word from index
if (shortLexiconDefinition == null) {
final EntityDoc[] relatedDoc = this.definitions.searchUniqueBySingleField("strongNumber",
relatedWord);
// assume first doc
if (relatedDoc.length > 0) {
shortLexiconDefinition = OriginalWordUtils.convertToSuggestion(relatedDoc[0]);
lookedUpWords.put(relatedWord, shortLexiconDefinition);
}
}
// store as a link to its source number
if (shortLexiconDefinition != null) {
SortedSet<LexiconSuggestion> associatedNumbersSoFar = relatedWords.get(sourceNumber);
if (associatedNumbersSoFar == null) {
associatedNumbersSoFar = new TreeSet<>(
SortingUtils.LEXICON_SUGGESTION_COMPARATOR);
relatedWords.put(sourceNumber, associatedNumbersSoFar);
}
associatedNumbersSoFar.add(shortLexiconDefinition);
}
}
}
return convertToListMap(relatedWords);
}
/**
* Convert to list map, from a map of sets to a map of lists. This also orders the definitions.
*
* @param relatedWords the related words
* @return the map
*/
private Map<String, List<LexiconSuggestion>> convertToListMap(
final Map<String, SortedSet<LexiconSuggestion>> relatedWords) {
final Map<String, List<LexiconSuggestion>> results = new HashMap<>();
for (final Entry<String, SortedSet<LexiconSuggestion>> relatedWordSet : relatedWords.entrySet()) {
results.put(relatedWordSet.getKey(), new ArrayList<LexiconSuggestion>(relatedWordSet.getValue()));
}
return results;
}
/**
* Re-orders based on the input.
*
* @param strongList the order list of stongs
* @param strongDefs the definitions that have been found
* @return the entity doc[]
*/
private EntityDoc[] reOrder(final String[] strongList, final EntityDoc[] strongDefs) {
final Map<String, EntityDoc> entitiesByStrong = new HashMap<>(strongList.length * 2);
for (final EntityDoc def : strongDefs) {
entitiesByStrong.put(def.get("strongNumber"), def);
}
final EntityDoc[] results = new EntityDoc[strongDefs.length];
int current = 0;
for (final String strong : strongList) {
final EntityDoc entityDoc = entitiesByStrong.get(strong);
if (entityDoc != null) {
results[current++] = entityDoc;
}
}
return results;
}
@Override
public VocabResponse getQuickDefinitions(final String version, final String reference, final String vocabIdentifiers) {
notBlank(vocabIdentifiers, "Vocab identifiers was null", UserExceptionType.SERVICE_VALIDATION_ERROR);
final String[] strongList = this.strongAugmentationService.augment(version, reference, getKeys(vocabIdentifiers)).getStrongList();
if (strongList.length != 0) {
EntityDoc[] strongNumbers = this.definitions.searchUniqueBySingleField("strongNumber", strongList);
return new VocabResponse(strongNumbers);
}
return new VocabResponse();
}
@Override
public String getTransliteration(final String originalText) {
return StringConversionUtils.transliterate(originalText);
}
@Override
public String getEnglishVocab(final String version, final String reference, final String vocabIdentifiers) {
return getDataFromLexiconDefinition(version, reference, vocabIdentifiers, this.englishVocabProvider);
}
@Override
public String getGreekVocab(final String version, final String reference, final String vocabIdentifiers) {
return getDataFromLexiconDefinition(version, reference, vocabIdentifiers, this.greekVocabProvider);
}
@Override
public String getDefaultTransliteration(final String version, final String reference, final String vocabIdentifiers) {
return getDataFromLexiconDefinition(version, reference, vocabIdentifiers, this.transliterationProvider);
}
/**
* gets data from the matched lexicon definitions
*
* @param reference the reference that anchors the strong number
* @param vocabIdentifiers the identifiers
* @param provider the provider used to get data from it
* @return the data in String form
*/
private String getDataFromLexiconDefinition(final String version, final String reference, final String vocabIdentifiers,
final LexiconDataProvider provider) {
// else we lookup and concatenate
final EntityDoc[] lds = getLexiconDefinitions(vocabIdentifiers, version, reference);
if (lds.length == 0) {
return vocabIdentifiers;
}
if (lds.length == 1) {
return provider.getData(lds[0]);
}
// otherwise, we need to resort to concatenating the fields
final StringBuilder sb = new StringBuilder(lds.length * 32);
sb.append('[');
for (int ii = 0; ii < lds.length; ii++) {
final EntityDoc l = lds[ii];
sb.append(provider.getData(l));
if (ii + 1 < lds.length) {
sb.append(MULTI_WORD_SEPARATOR);
}
}
sb.append(']');
return sb.toString();
}
@Override
public EntityDoc[] getLexiconDefinitions(final String vocabIdentifiers, final String version, final String reference) {
final String[] keys = this.strongAugmentationService.augment(version, reference, getKeys(vocabIdentifiers)).getStrongList();
if (keys.length == 0) {
return new EntityDoc[0];
}
final String cacheKey = getCacheKey(version, reference, vocabIdentifiers);
final EntityDoc[] entityDocs = DEFINITIION_CACHE.get(cacheKey);
if (entityDocs != null) {
return entityDocs;
}
final EntityDoc[] strongNumbers = this.definitions.searchUniqueBySingleField("strongNumber", keys);
DEFINITIION_CACHE.put(cacheKey, strongNumbers);
return strongNumbers;
}
/**
* Simple cache key that concatenates all the variables passed in
*
* @param version the version
* @param reference the reference
* @param vocabIdentifiers the vocabulary identifiers
* @return the cache key
*/
private String getCacheKey(final String version, final String reference, final String vocabIdentifiers) {
return new StringBuilder(32).append(version).append('-').append(reference).append('-').append(vocabIdentifiers).toString();
}
/**
* Extracts a compound key into several keys
*
* @param vocabIdentifiers the vocabulary identifiers
* @return the list of all keys to lookup
*/
String[] getKeys(final String vocabIdentifiers) {
if (isBlank(vocabIdentifiers)) {
return new String[0];
}
final String[] ids = vocabIdentifiers.split(STRONG_SEPARATORS);
for (int ii = 0; ii < ids.length; ii++) {
final char firstChar = ids[ii].charAt(0);
if (firstChar == 'G' || firstChar == 'H') {
ids[ii] = padStrongNumber(ids[ii], false);
} else if ((ids[ii].startsWith(HIGHER_STRONG) || ids[ii].startsWith(LOWER_STRONG))
&& ids[ii].length() > START_STRONG_KEY) {
ids[ii] = padStrongNumber(ids[ii].substring(START_STRONG_KEY), false);
}
}
return ids;
}
}