/*******************************************************************************
* Copyright (c) 2012, Directors of the Tyndale STEP Project
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* Neither the name of the Tyndale House, Cambridge (www.TyndaleHouse.com)
* nor the names of its contributors may be used to endorse or promote
* products derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
* IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
******************************************************************************/
package com.tyndalehouse.step.core.service.jsword.impl;
import static com.tyndalehouse.step.core.utils.StringUtils.split;
import javax.inject.Inject;
import javax.inject.Named;
import com.tyndalehouse.step.core.models.stats.ScopeType;
import com.tyndalehouse.step.core.service.StrongAugmentationService;
import com.tyndalehouse.step.core.service.jsword.JSwordPassageService;
import com.tyndalehouse.step.core.service.jsword.JSwordVersificationService;
import com.tyndalehouse.step.core.utils.StringConversionUtils;
import com.tyndalehouse.step.core.utils.StringUtils;
import org.crosswire.jsword.book.Book;
import org.crosswire.jsword.book.BookData;
import org.crosswire.jsword.book.BookException;
import org.crosswire.jsword.book.OSISUtil;
import org.crosswire.jsword.passage.*;
import com.tyndalehouse.step.core.exceptions.StepInternalException;
import com.tyndalehouse.step.core.models.stats.PassageStat;
import com.tyndalehouse.step.core.service.jsword.JSwordAnalysisService;
import org.crosswire.jsword.versification.Versification;
import java.util.*;
/**
* The Class JSwordAnalysisServiceImpl.
*
* @author chrisburrell
*/
public class JSwordAnalysisServiceImpl implements JSwordAnalysisService {
static final String WORD_SPLIT = "[,./<>?!;:'\\[\\]\\{\\}!\"\\-\u2013 ()]+";
private static final String LANGUAGE_STOP_LIST = "analysis.stopWords.%s";
private final JSwordVersificationService versification;
private final Map<String, Set<String>> stopWords = new HashMap<String, Set<String>>(32);
private final Set<String> stopStrongs;
private final Versification strongsV11n;
private final Book strongsBook;
private final Properties stopWordsProperties;
private StrongAugmentationService strongAugmentationService;
/**
* Instantiates a new jsword analysis service impl.
*
* @param versification the versification
*/
@Inject
public JSwordAnalysisServiceImpl(final JSwordVersificationService versification,
@Named("StepCoreProperties") final Properties stopWordsProperties,
@Named("analysis.stopStrongs") final String configuredStopStrongs,
final StrongAugmentationService strongAugmentationService) {
this.versification = versification;
this.stopWordsProperties = stopWordsProperties;
this.strongAugmentationService = strongAugmentationService;
stopStrongs = StringUtils.createSet(configuredStopStrongs);
strongsBook = this.versification.getBookFromVersion(JSwordPassageService.REFERENCE_BOOK);
strongsV11n = this.versification.getVersificationForVersion(strongsBook);
}
@Override
public PassageStat getWordStats(final Key reference, final ScopeType scopeType) {
try {
//change the reference to match what we need
final BookData expandedBook = getExpandedBookData(reference, scopeType, strongsV11n, strongsBook);
return getStatsFromStrongArray(expandedBook.getFirstBook().getInitials(), expandedBook.getKey(), split(OSISUtil.getStrongsNumbers(expandedBook.getOsisFragment())));
} catch (final BookException e) {
throw new StepInternalException("Unable to read passage text", e);
}
}
@Override
public PassageStat getTextStats(final String version, final Key reference, final ScopeType scopeType) {
try {
final Book book = this.versification.getBookFromVersion(version);
final Versification av11n = this.versification.getVersificationForVersion(book);
final BookData bookData = getExpandedBookData(reference, scopeType, av11n, book);
final String canonicalText = OSISUtil.getCanonicalText(bookData.getOsisFragment());
final String[] words = split(canonicalText, WORD_SPLIT);
Set<String> languageStopWords = getLanguageStopList(book);
final PassageStat stat = new PassageStat();
for (final String word : words) {
//only add word if not in STOP list
if (!languageStopWords.contains(StringConversionUtils.unAccent(word.toUpperCase(), true))) {
stat.addWord(word);
}
}
return stat;
} catch (final BookException e) {
throw new StepInternalException("Unable to read passage text", e);
}
}
/**
* Lazily obtains the stop list for the specific language of a book
*
* @param book the book that the viewer is looking at
* @return the set of words that form part of the stop list
*/
private Set<String> getLanguageStopList(final Book book) {
String code = book.getLanguage().getCode();
Set<String> languageStopList = this.stopWords.get(code);
if (languageStopList == null) {
//only one language gets loaded at any one time
synchronized (this) {
languageStopList = this.stopWords.get(code);
if (languageStopList == null) {
languageStopList = StringUtils.createSet(this.stopWordsProperties.getProperty(String.format(LANGUAGE_STOP_LIST, code.toLowerCase())), true);
this.stopWords.put(code, languageStopList);
}
}
}
return languageStopList;
}
/**
* Expands the key to the correct part, depending on whether we want the single chapter,
* the surrounding chapters, or the whole book.
*
* @param scopeType the scope type
* @param v11n the v11n for the book we are looking up
* @param bookFromVersion the book containing the text/key
* @return the correct key.
*/
BookData getExpandedBookData(final Key key, final ScopeType scopeType, final Versification v11n, final Book bookFromVersion) {
if (scopeType == ScopeType.PASSAGE) {
return new BookData(bookFromVersion, key);
}
//validate the key is a verse key
if (!(key instanceof VerseKey)) {
throw new StepInternalException("Unable to identify verses in this passage");
}
//if we have no data, then no point in continuing
if (!key.iterator().hasNext()) {
//there is no data
return new BookData(bookFromVersion, new RangedPassage(v11n));
}
Verse firstVerse = KeyUtil.getVerse(key);
Verse lastVerse;
if (key instanceof AbstractPassage) {
final AbstractPassage abstractPassage = (AbstractPassage) key;
lastVerse = (Verse) abstractPassage.get(abstractPassage.getCardinality() - 1);
} else if (key instanceof VerseRange) {
lastVerse = ((VerseRange) key).getEnd();
} else {
lastVerse = firstVerse;
}
Verse start = null;
Verse end = null;
switch (scopeType) {
case CHAPTER:
start = new Verse(v11n, firstVerse.getBook(), firstVerse.getChapter(), 0);
end = new Verse(v11n, lastVerse.getBook(), lastVerse.getChapter(), v11n.getLastVerse(lastVerse.getBook(), lastVerse.getChapter()));
break;
case NEAR_BY_CHAPTER:
final int lastChapInBook = v11n.getLastChapter(firstVerse.getBook());
int previousChapter = firstVerse.getChapter() - 1;
if (previousChapter < 1) {
previousChapter = 1;
}
int nextChapter = lastVerse.getChapter() + 1;
int lastChapter = lastChapInBook;
if (nextChapter > lastChapInBook) {
nextChapter = lastChapter;
}
//book (n-1):0
start = new Verse(v11n, firstVerse.getBook(), previousChapter, 0);
//book (n+1):last
end = new Verse(v11n, lastVerse.getBook(), nextChapter, v11n.getLastVerse(lastVerse.getBook(), nextChapter));
break;
case BOOK:
final int lastChapterInBook = v11n.getLastChapter(firstVerse.getBook());
//book 1:0
start = new Verse(v11n, firstVerse.getBook(), 1, 0);
//book end:verse
end = new Verse(v11n, lastVerse.getBook(), lastChapterInBook, v11n.getLastVerse(lastVerse.getBook(), lastChapterInBook));
break;
default:
throw new StepInternalException("Unable to recognise passed-in scope type.");
}
return new BookData(bookFromVersion, new VerseRange(v11n, start, end));
}
/**
* Gets the stats from word array, counting words one by one and using the {@link PassageStat} to do the
* incrementing word by word
*
* @param words the words
* @return the stats from word array
*/
private PassageStat getStatsFromStrongArray(final String version, final Key reference, final String[] words) {
final PassageStat stat = new PassageStat();
//slight annoyance that we are deserializing the key to re-serialise later
final String ref = reference.getOsisRef();
for (final String unaugmentedWord : words) {
StrongAugmentationService.AugmentedStrongs strongs = this.strongAugmentationService.augment(version, ref, unaugmentedWord);
for(String word : strongs.getStrongList()) {
final String paddedStrongNumber = StringConversionUtils.getStrongPaddedKey(word);
if (!this.stopStrongs.contains(paddedStrongNumber.toUpperCase())) {
stat.addWord(paddedStrongNumber);
}
}
}
return stat;
}
/**
* @return access tot he default v11n for analysis
*/
Versification getStrongsV11n() {
return this.strongsV11n;
}
/**
* @return the default versification
*/
Book getStrongsBook() {
return this.strongsBook;
}
}