/******************************************************************************* * Copyright (c) 2012, Directors of the Tyndale STEP Project * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * Neither the name of the Tyndale House, Cambridge (www.TyndaleHouse.com) * nor the names of its contributors may be used to endorse or promote * products derived from this software without specific prior written * permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************/ package com.tyndalehouse.step.core.service.jsword.impl; import java.awt.image.LookupOp; import java.util.*; import java.util.Map.Entry; import java.util.regex.Pattern; import javax.inject.Inject; import javax.inject.Singleton; import com.tyndalehouse.step.core.exceptions.LuceneSearchException; import com.tyndalehouse.step.core.models.InterlinearMode; import com.tyndalehouse.step.core.service.jsword.JSwordMetadataService; import org.apache.lucene.search.IndexSearcher; import org.crosswire.jsword.book.Book; import org.crosswire.jsword.book.BookException; import org.crosswire.jsword.index.Index; import org.crosswire.jsword.index.IndexManager; import org.crosswire.jsword.index.IndexManagerFactory; import org.crosswire.jsword.index.IndexStatus; import org.crosswire.jsword.index.lucene.LuceneIndex; import org.crosswire.jsword.index.search.DefaultSearchModifier; import org.crosswire.jsword.index.search.DefaultSearchRequest; import org.crosswire.jsword.passage.*; import org.crosswire.jsword.passage.PassageTally.Order; import org.crosswire.jsword.versification.Versification; import org.crosswire.jsword.versification.VersificationsMapper; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.tyndalehouse.step.core.exceptions.StepInternalException; import com.tyndalehouse.step.core.models.LookupOption; import com.tyndalehouse.step.core.models.OsisWrapper; import com.tyndalehouse.step.core.models.search.SearchEntry; import com.tyndalehouse.step.core.models.search.SearchResult; import com.tyndalehouse.step.core.models.search.VerseSearchEntry; import com.tyndalehouse.step.core.service.impl.IndividualSearch; import com.tyndalehouse.step.core.service.impl.SearchQuery; import com.tyndalehouse.step.core.service.jsword.JSwordPassageService; import com.tyndalehouse.step.core.service.jsword.JSwordSearchService; import com.tyndalehouse.step.core.service.jsword.JSwordVersificationService; /** * API to search across the data * * @author chrisburrell */ @Singleton public class JSwordSearchServiceImpl implements JSwordSearchService { private static final Logger LOGGER = LoggerFactory.getLogger(JSwordSearchServiceImpl.class); private static final int MAX_RESULTS = 50000; private static final Pattern GEN_REV_RANGE = Pattern.compile("(\\+)\\[Gen-Rev\\]"); private final JSwordVersificationService av11nService; private final JSwordMetadataService metadataService; private final JSwordPassageService jsword; /** * @param av11nService the versification service * @param jsword the jsword lookup service to retrieve the references */ @Inject public JSwordSearchServiceImpl(final JSwordVersificationService av11nService, final JSwordMetadataService metadataService, final JSwordPassageService jsword) { this.av11nService = av11nService; this.metadataService = metadataService; this.jsword = jsword; } @Override public int estimateSearchResults(final SearchQuery sq) { final long start = System.currentTimeMillis(); final Key k = searchKeys(sq); LOGGER.trace("Took [{}]ms", System.currentTimeMillis() - start); return k.getCardinality(); } @Override public Key searchKeys(final SearchQuery sq) { final DefaultSearchModifier modifier = new DefaultSearchModifier(); // we have a linked hashmap, because we want to preserve the order of the versions we're looking up // this was we end up with the results in the correct versification final Map<String, Key> resultsPerVersion = new LinkedHashMap<String, Key>(); modifier.setRanked(sq.isRanked()); // need to set to something sensible, other we may experience a // "Requested array size exceeds VM limit" modifier.setMaxResults(MAX_RESULTS); final IndividualSearch currentSearch = sq.getCurrentSearch(); final boolean searchOnTaggedText = currentSearch.getType().isOriginalSearch(); boolean searchExecuted = false; for (final String version : currentSearch.getVersions()) { // now for each version, we do the search and store it in a map final Book bible = this.av11nService.getBookFromVersion(version); //if we're looking at only running searches on tagged texts, then we skip those //that don't have strong numbers (admitedly it could be faster to do it and have nothing //TODO: improvement investigate which is faster if(searchOnTaggedText) { //then we only do the search if the bible is tagged if(!this.metadataService.supportsStrongs(bible)) { continue; } } doSearch(modifier, resultsPerVersion, currentSearch, bible); searchExecuted = true; } if(searchOnTaggedText && !searchExecuted) { Book bible = this.av11nService.getBookFromVersion(JSwordPassageService.REFERENCE_BOOK); doSearch(modifier, resultsPerVersion, currentSearch, bible); } // we then need to merge the keys together // otherwise, we are into the realm of searching across multiple versions // no need to rank, since it won't be possible to rank accurately across versions return mergeSearches(resultsPerVersion); } private void doSearch(final DefaultSearchModifier modifier, final Map<String, Key> resultsPerVersion, final IndividualSearch currentSearch, final Book bible) { String version = bible.getInitials(); if (bible.getIndexStatus().equals(IndexStatus.DONE)) { final Key luceneSearchResults; try { String query = currentSearch.getQuery(); //small optimization and cater for versions that don't support Gen-Rev as a range: query = GEN_REV_RANGE.matcher(query).replaceAll(""); luceneSearchResults = bible.find(new DefaultSearchRequest(query, modifier)); } catch (final BookException e) { throw new LuceneSearchException("Unable to search for " + currentSearch.getQuery() + " with Bible " + version, e); } resultsPerVersion.put(version, luceneSearchResults); } else { LOGGER.error("Module [{}] is not indexed.", version); resultsPerVersion.put(version, PassageKeyFactory.instance().createEmptyKeyList(av11nService.getVersificationForVersion(bible))); } } /** * merges all search results together * * @param resultsPerVersion the results per version * @return the list of results */ private Key mergeSearches(final Map<String, Key> resultsPerVersion) { Key all = null; Versification allVersification = null; for (final Entry<String, Key> entry : resultsPerVersion.entrySet()) { final Key value = entry.getValue(); LOGGER.debug("Sub-result-set [{}] has [{}] entries", entry.getKey(), value.getCardinality()); if (all == null) { all = value; if(all instanceof VerseKey) { allVersification = ((VerseKey) all).getVersification(); } } else { boolean valueIsVerseKey = value instanceof VerseKey; if(valueIsVerseKey && allVersification == null) { throw new StepInternalException("Trying to combine versified key with non-versified key."); } //i.e. and allVersification != null Key convertedKey = value; if(valueIsVerseKey) { final VerseKey versifiedResults = (VerseKey) value; final Passage versifiedPassageResults = KeyUtil.getPassage(versifiedResults); convertedKey = VersificationsMapper.instance().map(versifiedPassageResults, allVersification); } all.addAll(convertedKey); } LOGGER.debug("Combined result-set has [{}] entries", all.getCardinality()); } return all; } @Override public SearchResult search(final SearchQuery sq, final String version, final LookupOption... options) { return retrieveResultsFromKeys(sq, searchKeys(sq), version, options); } @Override public SearchResult retrieveResultsFromKeys(final SearchQuery sq, final Key results, final String version, final LookupOption... options) { final int total = getTotal(results); LOGGER.debug("Total of [{}] results.", total); final Key newResults = rankAndTrimResults(sq, results); if(LOGGER.isDebugEnabled()) { LOGGER.debug("Trimmed down to [{}].", newResults.getCardinality()); } return getResultsFromTrimmedKeys(sq, new String[]{version}, total, newResults, options); } /** * Assumes the page size logic has already been run, retrieves results from the actual book in quest * * @param sq the search criteria * @param versions the versions * @param total the total number of items * @param newResults the page of keys to retrieve * @param options the options to retrieve the passage with * @return the search result passages */ @Override public SearchResult getResultsFromTrimmedKeys(final SearchQuery sq, final String[] versions, final int total, final Key newResults, final LookupOption... options) { final long startRefRetrieval = System.currentTimeMillis(); // if context > 0, then we need to add verse numbers: final List<LookupOption> lookupOptions = new ArrayList<LookupOption>(); Collections.addAll(lookupOptions, options); lookupOptions.add(LookupOption.CHAPTER_BOOK_VERSE_NUMBER); lookupOptions.add(LookupOption.HEBREW_VOWELS); lookupOptions.add(LookupOption.GREEK_ACCENTS); lookupOptions.add(LookupOption.HEBREW_ACCENTS); final SearchResult r = new SearchResult(); getPassagesForResults(r, versions, newResults, sq.getContext(), lookupOptions, sq.getInterlinearMode()); return getSearchResult(r, total, System.currentTimeMillis() - startRefRetrieval); } /** * returns the total or -1 if not available * * @param results the key to set of results * @return the results */ @Override public int getTotal(final Key results) { return results.getCardinality(); } /** * Constructs the search result object * * @param total the total number of hits * @param retrievalTime the time taken to retrieve the references attached to the search results * @return the search result to be returned to the service caller */ private SearchResult getSearchResult(SearchResult r, final int total, final long retrievalTime) { // set stats: r.setTimeTookToRetrieveScripture(retrievalTime); r.setTotal(total); return r; } /** * Looks up all passages represented by the key * * * @param result the results that we will be returning * @param versions the bibles under examination * @param results the list of results * @param context amount of context to add * @param options to use to lookup the right parameterization of the text * @return the list of entries found */ private void getPassagesForResults(SearchResult result, String[] versions, final Key results, final int context, final List<LookupOption> options, String interlinearMode) { final List<SearchEntry> resultPassages = new ArrayList<SearchEntry>(); final Iterator<Key> iterator = ((Passage) results).iterator(); boolean interlinearModeCaptured = false; int count = 0; while (iterator.hasNext()) { final Key verse = iterator.next(); final Key lookupKey; if (verse instanceof Verse) { // then we need to make it into a verse range final Verse verseAsVerse = (Verse) verse; final VerseRange vr = new VerseRange(verseAsVerse.getVersification(), verseAsVerse); vr.blur(context, RestrictionType.NONE); lookupKey = vr; } else { // assume blur is supported verse.blur(context, RestrictionType.NONE); lookupKey = verse; } if(count == 1) { options.add(LookupOption.HIDE_COMPARE_HEADERS); } // TODO this is not very efficient so requires refactoring final OsisWrapper peakOsisText = this.jsword.peakOsisText(versions, lookupKey, options, interlinearMode); resultPassages.add(new VerseSearchEntry(peakOsisText.getReference(), peakOsisText.getValue(), peakOsisText.getOsisId())); if(!interlinearModeCaptured) { result.setInterlinearMode(peakOsisText.getInterlinearMode()); interlinearModeCaptured = true; } count++; } result.setResults(resultPassages); } /** * @param sq search query * @param results the result to be trimmed * @return the results */ @Override public Key rankAndTrimResults(final SearchQuery sq, final Key results) { rankResults(sq.isRanked(), results); final Passage passage = (Passage) results; if (!sq.isAllKeys()) { // we need the first pageNumber*PAGE_SIZE results, so remove anything beyond that. return getPage(sq.getPageNumber(), sq.getPageSize(), passage); } return results; } /** * @param pageNumber the page number, 1 indexed * @param pageSize the size of the page * @param passage the passage we want to trim down * @return the new results */ private Passage getPage(final int pageNumber, final int pageSize, final Passage passage) { Passage results = (Passage) PassageKeyFactory.instance().createEmptyKeyList(passage.getVersification()); Iterator<Key> verses = passage.iterator(); //page 1 = 1-60 //page 2 = 61-120 int from = (pageNumber-1) * pageSize + 1; int to = from + pageSize - 1; int currentCount = 1; while(verses.hasNext()) { final Key verse = verses.next(); if(currentCount >= from && currentCount <= to) { results.addAll(verse); } currentCount++; } return results; } /** * Retrieves the index from JSword * @param bookName the book name * @return the index searcher responsible for carrying out operations on JSword data. */ public IndexSearcher getIndexSearcher(String bookName) { final IndexManager indexManager = IndexManagerFactory.getIndexManager(); Index index; try { index = indexManager.getIndex(this.av11nService.getBookFromVersion(bookName)); } catch (BookException e) { throw new StepInternalException(e.getMessage(), e); } if (!(index instanceof LuceneIndex)) { LOGGER.warn("Unsupported Lucene Index type [{}]", index.getClass()); throw new StepInternalException("Unable to obtain index"); } @SuppressWarnings("resource") final LuceneIndex li = (LuceneIndex) index; return (IndexSearcher) li.getSearcher(); } /** * Sets up the passage tally to rank the results * * @param ranked true to indicate ranking occurs * @param results the results, amended to reflect what is desired */ private void rankResults(final boolean ranked, final Key results) { if (ranked) { if (!(results instanceof PassageTally)) { throw new StepInternalException("Unable to retrieve in ranked order..."); } ((PassageTally) results).setOrdering(Order.TALLY); } } }