package com.tyndalehouse.step.core.utils; import com.tyndalehouse.step.core.data.common.TermsAndMaxCount; import com.tyndalehouse.step.core.exceptions.StepInternalException; import org.apache.lucene.index.Term; import org.apache.lucene.index.TermEnum; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.PrefixTermEnum; import org.apache.lucene.search.SingleTermEnum; import java.io.IOException; import java.util.HashSet; import java.util.Set; /** * Utilities to help with index reading * * @author chrisburrell */ public final class LuceneUtils { private static final int MAX_TRACK = 55; /** * no op */ private LuceneUtils() { // no op } public static String safeEscape(final String userTerm) { if (userTerm == null) { return null; } final String term = QueryParser.escape(userTerm); if (term.indexOf(' ') != -1) { return "\"" + term.replace("\"", "\\\"") + "\""; } return term; } /** * Returns all terms starting with a particular prefix * * @param exact indicates we want 'exact' matches only * @param fieldName the name of the fields * @param searchTerm the search term * @return the list of terms matching searchTerm as a prefix */ public static TermsAndMaxCount getAllTermsPrefixedWith(final boolean exact, final boolean trackMax, IndexSearcher searcher, final String fieldName, final String searchTerm, final int max) { final String lastTerm = getLastTerm(searchTerm); if (StringUtils.isBlank(lastTerm)) { return getBlankTermsAndMaxCount(); } TermEnum termEnum = null; try { final Term term = new Term(fieldName, QueryParser.escape(lastTerm.toLowerCase().trim())); termEnum = exact ? new SingleTermEnum(searcher.getIndexReader(), term) : new PrefixTermEnum(searcher.getIndexReader(), term); int count = 0; if (termEnum.term() == null) { return getBlankTermsAndMaxCount(); } final Set<String> terms = new HashSet<String>(); do { if (count < max) { //when inexact, don't include exact terms final String termValue = termEnum.term().text(); if (!exact && termValue.equalsIgnoreCase(searchTerm)) { // we didn't really find a term after all, since it's the exact same term count--; } else { terms.add(termValue); } } count++; //we continue round the loop until we've got enough, or in case we're wanting to keep track of the total number } while (termEnum.next() && ((count < max) || trackMax || count < MAX_TRACK)); //finalise and return TermsAndMaxCount termsAndMaxCount = new TermsAndMaxCount(); termsAndMaxCount.setTotalCount(count); termsAndMaxCount.setTerms(terms); return termsAndMaxCount; } catch (IOException ex) { throw new StepInternalException(ex.getMessage(), ex); } finally { IOUtils.closeQuietly(termEnum); } } private static TermsAndMaxCount getBlankTermsAndMaxCount() { TermsAndMaxCount termsAndMaxCount = new TermsAndMaxCount(); termsAndMaxCount.setTerms(new HashSet<String>()); return termsAndMaxCount; } /** * Obtains the last word in the list * * @param fullTerm the full term as entered by the user * @return the last term in the input string */ private static String getLastTerm(String fullTerm) { final String trimmedUserEntry = fullTerm.toLowerCase(); int lastWordStart = trimmedUserEntry.lastIndexOf(' '); return lastWordStart != -1 ? trimmedUserEntry.substring(lastWordStart + 1) : trimmedUserEntry; } }