package ca.uhn.fhir.jpa.dao; /* * #%L * HAPI FHIR JPA Server * %% * Copyright (C) 2014 - 2017 University Health Network * %% * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * #L% */ import static org.apache.commons.lang3.StringUtils.isNotBlank; import java.util.ArrayList; import java.util.Collections; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Set; import javax.persistence.EntityManager; import javax.persistence.PersistenceContext; import javax.persistence.PersistenceContextType; import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.Validate; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.search.Query; import org.apache.lucene.search.highlight.Formatter; import org.apache.lucene.search.highlight.Highlighter; import org.apache.lucene.search.highlight.QueryScorer; import org.apache.lucene.search.highlight.Scorer; import org.apache.lucene.search.highlight.TokenGroup; import org.hibernate.search.jpa.FullTextEntityManager; import org.hibernate.search.jpa.FullTextQuery; import org.hibernate.search.query.dsl.BooleanJunction; import org.hibernate.search.query.dsl.QueryBuilder; import org.hl7.fhir.instance.model.api.IBaseResource; import org.springframework.transaction.annotation.Transactional; import com.google.common.collect.Lists; import com.google.common.collect.Sets; import ca.uhn.fhir.jpa.entity.ResourceTable; import ca.uhn.fhir.model.api.IQueryParameterType; import ca.uhn.fhir.model.dstu.resource.BaseResource; import ca.uhn.fhir.rest.param.StringParam; import ca.uhn.fhir.rest.server.Constants; import ca.uhn.fhir.rest.server.exceptions.InternalErrorException; import ca.uhn.fhir.rest.server.exceptions.InvalidRequestException; public class FulltextSearchSvcImpl extends BaseHapiFhirDao<IBaseResource> implements IFulltextSearchSvc { private static final org.slf4j.Logger ourLog = org.slf4j.LoggerFactory.getLogger(FulltextSearchSvcImpl.class); @PersistenceContext(type = PersistenceContextType.TRANSACTION) private EntityManager myEntityManager; /** * Constructor */ public FulltextSearchSvcImpl() { super(); } private void addTextSearch(QueryBuilder theQueryBuilder, BooleanJunction<?> theBoolean, List<List<? extends IQueryParameterType>> theTerms, String theFieldName, String theFieldNameEdgeNGram, String theFieldNameNGram) { if (theTerms == null) { return; } for (List<? extends IQueryParameterType> nextAnd : theTerms) { Set<String> terms = new HashSet<String>(); for (IQueryParameterType nextOr : nextAnd) { StringParam nextOrString = (StringParam) nextOr; String nextValueTrimmed = StringUtils.defaultString(nextOrString.getValue()).trim(); if (isNotBlank(nextValueTrimmed)) { terms.add(nextValueTrimmed); } } if (terms.isEmpty() == false) { if (terms.size() == 1) { //@formatter:off Query textQuery = theQueryBuilder .phrase() .withSlop(2) .onField(theFieldName).boostedTo(4.0f) // .andField(theFieldNameEdgeNGram).boostedTo(2.0f) // .andField(theFieldNameNGram).boostedTo(1.0f) .sentence(terms.iterator().next().toLowerCase()).createQuery(); //@formatter:on theBoolean.must(textQuery); } else { String joinedTerms = StringUtils.join(terms, ' '); theBoolean.must(theQueryBuilder.keyword().onField(theFieldName).matching(joinedTerms).createQuery()); } } } } private List<Long> doSearch(String theResourceName, SearchParameterMap theParams, Long theReferencingPid) { FullTextEntityManager em = org.hibernate.search.jpa.Search.getFullTextEntityManager(myEntityManager); List<Long> pids = null; /* * Handle textual params */ /* for (String nextParamName : theParams.keySet()) { for (List<? extends IQueryParameterType> nextAndList : theParams.get(nextParamName)) { for (Iterator<? extends IQueryParameterType> orIterator = nextAndList.iterator(); orIterator.hasNext();) { IQueryParameterType nextParam = orIterator.next(); if (nextParam instanceof TokenParam) { TokenParam nextTokenParam = (TokenParam) nextParam; if (nextTokenParam.isText()) { orIterator.remove(); QueryBuilder qb = em.getSearchFactory().buildQueryBuilder().forEntity(ResourceIndexedSearchParamString.class).get(); BooleanJunction<?> bool = qb.bool(); bool.must(qb.keyword().onField("myParamName").matching(nextParamName).createQuery()); if (isNotBlank(theResourceName)) { bool.must(qb.keyword().onField("myResourceType").matching(theResourceName).createQuery()); } // //@formatter:off String value = nextTokenParam.getValue().toLowerCase(); bool.must(qb.keyword().onField("myValueTextEdgeNGram").matching(value).createQuery()); //@formatter:on FullTextQuery ftq = em.createFullTextQuery(bool.createQuery(), ResourceIndexedSearchParamString.class); List<?> resultList = ftq.getResultList(); pids = new ArrayList<Long>(); for (Object next : resultList) { ResourceIndexedSearchParamString nextAsArray = (ResourceIndexedSearchParamString) next; pids.add(nextAsArray.getResourcePid()); } } } } } } if (pids != null && pids.isEmpty()) { return pids; } */ QueryBuilder qb = em.getSearchFactory().buildQueryBuilder().forEntity(ResourceTable.class).get(); BooleanJunction<?> bool = qb.bool(); /* * Handle _content parameter (resource body content) */ List<List<? extends IQueryParameterType>> contentAndTerms = theParams.remove(Constants.PARAM_CONTENT); addTextSearch(qb, bool, contentAndTerms, "myContentText", "myContentTextEdgeNGram", "myContentTextNGram"); /* * Handle _text parameter (resource narrative content) */ List<List<? extends IQueryParameterType>> textAndTerms = theParams.remove(Constants.PARAM_TEXT); addTextSearch(qb, bool, textAndTerms, "myNarrativeText", "myNarrativeTextEdgeNGram", "myNarrativeTextNGram"); if (theReferencingPid != null) { bool.must(qb.keyword().onField("myResourceLinks.myTargetResourcePid").matching(theReferencingPid).createQuery()); } if (bool.isEmpty()) { return pids; } if (isNotBlank(theResourceName)) { bool.must(qb.keyword().onField("myResourceType").matching(theResourceName).createQuery()); } Query luceneQuery = bool.createQuery(); // wrap Lucene query in a javax.persistence.Query FullTextQuery jpaQuery = em.createFullTextQuery(luceneQuery, ResourceTable.class); jpaQuery.setProjection("myId"); // execute search List<?> result = jpaQuery.getResultList(); HashSet<Long> pidsSet = pids != null ? new HashSet<Long>(pids) : null; ArrayList<Long> retVal = new ArrayList<Long>(); for (Object object : result) { Object[] nextArray = (Object[]) object; Long next = (Long) nextArray[0]; if (next != null && (pidsSet == null || pidsSet.contains(next))) { retVal.add(next); } } return retVal; } @Override public List<Long> everything(String theResourceName, SearchParameterMap theParams) { Long pid = null; if (theParams.get(BaseResource.SP_RES_ID) != null) { StringParam idParm = (StringParam) theParams.get(BaseResource.SP_RES_ID).get(0).get(0); pid = BaseHapiFhirDao.translateForcedIdToPid(theResourceName, idParm.getValue(), myForcedIdDao); } Long referencingPid = pid; List<Long> retVal = doSearch(null, theParams, referencingPid); if (referencingPid != null) { retVal.add(referencingPid); } return retVal; } @Transactional() @Override public List<Long> search(String theResourceName, SearchParameterMap theParams) { return doSearch(theResourceName, theParams, null); } @Override public List<Suggestion> suggestKeywords(String theContext, String theSearchParam, String theText) { Validate.notBlank(theContext, "theContext must be provided"); Validate.notBlank(theSearchParam, "theSearchParam must be provided"); Validate.notBlank(theText, "theSearchParam must be provided"); long start = System.currentTimeMillis(); String[] contextParts = StringUtils.split(theContext, '/'); if (contextParts.length != 3 || "Patient".equals(contextParts[0]) == false || "$everything".equals(contextParts[2]) == false) { throw new InvalidRequestException("Invalid context: " + theContext); } Long pid = BaseHapiFhirDao.translateForcedIdToPid(contextParts[0], contextParts[1], myForcedIdDao); FullTextEntityManager em = org.hibernate.search.jpa.Search.getFullTextEntityManager(myEntityManager); QueryBuilder qb = em.getSearchFactory().buildQueryBuilder().forEntity(ResourceTable.class).get(); //@formatter:off Query textQuery = qb .phrase() .withSlop(2) .onField("myContentText").boostedTo(4.0f) .andField("myContentTextEdgeNGram").boostedTo(2.0f) .andField("myContentTextNGram").boostedTo(1.0f) .andField("myContentTextPhonetic").boostedTo(0.5f) .sentence(theText.toLowerCase()).createQuery(); Query query = qb.bool() .must(qb.keyword().onField("myResourceLinks.myTargetResourcePid").matching(pid).createQuery()) .must(textQuery) .createQuery(); //@formatter:on FullTextQuery ftq = em.createFullTextQuery(query, ResourceTable.class); ftq.setProjection("myContentText"); ftq.setMaxResults(20); List<?> resultList = ftq.getResultList(); List<Suggestion> suggestions = Lists.newArrayList(); for (Object next : resultList) { Object[] nextAsArray = (Object[]) next; String nextValue = (String) nextAsArray[0]; try { MySuggestionFormatter formatter = new MySuggestionFormatter(theText, suggestions); Scorer scorer = new QueryScorer(textQuery); Highlighter highlighter = new Highlighter(formatter, scorer); Analyzer analyzer = em.getSearchFactory().getAnalyzer(ResourceTable.class); formatter.setAnalyzer("myContentTextPhonetic"); highlighter.getBestFragments(analyzer.tokenStream("myContentTextPhonetic", nextValue), nextValue, 10); formatter.setAnalyzer("myContentTextNGram"); highlighter.getBestFragments(analyzer.tokenStream("myContentTextNGram", nextValue), nextValue, 10); formatter.setFindPhrasesWith(); formatter.setAnalyzer("myContentTextEdgeNGram"); highlighter.getBestFragments(analyzer.tokenStream("myContentTextEdgeNGram", nextValue), nextValue, 10); // formatter.setAnalyzer("myContentText"); // highlighter.getBestFragments(analyzer.tokenStream("myContentText", nextValue), nextValue, 10); // formatter.setAnalyzer("myContentTextNGram"); // highlighter.getBestFragments(analyzer.tokenStream("myContentTextNGram", nextValue), nextValue, 10); // formatter.setAnalyzer("myContentTextEdgeNGram"); // highlighter.getBestFragments(analyzer.tokenStream("myContentTextEdgeNGram", nextValue), nextValue, 10); // formatter.setAnalyzer("myContentTextPhonetic"); // highlighter.getBestFragments(analyzer.tokenStream("myContentTextPhonetic", nextValue), nextValue, 10); } catch (Exception e) { throw new InternalErrorException(e); } } Collections.sort(suggestions); Set<String> terms = Sets.newHashSet(); for (Iterator<Suggestion> iter = suggestions.iterator(); iter.hasNext();) { String nextTerm = iter.next().getTerm().toLowerCase(); if (!terms.add(nextTerm)) { iter.remove(); } } long delay = System.currentTimeMillis() - start; ourLog.info("Provided {} suggestions for term {} in {} ms", new Object[] { terms.size(), theText, delay }); return suggestions; } public static class Suggestion implements Comparable<Suggestion> { public Suggestion(String theTerm, float theScore) { myTerm = theTerm; myScore = theScore; } public String getTerm() { return myTerm; } public float getScore() { return myScore; } private String myTerm; private float myScore; @Override public int compareTo(Suggestion theO) { return Float.compare(theO.myScore, myScore); } @Override public String toString() { return "Suggestion[myTerm=" + myTerm + ", myScore=" + myScore + "]"; } } public class MySuggestionFormatter implements Formatter { private List<Suggestion> mySuggestions; private String myAnalyzer; private ArrayList<String> myPartialMatchPhrases; private ArrayList<Float> myPartialMatchScores; private String myOriginalSearch; public MySuggestionFormatter(String theOriginalSearch, List<Suggestion> theSuggestions) { myOriginalSearch = theOriginalSearch; mySuggestions = theSuggestions; } public void setFindPhrasesWith() { myPartialMatchPhrases = new ArrayList<String>(); myPartialMatchScores = new ArrayList<Float>(); for (Suggestion next : mySuggestions) { myPartialMatchPhrases.add(' ' + next.myTerm); myPartialMatchScores.add(next.myScore); } myPartialMatchPhrases.add(myOriginalSearch); myPartialMatchScores.add(1.0f); } public void setAnalyzer(String theString) { myAnalyzer = theString; } @Override public String highlightTerm(String theOriginalText, TokenGroup theTokenGroup) { ourLog.debug("{} Found {} with score {}", new Object[] { myAnalyzer, theOriginalText, theTokenGroup.getTotalScore() }); if (theTokenGroup.getTotalScore() > 0) { float score = theTokenGroup.getTotalScore(); if (theOriginalText.equalsIgnoreCase(myOriginalSearch)) { score = score + 1.0f; } mySuggestions.add(new Suggestion(theOriginalText, score)); } else if (myPartialMatchPhrases != null) { if (theOriginalText.length() < 100) { for (int i = 0; i < myPartialMatchPhrases.size(); i++) { if (theOriginalText.contains(myPartialMatchPhrases.get(i))) { mySuggestions.add(new Suggestion(theOriginalText, myPartialMatchScores.get(i) - 0.5f)); } } } } return null; } } @Override public boolean isDisabled() { try { FullTextEntityManager em = org.hibernate.search.jpa.Search.getFullTextEntityManager(myEntityManager); em.getSearchFactory().buildQueryBuilder().forEntity(ResourceTable.class).get(); } catch (Exception e) { ourLog.trace("FullText test failed", e); ourLog.debug("Hibernate Search (Lucene) appears to be disabled on this server, fulltext will be disabled"); return true; } return false; } }