/** * Licensed to The Apereo Foundation under one or more contributor license * agreements. See the NOTICE file distributed with this work for additional * information regarding copyright ownership. * * * The Apereo Foundation licenses this file to you under the Educational * Community License, Version 2.0 (the "License"); you may not use this file * except in compliance with the License. You may obtain a copy of the License * at: * * http://opensource.org/licenses/ecl2.txt * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. * */ package org.opencastproject.search.impl.solr; import static org.opencastproject.security.api.Permissions.Action.READ; import static org.opencastproject.security.api.Permissions.Action.WRITE; import static org.opencastproject.util.data.Collections.filter; import static org.opencastproject.util.data.Collections.head; import static org.opencastproject.util.data.Option.option; import org.opencastproject.mediapackage.MediaPackage; import org.opencastproject.mediapackage.MediaPackageBuilder; import org.opencastproject.mediapackage.MediaPackageBuilderFactory; import org.opencastproject.mediapackage.MediaPackageSerializer; import org.opencastproject.search.api.MediaSegment; import org.opencastproject.search.api.MediaSegmentImpl; import org.opencastproject.search.api.SearchQuery; import org.opencastproject.search.api.SearchResult; import org.opencastproject.search.api.SearchResultImpl; import org.opencastproject.search.api.SearchResultItem; import org.opencastproject.search.api.SearchResultItem.SearchResultItemType; import org.opencastproject.search.api.SearchResultItemImpl; import org.opencastproject.security.api.Role; import org.opencastproject.security.api.SecurityService; import org.opencastproject.security.api.User; import org.opencastproject.util.SolrUtils; import org.opencastproject.util.data.Function; import org.opencastproject.util.data.Function0; import org.opencastproject.util.data.Option; import org.opencastproject.util.data.Predicate; import org.apache.commons.lang3.StringUtils; import org.apache.solr.client.solrj.SolrQuery; import org.apache.solr.client.solrj.SolrQuery.ORDER; import org.apache.solr.client.solrj.SolrServer; import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.client.solrj.response.QueryResponse; import org.apache.solr.common.SolrDocument; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.ByteArrayInputStream; import java.io.IOException; import java.util.ArrayList; import java.util.Date; import java.util.List; import java.util.Map.Entry; import java.util.Properties; import java.util.Set; import java.util.regex.Matcher; import java.util.regex.Pattern; /** * Class implementing <code>LookupRequester</code> to provide connection to solr indexing facility. */ public class SolrRequester { /** * Logging facility */ private static Logger logger = LoggerFactory.getLogger(SolrRequester.class); /** * The connection to the solr database */ private SolrServer solrServer = null; /** * The security service */ private SecurityService securityService; /** * The optional serializer */ private MediaPackageSerializer serializer = null; /** * Creates a new requester for solr that will be using the given connection object to query the search index. * * @param connection * the solr connection * @param securityService * the security service */ public SolrRequester(SolrServer connection, SecurityService securityService) { this(connection, securityService, null); } /** * Creates a new requester for solr that will be using the given connection object to query the search index. * * @param connection * the solr connection * @param securityService * the security service * @param serializer * the optional mediapackage serializer */ public SolrRequester(SolrServer connection, SecurityService securityService, MediaPackageSerializer serializer) { if (connection == null) throw new IllegalStateException("Unable to run queries on null connection"); this.solrServer = connection; this.securityService = securityService; this.serializer = serializer; } /** * Returns the search results for a solr query string with read access for the current user. * * @param q * the query * @param limit * the limit * @param offset * the offset * @return the search results * @throws SolrServerException */ public SearchResult getByQuery(String q, int limit, int offset) throws SolrServerException { SearchQuery q1 = new SearchQuery(); q1.withQuery(q).withLimit(limit).withOffset(offset); return getForRead(q1); } /** * Creates a search result from a given solr response. * * @param query * The solr query. * @return The search result. * @throws SolrServerException * if the solr server is not working as expected */ private SearchResult createSearchResult(final SolrQuery query) throws SolrServerException { // Execute the query and try to get hold of a query response QueryResponse solrResponse = null; try { solrResponse = solrServer.query(query); } catch (Exception e) { throw new SolrServerException(e); } // Create and configure the query result final SearchResultImpl result = new SearchResultImpl(query.getQuery()); result.setSearchTime(solrResponse.getQTime()); result.setOffset(solrResponse.getResults().getStart()); result.setLimit(solrResponse.getResults().size()); result.setTotal(solrResponse.getResults().getNumFound()); // Walk through response and create new items with title, creator, etc: for (final SolrDocument doc : solrResponse.getResults()) { final SearchResultItemImpl item = SearchResultItemImpl.fill(new SearchResultItem() { private final String dfltString = null; @Override public String getId() { return Schema.getId(doc); } /** * {@inheritDoc} * * @see org.opencastproject.search.api.SearchResultItem#getOrganization() */ @Override public String getOrganization() { return Schema.getOrganization(doc); } @Override public MediaPackage getMediaPackage() { MediaPackageBuilder builder = MediaPackageBuilderFactory.newInstance().newMediaPackageBuilder(); if (serializer != null) builder.setSerializer(serializer); String mediaPackageFieldValue = Schema.getOcMediapackage(doc); if (mediaPackageFieldValue != null) { try { return builder.loadFromXml(mediaPackageFieldValue); } catch (Exception e) { logger.warn("Unable to read media package from search result", e); } } return null; } @Override public long getDcExtent() { if (getType().equals(SearchResultItemType.AudioVisual)) { Long extent = Schema.getDcExtent(doc); if (extent != null) return extent; } return -1; } @Override public String getDcTitle() { final List<DField<String>> titles = Schema.getDcTitle(doc); // try to return the first title without any language information first... return head(filter(titles, new Predicate<DField<String>>() { @Override public Boolean apply(DField<String> f) { return f.getSuffix().equals(Schema.LANGUAGE_UNDEFINED); } })).map(new Function<DField<String>, String>() { @Override public String apply(DField<String> f) { return f.getValue(); } }).getOrElse(new Function0<String>() { @Override public String apply() { // ... since none is present return the first arbitrary title return Schema.getFirst(titles, dfltString); } }); } @Override public String getDcSubject() { return Schema.getFirst(Schema.getDcSubject(doc), dfltString); } @Override public String getDcDescription() { return Schema.getFirst(Schema.getDcDescription(doc), dfltString); } @Override public String getDcCreator() { return Schema.getFirst(Schema.getDcCreator(doc), dfltString); } @Override public String getDcPublisher() { return Schema.getFirst(Schema.getDcPublisher(doc), dfltString); } @Override public String getDcContributor() { return Schema.getFirst(Schema.getDcContributor(doc), dfltString); } @Override public String getDcAbstract() { return null; } @Override public Date getDcCreated() { return Schema.getDcCreated(doc); } @Override public Date getDcAvailableFrom() { return Schema.getDcAvailableFrom(doc); } @Override public Date getDcAvailableTo() { return Schema.getDcAvailableTo(doc); } @Override public String getDcLanguage() { return Schema.getDcLanguage(doc); } @Override public String getDcRightsHolder() { return Schema.getFirst(Schema.getDcRightsHolder(doc), dfltString); } @Override public String getDcSpatial() { return Schema.getFirst(Schema.getDcSpatial(doc), dfltString); } @Override public String getDcTemporal() { return null; } @Override public String getDcIsPartOf() { return Schema.getDcIsPartOf(doc); } @Override public String getDcReplaces() { return Schema.getDcReplaces(doc); } @Override public String getDcType() { return Schema.getDcType(doc); } @Override public String getDcAccessRights() { return Schema.getFirst(Schema.getDcAccessRights(doc), dfltString); } @Override public String getDcLicense() { return Schema.getFirst(Schema.getDcLicense(doc), dfltString); } @Override public String getOcMediapackage() { return Schema.getOcMediapackage(doc); } @Override public SearchResultItemType getType() { String t = Schema.getOcMediatype(doc); return t != null ? SearchResultItemType.valueOf(t) : null; } @Override public String[] getKeywords() { if (getType().equals(SearchResultItemType.AudioVisual)) { String k = Schema.getOcKeywords(doc); return k != null ? k.split(" ") : new String[0]; } else return new String[0]; } @Override public String getCover() { return Schema.getOcCover(doc); } @Override public Date getModified() { return Schema.getOcModified(doc); } @Override public double getScore() { return Schema.getScore(doc); } @Override public MediaSegment[] getSegments() { if (SearchResultItemType.AudioVisual.equals(getType())) return createSearchResultSegments(doc, query).toArray(new MediaSegmentImpl[0]); else return new MediaSegmentImpl[0]; } }); // Add the item to the result set result.addItem(item); } return result; } /** * Creates a list of <code>MediaSegment</code>s from the given result document. * * @param doc * the result document * @param query * the original query */ private List<MediaSegmentImpl> createSearchResultSegments(SolrDocument doc, SolrQuery query) { List<MediaSegmentImpl> segments = new ArrayList<MediaSegmentImpl>(); // The maximum number of hits in a segment int maxHits = 0; // Loop over every segment for (String fieldName : doc.getFieldNames()) { if (!fieldName.startsWith(Schema.SEGMENT_TEXT_PREFIX)) continue; // Ceate a new segment int segmentId = Integer.parseInt(fieldName.substring(Schema.SEGMENT_TEXT_PREFIX.length())); MediaSegmentImpl segment = new MediaSegmentImpl(segmentId); segment.setText(mkString(doc.getFieldValue(fieldName))); // Read the hints for this segment Properties segmentHints = new Properties(); try { String hintFieldName = Schema.SEGMENT_HINT_PREFIX + segment.getIndex(); Object hintFieldValue = doc.getFieldValue(hintFieldName); segmentHints.load(new ByteArrayInputStream(hintFieldValue.toString().getBytes())); } catch (IOException e) { logger.warn("Cannot load hint properties."); } // get segment time String segmentTime = segmentHints.getProperty("time"); if (segmentTime == null) throw new IllegalStateException("Found segment without time hint"); segment.setTime(Long.parseLong(segmentTime)); // get segment duration String segmentDuration = segmentHints.getProperty("duration"); if (segmentDuration == null) throw new IllegalStateException("Found segment without duration hint"); segment.setDuration(Long.parseLong(segmentDuration)); // get preview urls for (Entry<Object, Object> entry : segmentHints.entrySet()) { if (entry.getKey().toString().startsWith("preview.")) { String[] parts = entry.getKey().toString().split("\\."); segment.addPreview(entry.getValue().toString(), parts[1]); } } // calculate the segment's relevance with respect to the query String queryText = query.getQuery(); String segmentText = segment.getText(); if (!StringUtils.isBlank(queryText) && !StringUtils.isBlank(segmentText)) { segmentText = segmentText.toLowerCase(); Pattern p = Pattern.compile(".*fulltext:\\(([^)]*)\\).*"); Matcher m = p.matcher(queryText); if (m.matches()) { String[] queryTerms = StringUtils.split(m.group(1).toLowerCase()); int segmentHits = 0; int textLength = segmentText.length(); for (String t : queryTerms) { String strippedTerm = StringUtils.strip(t, "*"); if (StringUtils.isBlank(strippedTerm)) continue; int startIndex = 0; while (startIndex < textLength - 1) { int foundAt = segmentText.indexOf(strippedTerm, startIndex); if (foundAt < 0) break; segmentHits++; startIndex = foundAt + strippedTerm.length(); } } // for now, just store the number of hits, but keep track of the maximum hit count if (segmentHits > 0) { segment.setHit(true); segment.setRelevance(segmentHits); } if (segmentHits > maxHits) maxHits = segmentHits; } } segments.add(segment); } for (MediaSegmentImpl segment : segments) { int hitsInSegment = segment.getRelevance(); if (hitsInSegment > 0) segment.setRelevance((int) ((100 * hitsInSegment) / maxHits)); } return segments; } /** * Modifies the query such that certain fields are being boosted (meaning they gain some weight). * * @param query * The user query. * @return The boosted query */ public StringBuffer boost(String query) { String uq = SolrUtils.clean(query); StringBuffer sb = new StringBuffer(); sb.append("("); sb.append(Schema.DC_TITLE_PREFIX); sb.append(":("); sb.append(uq); sb.append(")^"); sb.append(Schema.DC_TITLE_BOOST); sb.append(" "); sb.append(Schema.DC_CREATOR_PREFIX); sb.append(":("); sb.append(uq); sb.append(")^"); sb.append(Schema.DC_CREATOR_BOOST); sb.append(" "); sb.append(Schema.DC_SUBJECT_PREFIX); sb.append(":("); sb.append(uq); sb.append(")^"); sb.append(Schema.DC_SUBJECT_BOOST); sb.append(" "); sb.append(Schema.DC_PUBLISHER_PREFIX); sb.append(":("); sb.append(uq); sb.append(")^"); sb.append(Schema.DC_PUBLISHER_BOOST); sb.append(" "); sb.append(Schema.DC_CONTRIBUTOR_PREFIX); sb.append(":("); sb.append(uq); sb.append(")^"); sb.append(Schema.DC_CONTRIBUTOR_BOOST); sb.append(" "); sb.append(Schema.DC_ABSTRACT_PREFIX); sb.append(":("); sb.append(uq); sb.append(")^"); sb.append(Schema.DC_ABSTRACT_BOOST); sb.append(" "); sb.append(Schema.DC_DESCRIPTION_PREFIX); sb.append(":("); sb.append(uq); sb.append(")^"); sb.append(Schema.DC_DESCRIPTION_BOOST); sb.append(" "); sb.append(Schema.FULLTEXT); sb.append(":("); sb.append(uq); sb.append(") "); // see http://wiki.apache.org/lucene-java/LuceneFAQ#Are_Wildcard.2C_Prefix.2C_and_Fuzzy_queries_case_sensitive.3F // for an explanation why .toLowerCase() is used here. This behaviour is tracked in SOLR-219. // It's also important not to stem when using wildcard queries. Please adjust the schema.xml accordingly. sb.append(Schema.FULLTEXT); sb.append(":(*"); sb.append(uq.toLowerCase()); sb.append("*) "); sb.append(")"); return sb; } /** * Simple helper method to avoid null strings. * * @param f * object which implements <code>toString()</code> method. * @return The input object or empty string. */ private static String mkString(Object f) { if (f != null) return f.toString(); else return ""; } /** * Converts the query object into a solr query and returns the results. * * @param q * the query * @param action * one of {@link org.opencastproject.search.api.SearchService#READ_PERMISSION}, * {@link org.opencastproject.search.api.SearchService#WRITE_PERMISSION} * @param applyPermissions * whether to apply the permissions to the query. Set to false for administrative queries. * @return the search results */ private SolrQuery getForAction(SearchQuery q, String action, boolean applyPermissions) throws SolrServerException { StringBuilder sb = new StringBuilder(); if (StringUtils.isNotBlank(q.getQuery())) sb.append(q.getQuery()); String solrIdRequest = StringUtils.trimToNull(q.getId()); if (solrIdRequest != null) { String cleanSolrIdRequest = SolrUtils.clean(solrIdRequest); if (sb.length() > 0) sb.append(" AND "); sb.append("("); sb.append(Schema.ID); sb.append(":"); sb.append(cleanSolrIdRequest); if (q.isIncludeEpisodes() && q.isIncludeSeries()) { sb.append(" OR "); sb.append(Schema.DC_IS_PART_OF); sb.append(":"); sb.append(cleanSolrIdRequest); } sb.append(")"); } String solrSeriesIdRequest = StringUtils.trimToNull(q.getSeriesId()); if (solrSeriesIdRequest != null) { String cleanSolrSeriesIdRequest = SolrUtils.clean(solrSeriesIdRequest); if (sb.length() > 0) { sb.append(" AND "); } sb.append("("); sb.append(Schema.DC_IS_PART_OF); sb.append(":"); sb.append(cleanSolrSeriesIdRequest); sb.append(")"); } String solrTextRequest = StringUtils.trimToNull(q.getText()); if (solrTextRequest != null) { String cleanSolrTextRequest = SolrUtils.clean(q.getText()); if (StringUtils.isNotEmpty(cleanSolrTextRequest)) { if (sb.length() > 0) sb.append(" AND "); sb.append("( *:"); sb.append(boost(cleanSolrTextRequest)); sb.append(" OR ("); sb.append(Schema.ID); sb.append(":"); sb.append(cleanSolrTextRequest); sb.append(") )"); } } if (q.getElementTags() != null && q.getElementTags().length > 0) { if (sb.length() > 0) sb.append(" AND "); StringBuilder tagBuilder = new StringBuilder(); for (int i = 0; i < q.getElementTags().length; i++) { String tag = SolrUtils.clean(q.getElementTags()[i]); if (StringUtils.isEmpty(tag)) continue; if (tagBuilder.length() == 0) { tagBuilder.append("("); } else { tagBuilder.append(" OR "); } tagBuilder.append(Schema.OC_ELEMENTTAGS); tagBuilder.append(":"); tagBuilder.append(tag); } if (tagBuilder.length() > 0) { tagBuilder.append(") "); sb.append(tagBuilder); } } if (q.getElementFlavors() != null && q.getElementFlavors().length > 0) { if (sb.length() > 0) sb.append(" AND "); StringBuilder flavorBuilder = new StringBuilder(); for (int i = 0; i < q.getElementFlavors().length; i++) { String flavor = SolrUtils.clean(q.getElementFlavors()[i].toString()); if (StringUtils.isEmpty(flavor)) continue; if (flavorBuilder.length() == 0) { flavorBuilder.append("("); } else { flavorBuilder.append(" OR "); } flavorBuilder.append(Schema.OC_ELEMENTFLAVORS); flavorBuilder.append(":"); flavorBuilder.append(flavor); } if (flavorBuilder.length() > 0) { flavorBuilder.append(") "); sb.append(flavorBuilder); } } if (q.getDeletedDate() != null) { if (sb.length() > 0) sb.append(" AND "); sb.append(Schema.OC_DELETED + ":" + SolrUtils.serializeDateRange(option(q.getDeletedDate()), Option.<Date> none())); } if (sb.length() == 0) sb.append("*:*"); if (applyPermissions) { sb.append(" AND ").append(Schema.OC_ORGANIZATION).append(":") .append(SolrUtils.clean(securityService.getOrganization().getId())); User user = securityService.getUser(); Set<Role> roles = user.getRoles(); boolean userHasAnonymousRole = false; if (roles.size() > 0) { sb.append(" AND ("); StringBuilder roleList = new StringBuilder(); for (Role role : roles) { if (roleList.length() > 0) roleList.append(" OR "); roleList.append(Schema.OC_ACL_PREFIX).append(action).append(":").append(SolrUtils.clean(role.getName())); if (role.getName().equalsIgnoreCase(securityService.getOrganization().getAnonymousRole())) { userHasAnonymousRole = true; } } if (!userHasAnonymousRole) { if (roleList.length() > 0) roleList.append(" OR "); roleList.append(Schema.OC_ACL_PREFIX).append(action).append(":") .append(SolrUtils.clean(securityService.getOrganization().getAnonymousRole())); } sb.append(roleList.toString()); sb.append(")"); } } if (!q.isIncludeEpisodes()) { if (sb.length() > 0) sb.append(" AND "); sb.append("-" + Schema.OC_MEDIATYPE + ":" + SearchResultItemType.AudioVisual); } if (!q.isIncludeSeries()) { if (sb.length() > 0) sb.append(" AND "); sb.append("-" + Schema.OC_MEDIATYPE + ":" + SearchResultItemType.Series); } if (q.getDeletedDate() == null) { if (sb.length() > 0) sb.append(" AND "); sb.append("-" + Schema.OC_DELETED + ":[* TO *]"); } SolrQuery query = new SolrQuery(sb.toString()); if (q.getLimit() > 0) { query.setRows(q.getLimit()); } else { query.setRows(Integer.MAX_VALUE); } if (q.getOffset() > 0) query.setStart(q.getOffset()); if (q.getSort() != null) { ORDER order = q.isSortAscending() ? ORDER.asc : ORDER.desc; query.addSortField(getSortField(q.getSort()), order); } if (!SearchQuery.Sort.DATE_CREATED.equals(q.getSort())) { query.addSortField(getSortField(SearchQuery.Sort.DATE_CREATED), ORDER.desc); } query.setFields("* score"); return query; } /** * Returns the search results, regardless of permissions. This should be used for maintenance purposes only. * * @param q * the search query * @return the readable search result * @throws SolrServerException */ public SearchResult getForAdministrativeRead(SearchQuery q) throws SolrServerException { SolrQuery query = getForAction(q, READ.toString(), false); return createSearchResult(query); } /** * Returns the search results that are accessible for read by the current user. * * @param q * the search query * @return the readable search result * @throws SolrServerException */ public SearchResult getForRead(SearchQuery q) throws SolrServerException { SolrQuery query = getForAction(q, READ.toString(), true); return createSearchResult(query); } /** * Returns the search results that are accessible for write by the current user. * * @param q * the search query * @return the writable search result * @throws SolrServerException */ public SearchResult getForWrite(SearchQuery q) throws SolrServerException { SolrQuery query = getForAction(q, WRITE.toString(), true); return createSearchResult(query); } /** * Sets the security service. * * @param securityService * the securityService to set */ public void setSecurityService(SecurityService securityService) { this.securityService = securityService; } /** * Sets the optional Mediapackage Serializer. * * @param serializer * the serializer */ public void setMediaPackageSerializer(MediaPackageSerializer serializer) { this.serializer = serializer; } /** * Returns the search index' field name that corresponds to the sort field. * * @param sort * the sort field * @return the field name in the search index */ protected String getSortField(SearchQuery.Sort sort) { switch (sort) { case TITLE: return Schema.DC_TITLE_SORT; case CONTRIBUTOR: return Schema.DC_CONTRIBUTOR_SORT; case DATE_CREATED: return Schema.DC_CREATED; case DATE_PUBLISHED: return Schema.OC_MODIFIED; case CREATOR: return Schema.DC_CREATOR_SORT; case LANGUAGE: return Schema.DC_LANGUAGE; case LICENSE: return Schema.DC_LICENSE_SORT; case MEDIA_PACKAGE_ID: return Schema.ID; case SERIES_ID: return Schema.DC_IS_PART_OF; case SUBJECT: return Schema.DC_SUBJECT_SORT; case DESCRIPTION: return Schema.DC_DESCRIPTION_SORT; case PUBLISHER: return Schema.DC_PUBLISHER_SORT; default: throw new IllegalArgumentException("No mapping found between sort field and index"); } } }