/** * Licensed to The Apereo Foundation under one or more contributor license * agreements. See the NOTICE file distributed with this work for additional * information regarding copyright ownership. * * * The Apereo Foundation licenses this file to you under the Educational * Community License, Version 2.0 (the "License"); you may not use this file * except in compliance with the License. You may obtain a copy of the License * at: * * http://opensource.org/licenses/ecl2.txt * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. * */ package org.opencastproject.search.impl.solr; import static org.opencastproject.security.api.Permissions.Action.READ; import static org.opencastproject.security.api.Permissions.Action.WRITE; import static org.opencastproject.util.RequireUtil.notNull; import static org.opencastproject.util.data.Collections.flatMap; import static org.opencastproject.util.data.Collections.head; import static org.opencastproject.util.data.Collections.map; import static org.opencastproject.util.data.Option.option; import org.opencastproject.mediapackage.Attachment; import org.opencastproject.mediapackage.Catalog; import org.opencastproject.mediapackage.MediaPackage; import org.opencastproject.mediapackage.MediaPackageElement; import org.opencastproject.mediapackage.MediaPackageElements; import org.opencastproject.mediapackage.MediaPackageException; import org.opencastproject.mediapackage.MediaPackageParser; import org.opencastproject.mediapackage.MediaPackageReference; import org.opencastproject.metadata.api.MetadataValue; import org.opencastproject.metadata.api.StaticMetadata; import org.opencastproject.metadata.api.StaticMetadataService; import org.opencastproject.metadata.api.util.Interval; import org.opencastproject.metadata.dublincore.DCMIPeriod; import org.opencastproject.metadata.dublincore.DublinCore; import org.opencastproject.metadata.dublincore.DublinCoreCatalog; import org.opencastproject.metadata.dublincore.DublinCoreValue; import org.opencastproject.metadata.dublincore.EncodingSchemeUtils; import org.opencastproject.metadata.dublincore.Temporal; import org.opencastproject.metadata.mpeg7.AudioVisual; import org.opencastproject.metadata.mpeg7.FreeTextAnnotation; import org.opencastproject.metadata.mpeg7.KeywordAnnotation; import org.opencastproject.metadata.mpeg7.MediaDuration; import org.opencastproject.metadata.mpeg7.MediaTime; import org.opencastproject.metadata.mpeg7.MediaTimePoint; import org.opencastproject.metadata.mpeg7.Mpeg7Catalog; import org.opencastproject.metadata.mpeg7.Mpeg7CatalogService; import org.opencastproject.metadata.mpeg7.MultimediaContent; import org.opencastproject.metadata.mpeg7.MultimediaContentType; import org.opencastproject.metadata.mpeg7.SpatioTemporalDecomposition; import org.opencastproject.metadata.mpeg7.TextAnnotation; import org.opencastproject.metadata.mpeg7.Video; import org.opencastproject.metadata.mpeg7.VideoSegment; import org.opencastproject.metadata.mpeg7.VideoText; import org.opencastproject.search.api.SearchResultItem.SearchResultItemType; import org.opencastproject.search.impl.persistence.SearchServiceDatabaseException; import org.opencastproject.security.api.AccessControlEntry; import org.opencastproject.security.api.AccessControlList; import org.opencastproject.security.api.SecurityService; import org.opencastproject.security.api.UnauthorizedException; import org.opencastproject.series.api.SeriesException; import org.opencastproject.series.api.SeriesService; import org.opencastproject.util.NotFoundException; import org.opencastproject.util.SolrUtils; import org.opencastproject.util.data.Function; import org.opencastproject.util.data.Option; import org.opencastproject.workspace.api.Workspace; import org.apache.commons.io.IOUtils; import org.apache.solr.client.solrj.SolrQuery; import org.apache.solr.client.solrj.SolrServer; import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.client.solrj.response.QueryResponse; import org.apache.solr.client.solrj.util.ClientUtils; import org.apache.solr.common.SolrDocument; import org.apache.solr.common.SolrInputDocument; import org.apache.solr.common.params.SolrParams; import org.apache.solr.servlet.SolrRequestParsers; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.Comparator; import java.util.Date; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.SortedSet; import java.util.TreeSet; /** * Utility class used to manage the search index. */ public class SolrIndexManager { /** Logging facility */ private static final Logger logger = LoggerFactory.getLogger(SolrIndexManager.class); /** Connection to the database */ private SolrServer solrServer = null; /** * Factor multiplied to fine tune relevance and confidence impact on important keyword decision. importance = * RELEVANCE_BOOST * relevance + confidence */ private static final double RELEVANCE_BOOST = 2.0; /** Number of characters an important should have at least. */ private static final int MAX_CHAR = 3; /** Maximum number of important keywords to detect. */ private static final int MAX_IMPORTANT_COUNT = 10; /** List of metadata services sorted by priority in reverse order. */ private List<StaticMetadataService> mdServices; private SeriesService seriesService; private Mpeg7CatalogService mpeg7CatalogService; private Workspace workspace; private SecurityService securityService; /** Convert a DublinCoreValue into a date. */ private static Function<DublinCoreValue, Option<Date>> toDateF = new Function<DublinCoreValue, Option<Date>>() { @Override public Option<Date> apply(DublinCoreValue v) { return EncodingSchemeUtils.decodeTemporal(v).fold(new Temporal.Match<Option<Date>>() { @Override public Option<Date> period(DCMIPeriod period) { return option(period.getStart()); } @Override public Option<Date> instant(Date instant) { return Option.some(instant); } @Override public Option<Date> duration(long duration) { return Option.none(); } }); } }; /** Convert a DublinCoreValue into a duration (long). */ private static Function<DublinCoreValue, Option<Long>> toDurationF = new Function<DublinCoreValue, Option<Long>>() { @Override public Option<Long> apply(DublinCoreValue dublinCoreValue) { return option(EncodingSchemeUtils.decodeDuration(dublinCoreValue)); } }; /** Dynamic reference. */ public void setStaticMetadataServices(List<StaticMetadataService> mdServices) { this.mdServices = new ArrayList<StaticMetadataService>(mdServices); Collections.sort(this.mdServices, new Comparator<StaticMetadataService>() { @Override public int compare(StaticMetadataService a, StaticMetadataService b) { return b.getPriority() - a.getPriority(); } }); } /** * Creates a new management instance for the search index. * * @param connection * connection to the database */ public SolrIndexManager(SolrServer connection, Workspace workspace, List<StaticMetadataService> mdServices, SeriesService seriesService, Mpeg7CatalogService mpeg7CatalogService, SecurityService securityService) { this.solrServer = notNull(connection, "solr connection"); this.workspace = notNull(workspace, "workspace"); this.seriesService = notNull(seriesService, "series service"); this.mpeg7CatalogService = notNull(mpeg7CatalogService, "mpeg7 service"); this.securityService = notNull(securityService, "security service"); setStaticMetadataServices(notNull(mdServices, "metadata service")); } /** * Clears the search index. Make sure you know what you are doing. * * @throws SolrServerException * if an errors occurs while talking to solr */ public void clear() throws SolrServerException { try { solrServer.deleteByQuery("*:*"); solrServer.commit(); } catch (IOException e) { throw new SolrServerException(e); } } /** * Removes the entry with the given <code>id</code> from the database. The entry can either be a series or an episode. * * @param id * identifier of the series or episode to delete * @param deletionDate * the deletion date * @throws SolrServerException * if an errors occurs while talking to solr */ public boolean delete(String id, Date deletionDate) throws SolrServerException { try { // Load the existing episode QueryResponse solrResponse = null; try { SolrQuery query = new SolrQuery(Schema.ID + ":" + ClientUtils.escapeQueryChars(id) + " AND -" + Schema.OC_DELETED + ":[* TO *]"); solrResponse = solrServer.query(query); } catch (Exception e1) { throw new SolrServerException(e1); } // Did we find the episode? if (solrResponse.getResults().size() == 0) { logger.warn("Trying to delete non-existing media package {} from the search index", id); return false; } // Use all existing fields SolrDocument doc = solrResponse.getResults().get(0); SolrInputDocument inputDocument = new SolrInputDocument(); for (String field : doc.getFieldNames()) { inputDocument.setField(field, doc.get(field)); } // Set the oc_deleted field to the current date, then update Schema.setOcDeleted(inputDocument, deletionDate); solrServer.add(inputDocument); solrServer.commit(); return true; } catch (IOException e) { throw new SolrServerException(e); } } /** * Posts the media package to solr. Depending on what is referenced in the media package, the method might create one * or two entries: one for the episode and one for the series that the episode belongs to. * * This implementation of the search service removes all references to non "engage/download" media tracks * * @param sourceMediaPackage * the media package to post * @param acl * the access control list for this mediapackage * @param now * current date * @throws SolrServerException * if an errors occurs while talking to solr */ public boolean add(MediaPackage sourceMediaPackage, AccessControlList acl, Date now) throws SolrServerException, UnauthorizedException { try { SolrInputDocument episodeDocument = createEpisodeInputDocument(sourceMediaPackage, acl); Schema.setOcModified(episodeDocument, now); SolrInputDocument seriesDocument = createSeriesInputDocument(sourceMediaPackage.getSeries(), acl); if (seriesDocument != null) Schema.enrich(episodeDocument, seriesDocument); // If neither an episode nor a series was contained, there is no point in trying to update if (episodeDocument == null && seriesDocument == null) { logger.warn("Neither episode nor series metadata found"); return false; } // Post everything to the search index if (episodeDocument != null) solrServer.add(episodeDocument); if (seriesDocument != null) solrServer.add(seriesDocument); solrServer.commit(); return true; } catch (Exception e) { throw new SolrServerException(e); } } /** * Posts the media package to solr. Depending on what is referenced in the media package, the method might create one * or two entries: one for the episode and one for the series that the episode belongs to. * * This implementation of the search service removes all references to non "engage/download" media tracks * * @param sourceMediaPackage * the media package to post * @param acl * the access control list for this mediapackage * @param deletionDate * the deletion date * @param modificationDate * the modification date * @return <code>true</code> if successfully added * @throws SolrServerException * if an errors occurs while talking to solr */ public boolean add(MediaPackage sourceMediaPackage, AccessControlList acl, Date deletionDate, Date modificationDate) throws SolrServerException { try { SolrInputDocument episodeDocument = createEpisodeInputDocument(sourceMediaPackage, acl); SolrInputDocument seriesDocument = createSeriesInputDocument(sourceMediaPackage.getSeries(), acl); if (seriesDocument != null) Schema.enrich(episodeDocument, seriesDocument); Schema.setOcModified(episodeDocument, modificationDate); if (deletionDate != null) Schema.setOcDeleted(episodeDocument, deletionDate); solrServer.add(episodeDocument); solrServer.add(seriesDocument); solrServer.commit(); return true; } catch (Exception e) { try { solrServer.rollback(); } catch (IOException e1) { throw new SolrServerException(e1); } throw new SolrServerException(e); } } /** * Creates a solr input document for the episode metadata of the media package. * * @param mediaPackage * the media package * @param acl * the access control list for this mediapackage * @return an input document ready to be posted to solr * @throws MediaPackageException * if serialization of the media package fails */ private SolrInputDocument createEpisodeInputDocument(MediaPackage mediaPackage, AccessControlList acl) throws MediaPackageException, IOException { SolrInputDocument doc = new SolrInputDocument(); String mediaPackageId = mediaPackage.getIdentifier().toString(); // Fill the input document Schema.setId(doc, mediaPackageId); // / // OC specific fields Schema.setOcMediatype(doc, SearchResultItemType.AudioVisual.toString()); Schema.setOrganization(doc, securityService.getOrganization().getId()); Schema.setOcMediapackage(doc, MediaPackageParser.getAsXml(mediaPackage)); Schema.setOcElementtags(doc, tags(mediaPackage)); Schema.setOcElementflavors(doc, flavors(mediaPackage)); // Add cover Attachment[] cover = mediaPackage.getAttachments(MediaPackageElements.MEDIAPACKAGE_COVER_FLAVOR); if (cover != null && cover.length > 0) { Schema.setOcCover(doc, cover[0].getURI().toString()); } // / // Add standard dublin core fields // naive approach. works as long as only setters, not adders are available in the schema for (StaticMetadata md : getMetadata(mdServices, mediaPackage)) addEpisodeMetadata(doc, md); // / // Add mpeg7 logger.debug("Looking for mpeg-7 catalogs containing segment texts"); Catalog[] mpeg7Catalogs = mediaPackage.getCatalogs(MediaPackageElements.TEXTS); if (mpeg7Catalogs.length == 0) { logger.debug("No text catalogs found, trying segments only"); mpeg7Catalogs = mediaPackage.getCatalogs(MediaPackageElements.SEGMENTS); } // TODO: merge the segments from each mpeg7 if there is more than one mpeg7 catalog if (mpeg7Catalogs.length > 0) { try { Mpeg7Catalog mpeg7Catalog = loadMpeg7Catalog(mpeg7Catalogs[0]); addMpeg7Metadata(doc, mediaPackage, mpeg7Catalog); } catch (IOException e) { logger.error("Error loading mpeg7 catalog. Skipping catalog: {}", e.getMessage()); } } else { logger.debug("No segmentation catalog found"); } // / // Add authorization setAuthorization(doc, securityService, acl); return doc; } static void addEpisodeMetadata(final SolrInputDocument doc, final StaticMetadata md) { Schema.fill(doc, new Schema.FieldCollector() { @Override public Option<String> getId() { return Option.none(); } @Override public Option<String> getOrganization() { return Option.none(); } @Override public Option<Date> getDcCreated() { return md.getCreated(); } @Override public Option<Long> getDcExtent() { return md.getExtent(); } @Override public Option<String> getDcLanguage() { return md.getLanguage(); } @Override public Option<String> getDcIsPartOf() { return md.getIsPartOf(); } @Override public Option<String> getDcReplaces() { return md.getReplaces(); } @Override public Option<String> getDcType() { return md.getType(); } @Override public Option<Date> getDcAvailableFrom() { return md.getAvailable().flatMap(new Function<Interval, Option<Date>>() { @Override public Option<Date> apply(Interval interval) { return interval.fold(new Interval.Match<Option<Date>>() { @Override public Option<Date> bounded(Date leftBound, Date rightBound) { return Option.some(leftBound); } @Override public Option<Date> leftInfinite(Date rightBound) { return Option.none(); } @Override public Option<Date> rightInfinite(Date leftBound) { return Option.some(leftBound); } }); } }); } @Override public Option<Date> getDcAvailableTo() { return md.getAvailable().flatMap(new Function<Interval, Option<Date>>() { @Override public Option<Date> apply(Interval interval) { return interval.fold(new Interval.Match<Option<Date>>() { @Override public Option<Date> bounded(Date leftBound, Date rightBound) { return Option.some(rightBound); } @Override public Option<Date> leftInfinite(Date rightBound) { return Option.some(rightBound); } @Override public Option<Date> rightInfinite(Date leftBound) { return Option.none(); } }); } }); } @Override public List<DField<String>> getDcTitle() { return fromMValue(md.getTitles()); } @Override public List<DField<String>> getDcSubject() { return fromMValue(md.getSubjects()); } @Override public List<DField<String>> getDcCreator() { return fromMValue(md.getCreators()); } @Override public List<DField<String>> getDcPublisher() { return fromMValue(md.getPublishers()); } @Override public List<DField<String>> getDcContributor() { return fromMValue(md.getContributors()); } @Override public List<DField<String>> getDcDescription() { return fromMValue(md.getDescription()); } @Override public List<DField<String>> getDcRightsHolder() { return fromMValue(md.getRightsHolders()); } @Override public List<DField<String>> getDcSpatial() { return fromMValue(md.getSpatials()); } @Override public List<DField<String>> getDcAccessRights() { return fromMValue(md.getAccessRights()); } @Override public List<DField<String>> getDcLicense() { return fromMValue(md.getLicenses()); } @Override public Option<String> getOcMediatype() { return Option.none(); // set elsewhere } @Override public Option<String> getOcMediapackage() { return Option.none(); // set elsewhere } @Override public Option<String> getOcKeywords() { return Option.none(); // set elsewhere } @Override public Option<String> getOcCover() { return Option.none(); // set elsewhere } @Override public Option<Date> getOcModified() { return Option.none(); // set elsewhere } @Override public Option<Date> getOcDeleted() { return Option.none(); // set elsewhere } @Override public Option<String> getOcElementtags() { return Option.none(); // set elsewhere } @Override public Option<String> getOcElementflavors() { return Option.none(); // set elsewhere } @Override public List<DField<String>> getOcAcl() { return Collections.EMPTY_LIST; // set elsewhere } @Override public List<DField<String>> getSegmentText() { return Collections.EMPTY_LIST; // set elsewhere } @Override public List<DField<String>> getSegmentHint() { return Collections.EMPTY_LIST; // set elsewhere } }); } static List<DField<String>> fromMValue(List<MetadataValue<String>> as) { return map(as, new ArrayList<DField<String>>(), new Function<MetadataValue<String>, DField<String>>() { @Override public DField<String> apply(MetadataValue<String> v) { return new DField<String>(v.getValue(), v.getLanguage()); } }); } static List<DField<String>> fromDCValue(List<DublinCoreValue> as) { return map(as, new ArrayList<DField<String>>(), new Function<DublinCoreValue, DField<String>>() { @Override public DField<String> apply(DublinCoreValue v) { return new DField<String>(v.getValue(), v.getLanguage()); } }); } /** * Adds authorization fields to the solr document. * * @param doc * the solr document * @param acl * the access control list */ static void setAuthorization(SolrInputDocument doc, SecurityService securityService, AccessControlList acl) { Map<String, List<String>> permissions = new HashMap<String, List<String>>(); // Define containers for common permissions List<String> reads = new ArrayList<String>(); permissions.put(READ.toString(), reads); List<String> writes = new ArrayList<String>(); permissions.put(WRITE.toString(), writes); String adminRole = securityService.getOrganization().getAdminRole(); // The admin user can read and write if (adminRole != null) { reads.add(adminRole); writes.add(adminRole); } for (AccessControlEntry entry : acl.getEntries()) { if (!entry.isAllow()) { logger.warn("Search service does not support denial via ACL, ignoring {}", entry); continue; } List<String> actionPermissions = permissions.get(entry.getAction()); /* * MH-8353 a series could have a permission defined we don't know how to handle -DH */ if (actionPermissions == null) { logger.warn("Search service doesn't know how to handle action: " + entry.getAction()); continue; } if (acl == null) { actionPermissions = new ArrayList<String>(); permissions.put(entry.getAction(), actionPermissions); } actionPermissions.add(entry.getRole()); } // Write the permissions to the solr document for (Map.Entry<String, List<String>> entry : permissions.entrySet()) { Schema.setOcAcl(doc, new DField<String>(mkString(entry.getValue(), " "), entry.getKey())); } } static String mkString(Collection<?> as, String sep) { StringBuffer b = new StringBuffer(); for (Object a : as) { b.append(a).append(sep); } return b.substring(0, b.length() - sep.length()); } private Mpeg7Catalog loadMpeg7Catalog(Catalog cat) throws IOException { InputStream in = null; try { File f = workspace.get(cat.getURI()); in = new FileInputStream(f); return mpeg7CatalogService.load(in); } catch (NotFoundException e) { throw new IOException("Unable to load metadata from mpeg7 catalog " + cat); } finally { IOUtils.closeQuietly(in); } } /** * Creates a solr input document for the series metadata of the media package. * * @param seriesId * the id of the series * @param acl * the access control list for this mediapackage * @return an input document ready to be posted to solr or null */ private SolrInputDocument createSeriesInputDocument(String seriesId, AccessControlList acl) throws IOException, UnauthorizedException { if (seriesId == null) return null; DublinCoreCatalog dc = null; try { dc = seriesService.getSeries(seriesId); } catch (SeriesException e) { logger.debug("No series dublincore found for series id " + seriesId); return null; } catch (NotFoundException e) { logger.debug("No series dublincore found for series id " + seriesId); return null; } SolrInputDocument doc = new SolrInputDocument(); // Populate document with existing data try { StringBuffer query = new StringBuffer("q="); query = query.append(Schema.ID).append(":").append(SolrUtils.clean(seriesId)); SolrParams params = SolrRequestParsers.parseQueryString(query.toString()); QueryResponse solrResponse = solrServer.query(params); if (solrResponse.getResults().size() > 0) { SolrDocument existingSolrDocument = solrResponse.getResults().get(0); for (String fieldName : existingSolrDocument.getFieldNames()) { doc.addField(fieldName, existingSolrDocument.getFieldValue(fieldName)); } } } catch (Exception e) { logger.error("Error trying to load series " + seriesId, e); } // Fill document Schema.setId(doc, seriesId); // OC specific fields Schema.setOrganization(doc, securityService.getOrganization().getId()); Schema.setOcMediatype(doc, SearchResultItemType.Series.toString()); Schema.setOcModified(doc, new Date()); // DC fields addSeriesMetadata(doc, dc); // Authorization setAuthorization(doc, securityService, acl); return doc; } /** * Add the standard dublin core fields to a series document. * * @param doc * the solr document to fill * @param dc * the dublin core catalog to get the data from */ static void addSeriesMetadata(final SolrInputDocument doc, final DublinCoreCatalog dc) throws IOException { Schema.fill(doc, new Schema.FieldCollector() { @Override public Option<String> getId() { return Option.some(dc.getFirst(DublinCore.PROPERTY_IDENTIFIER)); } @Override public Option<String> getOrganization() { return Option.none(); } @Override public Option<Date> getDcCreated() { return head(dc.get(DublinCore.PROPERTY_CREATED)).flatMap(toDateF); } @Override public Option<Long> getDcExtent() { return head(dc.get(DublinCore.PROPERTY_EXTENT)).flatMap(toDurationF); } @Override public Option<String> getDcLanguage() { return option(dc.getFirst(DublinCore.PROPERTY_LANGUAGE)); } @Override public Option<String> getDcIsPartOf() { return option(dc.getFirst(DublinCore.PROPERTY_IS_PART_OF)); } @Override public Option<String> getDcReplaces() { return option(dc.getFirst(DublinCore.PROPERTY_REPLACES)); } @Override public Option<String> getDcType() { return option(dc.getFirst(DublinCore.PROPERTY_TYPE)); } @Override public Option<Date> getDcAvailableFrom() { return option(dc.getFirst(DublinCore.PROPERTY_AVAILABLE)).flatMap(new Function<String, Option<Date>>() { @Override public Option<Date> apply(String s) { return option(EncodingSchemeUtils.decodePeriod(s).getStart()); } }); } @Override public Option<Date> getDcAvailableTo() { return option(dc.getFirst(DublinCore.PROPERTY_AVAILABLE)).flatMap(new Function<String, Option<Date>>() { @Override public Option<Date> apply(String s) { return option(EncodingSchemeUtils.decodePeriod(s).getEnd()); } }); } @Override public List<DField<String>> getDcTitle() { return fromDCValue(dc.get(DublinCore.PROPERTY_TITLE)); } @Override public List<DField<String>> getDcSubject() { return fromDCValue(dc.get(DublinCore.PROPERTY_SUBJECT)); } @Override public List<DField<String>> getDcCreator() { return fromDCValue(dc.get(DublinCore.PROPERTY_CREATOR)); } @Override public List<DField<String>> getDcPublisher() { return fromDCValue(dc.get(DublinCore.PROPERTY_PUBLISHER)); } @Override public List<DField<String>> getDcContributor() { return fromDCValue(dc.get(DublinCore.PROPERTY_CONTRIBUTOR)); } @Override public List<DField<String>> getDcDescription() { return fromDCValue(dc.get(DublinCore.PROPERTY_DESCRIPTION)); } @Override public List<DField<String>> getDcRightsHolder() { return fromDCValue(dc.get(DublinCore.PROPERTY_RIGHTS_HOLDER)); } @Override public List<DField<String>> getDcSpatial() { return fromDCValue(dc.get(DublinCore.PROPERTY_SPATIAL)); } @Override public List<DField<String>> getDcAccessRights() { return fromDCValue(dc.get(DublinCore.PROPERTY_ACCESS_RIGHTS)); } @Override public List<DField<String>> getDcLicense() { return fromDCValue(dc.get(DublinCore.PROPERTY_LICENSE)); } @Override public Option<String> getOcMediatype() { return Option.none(); } @Override public Option<String> getOcMediapackage() { return Option.none(); } @Override public Option<String> getOcKeywords() { return Option.none(); } @Override public Option<String> getOcCover() { return Option.none(); } @Override public Option<Date> getOcModified() { return Option.none(); } @Override public Option<Date> getOcDeleted() { return Option.none(); } @Override public Option<String> getOcElementtags() { return Option.none(); } @Override public Option<String> getOcElementflavors() { return Option.none(); } @Override public List<DField<String>> getOcAcl() { return Collections.EMPTY_LIST; } @Override public List<DField<String>> getSegmentText() { return Collections.EMPTY_LIST; } @Override public List<DField<String>> getSegmentHint() { return Collections.EMPTY_LIST; } }); } /** * Add the mpeg 7 catalog data to the solr document. * * @param doc * the input document to the solr index * @param mpeg7 * the mpeg7 catalog */ @SuppressWarnings("unchecked") static void addMpeg7Metadata(SolrInputDocument doc, MediaPackage mediaPackage, Mpeg7Catalog mpeg7) { // Check for multimedia content if (!mpeg7.multimediaContent().hasNext()) { logger.warn("Mpeg-7 doesn't contain multimedia content"); return; } // Get the content duration by looking at the first content track. This // of course assumes that all tracks are equally long. MultimediaContent<? extends MultimediaContentType> mc = mpeg7.multimediaContent().next(); MultimediaContentType mct = mc.elements().next(); MediaTime mediaTime = mct.getMediaTime(); Schema.setDcExtent(doc, mediaTime.getMediaDuration().getDurationInMilliseconds()); // Check if the keywords have been filled by (manually) added dublin // core data. If not, look for the most relevant fields in mpeg-7. SortedSet<TextAnnotation> sortedAnnotations = null; if (!"".equals(Schema.getOcKeywords(doc))) { sortedAnnotations = new TreeSet<TextAnnotation>(new Comparator<TextAnnotation>() { @Override public int compare(TextAnnotation a1, TextAnnotation a2) { if ((RELEVANCE_BOOST * a1.getRelevance() + a1.getConfidence()) > (RELEVANCE_BOOST * a2.getRelevance() + a2 .getConfidence())) return -1; else if ((RELEVANCE_BOOST * a1.getRelevance() + a1.getConfidence()) < (RELEVANCE_BOOST * a2.getRelevance() + a2 .getConfidence())) return 1; return 0; } }); } // Iterate over the tracks and extract keywords and hints Iterator<MultimediaContent<? extends MultimediaContentType>> mmIter = mpeg7.multimediaContent(); int segmentCount = 0; while (mmIter.hasNext()) { MultimediaContent<?> multimediaContent = mmIter.next(); // We need to process visual segments first, due to the way they are handled in the ui. for (Iterator<?> iterator = multimediaContent.elements(); iterator.hasNext();) { MultimediaContentType type = (MultimediaContentType) iterator.next(); if (!(type instanceof Video) && !(type instanceof AudioVisual)) continue; // for every segment in the current multimedia content track Video video = (Video) type; Iterator<VideoSegment> vsegments = (Iterator<VideoSegment>) video.getTemporalDecomposition().segments(); while (vsegments.hasNext()) { VideoSegment segment = vsegments.next(); StringBuffer segmentText = new StringBuffer(); StringBuffer hintField = new StringBuffer(); // Collect the video text elements to a segment text SpatioTemporalDecomposition spt = segment.getSpatioTemporalDecomposition(); if (spt != null) { for (VideoText videoText : spt.getVideoText()) { if (segmentText.length() > 0) segmentText.append(" "); segmentText.append(videoText.getText().getText()); // TODO: Add hint on bounding box } } // Add keyword annotations Iterator<TextAnnotation> textAnnotations = segment.textAnnotations(); while (textAnnotations.hasNext()) { TextAnnotation textAnnotation = textAnnotations.next(); Iterator<?> kwIter = textAnnotation.keywordAnnotations(); while (kwIter.hasNext()) { KeywordAnnotation keywordAnnotation = (KeywordAnnotation) kwIter.next(); if (segmentText.length() > 0) segmentText.append(" "); segmentText.append(keywordAnnotation.getKeyword()); } } // Add free text annotations Iterator<TextAnnotation> freeIter = segment.textAnnotations(); if (freeIter.hasNext()) { Iterator<FreeTextAnnotation> freeTextIter = freeIter.next().freeTextAnnotations(); while (freeTextIter.hasNext()) { FreeTextAnnotation freeTextAnnotation = freeTextIter.next(); if (segmentText.length() > 0) segmentText.append(" "); segmentText.append(freeTextAnnotation.getText()); } } // add segment text to solr document Schema.setSegmentText(doc, new DField<String>(segmentText.toString(), Integer.toString(segmentCount))); // get the segments time properties MediaTimePoint timepoint = segment.getMediaTime().getMediaTimePoint(); MediaDuration duration = segment.getMediaTime().getMediaDuration(); // TODO: define a class with hint field constants hintField.append("time=" + timepoint.getTimeInMilliseconds() + "\n"); hintField.append("duration=" + duration.getDurationInMilliseconds() + "\n"); // Look for preview images. Their characteristics are that they are // attached as attachments with a flavor of preview/<something>. String time = timepoint.toString(); for (Attachment slide : mediaPackage.getAttachments(MediaPackageElements.PRESENTATION_SEGMENT_PREVIEW)) { MediaPackageReference ref = slide.getReference(); if (ref != null && time.equals(ref.getProperty("time"))) { hintField.append("preview"); hintField.append("."); hintField.append(ref.getIdentifier()); hintField.append("="); hintField.append(slide.getURI().toString()); hintField.append("\n"); } } logger.trace("Adding segment: " + timepoint.toString()); Schema.setSegmentHint(doc, new DField<String>(hintField.toString(), Integer.toString(segmentCount))); // increase segment counter segmentCount++; } } } // Put the most important keywords into a special solr field if (sortedAnnotations != null) { Schema.setOcKeywords(doc, importantKeywordsString(sortedAnnotations).toString()); } } /** * Generates a string with the most important kewords from the text annotation. * * @param sortedAnnotations * @return The keyword string. */ static StringBuffer importantKeywordsString(SortedSet<TextAnnotation> sortedAnnotations) { // important keyword: // - high relevance // - high confidence // - occur often // - more than MAX_CHAR chars // calculate keyword occurences (histogram) and importance ArrayList<String> list = new ArrayList<String>(); Iterator<TextAnnotation> textAnnotations = sortedAnnotations.iterator(); TextAnnotation textAnnotation = null; String keyword = null; HashMap<String, Integer> histogram = new HashMap<String, Integer>(); HashMap<String, Double> importance = new HashMap<String, Double>(); int occ = 0; double imp; while (textAnnotations.hasNext()) { textAnnotation = textAnnotations.next(); Iterator<KeywordAnnotation> keywordAnnotations = textAnnotation.keywordAnnotations(); while (keywordAnnotations.hasNext()) { KeywordAnnotation annotation = keywordAnnotations.next(); keyword = annotation.getKeyword().toLowerCase(); if (keyword.length() > MAX_CHAR) { occ = 0; if (histogram.keySet().contains(keyword)) { occ = histogram.get(keyword); } histogram.put(keyword, occ + 1); // here the importance value is calculated // from relevance, confidence and frequency of occurence. imp = (RELEVANCE_BOOST * getMaxRelevance(keyword, sortedAnnotations) + getMaxConfidence(keyword, sortedAnnotations)) * (occ + 1); importance.put(keyword, imp); } } } // get the MAX_IMPORTANT_COUNT most important keywords StringBuffer buf = new StringBuffer(); while (list.size() < MAX_IMPORTANT_COUNT && importance.size() > 0) { double max = 0.0; String maxKeyword = null; // get maximum from importance list for (Entry<String, Double> entry : importance.entrySet()) { keyword = entry.getKey(); if (max < entry.getValue()) { max = entry.getValue(); maxKeyword = keyword; } } // pop maximum importance.remove(maxKeyword); // append keyword to string if (buf.length() > 0) buf.append(" "); buf.append(maxKeyword); } return buf; } /** * Gets the maximum confidence for a given keyword in the text annotation. * * @param keyword * @param sortedAnnotations * @return The maximum confidence value. */ static double getMaxConfidence(String keyword, SortedSet<TextAnnotation> sortedAnnotations) { double max = 0.0; String needle = null; TextAnnotation textAnnotation = null; Iterator<TextAnnotation> textAnnotations = sortedAnnotations.iterator(); while (textAnnotations.hasNext()) { textAnnotation = textAnnotations.next(); Iterator<KeywordAnnotation> keywordAnnotations = textAnnotation.keywordAnnotations(); while (keywordAnnotations.hasNext()) { KeywordAnnotation ann = keywordAnnotations.next(); needle = ann.getKeyword().toLowerCase(); if (keyword.equals(needle)) { if (max < textAnnotation.getConfidence()) { max = textAnnotation.getConfidence(); } } } } return max; } /** * Gets the maximum relevance for a given keyword in the text annotation. * * @param keyword * @param sortedAnnotations * @return The maximum relevance value. */ static double getMaxRelevance(String keyword, SortedSet<TextAnnotation> sortedAnnotations) { double max = 0.0; String needle = null; TextAnnotation textAnnotation = null; Iterator<TextAnnotation> textAnnotations = sortedAnnotations.iterator(); while (textAnnotations.hasNext()) { textAnnotation = textAnnotations.next(); Iterator<KeywordAnnotation> keywordAnnotations = textAnnotation.keywordAnnotations(); while (keywordAnnotations.hasNext()) { KeywordAnnotation ann = keywordAnnotations.next(); needle = ann.getKeyword().toLowerCase(); if (keyword.equals(needle)) { if (max < textAnnotation.getRelevance()) { max = textAnnotation.getRelevance(); } } } } return max; } /** * Get metadata from all registered metadata services. */ static List<StaticMetadata> getMetadata(final List<StaticMetadataService> mdServices, final MediaPackage mp) { return flatMap(mdServices, new ArrayList<StaticMetadata>(), new Function<StaticMetadataService, Collection<StaticMetadata>>() { @Override public Collection<StaticMetadata> apply(StaticMetadataService s) { StaticMetadata md = s.getMetadata(mp); return md != null ? Arrays.asList(md) : Collections.<StaticMetadata> emptyList(); } }); } /** * Return all media package tags as a space separated string. */ static String tags(MediaPackage mp) { StringBuilder sb = new StringBuilder(); for (MediaPackageElement element : mp.getElements()) { for (String tag : element.getTags()) { sb.append(tag); sb.append(" "); } } return sb.toString(); } /** * Return all media package flavors as a space separated string. */ static String flavors(MediaPackage mp) { StringBuilder sb = new StringBuilder(); for (MediaPackageElement element : mp.getElements()) { if (element.getFlavor() != null) { sb.append(element.getFlavor().toString()); sb.append(" "); } } return sb.toString(); } /** * Returns number of entries in search index, across all organizations. * * @return number of entries in search index * @throws SearchServiceDatabaseException * if count cannot be retrieved */ public long count() throws SearchServiceDatabaseException { try { QueryResponse response = solrServer.query(new SolrQuery("*:*")); return response.getResults().getNumFound(); } catch (SolrServerException e) { throw new SearchServiceDatabaseException(e); } } }