/**********************************************************************************
* $URL: https://source.sakaiproject.org/svn/search/trunk/search-impl/impl/src/java/org/sakaiproject/search/component/adapter/contenthosting/ContentHostingContentProducer.java $
* $Id: ContentHostingContentProducer.java 107465 2012-04-23 02:28:00Z steve.swinsburg@gmail.com $
***********************************************************************************
*
* Copyright (c) 2003, 2004, 2005, 2006, 2007, 2008 The Sakai Foundation
*
* Licensed under the Educational Community License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.opensource.org/licenses/ECL-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
**********************************************************************************/
package org.sakaiproject.search.component.adapter.contenthosting;
import java.io.Reader;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.sakaiproject.component.api.ServerConfigurationService;
import org.sakaiproject.content.api.ContentHostingService;
import org.sakaiproject.content.api.ContentResource;
import org.sakaiproject.entity.api.EntityManager;
import org.sakaiproject.entity.api.EntityProducer;
import org.sakaiproject.entity.api.Reference;
import org.sakaiproject.entity.api.ResourceProperties;
import org.sakaiproject.event.api.Event;
import org.sakaiproject.exception.IdUnusedException;
import org.sakaiproject.exception.PermissionException;
import org.sakaiproject.exception.TypeException;
import org.sakaiproject.search.api.EntityContentProducer;
import org.sakaiproject.search.api.SearchIndexBuilder;
import org.sakaiproject.search.api.SearchService;
import org.sakaiproject.search.api.StoredDigestContentProducer;
import org.sakaiproject.search.model.SearchBuilderItem;
import org.sakaiproject.site.api.SiteService;
public class ContentHostingContentProducer implements EntityContentProducer, StoredDigestContentProducer
{
private static Log log = LogFactory.getLog(ContentHostingContentProducer.class);
/**
* resolved dep
*/
private SearchService searchService = null;
/**
* resolved dep
*/
private ContentHostingService contentHostingService = null;
/**
* resolved dep
*/
private SearchIndexBuilder searchIndexBuilder = null;
/**
* resolved dep
*/
private EntityManager entityManager = null;
/**
* resolved dep
*/
private SiteService siteService = null;
/**
* runtime injected
*/
private ArrayList<ContentDigester> digesters = new ArrayList<ContentDigester>();
/**
* config injected dep
*/
private ContentDigester defaultDigester;
private int readerSizeLimit = 1024 * 1024 * 2; // (2M)
private int digesterSizeLimit = 1024 * 1024 * 5; // (5M)
/**
* A list of custom properties in the form indexkey.entitykey;indexkey.entitykey;indexkey.entitykey;
*/
private List<String> customProperties = null;
private ServerConfigurationService serverConfigurationService;
public ContentHostingContentProducer() {
customProperties = new ArrayList<String>();
customProperties.add("dc_created.http://purl.org/dc/terms/created");
customProperties.add("Tdc_publisher.http://purl.org/dc/elements/1.1/publisher");
customProperties.add("Tdc_audience.http://purl.org/dc/terms/audience");
customProperties.add("Tdc_subject.http://purl.org/dc/elements/1.1/subject");
customProperties.add("Tdc_creator.http://purl.org/dc/elements/1.1/creator");
customProperties.add("Tdc_educationlevel.http://purl.org/dc/terms/educationLevel");
customProperties.add("Tdc_alternative.http://purl.org/dc/elements/1.1/alternative");
customProperties.add("dc_issued.http://purl.org/dc/terms/issued");
customProperties.add("Tdc_abstract.http://purl.org/dc/terms/abstract");
customProperties.add("Tdc_contributor.http://purl.org/dc/elements/1.1/contributor");
customProperties.add("tag.tag");
//LOM fields
customProperties.add("Tlom_role.http://ltsc.ieee.org/xsd/lomv1.0/role");
customProperties.add("Tlom_coverage.http://ltsc.ieee.org/xsd/lomv1.0/coverage");
customProperties.add("Tlom_status.http://ltsc.ieee.org/xsd/lomv1.0/status");
//customProperties.add("lom_duration.http://ltsc.ieee.org/xsd/lomv1.0/duration");
customProperties.add("Tlom_engagement.http://ltsc.ieee.org/xsd/lomv1.0/engagement");
customProperties.add("Tlom_learning_resource_type.http://ltsc.ieee.org/xsd/lomv1.0/learning_resource_type");
customProperties.add("Tlom_interactivity_level.http://ltsc.ieee.org/xsd/lomv1.0/interactivity_level");
customProperties.add("Tlom_context_level.http://ltsc.ieee.org/xsd/lomv1.0/context_level");
customProperties.add("Tlom_difficulty.http://ltsc.ieee.org/xsd/lomv1.0/difficulty");
//customProperties.add("lom_learning_time.http://ltsc.ieee.org/xsd/lomv1.0/learning_time");
customProperties.add("Tlom_assumed_knowledge.http://ltsc.ieee.org/xsd/lomv1.0/assumed_knowledge");
customProperties.add("Tlom_technical_requirements.http://ltsc.ieee.org/xsd/lomv1.0/technical_requirements");
customProperties.add("Tlom_install_remarks.http://ltsc.ieee.org/xsd/lomv1.0/install_remarks");
customProperties.add("Tlom_other_requirements.http://ltsc.ieee.org/xsd/lomv1.0/other_requirements");
customProperties.add("Tlom_level.http://ltsc.ieee.org/xsd/lomv1.0/granularity_level");
customProperties.add("Tlom_structure.http://ltsc.ieee.org/xsd/lomv1.0/structure");
customProperties.add("Tlom_relation.http://ltsc.ieee.org/xsd/lomv1.0/relation");
customProperties.add("Tlom_reviewer.http://ltsc.ieee.org/xsd/lomv1.0/reviewer");
//customProperties.add("lom_review_date.http://ltsc.ieee.org/xsd/lomv1.0/review_date");
customProperties.add("Tlom_review_comments.http://ltsc.ieee.org/xsd/lomv1.0/review_comments");
}
public void init()
{
try
{
if ("true".equals(serverConfigurationService.getString("search.enable",
"false")))
{
searchService.registerFunction(ContentHostingService.EVENT_RESOURCE_ADD);
searchService
.registerFunction(ContentHostingService.EVENT_RESOURCE_WRITE);
searchService
.registerFunction(ContentHostingService.EVENT_RESOURCE_REMOVE);
searchIndexBuilder.registerEntityContentProducer(this);
}
}
catch (Throwable t)
{
log.error("Failed to init Service ", t);
}
}
public void addDigester(ContentDigester digester)
{
digesters.add(digester);
}
public void removeDigester(ContentDigester digester)
{
digesters.remove(digester);
}
public boolean isContentFromReader(String ref)
{
boolean debug = log.isDebugEnabled();
ContentResource contentResource;
try
{
Reference reference = entityManager.newReference(ref);
contentResource = contentHostingService.getResource(reference.getId());
}
catch (Exception e)
{
throw new RuntimeException("Failed to resolve resource " + ref, e);
}
if (contentResource.getContentLength() > readerSizeLimit)
{
if (debug)
{
log.debug("ContentHosting.isContentFromReader" + ref + ":yes");
}
return true;
}
if (debug)
{
log.debug("ContentHosting.isContentFromReader" + ref + ":yes");
}
return false;
}
public Reader getContentReader(String ref)
{
boolean debug = log.isDebugEnabled();
ContentResource contentResource;
try
{
Reference reference = entityManager.newReference(ref);
contentResource = contentHostingService.getResource(reference.getId());
}
catch (Exception e)
{
throw new RuntimeException("Failed to resolve resource " + ref, e);
}
if (contentResource.getContentLength() <= 0)
{
if (debug)
{
log.debug("ContentHosting.getContentReader" + ref + ": empty");
}
return new StringReader("");
}
ContentDigester digester = getDigester(contentResource);
Reader reader = null;
try
{
reader = digester.getContentReader(contentResource);
}
catch (Exception ex)
{
log.debug("Failed to digest "+ref+" with " + digester, ex);
log.warn("Failed to digest "+ref+" with " + digester + " cause: " + ex.getMessage());
if (!digester.equals(defaultDigester))
{
try
{
reader = defaultDigester.getContentReader(contentResource);
log.info("Digested "+ref+" into a Reader with Default Digester ");
}
catch (Exception ex2)
{
log.debug("Failed to extract content from " + contentResource
+ " using " + defaultDigester, ex2);
throw new RuntimeException("Failed to extract content from "
+ contentResource + " using " + defaultDigester + " and "
+ digester, ex);
}
}
else
{
throw new RuntimeException("Failed to extract content from "
+ ref + " using " + digester, ex);
}
}
if (debug)
{
log.debug("ContentHosting.getContentReader" + ref + ":" + reader);
}
return reader;
}
public String getContent(String ref)
{
return getContent(ref, 3);
}
public String getContent(String ref, int minWordLenght)
{
log.info("Digesting " + ref);
boolean debug = log.isDebugEnabled();
ContentResource contentResource;
try
{
Reference reference = entityManager.newReference(ref);
contentResource = contentHostingService.getResource(reference.getId());
}
catch (Exception e)
{
if (debug)
{
log.debug("Failed To resolve Resource", e);
}
throw new RuntimeException("Failed to resolve resource ", e);
}
if (contentResource.getContentLength() <= 0)
{
if (debug)
{
log.debug("ContentHosting.getContent" + ref + ":empty");
}
return "";
}
ContentDigester digester = getDigester(contentResource);
String content = null;
try
{
content = digester.getContent(contentResource);
}
catch (Exception ex)
{
log.debug("Failed to digest "+ref+" with " + digester, ex);
log.warn("Failed to digest "+ref+" with " + digester + " cause: " + ex);
if (log.isDebugEnabled()) {
ex.printStackTrace();
}
if (!digester.equals(defaultDigester))
{
try
{
content = defaultDigester.getContent(contentResource);
log.info("Digested "+ref+" into "+content.length()+" characters with Default Digester ");
}
catch (Exception ex2)
{
log.debug("Failed to extract content from " + ref
+ " using " + defaultDigester, ex2);
throw new RuntimeException("Failed to extract content from "
+ ref + " using " + defaultDigester + " and "
+ digester, ex);
}
}
else
{
if (debug)
{
log.debug("Failed To extract content");
}
throw new RuntimeException("Failed to extract content from "
+ ref + " using " + digester, ex);
}
}
if (debug)
{
log.debug("ContentHosting.getContent" + ref + ":" + content);
}
return content;
}
public ContentDigester getDigester(ContentResource cr)
{
if (cr.getContentLength() > digesterSizeLimit)
{
return defaultDigester;
}
String mimeType = cr.getContentType();
for (Iterator<ContentDigester> i = digesters.iterator(); i.hasNext();)
{
ContentDigester digester = (ContentDigester) i.next();
if (digester.accept(mimeType))
{
return digester;
}
}
return defaultDigester;
}
public String getTitle(String ref)
{
boolean debug = log.isDebugEnabled();
ContentResource contentResource;
try
{
Reference reference = entityManager.newReference(ref);
contentResource = contentHostingService.getResource(reference.getId());
}
catch (Exception e)
{
if (debug)
{
log.debug("Failed To resolve Resource", e);
}
throw new RuntimeException("Failed to resolve resource ", e);
}
ResourceProperties rp = contentResource.getProperties();
String displayNameProp = rp.getNamePropDisplayName();
String title = rp.getProperty(displayNameProp);
if (debug)
{
log.debug("ContentHosting.getTitle" + ref + ":" + title);
}
return title;
}
public boolean matches(String ref)
{
boolean debug = log.isDebugEnabled();
try
{
Reference reference = entityManager.newReference(ref);
EntityProducer ep = reference.getEntityProducer();
boolean m = (ep instanceof ContentHostingService);
if (debug)
{
log.debug("ContentHosting.matches" + ref + ":" + m);
}
return m;
}
catch (Exception ex)
{
if (debug)
{
log.debug("ContentHosting.matches" + ref + ":fail-no-match");
}
return false;
}
}
public Integer getAction(Event event)
{
boolean debug = log.isDebugEnabled();
String eventName = event.getEvent();
if (ContentHostingService.EVENT_RESOURCE_ADD.equals(eventName)
|| ContentHostingService.EVENT_RESOURCE_WRITE.equals(eventName))
{
if (debug)
{
log.debug("ContentHosting.getAction" + event + ":add");
}
if ( isForIndex(event.getResource())) {
return SearchBuilderItem.ACTION_ADD;
}
}
if (ContentHostingService.EVENT_RESOURCE_REMOVE.equals(eventName))
{
if (debug)
{
log.debug("ContentHosting.getAction" + event + ":delete");
}
if ( isForIndexDelete(event.getResource())) {
return SearchBuilderItem.ACTION_DELETE;
}
}
if (debug)
{
log.debug("ContentHosting.getAction" + event + ":unknown");
}
return SearchBuilderItem.ACTION_UNKNOWN;
}
public boolean matches(Event event)
{
boolean debug = log.isDebugEnabled();
boolean m = !SearchBuilderItem.ACTION_UNKNOWN.equals(getAction(event));
if (debug)
{
log.debug("ContentHosting.matches" + event + ":" + m);
}
return m;
}
public String getTool()
{
return "content";
}
public String getUrl(String ref)
{
boolean debug = log.isDebugEnabled();
Reference reference = entityManager.newReference(ref);
String url = reference.getUrl();
if (debug)
{
log.debug("ContentHosting.getAction" + ref + ":" + url);
}
return url;
}
private String getSiteId(Reference ref)
{
String r = ref.getContext();
if (log.isDebugEnabled())
{
log.debug("ContentHosting.getSiteId" + ref + ":" + r);
}
return r;
}
public String getSiteId(String resourceName)
{
String r = getSiteId(entityManager.newReference(resourceName));
if (log.isDebugEnabled())
{
log.debug("ContentHosting.getSiteId" + resourceName + ":" + r);
}
return r;
}
public Iterator<String> getSiteContentIterator(String context)
{
boolean debug = log.isDebugEnabled();
String siteCollection = contentHostingService.getSiteCollection(context);
if (debug)
{
log.debug("Getting content for site info " + siteCollection);
}
List<ContentResource> siteContent = null;
if ("/".equals(siteCollection))
{
siteContent = new ArrayList<ContentResource>();
}
else
{
siteContent = contentHostingService.getAllResources(siteCollection);
}
final Iterator<ContentResource> scIterator = siteContent.iterator();
return new Iterator<String>()
{
public boolean hasNext()
{
return scIterator.hasNext();
}
public String next()
{
ContentResource resource = (ContentResource) scIterator.next();
return resource.getReference();
}
public void remove()
{
throw new UnsupportedOperationException("Remove is not implimented ");
}
};
}
/**
* @return Returns the readerSizeLimit.
*/
public int getReaderSizeLimit()
{
return readerSizeLimit;
}
/**
* @param readerSizeLimit
* The readerSizeLimit to set.
*/
public void setReaderSizeLimit(int readerSizeLimit)
{
this.readerSizeLimit = readerSizeLimit;
}
/**
* @return Returns the defaultDigester.
*/
public ContentDigester getDefaultDigester()
{
return defaultDigester;
}
/**
* @param defaultDigester
* The defaultDigester to set.
*/
public void setDefaultDigester(ContentDigester defaultDigester)
{
this.defaultDigester = defaultDigester;
}
private boolean isForIndexDelete(String ref)
{
// nasty hack to not index dropbox without loading an entity from the DB
if ( ref.length() > "/content".length() && contentHostingService.isInDropbox(ref.substring("/content".length())) ) {
return false;
}
return true;
}
public boolean isForIndex(String ref)
{
ContentResource contentResource;
try
{
// nasty hack to not index dropbox without loading an entity from the DB
if ( ref.length() > "/content".length() && contentHostingService.isInDropbox(ref.substring("/content".length())) ) {
return false;
}
// filter out assignemt attachements
String[] parts = ref.split("/");
if ( parts.length > 4 && ContentHostingService.ATTACHMENTS_COLLECTION.equals("/"+parts[2]+"/") && "Assignments".equals(parts[4]) ) {
return false;
}
Reference reference = entityManager.newReference(ref);
String r = reference.getId();
contentResource = contentHostingService.getResource(r);
if (contentResource == null || contentResource.isCollection() )
{
return false;
}
}
catch (IdUnusedException idun)
{
if (log.isDebugEnabled())
{
log.debug("Resource Not present in CHS " + ref);
}
return false; // a collection or unknown resource that cant be
// indexed
}
catch (Exception e)
{
if (log.isDebugEnabled())
{
log.debug("Failed To resolve Resource", e);
}
throw new RuntimeException("Failed to resolve resource ", e);
}
return true;
}
public boolean canRead(String ref)
{
log.debug("canRead(" + ref);
try
{
Reference reference = entityManager.newReference(ref);
contentHostingService.checkResource(reference.getId());
return true;
}
catch (Exception ex)
{
if (log.isDebugEnabled())
{
log.debug("Current user cannot read ref: " + ref, ex);
}
return false;
}
}
public Map<String, String[]> getCustomProperties(String ref)
{
try
{
Reference reference = entityManager.newReference(ref);
ContentResource contentResource;
contentResource = contentHostingService.getResource(reference.getId());
Map<String, String[]> cp = new HashMap<String, String[]>();
for (String propname : customProperties)
{
String[] propKey = propname.split("\\.", 2);
if(log.isDebugEnabled()) {
log.debug("prop: " + propKey[0] + "=" + propKey[1]);
}
if (propKey.length == 2)
{
List<?> prop = contentResource.getProperties().getPropertyList(propKey[1]);
if (prop != null)
{
cp.put(propKey[0], prop.toArray(new String[0]));
}
}
}
return cp;
}
catch (PermissionException e)
{
log.debug(e);
}
catch (IdUnusedException e)
{
log.debug(e);
}
catch (TypeException e)
{
log.debug(e);
}
return null;
}
public String getCustomRDF(String ref)
{
return null;
}
/**
* @return Returns the digesterSizeLimit.
*/
public int getDigesterSizeLimit()
{
return digesterSizeLimit;
}
/**
* @param digesterSizeLimit
* The digesterSizeLimit to set.
*/
public void setDigesterSizeLimit(int digesterSizeLimit)
{
this.digesterSizeLimit = digesterSizeLimit;
}
private Reference getReference(String reference)
{
try
{
return entityManager.newReference(reference);
}
catch (Exception ex)
{
if (log.isDebugEnabled())
{
log.debug("Failed To resolve Resource", ex);
}
}
return null;
}
/*
* (non-Javadoc)
*
* @see org.sakaiproject.search.api.EntityContentProducer#getId(java.lang.String)
*/
public String getId(String reference)
{
boolean debug = log.isDebugEnabled();
try
{
return getReference(reference).getId();
}
catch (Exception ex)
{
if (debug)
{
log.debug("Failed To resolve Resource", ex);
}
return "";
}
}
/*
* (non-Javadoc)
*
* @see org.sakaiproject.search.api.EntityContentProducer#getSubType(java.lang.String)
*/
public String getSubType(String reference)
{
boolean debug = log.isDebugEnabled();
try
{
String r = getReference(reference).getSubType();
if (debug)
{
log.debug("ContentHosting.getSubType" + reference + ":" + r);
}
return r;
}
catch (Exception ex)
{
return "";
}
}
/*
* (non-Javadoc)
*
* @see org.sakaiproject.search.api.EntityContentProducer#getType(java.lang.String)
*/
public String getType(String reference)
{
boolean debug = log.isDebugEnabled();
try
{
String r = getReference(reference).getType();
if (debug)
{
log.debug("ContentHosting.getType" + reference + ":" + r);
}
return r;
}
catch (Exception ex)
{
return "";
}
}
/*
* (non-Javadoc)
*
* @see org.sakaiproject.search.api.EntityContentProducer#getType(java.lang.String)
*/
public String getContainer(String reference)
{
boolean debug = log.isDebugEnabled();
try
{
String r = getReference(reference).getContainer();
if (debug)
{
log.debug("ContentHosting.getContainer" + reference + ":" + r);
}
return r;
}
catch (Exception ex)
{
return "";
}
}
/**
* @return the customProperties
*/
public List<String> getCustomProperties()
{
return customProperties;
}
/**
* @param customProperties the customProperties to set
*/
public void setCustomProperties(List<String> customProperties)
{
this.customProperties = customProperties;
}
/**
* @return the contentHostingService
*/
public ContentHostingService getContentHostingService()
{
return contentHostingService;
}
/**
* @param contentHostingService the contentHostingService to set
*/
public void setContentHostingService(ContentHostingService contentHostingService)
{
this.contentHostingService = contentHostingService;
}
/**
* @return the entityManager
*/
public EntityManager getEntityManager()
{
return entityManager;
}
/**
* @param entityManager the entityManager to set
*/
public void setEntityManager(EntityManager entityManager)
{
this.entityManager = entityManager;
}
/**
* @return the searchIndexBuilder
*/
public SearchIndexBuilder getSearchIndexBuilder()
{
return searchIndexBuilder;
}
/**
* @param searchIndexBuilder the searchIndexBuilder to set
*/
public void setSearchIndexBuilder(SearchIndexBuilder searchIndexBuilder)
{
this.searchIndexBuilder = searchIndexBuilder;
}
/**
* @return the searchService
*/
public SearchService getSearchService()
{
return searchService;
}
/**
* @param searchService the searchService to set
*/
public void setSearchService(SearchService searchService)
{
this.searchService = searchService;
}
/**
* @return the serverConfigurationService
*/
public ServerConfigurationService getServerConfigurationService()
{
return serverConfigurationService;
}
/**
* @param serverConfigurationService the serverConfigurationService to set
*/
public void setServerConfigurationService(
ServerConfigurationService serverConfigurationService)
{
this.serverConfigurationService = serverConfigurationService;
}
/**
* @return the siteService
*/
public SiteService getSiteService()
{
return siteService;
}
/**
* @param siteService the siteService to set
*/
public void setSiteService(SiteService siteService)
{
this.siteService = siteService;
}
}