/*
* See the NOTICE file distributed with this work for additional
* information regarding copyright ownership.
*
* This is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2.1 of
* the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this software; if not, write to the Free
* Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
* 02110-1301 USA, or see the FSF site: http://www.fsf.org.
*/
package org.xwiki.search.solr.internal.metadata;
import java.util.Locale;
import javax.inject.Inject;
import javax.inject.Named;
import javax.inject.Singleton;
import org.apache.solr.common.SolrInputDocument;
import org.xwiki.component.annotation.Component;
import org.xwiki.model.reference.AttachmentReference;
import org.xwiki.model.reference.EntityReference;
import org.xwiki.model.reference.EntityReferenceSerializer;
import org.xwiki.search.solr.internal.api.FieldUtils;
import com.xpn.xwiki.XWikiContext;
import com.xpn.xwiki.doc.XWikiAttachment;
import com.xpn.xwiki.doc.XWikiDocument;
/**
* Extract the metadata to be indexed from attachments.
*
* @version $Id: 408497b13fcc3f675e50c0e7d75fa1ae7c120488 $
* @since 4.3M2
*/
@Component
@Named("attachment")
@Singleton
public class AttachmentSolrMetadataExtractor extends AbstractSolrMetadataExtractor
{
@Inject
private EntityReferenceSerializer<String> entityReferenceSerializer;
@Override
public boolean setFieldsInternal(LengthSolrInputDocument solrDocument, EntityReference entityReference)
throws Exception
{
AttachmentReference attachmentReference = new AttachmentReference(entityReference);
XWikiDocument document = getDocument(attachmentReference.getDocumentReference());
XWikiAttachment attachment = document.getAttachment(attachmentReference.getName());
if (attachment == null) {
return false;
}
XWikiContext xcontext = xcontextProvider.get();
solrDocument.setField(FieldUtils.FILENAME, attachment.getFilename());
solrDocument.setField(FieldUtils.FILENAME_SORT, attachment.getFilename());
solrDocument.setField(FieldUtils.MIME_TYPE, attachment.getMimeType(xcontext));
solrDocument.setField(FieldUtils.ATTACHMENT_DATE, attachment.getDate());
// We need to add a dedicated sort field because the corresponding field is multiValued and thus cannot be used
// for sorting (the reason it is multiValued is because it is 'reused' on document rows and documents can have
// multiple attachments).
solrDocument.setField(FieldUtils.ATTACHMENT_DATE_SORT, attachment.getDate());
solrDocument.setField(FieldUtils.ATTACHMENT_SIZE, attachment.getLongSize());
solrDocument.setField(FieldUtils.ATTACHMENT_SIZE_SORT, attachment.getLongSize());
// We need to index the attachment version (revision) to be able to detect when the search index is out of date
// (not in sync with the database).
solrDocument.setField(FieldUtils.ATTACHMENT_VERSION, attachment.getVersion());
// Index the full author reference for exact matching (faceting).
String authorStringReference = entityReferenceSerializer.serialize(attachment.getAuthorReference());
solrDocument.setField(FieldUtils.ATTACHMENT_AUTHOR, authorStringReference);
try {
// Index the author display name for free text search and results sorting.
String authorDisplayName = xcontext.getWiki().getPlainUserName(attachment.getAuthorReference(), xcontext);
solrDocument.setField(FieldUtils.ATTACHMENT_AUTHOR_DISPLAY, authorDisplayName);
solrDocument.setField(FieldUtils.ATTACHMENT_AUTHOR_DISPLAY_SORT, authorDisplayName);
} catch (Exception e) {
this.logger.error("Failed to get author display name for attachment [{}]", attachment.getReference(), e);
}
setLocaleAndContentFields(attachment, solrDocument);
return true;
}
/**
* Set the locale to all the translations that the owning document has. This ensures that this entity is found for
* all the translations of a document, not just the original document.
* <p>
* Also, index the content with each locale so that the right analyzer is used.
*
* @param attachment the attachment.
* @param solrDocument the Solr document where to add the fields.
* @throws Exception if problems occur.
*/
protected void setLocaleAndContentFields(XWikiAttachment attachment, SolrInputDocument solrDocument)
throws Exception
{
String attachmentTextContent = getContentAsText(attachment);
// Do the work for each locale.
for (Locale documentLocale : getLocales(attachment.getDoc(), null)) {
solrDocument.addField(FieldUtils.LOCALES, documentLocale.toString());
solrDocument.setField(FieldUtils.getFieldName(FieldUtils.ATTACHMENT_CONTENT, documentLocale),
attachmentTextContent);
}
// We can't rely on the schema's copyField here because we would trigger it for each language. Doing the copy to
// the text_general field manually.
solrDocument.setField(FieldUtils.getFieldName(FieldUtils.ATTACHMENT_CONTENT, null), attachmentTextContent);
}
}