/* * Copyright (c) 2006-2011 Nuxeo SA (http://nuxeo.com/) and others. * * All rights reserved. This program and the accompanying materials * are made available under the terms of the Eclipse Public License v1.0 * which accompanies this distribution, and is available at * http://www.eclipse.org/legal/epl-v10.html * * Contributors: * Florent Guillaume * Stephane Lacoin */ package org.eclipse.ecr.core.storage.sql.coremodel; import java.io.Serializable; import java.util.HashSet; import java.util.LinkedList; import java.util.List; import java.util.Set; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.eclipse.ecr.convert.api.ConversionService; import org.eclipse.ecr.core.api.Blob; import org.eclipse.ecr.core.api.ClientException; import org.eclipse.ecr.core.api.CoreSession; import org.eclipse.ecr.core.api.DocumentException; import org.eclipse.ecr.core.api.DocumentModel; import org.eclipse.ecr.core.api.DocumentModelList; import org.eclipse.ecr.core.api.DocumentRef; import org.eclipse.ecr.core.api.IdRef; import org.eclipse.ecr.core.api.blobholder.BlobHolder; import org.eclipse.ecr.core.api.blobholder.SimpleBlobHolder; import org.eclipse.ecr.core.event.Event; import org.eclipse.ecr.core.event.EventBundle; import org.eclipse.ecr.core.event.EventContext; import org.eclipse.ecr.core.event.PostCommitEventListener; import org.eclipse.ecr.core.event.ReconnectedEventBundle; import org.eclipse.ecr.core.storage.sql.Model; import org.eclipse.ecr.core.storage.sql.ModelFulltext; import org.eclipse.ecr.core.utils.BlobsExtractor; import org.eclipse.ecr.runtime.api.Framework; import org.nuxeo.common.utils.StringUtils; /** * Listener that does fulltext extraction from the blobs of documents whose ids * have been recorded in the bundle's events. * * @author Florent Guillaume * @author Stephane Lacoin */ public class BinaryTextListener implements PostCommitEventListener { private static final Log log = LogFactory.getLog(BinaryTextListener.class); public static final String EVENT_NAME = "event_storage_binaries_doc"; private static final String ANY2TEXT = "any2text"; protected final ConversionService conversionService; public BinaryTextListener() throws ClientException { try { conversionService = Framework.getService(ConversionService.class); } catch (Exception e) { throw new ClientException(e); } if (conversionService == null) { throw new ClientException("No conversion service"); } } @Override public void handleEvent(EventBundle eventBundle) throws ClientException { if (! eventBundle.containsEventName(EVENT_NAME)) { return; } if (!(eventBundle instanceof ReconnectedEventBundle)) { log.error("Incorrect event bundle type: " + eventBundle); return; } CoreSession session = null; ModelFulltext fulltextInfo = null; Set<Serializable> ids = new HashSet<Serializable>(); for (Event event : eventBundle) { if (!event.getName().equals(EVENT_NAME)) { continue; } EventContext eventContext = event.getContext(); fulltextInfo = getFulltextInfoFromEventContext(eventContext); ids.addAll(getIdsFromEventContext(eventContext)); CoreSession s = eventContext.getCoreSession(); if (session == null) { session = s; } else if (session != s) { // cannot happen given current ReconnectedEventBundleImpl throw new ClientException( "Several CoreSessions in one EventBundle"); } } if (session == null) { if (ids.isEmpty()) { return; } throw new ClientException("Null CoreSession"); } // we have all the info from the bundle, now do the extraction boolean save = false; BlobsExtractor extractor = new BlobsExtractor(); for (Serializable id : ids) { IdRef docRef = new IdRef((String) id); // if the runtime has shutdown (normally because tests are finished) // this can happen, see NXP-4009 if (session.getPrincipal() == null) { continue; } if (!session.exists(docRef)) { // doc is gone continue; } DocumentModel indexedDoc = session.getDocument(docRef); if (indexedDoc.isProxy()) { // proxies don't have any fulltext attached, it's // the target document that carries it continue; } // Iterate on each index to set the binaryText column for (String indexName : fulltextInfo.indexNames) { if (!fulltextInfo.indexesAllBinary.contains(indexName) && fulltextInfo.propPathsByIndexBinary.get(indexName) == null) { // nothing to do: index not configured for blob continue; } extractor.setExtractorProperties( fulltextInfo.propPathsByIndexBinary.get(indexName), fulltextInfo.propPathsExcludedByIndexBinary.get(indexName), fulltextInfo.indexesAllBinary.contains(indexName)); List<Blob> blobs = extractor.getBlobs(indexedDoc); String text = blobsToText(blobs); String impactedQuery = String.format("SELECT * from Document where ecm:fulltextJobId = '%s'", indexedDoc.getId()); DocumentModelList impactedDocs = session.query(impactedQuery); for (DocumentModel impactedDoc : impactedDocs) { try { DocumentRef ref = impactedDoc.getRef(); session.setDocumentSystemProp(ref, SQLDocument.FULLTEXT_JOBID_SYS_PROP, null); session.setDocumentSystemProp(ref, SQLDocument.BINARY_TEXT_SYS_PROP + getFulltextIndexSuffix(indexName), text); } catch (DocumentException e) { log.error("Couldn't set fulltext on: " + id, e); continue; } } } save = true; } if (save) { session.save(); } } @SuppressWarnings("unchecked") protected Set<Serializable> getIdsFromEventContext(EventContext eventContext) { return (Set<Serializable>) eventContext.getArguments()[0]; } protected ModelFulltext getFulltextInfoFromEventContext( EventContext eventContext) { return (ModelFulltext) eventContext.getArguments()[1]; } protected String blobsToText(List<Blob> blobs) { List<String> strings = new LinkedList<String>(); for (Blob blob : blobs) { try { SimpleBlobHolder bh = new SimpleBlobHolder(blob); BlobHolder result = conversionService.convert(ANY2TEXT, bh, null); if (result == null) { continue; } blob = result.getBlob(); if (blob == null) { continue; } String string = new String(blob.getByteArray(), "UTF-8"); // strip '\0 chars from text if (string.indexOf('\0') >= 0) { string = string.replace("\0", " "); } strings.add(string); } catch (Exception e) { log.error(e.getMessage(), e); continue; } } return StringUtils.join(strings, " "); } public String getFulltextIndexSuffix(String indexName) { return indexName.equals(Model.FULLTEXT_DEFAULT_INDEX) ? "" : '_' + indexName; } }