WorkPlan.java example

Explorer
hibernate-search-master
/*
 * Hibernate Search, full-text search for your domain model
 *
 * License: GNU Lesser General Public License (LGPL), version 2.1 or later
 * See the lgpl.txt file in the root directory or <http://www.gnu.org/licenses/lgpl-2.1.html>.
 */
package org.hibernate.search.engine.impl;

import java.io.Serializable;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;

import org.hibernate.search.exception.AssertionFailure;
import org.hibernate.search.exception.SearchException;
import org.hibernate.search.backend.LuceneWork;
import org.hibernate.search.backend.PurgeAllLuceneWork;
import org.hibernate.search.backend.spi.DeleteByQueryLuceneWork;
import org.hibernate.search.backend.spi.DeleteByQueryWork;
import org.hibernate.search.backend.spi.DeletionQuery;
import org.hibernate.search.backend.spi.Work;
import org.hibernate.search.backend.spi.WorkType;
import org.hibernate.search.bridge.spi.ConversionContext;
import org.hibernate.search.bridge.util.impl.ContextualExceptionBridgeHelper;
import org.hibernate.search.engine.integration.impl.ExtendedSearchIntegrator;
import org.hibernate.search.engine.spi.AbstractDocumentBuilder;
import org.hibernate.search.engine.spi.ContainedInRecursionContext;
import org.hibernate.search.engine.spi.DocumentBuilderContainedEntity;
import org.hibernate.search.engine.spi.EntityIndexBinding;
import org.hibernate.search.indexes.interceptor.EntityIndexingInterceptor;
import org.hibernate.search.indexes.interceptor.IndexingOverride;
import org.hibernate.search.spi.InstanceInitializer;
import org.hibernate.search.util.logging.impl.Log;
import org.hibernate.search.util.logging.impl.LoggerFactory;

/**
 * Represents the set of changes going to be applied to the index for the entities. A stream of Work is feed as input, a
 * list of LuceneWork is output, and in the process we try to reduce the number of output operations to the minimum
 * needed to reach the same final state.
 *
 * @author Sanne Grinovero
 * @author Hardy Ferentschik
 * @author Martin Braun
 * @since 3.3
 */
@SuppressWarnings( { "rawtypes", "unchecked" })
public class WorkPlan {

	private static final Log log = LoggerFactory.make();

	private final HashMap<Class<?>, PerClassWork> byClass = new HashMap<Class<?>, PerClassWork>();

	private final ExtendedSearchIntegrator extendedIntegrator;

	private final InstanceInitializer instanceInitializer;

	/**
	 * most work is split in two, some other might cancel one or more existing works,
	 * we don't track the number accurately as that's not needed.
	 */
	private int approximateWorkQueueSize = 0;

	public WorkPlan(ExtendedSearchIntegrator extendedIntegrator) {
		this.extendedIntegrator = extendedIntegrator;
		this.instanceInitializer = extendedIntegrator.getInstanceInitializer();
	}

	/**
	 * Adds a work to be performed as part of the final plan.
	 *
	 * @param work The work instance to add to the work plan
	 */
	public void addWork(Work work) {
		approximateWorkQueueSize++;
		Class<?> entityClass = instanceInitializer.getClassFromWork( work );
		PerClassWork classWork = getClassWork( work.getTenantIdentifier(), entityClass );
		classWork.addWork( work );
	}

	/**
	 * Removes all scheduled work
	 */
	public void clear() {
		byClass.clear();
		approximateWorkQueueSize = 0;
	}

	/**
	 * Returns an approximation of the amount of work in the queue.
	 * This is meant for resource control for auto flushing of large pending batches.
	 *
	 * @return the approximation
	 * @see org.hibernate.search.cfg.Environment#QUEUEINGPROCESSOR_BATCHSIZE
	 */
	public int size() {
		return approximateWorkQueueSize;
	}

	/**
	 * @param tenantId the tenant identifier
	 * @param entityClass The entity class for which to retrieve the work
	 *
	 * @return returns (and creates if needed) the {@code PerClassWork} from the {@link #byClass} map.
	 */
	private PerClassWork getClassWork(String tenantId, Class<?> entityClass) {
		PerClassWork classWork = byClass.get( entityClass );
		if ( classWork == null ) {
			classWork = new PerClassWork( tenantId, entityClass );
			byClass.put( entityClass, classWork );
		}
		return classWork;
	}

	/**
	 * Makes sure that all additional work needed because of containedIn
	 * is added to the work plan.
	 */
	public void processContainedInAndPrepareExecution() {
		PerClassWork[] worksFromEvents = byClass.values().toArray( new PerClassWork[byClass.size()] );

		// We need to iterate on a "frozen snapshot" of the byClass values
		// because of HSEARCH-647. This method is not recursive, invoked
		// only after the current unit of work is complete, and all additional
		// work we add through recursion is already complete, so we don't need
		// to process again new classes we add during the process.
		for ( PerClassWork perClassWork : worksFromEvents ) {
			perClassWork.processContainedInAndPrepareExecution();
		}
	}

	/**
	 * Used for recursive processing of containedIn
	 *
	 * @param <T> the type of the entity
	 * @param value the entity to be processed
	 * @param context the validator for the depth constraints
	 * @param tenantId the tenant identifier. It can be null.
	 */
	public <T> void recurseContainedIn(T value, ContainedInRecursionContext context, String tenantId) {
		Class<T> entityClass = instanceInitializer.getClass( value );
		PerClassWork classWork = getClassWork( tenantId, entityClass );
		classWork.recurseContainedIn( value, context );
	}

	/**
	 * @return returns the current plan converted as a list of {@code LuceneWork}
	 */
	public List<LuceneWork> getPlannedLuceneWork() {
		List<LuceneWork> luceneQueue = new ArrayList<LuceneWork>();
		for ( PerClassWork perClassWork : byClass.values() ) {
			perClassWork.enqueueLuceneWork( luceneQueue );
		}
		return luceneQueue;
	}

	/**
	 * {@code PerClassWork} organizes work per entity type.
	 */
	class PerClassWork {

		/**
		 * We further organize work per entity identifier so that we can cancel or adapt work being done
		 * on the same entities.
		 * This map uses as key what we originally received as {@link Work#getId()} if the type
		 * is annotated with @ProvidedId, otherwise it uses the value pointed to by
		 * {@link org.hibernate.search.annotations.DocumentId} or as last attempt {@code javax.persistence.Id}.
		 */
		private final Map<Serializable, PerEntityWork> entityById = new HashMap<Serializable, PerEntityWork>();

		/**
		 * When a PurgeAll operation is send on the type, we can remove all previously scheduled work
		 * and remember that the first operation on the index is going to be a purge all.
		 */
		private boolean purgeAll = false;

		private List<DeletionQuery> deletionQueries = new ArrayList<>();

		/**
		 * The type of all classes being managed
		 */
		private final Class<?> entityClass;

		private final String tenantId;

		/**
		 * The DocumentBuilder relative to the type being managed
		 */
		private final AbstractDocumentBuilder documentBuilder;

		/**
		 * The entity {@link #entityClass} does not have its own index, but is only used in contained scenarios
		 */
		private final boolean containedInOnly;

		/**
		 * @param clazz The type of entities being managed by this instance
		 */
		PerClassWork(String tenantId, Class<?> clazz) {
			this.entityClass = clazz;
			this.documentBuilder = getEntityBuilder( extendedIntegrator, clazz );
			this.containedInOnly = documentBuilder instanceof DocumentBuilderContainedEntity;
			this.tenantId = tenantId;
		}

		/**
		 * Adds a work to the current plan. The entityClass of the work must be of the
		 * type managed by this.
		 *
		 * @param work the {@code Work} instance to add to the plan
		 */
		public void addWork(Work work) {
			if ( work.getType() == WorkType.PURGE_ALL ) {
				entityById.clear();
				this.deletionQueries.clear();
				purgeAll = true;
			}
			else if ( work.getType() == WorkType.DELETE_BY_QUERY ) {
				DeleteByQueryWork delWork = (DeleteByQueryWork) work;
				this.deletionQueries.add( delWork.getDeleteByQuery() );
			}
			else {
				Serializable id = extractProperId( work );
				PerEntityWork entityWork = entityById.get( id );
				if ( entityWork == null ) {
					entityWork = new PerEntityWork( work );
					entityById.put( id, entityWork );
				}
				entityWork.addWork( work );
			}
		}

		/**
		 * We need to make a difference on which value is used as identifier
		 * according to use case and mapping options
		 *
		 * @param work The work instance from which to extract the id
		 *
		 * @return the appropriate id to use for this work
		 */
		private Serializable extractProperId(Work work) {
			// see HSEARCH-662
			if ( containedInOnly ) {
				return work.getId();
			}

			Object entity = work.getEntity();
			// 1) entity is null for purge operation, which requires to trust the work id
			// 2) types mapped as provided id require to use the work id
			// 3) when Hibernate identifier rollback is used && this identifier is our same id source, we need to get the value from work id
			if ( entity == null
					|| documentBuilder.requiresProvidedId()
					|| ( work.isIdentifierWasRolledBack() && documentBuilder.isIdMatchingJpaId() ) ) {
				return work.getId();
			}
			else {
				return documentBuilder.getId( entity );
			}
		}

		/**
		 * Enqueues all work needed to be performed according to current state into
		 * the LuceneWork queue.
		 *
		 * @param luceneQueue work will be appended to this list
		 */
		public void enqueueLuceneWork(List<LuceneWork> luceneQueue) {
			final Set<Entry<Serializable, PerEntityWork>> entityInstances = entityById.entrySet();
			ConversionContext conversionContext = new ContextualExceptionBridgeHelper();
			if ( purgeAll ) {
				luceneQueue.add( new PurgeAllLuceneWork( tenantId, entityClass ) );
			}
			for ( DeletionQuery delQuery : this.deletionQueries ) {
				luceneQueue.add( new DeleteByQueryLuceneWork( tenantId, entityClass, delQuery ) );
			}
			for ( Entry<Serializable, PerEntityWork> entry : entityInstances ) {
				Serializable indexingId = entry.getKey();
				PerEntityWork perEntityWork = entry.getValue();
				String tenantIdentifier = perEntityWork.getTenantIdentifier();
				perEntityWork.enqueueLuceneWork( tenantIdentifier, entityClass, indexingId, documentBuilder, luceneQueue, conversionContext );
			}
		}

		/**
		 * Starts processing the {@code ContainedIn} annotation for all instances stored in
		 * {@link #entityById}.
		 *
		 * This processing must be performed when no more work is being collected by the event
		 * system. The processing might recursively add more work to the plan.
		 */
		public void processContainedInAndPrepareExecution() {
			Entry<String, PerEntityWork>[] entityInstancesFrozenView = new Entry[entityById.size()];
			entityInstancesFrozenView = entityById.entrySet().toArray( entityInstancesFrozenView );
			for ( Entry<String, PerEntityWork> entry : entityInstancesFrozenView ) {
				PerEntityWork perEntityWork = entry.getValue();
				perEntityWork.processContainedIn( documentBuilder, WorkPlan.this );
			}
		}

		/**
		 * Method to continue the recursion for ContainedIn processing, as started by {@link #processContainedInAndPrepareExecution()}
		 * Additional work that needs to be processed will be added to this same WorkPlan.
		 *
		 * @param value the instance to be processed
		 */
		void recurseContainedIn(Object value, ContainedInRecursionContext context) {
			if ( documentBuilder.requiresProvidedId() ) {
				log.containedInPointsToProvidedId( instanceInitializer.getClass( value ) );
			}
			else {
				Serializable extractedId = documentBuilder.getId( value );
				if ( extractedId != null ) {
					PerEntityWork entityWork = entityById.get( extractedId );
					if ( entityWork == null ) {
						EntityIndexingInterceptor entityInterceptor = getEntityInterceptor();
						IndexingOverride operation;
						if ( entityInterceptor != null ) {
							operation = entityInterceptor.onUpdate( value );
						}
						else {
							operation = IndexingOverride.APPLY_DEFAULT;
						}
						//TODO there is a small duplication with some of TransactionalWorker.interceptWork
						//     but what would be a proper factored solution?
						switch ( operation ) {
							//we are planning an update by default
							case UPDATE:
							case APPLY_DEFAULT:
								entityWork = new PerEntityWork( tenantId, value );
								entityById.put( extractedId, entityWork );
								break;
							case SKIP:
								log.forceSkipIndexOperationViaInterception( entityClass, WorkType.UPDATE );
								break;
							case REMOVE:
								log.forceRemoveOnIndexOperationViaInterception( entityClass, WorkType.UPDATE );
								Work work = new Work( tenantId, value, extractedId, WorkType.DELETE );
								entityWork = new PerEntityWork( work );
								entityById.put( extractedId, entityWork );
								break;
							default:
								throw new AssertionFailure( "Unknown action type: " + operation );
						}
						// recursion starts
						documentBuilder.appendContainedInWorkForInstance( value, WorkPlan.this, context );
					}
					// else nothing to do as it's being processed already
				}
				else {
					// this branch for @ContainedIn recursive work of non-indexed entities
					// as they don't have an indexingId
					documentBuilder.appendContainedInWorkForInstance( value, WorkPlan.this, context );
				}
			}
		}

		private EntityIndexingInterceptor getEntityInterceptor() {
			EntityIndexBinding indexBindingForEntity = extendedIntegrator.getIndexBinding(
					entityClass
			);
			return indexBindingForEntity != null ? indexBindingForEntity.getEntityIndexingInterceptor() : null;
		}

		public String getTenantId() {
			return tenantId;
		}
	}

	/**
	 * Keeps track of what needs to be done Lucene wise for each entity.
	 * Each entity might need to be deleted from the index, added to the index,
	 * or both; in this case delete will be performed first.
	 */
	private static class PerEntityWork {

		private Object entity;

		/**
		 * When true, the Lucene Document representing this entity will be deleted
		 * from the index.
		 */
		private boolean delete = false;

		/**
		 * When true, the entity will be converted to a Lucene Document and added
		 * to the index.
		 */
		private boolean add = false;

		/**
		 * Needed to stop recursion for processing ContainedIn
		 * of already processed instances.
		 */
		private boolean containedInProcessed = false;

		private final String tenantId;

		/**
		 * Constructor to force an update of the entity even without
		 * having a specific Work instance for it.
		 *
		 * @param entity the instance which needs to be updated in the index
		 */
		private PerEntityWork(String tenantId, Object entity) {
			// for updates only
			this.entity = entity;
			this.delete = true;
			this.add = true;
			this.containedInProcessed = true;
			this.tenantId = tenantId;
		}

		/**
		 * Prepares the initial state of planned changes according
		 * to the type of work being fired.
		 *
		 * @param work the work instance
		 */
		private PerEntityWork(Work work) {
			entity = work.getEntity();
			tenantId = work.getTenantIdentifier();
			WorkType type = work.getType();
			// sets the initial state:
			switch ( type ) {
				case ADD:
					add = true;
					break;
				case DELETE:
				case PURGE:
					delete = true;
					break;
				case COLLECTION:
				case UPDATE:
					delete = true;
					add = true;
					break;
				case INDEX:
					add = true;
					delete = true;
					break;
				case PURGE_ALL:
					// not breaking intentionally: PURGE_ALL should not reach this
					// class
				case DELETE_BY_QUERY:
					// not breaking intentionally: DELETE_BY_QUERY should not reach
					// this class
				default:
					throw new SearchException( "unexpected state:" + type );
			}
		}

		/**
		 * Has different effects depending on the new type of work needed
		 * and the previous scheduled work.
		 * This way we never store more than a plan for each entity and order
		 * of final execution is irrelevant, what matters is the order in which the
		 * work is added to the plan.
		 *
		 * @param work the work instance to add
		 */
		public void addWork(Work work) {
			entity = work.getEntity();
			WorkType type = work.getType();
			switch ( type ) {
				case INDEX:
				case UPDATE:
					if ( add && !delete ) {
						// noop: the entity was newly created in this same unit of work
						// so it needs to be added no need to delete
					}
					else {
						add = true;
						delete = true;
					}
					break;
				case ADD: // Is the only operation which doesn't imply a delete-before-add
					add = true;
					// leave delete flag as-is
					break;
				case DELETE:
				case PURGE:
					if ( add && !delete ) {
						// the entity was was newly created in this same unit of
						// work so works counter each other
						add = false;
					}
					else {
						add = false;
						delete = true;
					}
					break;
				case COLLECTION:
					if ( !add && !delete ) {
						add = true;
						delete = true;
					}
					// nothing to do, as something else was done
					break;
				case PURGE_ALL:
				case DELETE_BY_QUERY:
				default:
					throw new SearchException( "unexpected state:" + type );
			}
		}

		/**
		 * Adds the needed LuceneWork to the queue for this entity instance
		 *
		 * @param tenantIdentifier the tenant identifier
		 * @param entityClass the type
		 * @param indexingId identifier of the instance
		 * @param entityBuilder the DocumentBuilder for this type
		 * @param luceneQueue the queue collecting all changes
		 */
		public void enqueueLuceneWork(String tenantIdentifier, Class entityClass, Serializable indexingId, AbstractDocumentBuilder entityBuilder,
				List<LuceneWork> luceneQueue, ConversionContext conversionContext) {
			if ( add || delete ) {
				entityBuilder.addWorkToQueue( tenantIdentifier, entityClass, entity, indexingId, delete, add, luceneQueue, conversionContext );
			}
		}

		/**
		 * Works via recursion passing the WorkPlan over, so that additional work can be planned
		 * according to the needs of ContainedIn processing.
		 *
		 * @param entityBuilder the DocumentBuilder for this type
		 * @param workplan the current WorkPlan, used for recursion
		 *
		 * @see org.hibernate.search.annotations.ContainedIn
		 */
		public void processContainedIn(AbstractDocumentBuilder entityBuilder, WorkPlan workplan) {
			if ( entity != null && !containedInProcessed ) {
				containedInProcessed = true;
				if ( add || delete ) {
					entityBuilder.appendContainedInWorkForInstance( entity, workplan, null, getTenantIdentifier() );
				}
			}
		}

		public String getTenantIdentifier() {
			return tenantId;
		}
	}

	/**
	 * Get and cache the DocumentBuilder for this type. Being this a perClassWork
	 * we can fetch it once.
	 *
	 * @param extendedIntegrator the search factory (implementor)
	 * @param entityClass the entity type for which to retrieve the document builder
	 *
	 * @return the DocumentBuilder for this type
	 */
	private static AbstractDocumentBuilder getEntityBuilder(ExtendedSearchIntegrator extendedIntegrator, Class<?> entityClass) {
		EntityIndexBinding entityIndexBinding = extendedIntegrator.getIndexBinding( entityClass );
		if ( entityIndexBinding == null ) {
			DocumentBuilderContainedEntity entityBuilder = extendedIntegrator.getDocumentBuilderContainedEntity(
					entityClass
			);
			if ( entityBuilder == null ) {
				// should never happen but better be safe than sorry
				throw new SearchException(
						"Unable to perform work. Entity Class is not @Indexed nor hosts @ContainedIn: " + entityClass
				);
			}
			else {
				return entityBuilder;
			}
		}
		else {
			return entityIndexBinding.getDocumentBuilder();
		}
	}
}