//
// ERXPrimaryKeyBatchIterator.java
// ERExtensions
//
// Created by Max Muller on Mon Oct 21 2002.
//
package er.extensions.eof;
import java.util.Enumeration;
import java.util.Iterator;
import org.apache.commons.lang3.builder.ToStringBuilder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.webobjects.eoaccess.EOAttribute;
import com.webobjects.eoaccess.EOEntity;
import com.webobjects.eoaccess.EOUtilities;
import com.webobjects.eocontrol.EOEditingContext;
import com.webobjects.eocontrol.EOFetchSpecification;
import com.webobjects.eocontrol.EOQualifier;
import com.webobjects.foundation.NSArray;
import com.webobjects.foundation.NSMutableArray;
import com.webobjects.foundation.NSRange;
import er.extensions.eof.qualifiers.ERXInQualifier;
import er.extensions.foundation.ERXArrayUtilities;
import er.extensions.jdbc.ERXSQLHelper;
/**
* The goal of the fetch specification batch iterator is to have the ability to
* iterate through a fetch specification that might fetch one million enterprise
* objects. Fetching all of the objects into a single editing context is
* prohibitive in the amount of memory needed and in the time taken to process
* all of the rows. <br>
* The iterator allows one to iterate through the fetched objects only hydrating
* those objects need in small bite size pieces. The iterator also allows you to
* swap out editing contexts between calls to <b>nextBatch()</b>, which will
* allow the garbage collector to collect the old editing context and the
* previous batch of enterprise objects.<br>
* For your convenience, this class also implements Iterator and Enumeration, so
* you can use it as such.<br>
* Be aware that the batch size is primarily intended to govern the number of
* objects requested from the database at once, and may differ from the number
* of objects returned by <b>nextBatch()</b>, for instance if the batch size is
* changed after fetching, or if <b>filtersBatches()</b> is set to true.
*
* @param <E> the type of elements returned by this iterator
*/
public class ERXFetchSpecificationBatchIterator<E> implements Iterator<E>, Enumeration<E> {
/** holds the default batch size, any bigger than this an Oracle has a fit */
public static final int DefaultBatchSize = 250;
private static final Logger log = LoggerFactory.getLogger(ERXFetchSpecificationBatchIterator.class);
/** holds the selected batch size */
protected int batchSize;
/** holds a reference to the selected editing context */
protected EOEditingContext editingContext;
/** holds a reference to the fetch spec to iterate over */
protected EOFetchSpecification fetchSpecification;
/** holds the name of the primary key attribute corresponding to the entity being iterated over */
protected String primaryKeyAttributeName;
/** holds an array of primary key values to iterate through */
protected NSArray primaryKeys;
/** holds array of fetched but not-yet-returned objects; used by the Iterator and Enumeration interfaces */
protected NSMutableArray<E> cachedBatch;
/** holds the number of objects fetched */
protected int currentObjectFetchCount;
/** determines whether we should re-apply the original qualifier to each batch of objects fetched */
protected boolean shouldFilterBatches;
/**
* Constructs a fetch specification iterator for a given fetch
* specification with the default batch size. Note you will have to
* set an editingContext on the iterator before calling the
* nextBatch method.
* @param fetchSpecification to iterate through
*/
public ERXFetchSpecificationBatchIterator(EOFetchSpecification fetchSpecification) {
this(fetchSpecification, null);
}
/**
* Constructs a fetch specification iterator for a given fetch
* specification with the default batch size. All objects will be
* fetched from the given editing context. Note that you can switch
* out different editing contexts between calls to <b>nextBatch</b>
* @param fetchSpecification to iterate through
* @param ec editing context to fetch against
*/
public ERXFetchSpecificationBatchIterator(EOFetchSpecification fetchSpecification, EOEditingContext ec) {
this(fetchSpecification, ec, DefaultBatchSize);
}
/**
* Constructs a fetch specification iterator for a given fetch
* specification and a batch size. All objects will be
* fetched from the given editing context. Note that you can switch
* out different editing contexts between calls to <b>nextBatch</b>
* @param fetchSpecification to iterate through
* @param ec editing context to fetch against
* @param batchSize number of objects to fetch in a given batch
*/
public ERXFetchSpecificationBatchIterator(EOFetchSpecification fetchSpecification, EOEditingContext ec, int batchSize) {
this(fetchSpecification, null, ec, batchSize);
}
/**
* Constructs a fetch specification iterator for a fetch specification,
* an optional set of pre-fetched primary keys
* and a batch size. All objects will be
* fetched from the given editing context. Note that you can switch
* out different editing contexts between calls to <b>nextBatch</b>.
* <p>Note: if no ec is supplied a new one is initialized.</p>
* @param fetchSpecification to iterate through
* @param pkeys primary keys to iterate through
* @param ec editing context to fetch against
* @param batchSize number of objects to fetch in a given batch
*/
public ERXFetchSpecificationBatchIterator(EOFetchSpecification fetchSpecification, NSArray pkeys, EOEditingContext ec, int batchSize) {
super();
EOEntity entity = ERXEOAccessUtilities.entityNamed(ec, fetchSpecification.entityName());
NSArray<EOAttribute> primaryKeyAttributes = entity.primaryKeyAttributes();
if (primaryKeyAttributes.count() > 1) {
throw new RuntimeException("ERXFetchSpecificationBatchIterator: Currently only single primary key entities are supported.");
}
primaryKeyAttributeName = primaryKeyAttributes.lastObject().name();
this.fetchSpecification = (EOFetchSpecification) fetchSpecification.clone();
primaryKeys = pkeys;
setEditingContext(ec != null ? ec : ERXEC.newEditingContext());
setBatchSize(batchSize);
setFiltersBatches(false);
EOQualifier qualifier = this.fetchSpecification.qualifier();
if (qualifier != null) {
editingContext().rootObjectStore().lock();
try {
this.fetchSpecification.setQualifier(entity.schemaBasedQualifier(qualifier));
} finally {
editingContext().rootObjectStore().unlock();
}
}
}
/**
* Gets the batch size.
* @return number of enterprise objects to fetch
* a batch.
*/
public int batchSize() {
return batchSize;
}
/**
* Gets the current batch index.
* @return number of batches fetched thus far
*/
public int currentBatchIndex() {
return (int)Math.ceil((currentObjectFetchCount() * 1.0) / (batchSize() * 1.0));
}
/**
* Gets the number of batches for a given iterator.
* @return number of objects / batch size rounded up
*/
public int batchCount() {
return (int)Math.ceil((count() * 1.0) / (batchSize() * 1.0));
}
/**
* Gets the number of objects.
* @return number of objects
*/
public int count() {
return primaryKeys().count();
}
/**
* Gets the current number of objects
* fetched thus far.
* @return current number of objects fetched.
*/
public int currentObjectFetchCount() {
return currentObjectFetchCount;
}
/**
* Sets the batch size.
* @param batchSize to be set.
*/
public void setBatchSize(int batchSize) {
if (batchSize <= 0)
throw new RuntimeException("Attempting to set a batch size of negative value.");
if (batchSize > DefaultBatchSize)
log.warn("Batches larger than the the default batch size of {} might cause JDBC issues.", DefaultBatchSize);
this.batchSize = batchSize;
}
/**
* If true, each batch will be filtered based on the original qualifier.
* @see #setFiltersBatches
* @return whether batches will be re-filtered
*/
public boolean filtersBatches() {
return shouldFilterBatches;
}
/**
* If set to true, each batch fetched will be filtered based on the qualifier attached
* to the original fetch specification. The is useful to cover the case in which the
* objects may have changed in important ways between the time their primary keys
* were retrieved and the time they were fetched. Note that when filtering is on,
* empty arrays may be returned from {@link #nextBatch()}, and null may be returned
* from {@link #next()} and {@link #nextElement()}.
*
* Note that not all qualifiers can be applied in-memory, so this should not bet set
* to true if such a qualifier is being used.
*
* Defaults to false.
*
* @param newValue whether batches should be re-filtered
*/
public void setFiltersBatches(boolean newValue) {
if(!newValue && shouldFilterBatches && cachedBatch != null) {
//NOTE: This could be made to work "as expected", if we cached un-filtered batches, and only filtered when we're about to return something; but, probably not worth it
log.warn("Setting filtersBatches from true to false while there is a cached batch--some objects may already have been discarded!");
}
shouldFilterBatches = newValue;
}
/**
* Gets the currently set editing context.
* @return editing context used to fetch against
*/
public EOEditingContext editingContext() {
return editingContext;
}
/**
* Sets the editing context used to fetch objects
* against. It is perfectly fine to change editing
* contexts between fetching the next batch.
* @param ec editing context used to fetch against
*/
public void setEditingContext(EOEditingContext ec) {
editingContext = ec;
}
/**
* Determines if the iterator has another batch.
* @return if ok to call {@link #nextBatch()}
*/
public boolean hasNextBatch() {
return (cachedBatch != null) || _hasMoreToFetch();
}
protected boolean _hasMoreToFetch() {
return currentObjectFetchCount() < count();
}
/**
* Gets the next batch of enterprise objects for the
* given fetch specification. Note that the editing
* context that is set will be used to fetch against.
* You can swap out a different editing context before
* calling this method to reduce memory consumption.
* (However, if you are mixing calls to this method
* with calls to {@link #next()} or {@link #nextElement()},
* this method may return a partial batch of already-cached
* objects, in the editing context which was in place at the
* time they were fetched.)
* @return batch of enterprise objects
*/
public NSArray<E> nextBatch() {
if(cachedBatch != null) {
NSArray<E> nextBatch = cachedBatch;
cachedBatch = null;
return nextBatch;
}
return _fetchNextBatch();
}
/**
* Fetches the next batch unconditionally. Subclasses can
* override this rather than {@link #nextBatch()}, to get
* automatic support for the Iterator and Enumeration interfaces.
* @return next batch
*/
protected NSArray<E> _fetchNextBatch() {
if (hasNextBatch()) {
NSRange range = _rangeForOffset(currentObjectFetchCount);
NSArray<E> nextBatch = batchWithRange(range);
currentObjectFetchCount += range.length();
return nextBatch;
}
throw new IllegalStateException("Iterator is exhausted");
}
private NSRange _rangeForBatchIndex(int index) {
int start = batchSize * index;
return _rangeForOffset(start);
}
private NSRange _rangeForOffset(int start) {
int batchSize = batchSize();
int totalCountMinusStart = count() - start;
int length = totalCountMinusStart > batchSize ? batchSize : totalCountMinusStart;
if (length < 0) {
length = 0;
}
return new NSRange(start, length);
}
/**
* Returns the batch corresponding to the given index, that is, the
* batch beginning at {@link #batchSize()} * index.
* Note that if the batch size has been changed after fetching, the
* batches return by {@link #nextBatch()} may not line up with the
* batches returned by this method.
*
* Calling this method does not affect the position of the iterator.
* @param index index of batch to retrieve
* @return batch of enterprise objects
*/
public NSArray<E> batchWithIndex(int index) {
NSRange range = _rangeForBatchIndex(index);
return batchWithRange(range);
}
/**
* Returns the batch corresponding to the given range.
*
* If the supplied range does not fall within the available range,
* the results returned correspond to the intersection of the two.
*
* If no items are found, the supplied range does not intersect the
* available range, or the supplied range has length zero, then an
* empty array is returned.
*
* Calling this method does not affect the position of the iterator.
* @param requestedRange range of batch to retrieve
* @return batch of enterprise objects
*/
public NSArray<E> batchWithRange(NSRange requestedRange) {
EOEditingContext ec = editingContext();
if ( ec == null) {
throw new IllegalStateException("ERXFetchSpecificationBatchIterator: Calling nextBatch with a null editing context!");
}
NSArray<E> nextBatch = null;
NSRange range = requestedRange.rangeByIntersectingRange( new NSRange(0, count()) ); //intersect with legal range
if ( range.length() > 0 ) {
NSArray primaryKeys = primaryKeys();
NSArray primaryKeysToFetch = primaryKeys.subarrayWithRange(range);
log.debug("Of primaryKey count: {} fetching range: {} which is: {}", primaryKeys.count(), range, primaryKeysToFetch.count());
ERXInQualifier qual = new ERXInQualifier(primaryKeyAttributeName, primaryKeysToFetch);
EOFetchSpecification batchFS = new EOFetchSpecification(fetchSpecification.entityName(), qual, fetchSpecification.sortOrderings());
if (fetchSpecification.prefetchingRelationshipKeyPaths() != null) {
batchFS.setPrefetchingRelationshipKeyPaths(fetchSpecification.prefetchingRelationshipKeyPaths());
}
batchFS.setRefreshesRefetchedObjects(fetchSpecification.refreshesRefetchedObjects());
batchFS.setRawRowKeyPaths(fetchSpecification.rawRowKeyPaths());
nextBatch = ec.objectsWithFetchSpecification(batchFS);
if (log.isDebugEnabled()) {
log.debug("Actually fetched: {} with fetch specification: {}", nextBatch.count(), batchFS);
if (primaryKeysToFetch.count() > nextBatch.count()) {
NSArray missedKeys = ERXArrayUtilities.arrayMinusArray(primaryKeysToFetch, (NSArray)nextBatch.valueForKey(primaryKeyAttributeName));
log.debug("Primary Keys that were not found for this batch: {}", missedKeys);
}
}
if (shouldFilterBatches) {
EOQualifier originalQualifier = fetchSpecification.qualifier();
if (originalQualifier != null) {
nextBatch = EOQualifier.filteredArrayWithQualifier(nextBatch, originalQualifier);
log.debug("Filtered batch to: {}", nextBatch.count());
}
}
}
return nextBatch != null ? nextBatch : NSArray.EmptyArray;
}
protected EOFetchSpecification batchFetchSpecificationForQualifier(EOQualifier qualifier) {
EOFetchSpecification fetchSpec = (EOFetchSpecification)fetchSpecification.clone();
fetchSpec.setQualifier(qualifier);
fetchSpec.setRequiresAllQualifierBindingVariables(false);
fetchSpec.setLocksObjects(false);
fetchSpec.setPromptsAfterFetchLimit(false);
return fetchSpec;
}
/**
* Method used to fetch the primary keys of the objects
* for the given fetch specification. Note the sort
* orderings for the fetch specification are respected.
*
* @return array of primary keys to iterate over
*/
protected NSArray primaryKeys() {
if (primaryKeys == null) {
if (editingContext() == null)
throw new RuntimeException("Attempting to fetch the primary keys for a null editingContext");
EOEntity entity = EOUtilities.entityNamed(editingContext(), fetchSpecification.entityName());
if (entity.primaryKeyAttributes().count() > 1)
throw new RuntimeException("ERXFetchSpecificationBatchIterator: Currently only single primary key entities are supported.");
EOFetchSpecification pkFetchSpec = ERXEOControlUtilities.primaryKeyFetchSpecificationForEntity(editingContext(),
fetchSpecification.entityName(),
fetchSpecification.qualifier(),
fetchSpecification.sortOrderings(),
null);
pkFetchSpec.setFetchLimit(fetchSpecification.fetchLimit());
pkFetchSpec.setUsesDistinct(fetchSpecification.usesDistinct());
boolean performDistinctInMemory = ERXSQLHelper.newSQLHelper(entity).shouldPerformDistinctInMemory(pkFetchSpec);
if (performDistinctInMemory) {
pkFetchSpec.setUsesDistinct(false);
}
log.debug("Fetching primary keys.");
NSArray primaryKeyDictionaries = editingContext().objectsWithFetchSpecification(pkFetchSpec);
String pkAttributeName = entity.primaryKeyAttributes().lastObject().name();
primaryKeys = (NSArray)primaryKeyDictionaries.valueForKey(pkAttributeName);
if (performDistinctInMemory) {
primaryKeys = ERXArrayUtilities.arrayWithoutDuplicates(primaryKeys);
}
}
return primaryKeys;
}
/**
* Resets the batch iterator so it will refetch its primary keys again.
*/
public void reset() {
primaryKeys = null;
cachedBatch = null;
}
/**
* Implementation of the Iterator interface
*/
public boolean hasNext() {
return hasNextBatch(); //either there are more batches to fetch, or there is a cached batch already
}
/**
* Implementation of the Iterator interface
*/
public E next() {
if( cachedBatch == null) {
NSArray<E> nextBatch = _fetchNextBatch(); //will raise if no more batches, which is expected behavior if next() is called w/o first checking hasNext()
while(nextBatch.count() == 0 && hasNextBatch()) { //if filtersBatches, we can get empty batches, so repeat until we get something, or run out
nextBatch = _fetchNextBatch();
}
cachedBatch = nextBatch.mutableClone();
}
E nextObject = null;
if( cachedBatch.count() > 0 ) {
nextObject = cachedBatch.removeObjectAtIndex(0);
}
if( cachedBatch.count() == 0 ) {
cachedBatch = null;
}
return nextObject;
}
/**
* Implementation of the Iterator interface
*/
public void remove() {
throw new UnsupportedOperationException("Can't remove, not implemented");
}
/**
* Implementation of the Enumeration interface
*/
public boolean hasMoreElements() {
return hasNext();
}
/**
* Implementation of the Enumeration interface
*/
public E nextElement() {
return next();
}
/*
* Return useful debug info including fetchspec info.
*
* (non-Javadoc)
* @see java.lang.Object#toString()
*/
@Override
public String toString() {
ToStringBuilder b = new ToStringBuilder(this);
b.append("PKs Initialized", primaryKeys == null ? "No" : "Yes");
if (primaryKeys != null) {
b.append("Count", primaryKeys.count());
}
b.append("entityName", fetchSpecification.entityName());
b.append("qualifier", fetchSpecification.qualifier());
b.append("isDeep", fetchSpecification.isDeep());
b.append("usesDistinct", fetchSpecification.usesDistinct());
b.append("sortOrderings", fetchSpecification.sortOrderings());
b.append("hints", fetchSpecification.hints());
b.append("prefetchingRelationshipKeyPaths", fetchSpecification.prefetchingRelationshipKeyPaths());
return b.toString();
}
}