package org.apache.lucene.search;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.Comparator;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.Bits;
/**
* Abstract class for enumerating a subset of all terms.
*
* <p>Term enumerations are always ordered by
* {@link #getComparator}. Each term in the enumeration is
* greater than all that precede it.</p>
* <p><em>Please note:</em> Consumers of this enum cannot
* call {@code seek()}, it is forward only; it throws
* {@link UnsupportedOperationException} when a seeking method
* is called.
*/
public abstract class FilteredTermsEnum extends TermsEnum {
private BytesRef initialSeekTerm = null;
private boolean doSeek = true;
private BytesRef actualTerm = null;
private boolean useTermsCache = false;
private final TermsEnum tenum;
/** Return value, if term should be accepted or the iteration should
* {@code END}. The {@code *_SEEK} values denote, that after handling the current term
* the enum should call {@link #nextSeekTerm} and step forward.
* @see #accept(BytesRef)
*/
protected static enum AcceptStatus {YES, YES_AND_SEEK, NO, NO_AND_SEEK, END};
/** Return if term is accepted, not accepted or the iteration should ended
* (and possibly seek).
*/
protected abstract AcceptStatus accept(BytesRef term) throws IOException;
/**
* Creates a filtered {@link TermsEnum} for the given field name and reader.
*/
public FilteredTermsEnum(final IndexReader reader, final String field) throws IOException {
final Terms terms = MultiFields.getTerms(reader, field);
if (terms != null) {
tenum = terms.iterator();
} else {
tenum = null;
}
}
/**
* Creates a filtered {@link TermsEnum} on a terms enum.
* @param tenum the terms enumeration to filter, if {@code null} this is the null iterator.
*/
public FilteredTermsEnum(final TermsEnum tenum) {
this.tenum = tenum;
}
/**
* Use this method to set the initial {@link BytesRef}
* to seek before iterating. This is a convenience method for
* subclasses that do not override {@link #nextSeekTerm}.
* If the initial seek term is {@code null} (default),
* the enum is empty.
* <P>You can only use this method, if you keep the default
* implementation of {@link #nextSeekTerm}.
*/
protected final void setInitialSeekTerm(BytesRef term) throws IOException {
this.initialSeekTerm = term;
}
/** On the first call to {@link #next} or if {@link #accept} returns
* {@link AcceptStatus#YES_AND_SEEK} or {@link AcceptStatus#NO_AND_SEEK},
* this method will be called to eventually seek the underlying TermsEnum
* to a new position.
* On the first call, {@code currentTerm} will be {@code null}, later
* calls will provide the term the underlying enum is positioned at.
* This method returns per default only one time the initial seek term
* and then {@code null}, so no repositioning is ever done.
* <p>Override this method, if you want a more sophisticated TermsEnum,
* that repositions the iterator during enumeration.
* If this method always returns {@code null} the enum is empty.
* <p><em>Please note:</em> This method should always provide a greater term
* than the last enumerated term, else the behaviour of this enum
* violates the contract for TermsEnums.
*/
protected BytesRef nextSeekTerm(final BytesRef currentTerm) throws IOException {
final BytesRef t = initialSeekTerm;
initialSeekTerm = null;
return t;
}
/** Expert: enable or disable the terms cache when seeking. */
protected final void setUseTermsCache(boolean useTermsCache) {
this.useTermsCache = useTermsCache;
}
/** Expert: enable or disable the terms cache when seeking. */
protected final boolean getUseTermsCache() {
return useTermsCache;
}
/**
* Returns the related attributes, the returned {@link AttributeSource}
* is shared with the delegate {@code TermsEnum}.
*/
@Override
public AttributeSource attributes() {
/* if we have no tenum, we return a new attributes instance,
* to prevent NPE in subclasses that use attributes.
* in all other cases we share the attributes with our delegate. */
return (tenum == null) ? super.attributes() : tenum.attributes();
}
@Override
public BytesRef term() throws IOException {
assert tenum != null;
return tenum.term();
}
@Override
public Comparator<BytesRef> getComparator() throws IOException {
return (tenum == null) ? null : tenum.getComparator();
}
@Override
public int docFreq() {
assert tenum != null;
return tenum.docFreq();
}
/** This enum does not support seeking!
* @throws UnsupportedOperationException
*/
@Override
public SeekStatus seek(BytesRef term, boolean useCache) throws IOException {
throw new UnsupportedOperationException(getClass().getName()+" does not support seeking");
}
/** This enum does not support seeking!
* @throws UnsupportedOperationException
*/
@Override
public SeekStatus seek(long ord) throws IOException {
throw new UnsupportedOperationException(getClass().getName()+" does not support seeking");
}
@Override
public long ord() throws IOException {
assert tenum != null;
return tenum.ord();
}
@Override
public DocsEnum docs(Bits bits, DocsEnum reuse) throws IOException {
assert tenum != null;
return tenum.docs(bits, reuse);
}
@Override
public DocsAndPositionsEnum docsAndPositions(Bits bits, DocsAndPositionsEnum reuse) throws IOException {
assert tenum != null;
return tenum.docsAndPositions(bits, reuse);
}
@Override
public BytesRef next() throws IOException {
if (tenum == null)
return null;
for (;;) {
// Seek or forward the iterator
if (doSeek) {
doSeek = false;
final BytesRef t = nextSeekTerm(actualTerm);
if (t == null || tenum.seek(t, useTermsCache) == SeekStatus.END) {
// no more terms to seek to or enum exhausted
return null;
}
actualTerm = tenum.term();
} else {
actualTerm = tenum.next();
if (actualTerm == null) {
// enum exhausted
return null;
}
}
// check if term is accepted
switch (accept(actualTerm)) {
case YES_AND_SEEK:
doSeek = true;
// term accepted, but we need to seek so fall-through
case YES:
// term accepted
return actualTerm;
case NO_AND_SEEK:
// invalid term, seek next time
doSeek = true;
break;
case END:
// we are supposed to end the enum
return null;
}
}
}
}