package com.alipay.tiansuan.solrplugin;
import java.io.IOException;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.Set;
import org.apache.lucene.document.NumericField.DataType;
import org.apache.lucene.search.FilteredTermEnum;
import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.util.NumericUtils;
import org.apache.lucene.util.StringHelper;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermEnum;
import com.alipay.tiansuan.solrplugin.HdfsToSet.TransType;
public final class NumericSelectInQuery<T extends Number> extends
MultiTermQuery {
private static final long serialVersionUID = 1L;
private String field;
private final int precisionStep;
private final DataType dataType;
private String file;
private TransType<T> trans;
private final Term termTemplate ;
public NumericSelectInQuery(final String field, int precisionStep,
final DataType dataType,TransType<T> trans, String file) {
if(precisionStep==0)
{
precisionStep=Integer.MAX_VALUE;
}
if (precisionStep < 1)
throw new IllegalArgumentException("precisionStep must be >=1");
this.field = StringHelper.intern(field);
termTemplate = new Term(field);
this.precisionStep = precisionStep;
this.dataType = dataType;
this.file = file;
this.trans=trans;
switch (dataType) {
case LONG:
case DOUBLE:
setRewriteMethod( (precisionStep > 6) ?
CONSTANT_SCORE_FILTER_REWRITE :
CONSTANT_SCORE_AUTO_REWRITE_DEFAULT
);
break;
case INT:
case FLOAT:
setRewriteMethod( (precisionStep > 8) ?
CONSTANT_SCORE_FILTER_REWRITE :
CONSTANT_SCORE_AUTO_REWRITE_DEFAULT
);
break;
default:
// should never happen
throw new IllegalArgumentException("Invalid numeric DataType");
}
}
@Override
protected FilteredTermEnum getEnum(final IndexReader reader)
throws IOException {
return new NumericRangeTermEnum(reader);
}
/** Returns the field name for this query */
public String getField() {
return field;
}
/** Returns the precision step. */
public int getPrecisionStep() {
return precisionStep;
}
private void readObject(java.io.ObjectInputStream in)
throws java.io.IOException, ClassNotFoundException {
in.defaultReadObject();
field = StringHelper.intern(field);
}
public final class Bounds{
HashSet<String> bound=new HashSet<String>();
Term max=null;
public void setmax(String s)
{
Term t=termTemplate.createTerm(s);
if(max==null||max.compareTo(t)<=0)
{
this.max=t;
}
}
}
private final class NumericRangeTermEnum extends FilteredTermEnum {
private final IndexReader reader;
private final java.util.LinkedHashMap<String, Bounds> rangeBounds = new LinkedHashMap<String,Bounds>();
private final java.util.LinkedList<String> lowBounds = new LinkedList<String>();
Set<T> list;
private void loopDouble() {
for (T t : list) {
long bound;
if (dataType == DataType.LONG) {
bound = t.longValue();
} else {
assert dataType == DataType.DOUBLE;
bound = NumericUtils.doubleToSortableLong(t.doubleValue());
}
NumericUtils.splitLongRange(
new NumericUtils.LongRangeBuilder() {
@Override
public final void addRange(String minPrefixCoded,
String maxPrefixCoded) {
Bounds maxset = rangeBounds.get(minPrefixCoded);
if (maxset == null) {
maxset = new Bounds();
rangeBounds.put(minPrefixCoded, maxset);
lowBounds.add(minPrefixCoded);
}
maxset.bound.add(maxPrefixCoded);
maxset.setmax(maxPrefixCoded);
}
}, precisionStep, bound, bound);
}
}
private void loopfloat() {
for (T t : list) {
// lower
int bound;
if (dataType == DataType.INT) {
bound = t.intValue();
} else {
assert dataType == DataType.FLOAT;
bound = NumericUtils.floatToSortableInt(t.floatValue());
}
NumericUtils.splitIntRange(new NumericUtils.IntRangeBuilder() {
@Override
public final void addRange(String minPrefixCoded, String maxPrefixCoded) {
Bounds maxset = rangeBounds.get(minPrefixCoded);
if (maxset == null) {
maxset = new Bounds();
rangeBounds.put(minPrefixCoded, maxset);
lowBounds.add(minPrefixCoded);
}
maxset.bound.add(maxPrefixCoded);
maxset.setmax(maxPrefixCoded);
}}, precisionStep, bound, bound);
}
}
NumericRangeTermEnum(final IndexReader reader) throws IOException {
this.reader = reader;
this.init();
}
boolean isinit=false;
public void init() throws IOException
{
if(this.isinit)
{
return ;
}
isinit=true;
HdfsToSet<T> toset = new HdfsToSet<T>();
this.list = toset.toset(file, trans);
switch (dataType) {
case LONG:
case DOUBLE: {
this.loopDouble();
break;
}
case INT:
case FLOAT: {
this.loopfloat();
break;
}
default:
// should never happen
throw new IllegalArgumentException("Invalid numeric DataType");
}
System.out.println("##############"+rangeBounds.size());
this.next();
}
@Override
public float difference() {
return 1.0f;
}
/** this is a dummy, it is not used by this class. */
@Override
protected boolean endEnum() {
throw new UnsupportedOperationException("not implemented");
}
/** this is a dummy, it is not used by this class. */
@Override
protected void setEnum(TermEnum tenum) {
throw new UnsupportedOperationException("not implemented");
}
/**
* Compares if current upper bound is reached. In contrast to
* {@link FilteredTermEnum}, a return value of <code>false</code> ends
* iterating the current enum and forwards to the next sub-range.
*/
@Override
protected boolean termCompare(Term term) {
if (!term.field().equals(field)) {
return false;
}
return isContainsTerm(term);
}
private boolean isContainsTerm(Term term)
{
String t = term.text();
if (maxset != null && maxset.bound.contains(t)) {
return true;
}
return false;
}
private boolean isEndTerm(Term term) {
if (!term.field().equals(field)) {
return true;
}
if (maxset != null &&term.compareTo(maxset.max)>0) {
return true;
}
return false;
}
Bounds maxset;
@Override
public Term term() {
return currentTerm;
}
@Override
public boolean next() throws IOException {
if (this.currentTerm != null && this.getterms()) {
return true;
}
currentTerm = null;
while (lowBounds.size() >= 1) {
if (actualEnum != null) {
actualEnum.close();
actualEnum = null;
}
String lowerBound = this.lowBounds.removeFirst();
maxset = rangeBounds.get(lowerBound);
//
actualEnum = reader.terms(termTemplate.createTerm(lowerBound));
currentTerm = actualEnum.term();
if (currentTerm != null
&& (termCompare(currentTerm) || this.getterms())) {
return true;
}
//
currentTerm = null;
}
return false;
}
public boolean getterms() throws IOException {
while (actualEnum.next()) {
currentTerm = actualEnum.term();
if (isEndTerm(currentTerm)) {
break;
}
if (isContainsTerm(currentTerm)) {
return true;
}
}
return false;
}
/** Closes the enumeration to further activity, freeing resources. */
@Override
public void close() throws IOException {
rangeBounds.clear();
lowBounds.clear();
super.close();
}
}
@Override
public String toString(String field) {
return toString() + ",field=" + field;
}
@Override
public int hashCode() {
final int prime = 31;
int result = super.hashCode();
result = prime * result
+ ((dataType == null) ? 0 : dataType.hashCode());
result = prime * result + ((field == null) ? 0 : field.hashCode());
result = prime * result + ((file == null) ? 0 : file.hashCode());
result = prime * result + precisionStep;
result = prime * result + ((trans == null) ? 0 : trans.hashCode());
return result;
}
@Override
public boolean equals(Object obj) {
if (this == obj)
return true;
if (!super.equals(obj))
return false;
if (getClass() != obj.getClass())
return false;
NumericSelectInQuery other = (NumericSelectInQuery) obj;
if (dataType != other.dataType)
return false;
if (field == null) {
if (other.field != null)
return false;
} else if (!field.equals(other.field))
return false;
if (file == null) {
if (other.file != null)
return false;
} else if (!file.equals(other.file))
return false;
if (precisionStep != other.precisionStep)
return false;
if (trans == null) {
if (other.trans != null)
return false;
} else if (!trans.equals(other.trans))
return false;
return true;
}
@Override
public String toString() {
return "NumericSelectInQuery [field=" + field + ", precisionStep="
+ precisionStep + ", dataType=" + dataType + ", file=" + file
+ ", trans=" + trans + "]";
}
}