/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.schema;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.document.Field;
import org.apache.lucene.search.*;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.NumericUtils;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.NumericTokenStream;
import org.apache.noggit.CharArr;
import org.apache.solr.analysis.*;
import org.apache.solr.common.SolrException;
import org.apache.solr.response.TextResponseWriter;
import org.apache.solr.response.XMLWriter;
import org.apache.solr.search.MutableValueDate;
import org.apache.solr.search.MutableValueLong;
import org.apache.solr.search.QParser;
import org.apache.solr.search.function.*;
import java.io.IOException;
import java.util.Locale;
import java.util.Map;
import java.util.Date;
/**
* Provides field types to support for Lucene's Trie Range Queries.
* See {@link org.apache.lucene.search.NumericRangeQuery} for more details.
* It supports integer, float, long, double and date types.
* <p/>
* For each number being added to this field, multiple terms are generated as per the algorithm described in the above
* link. The possible number of terms increases dramatically with higher precision steps (factor 2^precisionStep). For
* the fast range search to work, trie fields must be indexed.
* <p/>
* Trie fields are sortable in numerical order and can be used in function queries.
* <p/>
* Note that if you use a precisionStep of 32 for int/float and 64 for long/double, then multiple terms will not be
* generated, range search will be no faster than any other number field, but sorting will still be possible.
*
* @version $Id: TrieField.java 987690 2010-08-21 02:36:09Z yonik $
* @see org.apache.lucene.search.NumericRangeQuery
* @since solr 1.4
*/
public class TrieField extends FieldType {
public static final int DEFAULT_PRECISION_STEP = 8;
protected int precisionStepArg = TrieField.DEFAULT_PRECISION_STEP; // the one passed in or defaulted
protected int precisionStep; // normalized
protected TrieTypes type;
/**
* Used for handling date types following the same semantics as DateField
*/
static final DateField dateField = new DateField();
@Override
protected void init(IndexSchema schema, Map<String, String> args) {
String p = args.remove("precisionStep");
if (p != null) {
precisionStepArg = Integer.parseInt(p);
}
// normalize the precisionStep
precisionStep = precisionStepArg;
if (precisionStep<=0 || precisionStep>=64) precisionStep=Integer.MAX_VALUE;
String t = args.remove("type");
if (t != null) {
try {
type = TrieTypes.valueOf(t.toUpperCase(Locale.ENGLISH));
} catch (IllegalArgumentException e) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
"Invalid type specified in schema.xml for field: " + args.get("name"), e);
}
}
CharFilterFactory[] filterFactories = new CharFilterFactory[0];
TokenFilterFactory[] tokenFilterFactories = new TokenFilterFactory[0];
analyzer = new TokenizerChain(filterFactories, new TrieTokenizerFactory(type, precisionStep), tokenFilterFactories);
// for query time we only need one token, so we use the biggest possible precisionStep:
queryAnalyzer = new TokenizerChain(filterFactories, new TrieTokenizerFactory(type, Integer.MAX_VALUE), tokenFilterFactories);
}
@Override
public Object toObject(Fieldable f) {
byte[] arr = f.getBinaryValue();
if (arr==null) return badFieldString(f);
switch (type) {
case INTEGER:
return toInt(arr);
case FLOAT:
return toFloat(arr);
case LONG:
return toLong(arr);
case DOUBLE:
return toDouble(arr);
case DATE:
return new Date(toLong(arr));
default:
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unknown type for trie field: " + f.name());
}
}
public SortField getSortField(SchemaField field, boolean top) {
switch (type) {
case INTEGER:
return new SortField(field.getName(), FieldCache.NUMERIC_UTILS_INT_PARSER, top);
case FLOAT:
return new SortField(field.getName(), FieldCache.NUMERIC_UTILS_FLOAT_PARSER, top);
case DATE: // fallthrough
case LONG:
return new SortField(field.getName(), FieldCache.NUMERIC_UTILS_LONG_PARSER, top);
case DOUBLE:
return new SortField(field.getName(), FieldCache.NUMERIC_UTILS_DOUBLE_PARSER, top);
default:
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unknown type for trie field: " + field.name);
}
}
public ValueSource getValueSource(SchemaField field) {
switch (type) {
case INTEGER:
return new IntFieldSource(field.getName(), FieldCache.NUMERIC_UTILS_INT_PARSER);
case FLOAT:
return new FloatFieldSource(field.getName(), FieldCache.NUMERIC_UTILS_FLOAT_PARSER);
case DATE:
return new TrieDateFieldSource(field.getName(), FieldCache.NUMERIC_UTILS_LONG_PARSER);
case LONG:
return new LongFieldSource(field.getName(), FieldCache.NUMERIC_UTILS_LONG_PARSER);
case DOUBLE:
return new DoubleFieldSource(field.getName(), FieldCache.NUMERIC_UTILS_DOUBLE_PARSER);
default:
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unknown type for trie field: " + field.name);
}
}
public void write(XMLWriter xmlWriter, String name, Fieldable f) throws IOException {
byte[] arr = f.getBinaryValue();
if (arr==null) {
xmlWriter.writeStr(name, badFieldString(f));
return;
}
switch (type) {
case INTEGER:
xmlWriter.writeInt(name,toInt(arr));
break;
case FLOAT:
xmlWriter.writeFloat(name,toFloat(arr));
break;
case LONG:
xmlWriter.writeLong(name,toLong(arr));
break;
case DOUBLE:
xmlWriter.writeDouble(name,toDouble(arr));
break;
case DATE:
xmlWriter.writeDate(name,new Date(toLong(arr)));
break;
default:
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unknown type for trie field: " + f.name());
}
}
public void write(TextResponseWriter writer, String name, Fieldable f) throws IOException {
byte[] arr = f.getBinaryValue();
if (arr==null) {
writer.writeStr(name, badFieldString(f),true);
return;
}
switch (type) {
case INTEGER:
writer.writeInt(name,toInt(arr));
break;
case FLOAT:
writer.writeFloat(name,toFloat(arr));
break;
case LONG:
writer.writeLong(name,toLong(arr));
break;
case DOUBLE:
writer.writeDouble(name,toDouble(arr));
break;
case DATE:
writer.writeDate(name,new Date(toLong(arr)));
break;
default:
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unknown type for trie field: " + f.name());
}
}
@Override
public boolean isTokenized() {
return true;
}
@Override
public boolean multiValuedFieldCache() {
return false;
}
/**
* @return the precisionStep used to index values into the field
*/
public int getPrecisionStep() {
return precisionStepArg;
}
/**
* @return the type of this field
*/
public TrieTypes getType() {
return type;
}
@Override
public Query getRangeQuery(QParser parser, SchemaField field, String min, String max, boolean minInclusive, boolean maxInclusive) {
int ps = precisionStep;
Query query = null;
switch (type) {
case INTEGER:
query = NumericRangeQuery.newIntRange(field.getName(), ps,
min == null ? null : Integer.parseInt(min),
max == null ? null : Integer.parseInt(max),
minInclusive, maxInclusive);
break;
case FLOAT:
query = NumericRangeQuery.newFloatRange(field.getName(), ps,
min == null ? null : Float.parseFloat(min),
max == null ? null : Float.parseFloat(max),
minInclusive, maxInclusive);
break;
case LONG:
query = NumericRangeQuery.newLongRange(field.getName(), ps,
min == null ? null : Long.parseLong(min),
max == null ? null : Long.parseLong(max),
minInclusive, maxInclusive);
break;
case DOUBLE:
query = NumericRangeQuery.newDoubleRange(field.getName(), ps,
min == null ? null : Double.parseDouble(min),
max == null ? null : Double.parseDouble(max),
minInclusive, maxInclusive);
break;
case DATE:
query = NumericRangeQuery.newLongRange(field.getName(), ps,
min == null ? null : dateField.parseMath(null, min).getTime(),
max == null ? null : dateField.parseMath(null, max).getTime(),
minInclusive, maxInclusive);
break;
default:
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unknown type for trie field");
}
return query;
}
static int toInt(byte[] arr) {
return (arr[0]<<24) | ((arr[1]&0xff)<<16) | ((arr[2]&0xff)<<8) | (arr[3]&0xff);
}
static long toLong(byte[] arr) {
int high = (arr[0]<<24) | ((arr[1]&0xff)<<16) | ((arr[2]&0xff)<<8) | (arr[3]&0xff);
int low = (arr[4]<<24) | ((arr[5]&0xff)<<16) | ((arr[6]&0xff)<<8) | (arr[7]&0xff);
return (((long)high)<<32) | (low&0x0ffffffffL);
}
static float toFloat(byte[] arr) {
return Float.intBitsToFloat(toInt(arr));
}
static double toDouble(byte[] arr) {
return Double.longBitsToDouble(toLong(arr));
}
static byte[] toArr(int val) {
byte[] arr = new byte[4];
arr[0] = (byte)(val>>>24);
arr[1] = (byte)(val>>>16);
arr[2] = (byte)(val>>>8);
arr[3] = (byte)(val);
return arr;
}
static byte[] toArr(long val) {
byte[] arr = new byte[8];
arr[0] = (byte)(val>>>56);
arr[1] = (byte)(val>>>48);
arr[2] = (byte)(val>>>40);
arr[3] = (byte)(val>>>32);
arr[4] = (byte)(val>>>24);
arr[5] = (byte)(val>>>16);
arr[6] = (byte)(val>>>8);
arr[7] = (byte)(val);
return arr;
}
static byte[] toArr(float val) {
return toArr(Float.floatToRawIntBits(val));
}
static byte[] toArr(double val) {
return toArr(Double.doubleToRawLongBits(val));
}
@Override
public String storedToReadable(Fieldable f) {
return toExternal(f);
}
@Override
public String readableToIndexed(String val) {
// TODO: Numeric should never be handled as String, that may break in future lucene versions! Change to use BytesRef for term texts!
BytesRef bytes = new BytesRef(NumericUtils.BUF_SIZE_LONG);
readableToIndexed(val, bytes);
return bytes.utf8ToString();
}
@Override
public void readableToIndexed(CharSequence val, BytesRef result) {
String s = val.toString();
switch (type) {
case INTEGER:
NumericUtils.intToPrefixCoded(Integer.parseInt(s), 0, result);
break;
case FLOAT:
NumericUtils.intToPrefixCoded(NumericUtils.floatToSortableInt(Float.parseFloat(s)), 0, result);
break;
case LONG:
NumericUtils.longToPrefixCoded(Long.parseLong(s), 0, result);
break;
case DOUBLE:
NumericUtils.longToPrefixCoded(NumericUtils.doubleToSortableLong(Double.parseDouble(s)), 0, result);
break;
case DATE:
NumericUtils.longToPrefixCoded(dateField.parseMath(null, s).getTime(), 0, result);
break;
default:
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unknown type for trie field: " + type);
}
}
@Override
public String toInternal(String val) {
return readableToIndexed(val);
}
static String badFieldString(Fieldable f) {
String s = f.stringValue();
return "ERROR:SCHEMA-INDEX-MISMATCH,stringValue="+s;
}
@Override
public String toExternal(Fieldable f) {
byte[] arr = f.getBinaryValue();
if (arr==null) return badFieldString(f);
switch (type) {
case INTEGER:
return Integer.toString(toInt(arr));
case FLOAT:
return Float.toString(toFloat(arr));
case LONG:
return Long.toString(toLong(arr));
case DOUBLE:
return Double.toString(toDouble(arr));
case DATE:
return dateField.formatDate(new Date(toLong(arr)));
default:
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unknown type for trie field: " + f.name());
}
}
@Override
public String indexedToReadable(String _indexedForm) {
final BytesRef indexedForm = new BytesRef(_indexedForm);
switch (type) {
case INTEGER:
return Integer.toString( NumericUtils.prefixCodedToInt(indexedForm) );
case FLOAT:
return Float.toString( NumericUtils.sortableIntToFloat(NumericUtils.prefixCodedToInt(indexedForm)) );
case LONG:
return Long.toString( NumericUtils.prefixCodedToLong(indexedForm) );
case DOUBLE:
return Double.toString( NumericUtils.sortableLongToDouble(NumericUtils.prefixCodedToLong(indexedForm)) );
case DATE:
return dateField.formatDate( new Date(NumericUtils.prefixCodedToLong(indexedForm)) );
default:
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unknown type for trie field: " + type);
}
}
@Override
public void indexedToReadable(BytesRef input, CharArr out) {
BytesRef indexedForm = input;
String s;
switch (type) {
case INTEGER:
s = Integer.toString( NumericUtils.prefixCodedToInt(indexedForm) );
break;
case FLOAT:
s = Float.toString( NumericUtils.sortableIntToFloat(NumericUtils.prefixCodedToInt(indexedForm)) );
break;
case LONG:
s = Long.toString( NumericUtils.prefixCodedToLong(indexedForm) );
break;
case DOUBLE:
s = Double.toString( NumericUtils.sortableLongToDouble(NumericUtils.prefixCodedToLong(indexedForm)) );
break;
case DATE:
s = dateField.formatDate( new Date(NumericUtils.prefixCodedToLong(indexedForm)) );
break;
default:
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unknown type for trie field: " + type);
}
out.write(s);
}
@Override
public String storedToIndexed(Fieldable f) {
// TODO: optimize to remove redundant string conversion
return readableToIndexed(storedToReadable(f));
}
@Override
public Field createField(SchemaField field, String externalVal, float boost) {
boolean indexed = field.indexed();
boolean stored = field.stored();
if (!indexed && !stored) {
if (log.isTraceEnabled())
log.trace("Ignoring unindexed/unstored field: " + field);
return null;
}
int ps = precisionStep;
byte[] arr=null;
TokenStream ts=null;
// String indexedVal = indexed && precisionStep==0 ? readableToIndexed(externalVal) : null;
switch (type) {
case INTEGER:
int i = Integer.parseInt(externalVal);
if (stored) arr = toArr(i);
if (indexed) ts = new NumericTokenStream(ps).setIntValue(i);
break;
case FLOAT:
float f = Float.parseFloat(externalVal);
if (stored) arr = toArr(f);
if (indexed) ts = new NumericTokenStream(ps).setFloatValue(f);
break;
case LONG:
long l = Long.parseLong(externalVal);
if (stored) arr = toArr(l);
if (indexed) ts = new NumericTokenStream(ps).setLongValue(l);
break;
case DOUBLE:
double d = Double.parseDouble(externalVal);
if (stored) arr = toArr(d);
if (indexed) ts = new NumericTokenStream(ps).setDoubleValue(d);
break;
case DATE:
long time = dateField.parseMath(null, externalVal).getTime();
if (stored) arr = toArr(time);
if (indexed) ts = new NumericTokenStream(ps).setLongValue(time);
break;
default:
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unknown type for trie field: " + type);
}
Field f;
if (stored) {
f = new Field(field.getName(), arr, Field.Store.YES);
if (indexed) f.setTokenStream(ts);
} else {
f = new Field(field.getName(), ts);
}
// term vectors aren't supported
f.setOmitNorms(field.omitNorms());
f.setOmitTermFreqAndPositions(field.omitTf());
f.setBoost(boost);
return f;
}
public enum TrieTypes {
INTEGER,
LONG,
FLOAT,
DOUBLE,
DATE
}
static final String INT_PREFIX = new String(new char[]{NumericUtils.SHIFT_START_INT});
static final String LONG_PREFIX = new String(new char[]{NumericUtils.SHIFT_START_LONG});
/** expert internal use, subject to change.
* Returns null if no prefix or prefix not needed, or the prefix of the main value of a trie field
* that indexes multiple precisions per value.
*/
public static String getMainValuePrefix(FieldType ft) {
if (ft instanceof TrieDateField) {
int step = ((TrieDateField)ft).getPrecisionStep();
if (step <= 0 || step >=64) return null;
return LONG_PREFIX;
} else if (ft instanceof TrieField) {
TrieField trie = (TrieField)ft;
if (trie.precisionStep == Integer.MAX_VALUE) return null;
switch (trie.type) {
case INTEGER:
case FLOAT:
return INT_PREFIX;
case LONG:
case DOUBLE:
case DATE:
return LONG_PREFIX;
default:
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unknown type for trie field: " + trie.type);
}
}
return null;
}
}
class TrieDateFieldSource extends LongFieldSource {
public TrieDateFieldSource(String field, FieldCache.LongParser parser) {
super(field, parser);
}
public TrieDateFieldSource(String field) {
super(field);
}
public String description() {
return "date(" + field + ')';
}
@Override
protected MutableValueLong newMutableValueLong() {
return new MutableValueDate();
}
@Override
public long externalToLong(String extVal) {
return TrieField.dateField.parseMath(null, extVal).getTime();
}
}