/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.search.facet;
import java.util.HashMap;
import java.util.Map;
import org.apache.solr.common.SolrException;
import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.NumberType;
import org.apache.solr.schema.SchemaField;
// Any type of facet request that generates a variable number of buckets
// and the ability to sort by those generated buckets.
abstract class FacetRequestSorted extends FacetRequest {
long offset;
long limit;
int overrequest = -1; // Number of buckets to request beyond the limit to do internally during distributed search. -1 means default.
long mincount;
String sortVariable;
SortDirection sortDirection;
RefineMethod refine; // null, NONE, or SIMPLE
@Override
public RefineMethod getRefineMethod() {
return refine;
}
@Override
public boolean returnsPartial() {
return limit > 0;
}
}
public class FacetField extends FacetRequestSorted {
String field;
boolean missing;
boolean allBuckets; // show cumulative stats across all buckets (this can be different than non-bucketed stats across all docs because of multi-valued docs)
boolean numBuckets;
String prefix;
FacetMethod method;
int cacheDf; // 0 means "default", -1 means "never cache"
// experimental - force perSeg collection when using dv method, currently for testing purposes only.
Boolean perSeg;
{
// defaults for FacetRequestSorted
mincount = 1;
limit = 10;
}
public enum FacetMethod {
DV, // DocValues, collect into ordinal array
UIF, // UnInvertedField, collect into ordinal array
DVHASH, // DocValues, collect into hash
ENUM, // TermsEnum then intersect DocSet (stream-able)
STREAM, // presently equivalent to ENUM
SMART,
;
public static FacetMethod fromString(String method) {
if (method == null || method.length()==0) return DEFAULT_METHOD;
switch (method) {
case "dv": return DV;
case "uif": return UIF;
case "dvhash": return DVHASH;
case "enum": return ENUM;
case "stream": return STREAM; // TODO replace with enum?
case "smart": return SMART;
default:
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Unknown FacetField method " + method);
}
}
static FacetMethod DEFAULT_METHOD = SMART; // non-final for tests to vary
}
@Override
public FacetProcessor createFacetProcessor(FacetContext fcontext) {
SchemaField sf = fcontext.searcher.getSchema().getField(field);
FieldType ft = sf.getType();
boolean multiToken = sf.multiValued() || ft.multiValuedFieldCache();
NumberType ntype = ft.getNumberType();
// ensure we can support the requested options for numeric faceting:
if (ntype != null) {
if (prefix != null) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
"Doesn't make sense to set facet prefix on a numeric field");
}
if (mincount == 0) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
"Numeric fields do not support facet mincount=0; try indexing as terms");
// TODO if indexed=true then we could add support
}
}
// TODO auto-pick ENUM/STREAM SOLR-9351 when index asc and DocSet cardinality is *not* much smaller than term cardinality
if (method == FacetMethod.ENUM) {// at the moment these two are the same
method = FacetMethod.STREAM;
}
if (method == FacetMethod.STREAM && sf.indexed() &&
"index".equals(sortVariable) && sortDirection == SortDirection.asc) {
return new FacetFieldProcessorByEnumTermsStream(fcontext, this, sf);
}
// TODO if method=UIF and not single-valued numerics then simply choose that now? TODO add FieldType.getDocValuesType()
if (!multiToken) {
if (mincount > 0 && prefix == null && (ntype != null || method == FacetMethod.DVHASH)) {
// TODO can we auto-pick for strings when term cardinality is much greater than DocSet cardinality?
// or if we don't know cardinality but DocSet size is very small
return new FacetFieldProcessorByHashDV(fcontext, this, sf);
} else if (ntype == null) {
// single valued string...
return new FacetFieldProcessorByArrayDV(fcontext, this, sf);
} else {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
"Couldn't pick facet algorithm for field " + sf);
}
}
// multi-valued after this point
if (sf.hasDocValues() || method == FacetMethod.DV) {
// single and multi-valued string docValues
return new FacetFieldProcessorByArrayDV(fcontext, this, sf);
}
// Top-level multi-valued field cache (UIF)
return new FacetFieldProcessorByArrayUIF(fcontext, this, sf);
}
@Override
public FacetMerger createFacetMerger(Object prototype) {
return new FacetFieldMerger(this);
}
@Override
public Map<String, Object> getFacetDescription() {
Map<String, Object> descr = new HashMap<>();
descr.put("field", field);
descr.put("limit", limit);
return descr;
}
}