/* * Licensed to Elasticsearch under one or more contributor * license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright * ownership. Elasticsearch licenses this file to you under * the Apache License, Version 2.0 (the "License"); you may * not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.elasticsearch.search.aggregations.bucket.significant; import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.search.aggregations.AggregationStreams; import org.elasticsearch.search.aggregations.InternalAggregation; import org.elasticsearch.search.aggregations.InternalAggregations; import org.elasticsearch.search.aggregations.bucket.BucketStreamContext; import org.elasticsearch.search.aggregations.bucket.BucketStreams; import org.elasticsearch.search.aggregations.bucket.significant.heuristics.SignificanceHeuristic; import org.elasticsearch.search.aggregations.bucket.significant.heuristics.SignificanceHeuristicStreams; import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator; import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; /** * */ public class SignificantStringTerms extends InternalSignificantTerms<SignificantStringTerms, SignificantStringTerms.Bucket> { public static final InternalAggregation.Type TYPE = new Type("significant_terms", "sigsterms"); public static final AggregationStreams.Stream STREAM = new AggregationStreams.Stream() { @Override public SignificantStringTerms readResult(StreamInput in) throws IOException { SignificantStringTerms buckets = new SignificantStringTerms(); buckets.readFrom(in); return buckets; } }; private final static BucketStreams.Stream<Bucket> BUCKET_STREAM = new BucketStreams.Stream<Bucket>() { @Override public Bucket readResult(StreamInput in, BucketStreamContext context) throws IOException { Bucket buckets = new Bucket((long) context.attributes().get("subsetSize"), (long) context.attributes().get("supersetSize")); buckets.readFrom(in); return buckets; } @Override public BucketStreamContext getBucketStreamContext(Bucket bucket) { BucketStreamContext context = new BucketStreamContext(); Map<String, Object> attributes = new HashMap<>(); attributes.put("subsetSize", bucket.subsetSize); attributes.put("supersetSize", bucket.supersetSize); context.attributes(attributes); return context; } }; public static void registerStream() { AggregationStreams.registerStream(STREAM, TYPE.stream()); BucketStreams.registerStream(BUCKET_STREAM, TYPE.stream()); } public static void registerStreams() { AggregationStreams.registerStream(STREAM, TYPE.stream()); } public static class Bucket extends InternalSignificantTerms.Bucket { BytesRef termBytes; public Bucket(long subsetSize, long supersetSize) { // for serialization super(subsetSize, supersetSize); } public Bucket(BytesRef term, long subsetDf, long subsetSize, long supersetDf, long supersetSize, InternalAggregations aggregations) { super(subsetDf, subsetSize, supersetDf, supersetSize, aggregations); this.termBytes = term; } public Bucket(BytesRef term, long subsetDf, long subsetSize, long supersetDf, long supersetSize, InternalAggregations aggregations, double score) { this(term, subsetDf, subsetSize, supersetDf, supersetSize, aggregations); this.score = score; } @Override public Number getKeyAsNumber() { // this method is needed for scripted numeric aggregations return Double.parseDouble(termBytes.utf8ToString()); } @Override int compareTerm(SignificantTerms.Bucket other) { return BytesRef.getUTF8SortedAsUnicodeComparator().compare(termBytes, ((Bucket) other).termBytes); } @Override public String getKeyAsString() { return termBytes.utf8ToString(); } @Override public String getKey() { return getKeyAsString(); } @Override Bucket newBucket(long subsetDf, long subsetSize, long supersetDf, long supersetSize, InternalAggregations aggregations) { return new Bucket(termBytes, subsetDf, subsetSize, supersetDf, supersetSize, aggregations); } @Override public void readFrom(StreamInput in) throws IOException { termBytes = in.readBytesRef(); subsetDf = in.readVLong(); supersetDf = in.readVLong(); score = in.readDouble(); aggregations = InternalAggregations.readAggregations(in); } @Override public void writeTo(StreamOutput out) throws IOException { out.writeBytesRef(termBytes); out.writeVLong(subsetDf); out.writeVLong(supersetDf); out.writeDouble(getSignificanceScore()); aggregations.writeTo(out); } @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { builder.startObject(); builder.utf8Field(CommonFields.KEY, termBytes); builder.field(CommonFields.DOC_COUNT, getDocCount()); builder.field("score", score); builder.field("bg_count", supersetDf); aggregations.toXContentInternal(builder, params); builder.endObject(); return builder; } } SignificantStringTerms() {} // for serialization public SignificantStringTerms(long subsetSize, long supersetSize, String name, int requiredSize, long minDocCount, SignificanceHeuristic significanceHeuristic, List<? extends InternalSignificantTerms.Bucket> buckets, List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData) { super(subsetSize, supersetSize, name, requiredSize, minDocCount, significanceHeuristic, buckets, pipelineAggregators, metaData); } @Override public Type type() { return TYPE; } @Override public SignificantStringTerms create(List<SignificantStringTerms.Bucket> buckets) { return new SignificantStringTerms(this.subsetSize, this.supersetSize, this.name, this.requiredSize, this.minDocCount, this.significanceHeuristic, buckets, this.pipelineAggregators(), this.metaData); } @Override public Bucket createBucket(InternalAggregations aggregations, SignificantStringTerms.Bucket prototype) { return new Bucket(prototype.termBytes, prototype.subsetDf, prototype.subsetSize, prototype.supersetDf, prototype.supersetSize, aggregations); } @Override protected SignificantStringTerms create(long subsetSize, long supersetSize, List<InternalSignificantTerms.Bucket> buckets, InternalSignificantTerms prototype) { return new SignificantStringTerms(subsetSize, supersetSize, prototype.getName(), prototype.requiredSize, prototype.minDocCount, prototype.significanceHeuristic, buckets, prototype.pipelineAggregators(), prototype.getMetaData()); } @Override protected void doReadFrom(StreamInput in) throws IOException { this.requiredSize = readSize(in); this.minDocCount = in.readVLong(); this.subsetSize = in.readVLong(); this.supersetSize = in.readVLong(); significanceHeuristic = SignificanceHeuristicStreams.read(in); int size = in.readVInt(); List<InternalSignificantTerms.Bucket> buckets = new ArrayList<>(size); for (int i = 0; i < size; i++) { Bucket bucket = new Bucket(subsetSize, supersetSize); bucket.readFrom(in); buckets.add(bucket); } this.buckets = buckets; this.bucketMap = null; } @Override protected void doWriteTo(StreamOutput out) throws IOException { writeSize(requiredSize, out); out.writeVLong(minDocCount); out.writeVLong(subsetSize); out.writeVLong(supersetSize); significanceHeuristic.writeTo(out); out.writeVInt(buckets.size()); for (InternalSignificantTerms.Bucket bucket : buckets) { bucket.writeTo(out); } } @Override public XContentBuilder doXContentBody(XContentBuilder builder, Params params) throws IOException { builder.field("doc_count", subsetSize); builder.startArray(CommonFields.BUCKETS); for (InternalSignificantTerms.Bucket bucket : buckets) { //There is a condition (presumably when only one shard has a bucket?) where reduce is not called // and I end up with buckets that contravene the user's min_doc_count criteria in my reducer if (bucket.subsetDf >= minDocCount) { bucket.toXContent(builder, params); } } builder.endArray(); return builder; } }