/* * Licensed to Elasticsearch under one or more contributor * license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright * ownership. Elasticsearch licenses this file to you under * the Apache License, Version 2.0 (the "License"); you may * not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.elasticsearch.search.aggregations.bucket.significant; import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.io.stream.Writeable; import org.elasticsearch.search.DocValueFormat; import org.elasticsearch.search.aggregations.bucket.significant.heuristics.ChiSquare; import org.elasticsearch.search.aggregations.bucket.significant.heuristics.GND; import org.elasticsearch.search.aggregations.bucket.significant.heuristics.JLHScore; import org.elasticsearch.search.aggregations.bucket.significant.heuristics.MutualInformation; import org.elasticsearch.search.aggregations.bucket.significant.heuristics.SignificanceHeuristic; import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator; import org.junit.Before; import java.util.ArrayList; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import static org.elasticsearch.search.aggregations.InternalAggregations.EMPTY; public class SignificantStringTermsTests extends InternalSignificantTermsTestCase { private SignificanceHeuristic significanceHeuristic; @Before public void setUpSignificanceHeuristic() { significanceHeuristic = randomSignificanceHeuristic(); } @Override protected InternalSignificantTerms createTestInstance(String name, List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData) { DocValueFormat format = DocValueFormat.RAW; int requiredSize = randomIntBetween(1, 5); int shardSize = requiredSize + 2; final int numBuckets = randomInt(shardSize); long globalSubsetSize = 0; long globalSupersetSize = 0; List<SignificantStringTerms.Bucket> buckets = new ArrayList<>(numBuckets); Set<BytesRef> terms = new HashSet<>(); for (int i = 0; i < numBuckets; ++i) { BytesRef term = randomValueOtherThanMany(b -> terms.add(b) == false, () -> new BytesRef(randomAlphaOfLength(10))); int subsetDf = randomIntBetween(1, 10); int supersetDf = randomIntBetween(subsetDf, 20); int supersetSize = randomIntBetween(supersetDf, 30); globalSubsetSize += subsetDf; globalSupersetSize += supersetSize; buckets.add(new SignificantStringTerms.Bucket(term, subsetDf, subsetDf, supersetDf, supersetSize, EMPTY, format)); } return new SignificantStringTerms(name, requiredSize, 1L, pipelineAggregators, metaData, format, globalSubsetSize, globalSupersetSize, significanceHeuristic, buckets); } @Override protected Writeable.Reader<InternalSignificantTerms<?, ?>> instanceReader() { return SignificantStringTerms::new; } private static SignificanceHeuristic randomSignificanceHeuristic() { return randomFrom( new JLHScore(), new MutualInformation(randomBoolean(), randomBoolean()), new GND(randomBoolean()), new ChiSquare(randomBoolean(), randomBoolean())); } }