/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.search.aggregations.bucket.significant.heuristics;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.xcontent.XContentBuilder;
import java.io.IOException;
public class ChiSquare extends NXYSignificanceHeuristic {
public static final String NAME = "chi_square";
public ChiSquare(boolean includeNegatives, boolean backgroundIsSuperset) {
super(includeNegatives, backgroundIsSuperset);
}
/**
* Read from a stream.
*/
public ChiSquare(StreamInput in) throws IOException {
super(in);
}
@Override
public boolean equals(Object other) {
if (!(other instanceof ChiSquare)) {
return false;
}
return super.equals(other);
}
@Override
public int hashCode() {
int result = NAME.hashCode();
result = 31 * result + super.hashCode();
return result;
}
/**
* Calculates Chi^2
* see "Information Retrieval", Manning et al., Eq. 13.19
*/
@Override
public double getScore(long subsetFreq, long subsetSize, long supersetFreq, long supersetSize) {
Frequencies frequencies = computeNxys(subsetFreq, subsetSize, supersetFreq, supersetSize, "ChiSquare");
// here we check if the term appears more often in subset than in background without subset.
if (!includeNegatives && frequencies.N11 / frequencies.N_1 < frequencies.N10 / frequencies.N_0) {
return Double.NEGATIVE_INFINITY;
}
return (frequencies.N * Math.pow((frequencies.N11 * frequencies.N00 - frequencies.N01 * frequencies.N10), 2.0) /
((frequencies.N_1) * (frequencies.N1_) * (frequencies.N0_) * (frequencies.N_0)));
}
@Override
public String getWriteableName() {
return NAME;
}
@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject(NAME);
super.build(builder);
builder.endObject();
return builder;
}
public static final SignificanceHeuristicParser PARSER = new NXYParser() {
@Override
protected SignificanceHeuristic newHeuristic(boolean includeNegatives, boolean backgroundIsSuperset) {
return new ChiSquare(includeNegatives, backgroundIsSuperset);
}
};
public static class ChiSquareBuilder extends NXYSignificanceHeuristic.NXYBuilder {
public ChiSquareBuilder(boolean includeNegatives, boolean backgroundIsSuperset) {
super(includeNegatives, backgroundIsSuperset);
}
@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject(NAME);
super.build(builder);
builder.endObject();
return builder;
}
}
}