package com.linkedin.thirdeye.dashboard.views.diffsummary;
import com.linkedin.thirdeye.client.diffsummary.DimNameValueCostEntry;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.jfree.util.Log;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.linkedin.thirdeye.client.diffsummary.CostFunction;
import com.linkedin.thirdeye.client.diffsummary.Cube;
import com.linkedin.thirdeye.client.diffsummary.HierarchyNode;
public class Summary {
static final NodeDimensionValuesComparator NODE_COMPARATOR = new NodeDimensionValuesComparator();
private Cube cube;
private int maxLevelCount;
private int levelCount;
private List<DPArray> dpArrays;
private double topValue;
private final RowInserter basicRowInserter = new BasicRowInserter();
private RowInserter oneSideErrorRowInserter = basicRowInserter;
private RowInserter leafRowInserter = basicRowInserter;
private List<DimNameValueCostEntry> costSet;
public Summary(Cube cube) {
this.cube = cube;
this.maxLevelCount = cube.getDimensions().size();
this.topValue = cube.getTopBaselineValue() + cube.getTopCurrentValue();
this.levelCount = this.maxLevelCount;
this.costSet = cube.getCostSet();
}
public SummaryResponse computeSummary(int answerSize) {
return computeSummary(answerSize, false, this.maxLevelCount);
}
public SummaryResponse computeSummary(int answerSize, boolean doOneSideError) {
return computeSummary(answerSize, doOneSideError, this.maxLevelCount);
}
public SummaryResponse computeSummary(int answerSize, int levelCount) {
return computeSummary(answerSize, false, levelCount);
}
public SummaryResponse computeSummary(int answerSize, boolean doOneSideError, int userLevelCount) {
if (answerSize <= 0) answerSize = 1;
if (userLevelCount <= 0 || userLevelCount > this.maxLevelCount) {
userLevelCount = this.maxLevelCount;
}
this.levelCount = userLevelCount;
dpArrays = new ArrayList<>(this.levelCount);
for (int i = 0; i < this.levelCount; ++i) {
dpArrays.add(new DPArray(answerSize));
}
HierarchyNode root = cube.getRoot();
if (doOneSideError) {
oneSideErrorRowInserter =
new OneSideErrorRowInserter(basicRowInserter, Double.compare(1., root.targetRatio()) <= 0);
// If this cube contains only one dimension, one side error is calculated starting at leaf (detailed) level;
// otherwise, a row at different side is removed through internal nodes.
if (this.levelCount == 1) leafRowInserter = oneSideErrorRowInserter;
}
computeChildDPArray(root);
List<HierarchyNode> answer = new ArrayList<>(dpArrays.get(0).getAnswer());
SummaryResponse response = new SummaryResponse();
response.build(answer, this.levelCount, this.costSet);
return response;
}
/**
* Check correctness of the sum of wow values. The check changes the wow values, so it should only be invoked after
* SummaryResponse is generated.
*/
public void testCorrectnessOfWowValues() {
List<HierarchyNode> nodeList = new ArrayList<>(dpArrays.get(0).getAnswer());
Collections.sort(nodeList, NODE_COMPARATOR); // Process lower level nodes first
for (HierarchyNode node : nodeList) {
HierarchyNode parent = findAncestor(node, null, dpArrays.get(0).getAnswer());
if (parent != null) parent.addNodeValues(node);
}
for (HierarchyNode node : nodeList) {
if (Double.compare(node.getBaselineValue(), node.getOriginalBaselineValue()) != 0
|| Double.compare(node.getCurrentValue(), node.getOriginalCurrentValue()) != 0) {
Log.warn("Wrong Wow values at node: " + node.getDimensionValues() + ". Expected: "
+ node.getOriginalBaselineValue() + "," + node.getOriginalCurrentValue() + ", actual: "
+ node.getBaselineValue() + "," + node.getCurrentValue());
}
}
}
static class NodeDimensionValuesComparator implements Comparator<HierarchyNode> {
@Override
public int compare(HierarchyNode n1, HierarchyNode n2) {
return n1.getDimensionValues().compareTo(n2.getDimensionValues());
}
}
/**
* Build the summary recursively. The parentTargetRatio for the root node can be any arbitrary value.
* The calculated answer for each invocation is put at dpArrays[node.level].
* So, the final answer is located at dpArray[0].
*/
private void computeChildDPArray(HierarchyNode node) {
HierarchyNode parent = node.getParent();
DPArray dpArray = dpArrays.get(node.getLevel());
dpArray.fullReset();
dpArray.targetRatio = node.targetRatio();
// Compute DPArray if the current node is the lowest internal node.
// Otherwise, merge DPArrays from its children.
if (node.getLevel() == levelCount - 1) {
// Shrink answer size for getting a higher level view, which gives larger picture of the dataset
if (node.childrenSize() < dpArray.size()) {
dpArray.setShrinkSize(Math.max(1, (node.childrenSize()+1)/2));
}
for (HierarchyNode child : node.getChildren()) {
leafRowInserter.insertRowToDPArray(dpArray, child, node.targetRatio());
updateWowValues(node, dpArray.getAnswer());
dpArray.targetRatio = node.targetRatio(); // get updated ratio
}
} else {
List<HierarchyNode> removedNodes = new ArrayList<>();
boolean doRollback = false;
do {
doRollback = false;
for (HierarchyNode child : node.getChildren()) {
computeChildDPArray(child);
removedNodes.addAll(mergeDPArray(node, dpArray, dpArrays.get(node.getLevel() + 1)));
updateWowValues(node, dpArray.getAnswer());
dpArray.targetRatio = node.targetRatio(); // get updated ratio
}
// Aggregate current node's answer if it is thinned out due to the user's answer size is too huge.
// If the current node is kept being thinned out, it eventually aggregates all its children.
if ( nodeIsThinnedOut(node) && dpArray.getAnswer().size() < dpArray.maxSize()) {
doRollback = true;
rollbackInsertions(node, dpArray.getAnswer(), removedNodes);
removedNodes.clear();
dpArray.setShrinkSize(Math.max(1, (dpArray.getAnswer().size()*2)/3));
dpArray.reset();
dpArray.targetRatio = node.targetRatio();
}
} while (doRollback);
}
// Calculate the cost if the node (aggregated row) is put in the answer.
// We do not need to do this for the root node.
// Moreover, if a node is thinned out by its children, it won't be inserted to the answer.
if (node.getLevel() != 0) {
updateWowValues(parent, dpArray.getAnswer());
double targetRatio = parent.targetRatio();
recomputeCostAndRemoveSmallNodes(node, dpArray, targetRatio);
dpArray.targetRatio = targetRatio;
if ( !nodeIsThinnedOut(node) ) {
// dpArray actually takes (dpArray.size-1) nodes as the answer, so we set its size to 2
// in order to insert the aggregated node to the answer.
if (dpArray.size() == 1) dpArray.setShrinkSize(2);
Set<HierarchyNode> removedNode = new HashSet<>(dpArray.getAnswer());
basicRowInserter.insertRowToDPArray(dpArray, node, targetRatio);
removedNode.removeAll(dpArray.getAnswer());
if (removedNode.size() != 0) {
updateWowValuesDueToRemoval(node, dpArray.getAnswer(), removedNode);
updateWowValues(node, dpArray.getAnswer());
}
}
} else {
dpArray.getAnswer().add(node);
}
}
// TODO: Need a better definition for "a node is thinned out by its children."
// We also need to look into the case where parent node is much smaller than its children.
private static boolean nodeIsThinnedOut(HierarchyNode node) {
return Double.compare(0., node.getBaselineValue()) == 0 && Double.compare(0., node.getCurrentValue()) == 0;
}
private static void rollbackInsertions(HierarchyNode node, Set<HierarchyNode> answer, List<HierarchyNode> removedNodes) {
Collections.sort(removedNodes, NODE_COMPARATOR); // Rollback from top to bottom nodes
Collections.reverse(removedNodes);
Set<HierarchyNode> targetSet = new HashSet<>(answer);
targetSet.addAll(removedNodes);
for (HierarchyNode removedNode : removedNodes) {
HierarchyNode parents = findAncestor(removedNode, node, targetSet);
if (parents != null) parents.removeNodeValues(removedNode);
}
node.resetValues();
}
/**
* Merge the answers of the two given DPArrays. The merged answer is put in the DPArray at the left hand side.
* After merging, the baseline and current values of the removed nodes (rows) will be add back to those of their
* parent node.
*/
private Set<HierarchyNode> mergeDPArray(HierarchyNode parentNode, DPArray parentArray, DPArray childArray) {
Set<HierarchyNode> removedNodes = new HashSet<>(parentArray.getAnswer());
removedNodes.addAll(childArray.getAnswer());
// Compute the merged answer
double targetRatio = (parentArray.targetRatio + childArray.targetRatio) / 2.;
recomputeCostAndRemoveSmallNodes(parentNode, parentArray, targetRatio);
List<HierarchyNode> childNodeList = new ArrayList<>(childArray.getAnswer());
Collections.sort(childNodeList, NODE_COMPARATOR);
for (HierarchyNode childNode : childNodeList) {
insertRowWithAdaptiveRatio(parentArray, childNode, targetRatio);
}
// Update an internal node's baseline and current value if any of its child is removed due to the merge
removedNodes.removeAll(parentArray.getAnswer());
updateWowValuesDueToRemoval(parentNode, parentArray.getAnswer(), removedNodes);
return removedNodes;
}
/**
* Recompute the baseline value and current value the node. The change is induced by the chosen nodes in
* the answer. Note that the current node may be in the answer.
*/
private static void updateWowValues(HierarchyNode node, Set<HierarchyNode> answer) {
node.resetValues();
for (HierarchyNode child : answer) {
if (child == node) continue;
node.removeNodeValues(child);
}
}
/**
* Update an internal node's baseline and current values if any of the nodes in its subtree is removed.
* @param node The internal node to be updated.
* @param answer The new answer.
* @param removedNodes The nodes removed from the subtree of node.
*/
private static void updateWowValuesDueToRemoval(HierarchyNode node, Set<HierarchyNode> answer,
Set<HierarchyNode> removedNodes) {
List<HierarchyNode> removedNodesList = new ArrayList<>(removedNodes);
Collections.sort(removedNodesList, NODE_COMPARATOR); // Process lower level nodes first
for (HierarchyNode removedNode : removedNodesList) {
HierarchyNode parents = findAncestor(removedNode, node, answer);
if (parents != null) parents.addNodeValues(removedNode);
}
}
/**
* Find a node's ancestor between the given node and ceiling that is contained in the target set of HierarchyNode.
* Returns null if no ancestor exists in the target set.
*/
private static HierarchyNode findAncestor(HierarchyNode node, HierarchyNode ceiling, Set<HierarchyNode> targets) {
while ((node = node.getParent()) != ceiling) {
if (targets.contains(node)) {
return node;
}
}
return null;
}
/**
* Recompute costs of the nodes in a DPArray using targetRatio for calculating the cost.
*/
private void recomputeCostAndRemoveSmallNodes(HierarchyNode parentNode, DPArray dp, double targetRatio) {
Set<HierarchyNode> removedNodes = new HashSet<>(dp.getAnswer());
List<HierarchyNode> ans = new ArrayList<>(dp.getAnswer());
Collections.sort(ans, NODE_COMPARATOR);
dp.reset();
for (HierarchyNode node : ans) {
insertRowWithAdaptiveRatioNoOneSideError(dp, node, targetRatio);
}
removedNodes.removeAll(dp.getAnswer());
if (removedNodes.size() != 0) {
// Temporarily add parentNode to the answer so the baseline and current values of the removed small node can
// successfully add back to parentNode by re-using the method updateWowValuesDueToRemoval.
dp.getAnswer().add(parentNode);
updateWowValuesDueToRemoval(parentNode.getParent(), dp.getAnswer(), removedNodes);
dp.getAnswer().remove(parentNode);
}
}
/**
* If the node's parent is also in the DPArray, then it's parent's current ratio is used as the target ratio for
* calculating the cost of the node; otherwise, targetRatio is used.
*/
private void insertRowWithAdaptiveRatioNoOneSideError(DPArray dp, HierarchyNode node, double targetRatio) {
if (dp.getAnswer().contains(node.getParent())) {
// For one side error if node's parent is included in the solution, then its cost will be calculated normally.
basicRowInserter.insertRowToDPArray(dp, node, node.getParent().targetRatio());
} else {
basicRowInserter.insertRowToDPArray(dp, node, targetRatio);
}
}
/**
* If the node's parent is also in the DPArray, then it's parent's current ratio is used as the target ratio for
* calculating the cost of the node; otherwise, targetRatio is used.
*/
private void insertRowWithAdaptiveRatio(DPArray dp, HierarchyNode node, double targetRatio) {
if (dp.getAnswer().contains(node.getParent())) {
// For one side error if node's parent is included in the solution, then its cost will be calculated normally.
basicRowInserter.insertRowToDPArray(dp, node, node.getParent().targetRatio());
} else {
oneSideErrorRowInserter.insertRowToDPArray(dp, node, targetRatio);
}
}
private static interface RowInserter {
public void insertRowToDPArray(DPArray dp, HierarchyNode node, double targetRatio);
}
private class BasicRowInserter implements RowInserter {
@Override
public void insertRowToDPArray(DPArray dp, HierarchyNode node, double targetRatio) {
double baselineValue = node.getBaselineValue();
double currentValue = node.getCurrentValue();
double cost = CostFunction.errWithPercentageRemoval(baselineValue, currentValue, targetRatio,
Cube.PERCENTAGE_CONTRIBUTION_THRESHOLD, topValue);
for (int n = dp.size() - 1; n > 0; --n) {
double val1 = dp.slotAt(n - 1).cost;
double val2 = dp.slotAt(n).cost + cost; // fixed r per iteration
if (Double.compare(val1, val2) < 0) {
dp.slotAt(n).cost = val1;
dp.slotAt(n).ans.retainAll(dp.slotAt(n - 1).ans); // dp[n].ans = dp[n-1].ans
dp.slotAt(n).ans.add(node);
} else {
dp.slotAt(n).cost = val2;
}
}
dp.slotAt(0).cost = dp.slotAt(0).cost + cost;
}
}
/**
* A wrapper class over BasicRowInserter. This class provide the calculation for one side error summary.
*/
private static class OneSideErrorRowInserter implements RowInserter {
final RowInserter basicRowInserter;
final boolean side;
public OneSideErrorRowInserter(RowInserter basicRowInserter, boolean side) {
this.basicRowInserter = basicRowInserter;
this.side = side;
}
@Override
public void insertRowToDPArray(DPArray dp, HierarchyNode node, double targetRatio) {
// If the row has the same change trend with the top row, then it is inserted.
if ( side == node.side() ) {
// When do oneSide, we try to make the root's ratio close to 1 in order to see the major root causes.
if ( (side && Double.compare(targetRatio, 1d) > 0) || (!side && Double.compare(targetRatio, 1d) < 0)) {
targetRatio = 1d;
}
basicRowInserter.insertRowToDPArray(dp, node, targetRatio);
} else { // Otherwise, it is inserted only there exists an intermediate parent besides root node
HierarchyNode parent = findAncestor(node, null, dp.getAnswer());
if (parent != null && parent.side() == side) basicRowInserter.insertRowToDPArray(dp, node, targetRatio);
}
}
}
public static void main (String[] argc) {
String oFileName = "Cube.json";
int answerSize = 10;
boolean doOneSideError = true;
int maxDimensionSize = 3;
Cube cube = null;
try {
cube = Cube.fromJson(oFileName);
System.out.println("Restored Cube:");
System.out.println(cube);
} catch (IOException e) {
e.printStackTrace();
System.exit(1);
}
Summary summary = new Summary(cube);
try {
SummaryResponse response = summary.computeSummary(answerSize, doOneSideError, maxDimensionSize);
System.out.print("JSon String: ");
System.out.println(new ObjectMapper().writeValueAsString(response));
System.out.println("Object String: ");
System.out.println(response.toString());
} catch (JsonProcessingException e) {
e.printStackTrace();
}
summary.testCorrectnessOfWowValues();
}
}