/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.addthis.hydra.data.tree.prop;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import com.addthis.bundle.value.ValueFactory;
import com.addthis.bundle.value.ValueObject;
import com.addthis.codec.annotations.FieldConfig;
import com.addthis.codec.codables.Codable;
import com.addthis.hydra.data.tree.DataTreeNode;
import com.addthis.hydra.data.tree.DataTreeNodeUpdater;
import com.addthis.hydra.data.tree.TreeDataParameters;
import com.addthis.hydra.data.tree.TreeNodeData;
import com.addthis.hydra.data.tree.TreeNodeDataDeferredOperation;
import com.addthis.hydra.data.util.KeyHistogram;
/**
* keep a histogram of the counts of nodes with bucketed # of hits
*/
public class DataHistogram extends TreeNodeData<DataHistogram.Config> implements Codable {
/**
* This data attachment <span class="hydra-summary">keeps a histogram of the counts of child nodes</span>.
* <p/>
* <p>As many buckets as needed are created for this histogram. The range of the buckets
* is determined by the {@link #scale} parameter. Each bucket represents an interval
* from <i>scale</i> <sup><i>i</i></sup> to <i>scale</i> <sup><i>i+1</i></sup> for an
* arbitrary value of <i>i</i>.</p>
* <p/>
* <p>Job Configuration Example:</p>
* <pre>
* {const:"pageUrlLengths"}
* {const:"by_date"}
* {field:"DATE_YMD", data.page_url_histo.histo.scale:10}
* {field:"PAGE_URL_LENGTH"}
* </pre>
*
* <p><b>Query Path Directives</b>
*
* <p>${attachment} returns the string representation of the histogram.
*
* <p>%{attachment}=tiers create a virtual node for each bucket in the histogram.
*
* <p>Query Path Examples:</p>
* <pre>
* /pageUrlLengths/by_date/130707$+page_url_histo
* /pageUrlLengths/by_date/130707/+%page_url_histo=tiers:+count
* </pre>
*
* @user-reference
*/
public static final class Config extends TreeDataParameters<DataHistogram> {
/**
* Base for the interval of the exponentially sized buckets.
* A positive integer greater than one.
* This field is required.
*/
@FieldConfig(codable = true)
private int scale;
@Override
public DataHistogram newInstance() {
DataHistogram dt = new DataHistogram();
dt.histo = new KeyHistogram().setScale(scale).init();
return dt;
}
}
@FieldConfig(codable = true)
private KeyHistogram histo;
@Override
public boolean updateChildData(DataTreeNodeUpdater state, DataTreeNode childNode, Config conf) {
return false;
}
@Override
public boolean updateParentData(DataTreeNodeUpdater state, DataTreeNode parentNode,
DataTreeNode childNode,
List<TreeNodeDataDeferredOperation> deferredOps) {
return histo.incrementTo(childNode.getCounter());
}
@Override
public ValueObject getValue(String key) {
return ValueFactory.create(histo.getHistogram().toString());
}
@Override
public List<String> getNodeTypes() {
return Arrays.asList(new String[]{"tiers"});
}
@Override
public List<DataTreeNode> getNodes(DataTreeNode parent, String key) {
if (key == null) {
return null;
}
if (key.equals("tiers")) {
Map<Long, Long> map = histo.getHistogram();
ArrayList<DataTreeNode> tiers = new ArrayList<>(map.size());
for (Entry<Long, Long> e : histo.getSortedHistogram().entrySet()) {
tiers.add(new VirtualTreeNode(e.getKey().toString(), e.getValue()));
}
return tiers;
}
return null;
}
}