/* * Copyright (C) 2014 Indeed Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except * in compliance with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software distributed under the * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language governing permissions and * limitations under the License. */ package com.indeed.imhotep.iql; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.indeed.imhotep.ez.EZImhotepSession; import com.indeed.imhotep.ez.GroupKey; import com.indeed.imhotep.ez.StatReference; import gnu.trove.TIntObjectHashMap; import org.apache.log4j.Logger; import java.text.DecimalFormat; import java.util.*; /** * @author jplaisance */ public final class GroupingFTGSCallback extends EZImhotepSession.FTGSCallback { private static final Logger log = Logger.getLogger(GroupingFTGSCallbackNoExplode.class); private final List<StatReference> statRefs; private final Map<Integer, GroupKey> groupKeys; private final List<Object> allTerms = Lists.newArrayList(); private final TIntObjectHashMap<Map<Object, double[]>> groupToTermsStats = new TIntObjectHashMap<Map<Object, double[]>>(); private final int termLimit; public GroupingFTGSCallback(int numStats, List<StatReference> statRefs, Map<Integer, GroupKey> groupKeys) { super(numStats); this.statRefs = statRefs; this.groupKeys = groupKeys; termLimit = EZImhotepSession.GROUP_LIMIT / Math.max(groupKeys.size(), 1); } protected void intTermGroup(final String field, final long term, final int group) { termGroup(term, group); } protected void stringTermGroup(final String field, final String term, final int group) { termGroup(term, group); } private void termGroup(Object term, int group) { final int allTermsCount = allTerms.size(); if(allTermsCount == 0 || !allTerms.get(allTermsCount-1).equals(term)) { allTerms.add(term); // got a new term. relying on terms being passed in sorted order if(allTermsCount > termLimit) { throw new IllegalArgumentException("Number of groups exceeds the limit " + new DecimalFormat("###,###").format(EZImhotepSession.GROUP_LIMIT) + ". Please simplify the query. " + "Try adding [] suffix to non-first groupings to disable addition of 0 rows. (e.g. 'group by country, lang[]')"); } } Map<Object, double[]> groupTerms = groupToTermsStats.get(group); if(groupTerms == null) { groupTerms = Maps.newHashMap(); groupToTermsStats.put(group, groupTerms); } groupTerms.put(term, getStats()); } private double[] getStats() { final double[] stats = new double[statRefs.size()]; for (int i = 0; i < statRefs.size(); i++) { stats[i] = getStat(statRefs.get(i)); } return stats; } public List<GroupStats> getResults() { final List<GroupStats> ret = Lists.newArrayList(); // warning: we are reusing the same array instance for all blank rows to save memory final double[] emptyArray = new double[statRefs.size()]; for (int group = 1; group <= groupKeys.size(); group++) { final Map<Object, double[]> termsStats = groupToTermsStats.get(group); if(termsStats == null) { // this grouping was skipped by FTGS, so assigning 0 stats to all terms for(Object missingTerm : allTerms) { ret.add(new GroupStats(groupKeys.get(group).add(missingTerm), emptyArray)); } continue; } for(Object term : allTerms) { double[] stats = termsStats.get(term); if(stats == null) { stats = emptyArray; } ret.add(new GroupStats(groupKeys.get(group).add(term), stats)); } } return ret; } }