/*
* Copyright (C) 2014 Indeed Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
* in compliance with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the
* License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
* express or implied. See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.indeed.imhotep.iql;
import com.google.common.collect.Lists;
import com.indeed.imhotep.api.ImhotepOutOfMemoryException;
import com.indeed.imhotep.ez.EZImhotepSession;
import com.indeed.imhotep.ez.Field;
import com.indeed.imhotep.ez.GroupKey;
import com.indeed.imhotep.ez.StatReference;
import gnu.trove.TIntHashSet;
import gnu.trove.TIntIntHashMap;
import gnu.trove.TIntObjectHashMap;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
/**
* @author vladimir
*/
public class DistinctGrouping extends Grouping {
// Fields to get distinct term counts for and their positions in the stats list
private final List<Field> fields = Lists.newArrayList();
private final List<Integer> distinctProjectionPositions = Lists.newArrayList();
public void addField(Field field, int projectionPosition) {
fields.add(field);
distinctProjectionPositions.add(projectionPosition);
}
public List<Field> getFields() {
return Lists.newArrayList(fields);
}
@Override
public Map<Integer, GroupKey> regroup(EZImhotepSession session, Map<Integer, GroupKey> groupKeys) throws ImhotepOutOfMemoryException {
throw new UnsupportedOperationException("DistinctGrouping requires FTGS so always should go last in the list of groupings");
}
public Iterator<GroupStats> getGroupStats(final EZImhotepSession session, final Map<Integer, GroupKey> groupKeys, final List<StatReference> statRefs, long timeoutTS) throws ImhotepOutOfMemoryException {
if(groupKeys.isEmpty()) { // we don't have any parent groups probably because all docs were filtered out
return Collections.<GroupStats>emptyList().iterator();
}
final int statCount = statRefs.size();
final int groupCount = groupKeys.size();
final List<GroupStats> result = Lists.newArrayList();
// list to set for use in lookups
final TIntHashSet distinctProjectionPositionsSet = new TIntHashSet(distinctProjectionPositions.size());
for(int pos : distinctProjectionPositions) {
distinctProjectionPositionsSet.add(pos);
}
// TODO: don't auto-get group stats on each FTGS iteration
// map of groups -> projection positions -> values
TIntObjectHashMap<TIntIntHashMap> distinctData = getDistinctData(session, groupKeys);
// get values for the normal stats
final TIntObjectHashMap<double[]> statsResults = (statCount > 0) ? getGroupStatsValues(session, statRefs, groupCount) : null;
// combine normal stats with distinct counts
for (int groupNum = 1; groupNum <= groupCount; groupNum++) {
TIntIntHashMap groupDistinctData = distinctData.get(groupNum);
double[] statsVals = statsResults != null ? statsResults.get(groupNum) : null;
double[] values = new double[statCount + fields.size()];
for(int i = 0, statsValsIndex = 0; i < values.length; i++) {
if(distinctProjectionPositionsSet.contains(i)) { // distinct value
values[i] = groupDistinctData != null ? groupDistinctData.get(i) : 0;
} else if(statsVals != null && statsValsIndex < statsVals.length) {
values[i] = statsVals[statsValsIndex++]; // normal stat value available
} else {
values[i] = 0; // normal stat not in stats array
}
}
GroupKey groupKey = groupKeys.get(groupNum);
result.add(new GroupStats(groupKey, values));
}
return result.iterator();
}
private TIntObjectHashMap<TIntIntHashMap> getDistinctData(EZImhotepSession session, Map<Integer, GroupKey> groupKeys) {
TIntObjectHashMap<TIntIntHashMap> distinctData = new TIntObjectHashMap<TIntIntHashMap>();
// get distinct data
for(int i = 0; i < fields.size(); i++) {
final Field field = fields.get(i);
final int projectionPosition = distinctProjectionPositions.get(i);
final DistinctFTGSCallback callback = new DistinctFTGSCallback(session.getStackDepth(), groupKeys);
session.ftgsIterate(Lists.newArrayList(field), callback);
final TIntIntHashMap distinctResults = callback.getResults();
for(int groupNum : groupKeys.keySet()) {
final int distinctResult = distinctResults.get(groupNum);
TIntIntHashMap groupDistinctData = distinctData.get(groupNum);
if(groupDistinctData == null) {
groupDistinctData = new TIntIntHashMap(distinctProjectionPositions.size());
distinctData.put(groupNum, groupDistinctData);
}
groupDistinctData.put(projectionPosition, distinctResult);
}
}
return distinctData;
}
private TIntObjectHashMap<double[]> getGroupStatsValues(EZImhotepSession session, List<StatReference> statRefs, int groupCount) {
final int statCount = statRefs.size();
final double[][] statGroupValues = new double[statCount][];
for (int i = 0; i < statCount; i++) {
statGroupValues[i] = session.getGroupStats(statRefs.get(i));
}
final TIntObjectHashMap<double[]> ret = new TIntObjectHashMap<double[]>(groupCount);
for (int group = 1; group <= groupCount; group++) {
final double[] groupStats = new double[statCount];
for (int statNum = 0; statNum < groupStats.length; statNum++) {
if(group < statGroupValues[statNum].length) {
groupStats[statNum] = statGroupValues[statNum][group];
}
}
ret.put(group, groupStats);
}
return ret;
}
}