/** * diqube: Distributed Query Base. * * Copyright (C) 2015 Bastian Gloeckle * * This file is part of diqube. * * diqube is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as * published by the Free Software Foundation, either version 3 of the * License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ package org.diqube.execution.steps; import java.util.ArrayList; import java.util.Arrays; import java.util.Deque; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentLinkedDeque; import java.util.concurrent.ConcurrentMap; import java.util.concurrent.atomic.AtomicBoolean; import org.diqube.execution.consumers.AbstractThreadedColumnValueConsumer; import org.diqube.execution.consumers.AbstractThreadedGroupIntermediaryAggregationConsumer; import org.diqube.execution.consumers.ColumnValueConsumer; import org.diqube.execution.consumers.DoneConsumer; import org.diqube.execution.consumers.GenericConsumer; import org.diqube.execution.consumers.GroupIntermediaryAggregationConsumer; import org.diqube.execution.consumers.RowIdConsumer; import org.diqube.function.IntermediaryResult; import org.diqube.queries.QueryRegistry; import org.diqube.util.Triple; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.google.common.collect.Maps; import com.google.common.collect.Sets; /** * As Group IDs are valid for one TableShard only, they need to be mapped to the group IDs of equal groups from other * cluster nodes when receiving updates on the query master. This step does that and provides the cleaned list of * groupIds (= row IDs) as {@link RowIdConsumer} output. * * <p> * Input: {@link ColumnValueConsumer}, {@link GroupIntermediaryAggregationConsumer}<br> * Output: {@link GroupIntermediaryAggregationConsumer}, {@link RowIdConsumer} * * @author Bastian Gloeckle */ public class GroupIdAdjustingStep extends AbstractThreadedExecutablePlanStep { private static final Logger logger = LoggerFactory.getLogger(GroupIdAdjustingStep.class); private volatile ConcurrentMap<Long, Map<String, Object>> incomingGroupIdToValues = new ConcurrentHashMap<>(); private AtomicBoolean columnValueSourceIsDone = new AtomicBoolean(false); private AbstractThreadedColumnValueConsumer columnValueConsumer = new AbstractThreadedColumnValueConsumer(this) { @Override protected void allSourcesAreDone() { GroupIdAdjustingStep.this.columnValueSourceIsDone.set(true); } @Override protected void doConsume(String colName, Map<Long, Object> values) { for (Entry<Long, Object> valueEntry : values.entrySet()) { Map<String, Object> valueMap = incomingGroupIdToValues.computeIfAbsent(valueEntry.getKey(), l -> new ConcurrentHashMap<String, Object>()); valueMap.put(colName, valueEntry.getValue()); } } }; /** sync additions/removals by value of {@link #incomingGroupIntermediariesSync}. */ private volatile ConcurrentMap<Long, Deque<Triple<String, IntermediaryResult, IntermediaryResult>>> incomingGroupIntermediaries = new ConcurrentHashMap<>(); private AtomicBoolean groupInputIsDone = new AtomicBoolean(false); private ConcurrentMap<Long, Object> incomingGroupIntermediariesSync = new ConcurrentHashMap<>(); private AbstractThreadedGroupIntermediaryAggregationConsumer groupIntermediateAggregateConsumer = new AbstractThreadedGroupIntermediaryAggregationConsumer(this) { @Override protected void allSourcesAreDone() { GroupIdAdjustingStep.this.groupInputIsDone.set(true); } @Override protected void doConsumeIntermediaryAggregationResult(long groupId, String colName, IntermediaryResult oldIntermediaryResult, IntermediaryResult newIntermediaryResult) { incomingGroupIntermediariesSync.putIfAbsent(groupId, new Object()); synchronized (incomingGroupIntermediariesSync.get(groupId)) { incomingGroupIntermediaries.compute(groupId, (key, value) -> { if (value == null) value = new ConcurrentLinkedDeque<Triple<String, IntermediaryResult, IntermediaryResult>>(); value.addLast(new Triple<>(colName, oldIntermediaryResult, newIntermediaryResult)); return value; }); } } }; private Set<String> groupedColumnNames; private Map<Long, Long> groupIdMap = new HashMap<>(); private Map<Map<String, Object>, Long> valuesToGroupId = new HashMap<>(); private Set<Long> allKnownGroupIds = new HashSet<>(); public GroupIdAdjustingStep(int stepId, QueryRegistry queryRegistry, Set<String> groupedColumnNames) { super(stepId, queryRegistry); this.groupedColumnNames = groupedColumnNames; } @Override protected void validateOutputConsumer(GenericConsumer consumer) throws IllegalArgumentException { if (!(consumer instanceof DoneConsumer) && !(consumer instanceof GroupIntermediaryAggregationConsumer) && !(consumer instanceof RowIdConsumer)) throw new IllegalArgumentException("Only GroupIntermediaryAggregationConsumer and RowIdConsumer supported."); } @Override protected void execute() { execute(true); } private void execute(boolean checkIfDone) { if (!incomingGroupIdToValues.isEmpty()) { incomingGroupIdToValues.keySet().removeAll(allKnownGroupIds); List<Long> newGroupIds = new ArrayList<>(); List<Long> incomingGroupIds = new ArrayList<Long>(Sets.difference(incomingGroupIdToValues.keySet(), allKnownGroupIds)); incomingGroupIdToValues.keySet().removeAll(allKnownGroupIds); List<Long> groupIdsWorkedOn = new ArrayList<Long>(); for (Long groupId : incomingGroupIds) { Map<String, Object> values = incomingGroupIdToValues.get(groupId); if (Sets.difference(groupedColumnNames, values.keySet()).isEmpty()) { values = Maps.filterKeys(new HashMap<String, Object>(values), colName -> groupedColumnNames.contains(colName)); if (valuesToGroupId.containsKey(values)) { // we found a new groupId mapping! long availableGroupId = valuesToGroupId.get(values); groupIdMap.put(groupId, availableGroupId); logger.trace("Mapping new group ID {} to group ID {}", groupId, availableGroupId); } else { // new group found valuesToGroupId.put(values, groupId); groupIdMap.put(groupId, groupId); newGroupIds.add(groupId); logger.trace("Found new group ID {}", groupId); } groupIdsWorkedOn.add(groupId); } } for (Long groupIdDone : groupIdsWorkedOn) { incomingGroupIdToValues.remove(groupIdDone); allKnownGroupIds.add(groupIdDone); } if (!newGroupIds.isEmpty()) forEachOutputConsumerOfType(RowIdConsumer.class, c -> c.consume(newGroupIds.stream().toArray(l -> new Long[l]))); } processIncomingGroupIntermediaries(); if (checkIfDone) { if ((groupInputIsDone.get() && isEmpty(incomingGroupIntermediaries)) || // all groups processed. // all inputs done, we though might not have processed everything yet. (groupInputIsDone.get() && columnValueSourceIsDone.get())) { if (groupInputIsDone.get() && columnValueSourceIsDone.get()) // make sure we have processed everything, so lets execute one additional time. execute(false); forEachOutputConsumerOfType(GenericConsumer.class, c -> c.sourceIsDone()); doneProcessing(); } } } private void processIncomingGroupIntermediaries() { if (!isEmpty(incomingGroupIntermediaries)) { List<Long> activeGroupIds = new ArrayList<>(Sets.intersection(groupIdMap.keySet(), incomingGroupIntermediaries.keySet())); for (Long inputGroupId : activeGroupIds) { long newGroupId = groupIdMap.get(inputGroupId); Deque<Triple<String, IntermediaryResult, IntermediaryResult>> intermediaries = incomingGroupIntermediaries.get(inputGroupId); if (intermediaries.isEmpty()) { synchronized (incomingGroupIntermediariesSync.get(inputGroupId)) { // double-checked locking since there might have been something added to the deque in the meantime. if (intermediaries.isEmpty()) { incomingGroupIntermediaries.remove(inputGroupId); continue; } } } logger.trace("Processing collected changes for group {}", newGroupId); List<String> colNamesProcessed = new ArrayList<>(); while (!intermediaries.isEmpty()) { Triple<String, IntermediaryResult, IntermediaryResult> update = intermediaries.poll(); colNamesProcessed.add(update.getLeft()); forEachOutputConsumerOfType(GroupIntermediaryAggregationConsumer.class, c -> c.consumeIntermediaryAggregationResult(newGroupId, update.getLeft(), update.getMiddle(), update.getRight())); } logger.trace("Processed collected changes for group {}, there were updates for cols {}", newGroupId, colNamesProcessed); } } } private boolean isEmpty(Map<Long, Deque<Triple<String, IntermediaryResult, IntermediaryResult>>> map) { for (Deque<Triple<String, IntermediaryResult, IntermediaryResult>> deque : map.values()) { if (!deque.isEmpty()) return false; } return true; } @Override protected List<GenericConsumer> inputConsumers() { return new ArrayList<>( Arrays.asList(new GenericConsumer[] { columnValueConsumer, groupIntermediateAggregateConsumer })); } @Override protected String getAdditionalToStringDetails() { return null; } }