/* * Copyright 2014 GoDataDriven B.V. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package io.divolte.server; import static io.divolte.server.processing.ItemProcessor.ProcessingDirective.*; import java.net.InetSocketAddress; import java.util.ArrayList; import java.util.Map; import java.util.Optional; import java.util.stream.Collectors; import java.util.stream.IntStream; import javax.annotation.ParametersAreNonnullByDefault; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Maps; import io.divolte.server.config.ValidatedConfiguration; import io.divolte.server.ip2geo.LookupService; import io.divolte.server.processing.Item; import io.divolte.server.processing.ItemProcessor; import io.divolte.server.processing.ProcessingPool; import io.undertow.util.AttachmentKey; @ParametersAreNonnullByDefault public final class IncomingRequestProcessor implements ItemProcessor<UndertowEvent> { private static final Logger logger = LoggerFactory.getLogger(IncomingRequestProcessor.class); public static final AttachmentKey<Boolean> DUPLICATE_EVENT_KEY = AttachmentKey.create(Boolean.class); private final ShortTermDuplicateMemory memory; // Given a source index, which mappings do we need to apply. private final ImmutableList<ImmutableList<Mapping>> mappingsBySourceIndex; // Given a mapping index, which sinks do we need to send it to. private final ImmutableList<ImmutableList<ProcessingPool<?, AvroRecordBuffer>>> sinksByMappingIndex; public IncomingRequestProcessor(final ValidatedConfiguration vc, final ImmutableMap<String, ProcessingPool<?, AvroRecordBuffer>> sinksByName, final Optional<LookupService> geoipLookupService, final SchemaRegistry schemaRegistry, final IncomingRequestListener listener) { memory = new ShortTermDuplicateMemory(vc.configuration().global.mapper.duplicateMemorySize); /* * Create all Mapping instances based on their config. */ final Map<String, Mapping> mappingsByName = vc.configuration() .mappings .entrySet() .stream() .collect(Collectors.toMap(Map.Entry::getKey, kv -> new Mapping(vc, kv.getKey(), geoipLookupService, schemaRegistry, listener))); /* * Create a mapping from source index to a list of Mapping's that apply * to events generated from that source index. Finally, we use a * ImmutableList<ImmutableList<Mapping>> as result, not a * Map<Integer, ImmutableList<Mapping>> because that way the backing * data structure is effectively a two-dimensional array and no hashing * is required for retrieval (list indexes are ints already). */ final ArrayList<ImmutableList<Mapping>> sourceMappingResult = // temporary mutable container for the result IntStream.range(0, vc.configuration().sources.size()) .<ImmutableList<Mapping>>mapToObj(ignored -> ImmutableList.of()) // initialized with empty lists per default .collect(Collectors.toCollection(ArrayList::new)); vc.configuration() .mappings .entrySet() .stream() // stream of entries (mapping_name, mapping_configuration) .flatMap(kv -> kv.getValue() .sources .stream() .map(s -> Maps.immutableEntry(vc.configuration().sourceIndex(s), kv.getKey()))) // Results in stream of (source_index, mapping_name) .collect(Collectors.groupingBy(Map.Entry::getKey, Collectors.mapping(e -> mappingsByName.get(e.getValue()), MoreCollectors.toImmutableList()) )) // Results in a Map<Integer, ImmutableList<Mapping>> where the key is the source index .forEach(sourceMappingResult::set); // Populate the temporary result in ArrayList<ImmutableList<Mapping>> mappingsBySourceIndex = ImmutableList.copyOf(sourceMappingResult); // Make immutable copy /* * Create a mapping from mapping index to a list of sinks (ProcessingPools) * that apply for events that came from the given mapping. Similar as above, * we transform the result into a list of lists, instead of a map in order * to make sure the underlying lookups are array index lookups instead of * hash map lookups. * * Note that we need to know the sinks for a mapping here, instead of on the * sink thread side, since we have one pool per sink at this moment. Later * we'll likely move to one pool per sink type (i.e. Kafka, HDFS) and leave * it to that pool to multiplex events to different sinks destinations (HDFS * files or Kafka topics), which should move this code elsewhere. */ final ArrayList<ImmutableList<ProcessingPool<?,AvroRecordBuffer>>> mappingMappingResult = // temporary mutable container for the result IntStream.range(0, vc.configuration().mappings.size()) .<ImmutableList<ProcessingPool<?,AvroRecordBuffer>>>mapToObj(ignored -> ImmutableList.of()) // initialized with empty lists per default .collect(Collectors.toCollection(ArrayList::new)); /* * Without the intermediate variable (collected), The Eclipse compiler's type * inference doesn't know how to handle this. Don't know about Oracle Java compiler. */ final Map<Integer, ImmutableList<ProcessingPool<?, AvroRecordBuffer>>> collected = vc.configuration() .mappings .entrySet() .stream() .flatMap(kv->kv.getValue() .sinks .stream() .map(s -> Maps.immutableEntry(vc.configuration().mappingIndex(kv.getKey()), s))) .filter(e -> sinksByName.containsKey(e.getValue())) .collect(Collectors.groupingBy(Map.Entry::getKey, Collectors.mapping(e -> sinksByName.get(e.getValue()), MoreCollectors.toImmutableList()))); collected.forEach(mappingMappingResult::set); sinksByMappingIndex = ImmutableList.copyOf(mappingMappingResult); } @Override public ProcessingDirective process(final Item<UndertowEvent> item) { final DivolteEvent event; try { event = item.payload.parseRequest(); } catch (final IncompleteRequestException e) { logger.warn("Improper request received from {}.", Optional.ofNullable(item.payload.exchange.getSourceAddress()).map(InetSocketAddress::getHostString).orElse("<UNKNOWN HOST>")); return CONTINUE; } final boolean duplicate = memory.isProbableDuplicate(event.partyId.value, event.sessionId.value, event.eventId); event.exchange.putAttachment(DUPLICATE_EVENT_KEY, duplicate); mappingsBySourceIndex.get(item.sourceId) .stream() // For each mapping that applies to this source .map(mapping -> mapping.map(item, event, duplicate)) .filter(Optional::isPresent) // Filter discarded for duplication or corruption .map(Optional::get) .forEach(bufferItem -> sinksByMappingIndex.get(bufferItem.sourceId) // For each sink that applies to this mapping .forEach(sink -> sink.enqueue(bufferItem))); return CONTINUE; } }