/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package gobblin.runtime.job_monitor; import java.io.IOException; import java.net.URI; import java.net.URISyntaxException; import java.util.Collection; import java.util.Map; import java.util.regex.Pattern; import org.apache.hadoop.fs.Path; import com.codahale.metrics.Counter; import com.google.common.base.Optional; import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.typesafe.config.Config; import com.typesafe.config.ConfigFactory; import com.typesafe.config.ConfigValue; import gobblin.metrics.GobblinTrackingEvent; import gobblin.metrics.event.sla.SlaEventKeys; import gobblin.metrics.reporter.util.NoopSchemaVersionWriter; import gobblin.metrics.reporter.util.SchemaVersionWriter; import gobblin.runtime.api.GobblinInstanceDriver; import gobblin.runtime.api.JobSpec; import gobblin.runtime.api.JobSpecMonitor; import gobblin.runtime.api.JobSpecMonitorFactory; import gobblin.runtime.api.MutableJobCatalog; import gobblin.runtime.metrics.RuntimeMetrics; import gobblin.util.Either; import gobblin.util.PathUtils; import gobblin.util.reflection.GobblinConstructorUtils; import lombok.Getter; /** * A {@link KafkaJobMonitor} that parses SLA {@link GobblinTrackingEvent}s and generates {@link JobSpec}s. Used * to trigger jobs on data availability. */ @Getter public class SLAEventKafkaJobMonitor extends KafkaAvroJobMonitor<GobblinTrackingEvent> { public static final String CONFIG_PREFIX = "gobblin.jobMonitor.slaEvent"; public static final String DATASET_URN_FILTER_KEY = "filter.urn"; public static final String EVENT_NAME_FILTER_KEY = "filter.name"; public static final String TEMPLATE_KEY = "job_template"; public static final String EXTRACT_KEYS = "extract_keys"; public static final String BASE_URI_KEY = "baseUri"; public static final String TOPIC_KEY = "topic"; public static final String SCHEMA_VERSION_READER_CLASS = "versionReaderClass"; private static final Config DEFAULTS = ConfigFactory.parseMap(ImmutableMap.of( BASE_URI_KEY, SLAEventKafkaJobMonitor.class.getSimpleName(), SCHEMA_VERSION_READER_CLASS, NoopSchemaVersionWriter.class.getName())); private final Optional<Pattern> urnFilter; private final Optional<Pattern> nameFilter; private final URI baseURI; private final URI template; private final Map<String, String> extractKeys; private Counter rejectedEvents; public static class Factory implements JobSpecMonitorFactory { @Override public JobSpecMonitor forJobCatalog(GobblinInstanceDriver instanceDriver, MutableJobCatalog jobCatalog) throws IOException { Config config = instanceDriver.getSysConfig().getConfig().getConfig(CONFIG_PREFIX).withFallback(DEFAULTS); return forConfig(config, jobCatalog); } /** * Create a {@link SLAEventKafkaJobMonitor} from an input {@link Config}. Useful for multiple monitors, where * the configuration of each monitor is scoped. * @param localScopeConfig The sub-{@link Config} for this monitor without any namespacing (e.g. the key for * topic should simply be "topic"). * @throws IOException */ public JobSpecMonitor forConfig(Config localScopeConfig, MutableJobCatalog jobCatalog) throws IOException { Preconditions.checkArgument(localScopeConfig.hasPath(TEMPLATE_KEY)); Preconditions.checkArgument(localScopeConfig.hasPath(TOPIC_KEY)); String topic = localScopeConfig.getString(TOPIC_KEY); URI baseUri; try { baseUri = new URI(localScopeConfig.getString(BASE_URI_KEY)); } catch (URISyntaxException use) { throw new IOException("Invalid base URI " + localScopeConfig.getString(BASE_URI_KEY), use); } String templateURIString = localScopeConfig.getString(TEMPLATE_KEY); URI template; try { template = new URI(templateURIString); } catch (URISyntaxException uri) { throw new IOException("Invalid template URI " + templateURIString); } ImmutableMap.Builder<String, String> mapBuilder = ImmutableMap.builder(); if (localScopeConfig.hasPath(EXTRACT_KEYS)) { Config extractKeys = localScopeConfig.getConfig(EXTRACT_KEYS); for (Map.Entry<String, ConfigValue> entry : extractKeys.entrySet()) { Object unwrappedValue = entry.getValue().unwrapped(); if (unwrappedValue instanceof String) { mapBuilder.put(entry.getKey(), (String) unwrappedValue); } } } Map<String, String> extractKeys = mapBuilder.build(); Optional<Pattern> urnFilter = localScopeConfig.hasPath(DATASET_URN_FILTER_KEY) ? Optional.of(Pattern.compile(localScopeConfig.getString(DATASET_URN_FILTER_KEY))) : Optional.<Pattern>absent(); Optional<Pattern> nameFilter = localScopeConfig.hasPath(EVENT_NAME_FILTER_KEY) ? Optional.of(Pattern.compile(localScopeConfig.getString(EVENT_NAME_FILTER_KEY))) : Optional.<Pattern>absent(); SchemaVersionWriter versionWriter; try { versionWriter = (SchemaVersionWriter) GobblinConstructorUtils. invokeLongestConstructor(Class.forName(localScopeConfig.getString(SCHEMA_VERSION_READER_CLASS)), localScopeConfig); } catch (ReflectiveOperationException roe) { throw new IllegalArgumentException(roe); } return new SLAEventKafkaJobMonitor(topic, jobCatalog, baseUri, localScopeConfig, versionWriter, urnFilter, nameFilter, template, extractKeys); } } protected SLAEventKafkaJobMonitor(String topic, MutableJobCatalog catalog, URI baseURI, Config limitedScopeConfig, SchemaVersionWriter<?> versionWriter, Optional<Pattern> urnFilter, Optional<Pattern> nameFilter, URI template, Map<String, String> extractKeys) throws IOException { super(topic, catalog, limitedScopeConfig, GobblinTrackingEvent.SCHEMA$, versionWriter); this.baseURI = baseURI; this.urnFilter = urnFilter; this.nameFilter = nameFilter; this.template = template; this.extractKeys = extractKeys; } @Override protected void createMetrics() { super.createMetrics(); this.rejectedEvents = getMetricContext().counter(RuntimeMetrics.GOBBLIN_JOB_MONITOR_SLAEVENT_REJECTEDEVENTS); } @Override public Collection<Either<JobSpec, URI>> parseJobSpec(GobblinTrackingEvent event) { if (!acceptEvent(event)) { this.rejectedEvents.inc(); return Lists.newArrayList(); } String datasetURN = event.getMetadata().get(SlaEventKeys.DATASET_URN_KEY); URI jobSpecURI = PathUtils.mergePaths(new Path(this.baseURI), new Path(datasetURN)).toUri(); Map<String, String> jobConfigMap = Maps.newHashMap(); for (Map.Entry<String, String> entry : this.extractKeys.entrySet()) { if (event.getMetadata().containsKey(entry.getKey())) { jobConfigMap.put(entry.getValue(), event.getMetadata().get(entry.getKey())); } } Config jobConfig = ConfigFactory.parseMap(jobConfigMap); JobSpec jobSpec = JobSpec.builder(jobSpecURI).withTemplate(this.template).withConfig(jobConfig).build(); return Lists.newArrayList(Either.<JobSpec, URI>left(jobSpec)); } /** * Filter for {@link GobblinTrackingEvent}. Used to quickly determine whether an event should be used to produce * a {@link JobSpec}. */ protected boolean acceptEvent(GobblinTrackingEvent event) { if (!event.getMetadata().containsKey(SlaEventKeys.DATASET_URN_KEY)) { return false; } String datasetURN = event.getMetadata().get(SlaEventKeys.DATASET_URN_KEY); if (this.urnFilter.isPresent() && !this.urnFilter.get().matcher(datasetURN).find()) { return false; } if (this.nameFilter.isPresent() && !this.nameFilter.get().matcher(event.getName()).find()) { return false; } return true; } }