/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.apache.nifi.processors.kite; import com.google.common.base.Splitter; import com.google.common.collect.ImmutableList; import com.google.common.io.Resources; import java.io.IOException; import java.io.InputStream; import java.net.URI; import java.net.URISyntaxException; import java.util.List; import org.apache.avro.Schema; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.nifi.annotation.lifecycle.OnScheduled; import org.apache.nifi.components.PropertyDescriptor; import org.apache.nifi.components.ValidationContext; import org.apache.nifi.components.ValidationResult; import org.apache.nifi.components.Validator; import org.apache.nifi.processor.AbstractProcessor; import org.apache.nifi.processor.ProcessContext; import org.apache.nifi.processor.util.StandardValidators; import org.kitesdk.data.DatasetNotFoundException; import org.kitesdk.data.Datasets; import org.kitesdk.data.SchemaNotFoundException; import org.kitesdk.data.URIBuilder; import org.kitesdk.data.spi.DefaultConfiguration; abstract class AbstractKiteProcessor extends AbstractProcessor { private static final Splitter COMMA = Splitter.on(',').trimResults(); protected static final Validator FILES_EXIST = new Validator() { @Override public ValidationResult validate(String subject, String configFiles, ValidationContext context) { if (configFiles != null && !configFiles.isEmpty()) { for (String file : COMMA.split(configFiles)) { ValidationResult result = StandardValidators.FILE_EXISTS_VALIDATOR .validate(subject, file, context); if (!result.isValid()) { return result; } } } return new ValidationResult.Builder() .subject(subject) .input(configFiles) .explanation("Files exist") .valid(true) .build(); } }; protected static final PropertyDescriptor CONF_XML_FILES = new PropertyDescriptor.Builder() .name("Hadoop configuration files") .description("A comma-separated list of Hadoop configuration files") .addValidator(FILES_EXIST) .build(); protected static final Validator RECOGNIZED_URI = new Validator() { @Override public ValidationResult validate(String subject, String uri, ValidationContext context) { String message = "not set"; boolean isValid = true; if (uri.trim().isEmpty()) { isValid = false; } else { final boolean elPresent = context.isExpressionLanguageSupported(subject) && context.isExpressionLanguagePresent(uri); if (!elPresent) { try { new URIBuilder(URI.create(uri)).build(); } catch (RuntimeException e) { message = e.getMessage(); isValid = false; } } } return new ValidationResult.Builder() .subject(subject) .input(uri) .explanation("Dataset URI is invalid: " + message) .valid(isValid) .build(); } }; /** * Resolves a {@link Schema} for the given string, either a URI or a JSON literal. */ protected static Schema getSchema(String uriOrLiteral, Configuration conf) { URI uri; try { uri = new URI(uriOrLiteral); } catch (URISyntaxException e) { // try to parse the schema as a literal return parseSchema(uriOrLiteral); } try { if ("dataset".equals(uri.getScheme()) || "view".equals(uri.getScheme())) { return Datasets.load(uri).getDataset().getDescriptor().getSchema(); } else if ("resource".equals(uri.getScheme())) { try (InputStream in = Resources.getResource(uri.getSchemeSpecificPart()) .openStream()) { return parseSchema(uri, in); } } else { // try to open the file Path schemaPath = new Path(uri); FileSystem fs = schemaPath.getFileSystem(conf); try (InputStream in = fs.open(schemaPath)) { return parseSchema(uri, in); } } } catch (DatasetNotFoundException e) { throw new SchemaNotFoundException( "Cannot read schema of missing dataset: " + uri, e); } catch (IOException e) { throw new SchemaNotFoundException( "Failed while reading " + uri + ": " + e.getMessage(), e); } } private static Schema parseSchema(String literal) { try { return new Schema.Parser().parse(literal); } catch (RuntimeException e) { throw new SchemaNotFoundException( "Failed to parse schema: " + literal, e); } } private static Schema parseSchema(URI uri, InputStream in) throws IOException { try { return new Schema.Parser().parse(in); } catch (RuntimeException e) { throw new SchemaNotFoundException("Failed to parse schema at " + uri, e); } } protected static final Validator SCHEMA_VALIDATOR = new Validator() { @Override public ValidationResult validate(String subject, String uri, ValidationContext context) { Configuration conf = getConfiguration(context.getProperty(CONF_XML_FILES).getValue()); String error = null; final boolean elPresent = context.isExpressionLanguageSupported(subject) && context.isExpressionLanguagePresent(uri); if (!elPresent) { try { getSchema(uri, conf); } catch (SchemaNotFoundException e) { error = e.getMessage(); } } return new ValidationResult.Builder() .subject(subject) .input(uri) .explanation(error) .valid(error == null) .build(); } }; protected static final List<PropertyDescriptor> ABSTRACT_KITE_PROPS = ImmutableList.<PropertyDescriptor>builder() .add(CONF_XML_FILES) .build(); static List<PropertyDescriptor> getProperties() { return ABSTRACT_KITE_PROPS; } @OnScheduled protected void setDefaultConfiguration(ProcessContext context) throws IOException { DefaultConfiguration.set(getConfiguration( context.getProperty(CONF_XML_FILES).getValue())); } protected static Configuration getConfiguration(String configFiles) { Configuration conf = DefaultConfiguration.get(); if (configFiles == null || configFiles.isEmpty()) { return conf; } for (String file : COMMA.split(configFiles)) { // process each resource only once if (conf.getResource(file) == null) { // use Path instead of String to get the file from the FS conf.addResource(new Path(file)); } } return conf; } @Override protected List<PropertyDescriptor> getSupportedPropertyDescriptors() { return ABSTRACT_KITE_PROPS; } }