/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.streams.plugins.hive;
import org.apache.streams.util.schema.FieldType;
import org.apache.streams.util.schema.FieldUtil;
import org.apache.streams.util.schema.FileUtil;
import org.apache.streams.util.schema.Schema;
import org.apache.streams.util.schema.SchemaStore;
import org.apache.streams.util.schema.SchemaStoreImpl;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.node.ObjectNode;
import org.apache.commons.lang3.StringUtils;
import org.jsonschema2pojo.util.URLUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.net.URL;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import static org.apache.streams.util.schema.FileUtil.dropExtension;
import static org.apache.streams.util.schema.FileUtil.dropSourcePathPrefix;
import static org.apache.streams.util.schema.FileUtil.swapExtension;
import static org.apache.streams.util.schema.FileUtil.writeFile;
/**
* Generates hive table definitions for using org.openx.data.jsonserde.JsonSerDe on new-line delimited json documents.
*
*
*/
public class StreamsHiveResourceGenerator implements Runnable {
private static final Logger LOGGER = LoggerFactory.getLogger(StreamsHiveResourceGenerator.class);
private static final String LS = System.getProperty("line.separator");
private StreamsHiveGenerationConfig config;
private SchemaStore schemaStore = new SchemaStoreImpl();
private int currentDepth = 0;
/**
* Run from CLI without Maven
*
* <p/>
* java -jar streams-plugin-hive-jar-with-dependencies.jar StreamsHiveResourceGenerator src/main/jsonschema target/generated-resources
*
* @param args [sourceDirectory, targetDirectory]
* */
public static void main(String[] args) {
StreamsHiveGenerationConfig config = new StreamsHiveGenerationConfig();
String sourceDirectory = "src/main/jsonschema";
String targetDirectory = "target/generated-resources/hive";
if ( args.length > 0 ) {
sourceDirectory = args[0];
}
if ( args.length > 1 ) {
targetDirectory = args[1];
}
config.setSourceDirectory(sourceDirectory);
config.setTargetDirectory(targetDirectory);
StreamsHiveResourceGenerator streamsHiveResourceGenerator = new StreamsHiveResourceGenerator(config);
streamsHiveResourceGenerator.run();
}
public StreamsHiveResourceGenerator(StreamsHiveGenerationConfig config) {
this.config = config;
}
@Override
public void run() {
Objects.requireNonNull(config);
generate(config);
}
/**
* run generate using supplied StreamsHiveGenerationConfig.
* @param config StreamsHiveGenerationConfig
*/
public void generate(StreamsHiveGenerationConfig config) {
LinkedList<File> sourceFiles = new LinkedList<>();
for (Iterator<URL> sources = config.getSource(); sources.hasNext();) {
URL source = sources.next();
sourceFiles.add(URLUtil.getFileFromURL(source));
}
LOGGER.info("Seeded with {} source paths:", sourceFiles.size());
FileUtil.resolveRecursive(config, sourceFiles);
LOGGER.info("Resolved {} schema files:", sourceFiles.size());
for (File item : sourceFiles) {
schemaStore.create(item.toURI());
}
LOGGER.info("Identified {} objects:", schemaStore.getSize());
for (Iterator<Schema> schemaIterator = schemaStore.getSchemaIterator(); schemaIterator.hasNext(); ) {
Schema schema = schemaIterator.next();
currentDepth = 0;
if ( schema.getUri().getScheme().equals("file")) {
String inputFile = schema.getUri().getPath();
String resourcePath = dropSourcePathPrefix(inputFile, config.getSourceDirectory());
for (String sourcePath : config.getSourcePaths()) {
resourcePath = dropSourcePathPrefix(resourcePath, sourcePath);
}
String outputFile = config.getTargetDirectory() + "/" + swapExtension(resourcePath, "json", "hql");
LOGGER.info("Processing {}:", resourcePath);
String resourceId = dropExtension(resourcePath).replace("/", "_");
String resourceContent = generateResource(schema, resourceId);
writeFile(outputFile, resourceContent);
LOGGER.info("Wrote {}:", outputFile);
}
}
}
/**
* generateResource String from schema and resourceId.
* @param schema Schema
* @param resourceId String
* @return CREATE TABLE ...
*/
public String generateResource(Schema schema, String resourceId) {
StringBuilder resourceBuilder = new StringBuilder();
resourceBuilder.append("CREATE TABLE ");
resourceBuilder.append(hqlEscape(resourceId));
resourceBuilder.append(LS);
resourceBuilder.append("(");
resourceBuilder.append(LS);
resourceBuilder = appendRootObject(resourceBuilder, schema, resourceId, ' ');
resourceBuilder.append(")");
resourceBuilder.append(LS);
resourceBuilder.append("ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe'");
resourceBuilder.append(LS);
resourceBuilder.append("WITH SERDEPROPERTIES (\"ignore.malformed.json\" = \"true\"");
resourceBuilder.append(LS);
resourceBuilder.append("STORED AS TEXTFILE");
resourceBuilder.append(LS);
resourceBuilder.append("LOCATION '${hiveconf:path}';");
resourceBuilder.append(LS);
return resourceBuilder.toString();
}
protected StringBuilder appendRootObject(StringBuilder builder, Schema schema, String resourceId, Character seperator) {
ObjectNode propertiesNode = schemaStore.resolveProperties(schema, null, resourceId);
if ( propertiesNode != null && propertiesNode.isObject() && propertiesNode.size() > 0) {
builder = appendPropertiesNode(builder, schema, propertiesNode, seperator);
}
return builder;
}
private StringBuilder appendValueField(StringBuilder builder, Schema schema, String fieldId, FieldType fieldType, Character seperator) {
// safe to append nothing
Objects.requireNonNull(builder);
builder.append(hqlEscape(fieldId));
builder.append(seperator);
builder.append(hqlType(fieldType));
return builder;
}
protected StringBuilder appendArrayItems(StringBuilder builder, Schema schema, String fieldId, ObjectNode itemsNode, Character seperator) {
// not safe to append nothing
Objects.requireNonNull(builder);
if ( itemsNode == null ) {
return builder;
}
if ( itemsNode.has("type")) {
try {
FieldType itemType = FieldUtil.determineFieldType(itemsNode);
switch ( itemType ) {
case OBJECT:
builder = appendArrayObject(builder, schema, fieldId, itemsNode, seperator);
break;
case ARRAY:
ObjectNode subArrayItems = (ObjectNode) itemsNode.get("items");
builder = appendArrayItems(builder, schema, fieldId, subArrayItems, seperator);
break;
default:
builder = appendArrayField(builder, schema, fieldId, itemType, seperator);
}
} catch (Exception ex) {
LOGGER.warn("No item type resolvable for {}", fieldId);
}
}
Objects.requireNonNull(builder);
return builder;
}
private StringBuilder appendArrayField(StringBuilder builder, Schema schema, String fieldId, FieldType fieldType, Character seperator) {
// safe to append nothing
Objects.requireNonNull(builder);
Objects.requireNonNull(fieldId);
builder.append(hqlEscape(fieldId));
builder.append(seperator);
builder.append("ARRAY<" + hqlType(fieldType) + ">");
Objects.requireNonNull(builder);
return builder;
}
private StringBuilder appendArrayObject(StringBuilder builder, Schema schema, String fieldId, ObjectNode fieldNode, Character seperator) {
// safe to append nothing
Objects.requireNonNull(builder);
Objects.requireNonNull(fieldNode);
if (StringUtils.isNotBlank(fieldId)) {
builder.append(hqlEscape(fieldId));
builder.append(seperator);
}
builder.append("ARRAY");
builder.append(LS);
builder.append("<");
builder.append(LS);
ObjectNode propertiesNode = schemaStore.resolveProperties(schema, fieldNode, fieldId);
builder = appendStructField(builder, schema, "", propertiesNode, ':');
builder.append(">");
Objects.requireNonNull(builder);
return builder;
}
private StringBuilder appendStructField(StringBuilder builder, Schema schema, String fieldId, ObjectNode propertiesNode, Character seperator) {
// safe to append nothing
Objects.requireNonNull(builder);
Objects.requireNonNull(propertiesNode);
if ( propertiesNode != null && propertiesNode.isObject() && propertiesNode.size() > 0 ) {
currentDepth += 1;
if (StringUtils.isNotBlank(fieldId)) {
builder.append(hqlEscape(fieldId));
builder.append(seperator);
}
builder.append("STRUCT");
builder.append(LS);
builder.append("<");
builder.append(LS);
builder = appendPropertiesNode(builder, schema, propertiesNode, ':');
builder.append(">");
builder.append(LS);
currentDepth -= 1;
}
Objects.requireNonNull(builder);
return builder;
}
private StringBuilder appendPropertiesNode(StringBuilder builder, Schema schema, ObjectNode propertiesNode, Character seperator) {
Objects.requireNonNull(builder);
Objects.requireNonNull(propertiesNode);
Iterator<Map.Entry<String, JsonNode>> fields = propertiesNode.fields();
List<String> fieldStrings = new ArrayList<>();
for ( ; fields.hasNext(); ) {
Map.Entry<String, JsonNode> field = fields.next();
String fieldId = field.getKey();
if ( !config.getExclusions().contains(fieldId) && field.getValue().isObject()) {
ObjectNode fieldNode = (ObjectNode) field.getValue();
FieldType fieldType = FieldUtil.determineFieldType(fieldNode);
if (fieldType != null ) {
switch (fieldType) {
case ARRAY:
ObjectNode itemsNode = (ObjectNode) fieldNode.get("items");
if ( currentDepth <= config.getMaxDepth()) {
StringBuilder arrayItemsBuilder = appendArrayItems(new StringBuilder(), schema, fieldId, itemsNode, seperator);
if (StringUtils.isNotBlank(arrayItemsBuilder.toString())) {
fieldStrings.add(arrayItemsBuilder.toString());
}
}
break;
case OBJECT:
ObjectNode childProperties = schemaStore.resolveProperties(schema, fieldNode, fieldId);
if ( currentDepth < config.getMaxDepth()) {
StringBuilder structFieldBuilder = appendStructField(new StringBuilder(), schema, fieldId, childProperties, seperator);
if (StringUtils.isNotBlank(structFieldBuilder.toString())) {
fieldStrings.add(structFieldBuilder.toString());
}
}
break;
default:
StringBuilder valueFieldBuilder = appendValueField(new StringBuilder(), schema, fieldId, fieldType, seperator);
if (StringUtils.isNotBlank(valueFieldBuilder.toString())) {
fieldStrings.add(valueFieldBuilder.toString());
}
}
}
}
}
builder.append(String.join("," + LS, fieldStrings)).append(LS);
Objects.requireNonNull(builder);
return builder;
}
private static String hqlEscape( String fieldId ) {
return "`" + fieldId + "`";
}
private static String hqlType( FieldType fieldType ) {
switch ( fieldType ) {
case INTEGER:
return "INT";
case NUMBER:
return "FLOAT";
case OBJECT:
return "STRUCT";
default:
return fieldType.name().toUpperCase();
}
}
}