/** * Copyright 2011-2017 Asakusa Framework Team. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.asakusafw.dmdl.directio.hive.orc; import java.math.BigInteger; import java.util.Map; import org.apache.hadoop.hive.ql.io.orc.CompressionKind; import org.apache.hadoop.hive.ql.io.orc.OrcFile; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import com.asakusafw.directio.hive.serde.DataModelMapping.ExceptionHandlingStrategy; import com.asakusafw.directio.hive.serde.DataModelMapping.FieldMappingStrategy; import com.asakusafw.dmdl.Diagnostic; import com.asakusafw.dmdl.Diagnostic.Level; import com.asakusafw.dmdl.directio.hive.common.HiveDataModelTrait; import com.asakusafw.dmdl.directio.hive.common.HiveFieldTrait; import com.asakusafw.dmdl.model.AstAttribute; import com.asakusafw.dmdl.model.AstAttributeElement; import com.asakusafw.dmdl.model.AstLiteral; import com.asakusafw.dmdl.model.LiteralKind; import com.asakusafw.dmdl.semantics.DmdlSemantics; import com.asakusafw.dmdl.semantics.ModelDeclaration; import com.asakusafw.dmdl.semantics.PropertyDeclaration; import com.asakusafw.dmdl.spi.ModelAttributeDriver; import com.asakusafw.dmdl.util.AttributeUtil; /** * Processes <code>@directio.hive.orc</code> attributes. <h2>'@directio.hive.orc' attribute</h2> The attributed declaration can have: <ul> <li> with {@code table_name=[string-literal]} as explicit table name (default: data model name) </li> <li> with {@code format_version=[string-literal]} as ORCFile format version (default: system default) </li> <li> with {@code compression=[string-literal]} as compression kind name (default: {@code "snappy"}) </li> <li> with {@code stripe_size=integer} as stripe size (default: system default) </li> <li> with {@code field_mappping=[string-literal]} (default: {@code "position"}) </li> <li> with {@code missing_source=[string-literal]} (default: {@code "logging"}) </li> <li> with {@code missing_target=[string-literal]} (default: {@code "logging"}) </li> <li> with {@code incompatible_type=[string-literal]} (default: {@code "fail"}) </li> </ul> * @since 0.7.0 */ public class OrcFileDriver extends ModelAttributeDriver { /** * The attribute name. */ public static final String TARGET_NAME = "directio.hive.orc"; //$NON-NLS-1$ /** * The element name of explicit table name. */ public static final String ELEMENT_TABLE_NAME = "table_name"; //$NON-NLS-1$ /** * The element name of ORCFile format version. */ public static final String ELEMENT_FORMAT_VERSION = "format_version"; //$NON-NLS-1$ /** * The element name of compression kind name. */ public static final String ELEMENT_COMPRESSION_KIND = "compression"; //$NON-NLS-1$ /** * The element name of stripe size. */ public static final String ELEMENT_STRIPE_SIZE = "stripe_size"; //$NON-NLS-1$ /** * The element name of field mapping strategy. */ public static final String ELEMENT_FIELD_MAPPING = "field_mapping"; //$NON-NLS-1$ /** * The element name of missing source handling strategy. */ public static final String ELEMENT_MISSING_SOURCE = "on_missing_source"; //$NON-NLS-1$ /** * The element name of missing target handling strategy. */ public static final String ELEMENT_MISSING_TARGET = "on_missing_target"; //$NON-NLS-1$ /** * The element name of missing incompatible type handling strategy. */ public static final String ELEMENT_INCOMPATIBLE_TYPE = "on_incompatible_type"; //$NON-NLS-1$ static final long MINIMUM_STRIPE_SIZE = 1L * 1024 * 1024; @Override public String getTargetName() { return TARGET_NAME; } @Override public void process(DmdlSemantics environment, ModelDeclaration declaration, AstAttribute attribute) { Map<String, AstAttributeElement> elements = AttributeUtil.getElementMap(attribute); OrcFileTrait trait = analyzeElements(environment, attribute, elements); declaration.putTrait(OrcFileTrait.class, trait); HiveDataModelTrait baseTrait = HiveDataModelTrait.get(declaration); baseTrait.addDataFormatNamer((context, model) -> OrcFileEmitter.getClassName(context, model)); baseTrait.setOriginalAst(attribute, false); } @Override public void verify(DmdlSemantics environment, ModelDeclaration declaration, AstAttribute attribute) { for (PropertyDeclaration property : declaration.getDeclaredProperties()) { if (HiveFieldTrait.get(property).isColumnPresent() == false) { continue; } TypeInfo typeInfo = HiveFieldTrait.getTypeInfo(property); if ((typeInfo instanceof PrimitiveTypeInfo) == false) { environment.report(new Diagnostic(Diagnostic.Level.ERROR, property.getOriginalAst(), Messages.getString("OrcFileDriver.diagnosticUnsupportedPropertyType"), //$NON-NLS-1$ typeInfo.getQualifiedName(), property.getName().identifier, property.getType())); } } } private OrcFileTrait analyzeElements( DmdlSemantics environment, AstAttribute attribute, Map<String, AstAttributeElement> elements) { OrcFileTrait result = new OrcFileTrait(); result.setOriginalAst(attribute, true); consumeTableName(environment, attribute, elements, result); consumeFormatVersion(environment, attribute, elements, result); consumeCompressionKind(environment, attribute, elements, result); consumeStripeSize(environment, attribute, elements, result); consumeFieldMapping(environment, attribute, elements, result); consumeMissingSource(environment, attribute, elements, result); consumeMissingTarget(environment, attribute, elements, result); consumeIncompatibleType(environment, attribute, elements, result); environment.reportAll(AttributeUtil.reportInvalidElements(attribute, elements.values())); return result; } private void consumeTableName( DmdlSemantics environment, AstAttribute attribute, Map<String, AstAttributeElement> elements, OrcFileTrait result) { AstLiteral tableName = take(environment, attribute, elements, ELEMENT_TABLE_NAME, LiteralKind.STRING); if (tableName != null) { String value = tableName.toStringValue(); if (AttributeUtil.checkPresent(environment, tableName, label(ELEMENT_TABLE_NAME), value)) { result.setTableName(value); } } } private void consumeFormatVersion( DmdlSemantics environment, AstAttribute attribute, Map<String, AstAttributeElement> elements, OrcFileTrait result) { AstLiteral formatVersion = take(environment, attribute, elements, ELEMENT_FORMAT_VERSION, LiteralKind.STRING); if (formatVersion != null) { String symbol = formatVersion.toStringValue(); try { OrcFile.Version value = OrcFile.Version.byName(symbol); result.configuration().withFormatVersion(value); } catch (IllegalArgumentException e) { environment.report(new Diagnostic( Level.ERROR, formatVersion, Messages.getString("OrcFileDriver.diagnosticUnknownElement"), //$NON-NLS-1$ TARGET_NAME, ELEMENT_FORMAT_VERSION, Messages.getString("OrcFileDriver.labelVersion"), //$NON-NLS-1$ symbol)); } } } private void consumeCompressionKind( DmdlSemantics environment, AstAttribute attribute, Map<String, AstAttributeElement> elements, OrcFileTrait result) { CompressionKind option = consumeOption( environment, attribute, elements, ELEMENT_COMPRESSION_KIND, Messages.getString("OrcFileDriver.labelCompression"), //$NON-NLS-1$ CompressionKind.values()); if (option != null) { result.configuration().withCompressionKind(option); } } private void consumeFieldMapping( DmdlSemantics environment, AstAttribute attribute, Map<String, AstAttributeElement> elements, OrcFileTrait result) { FieldMappingStrategy option = consumeOption( environment, attribute, elements, ELEMENT_FIELD_MAPPING, Messages.getString("OrcFileDriver.labelFieldMappingStrategy"), //$NON-NLS-1$ FieldMappingStrategy.values()); if (option != null) { result.configuration().withFieldMappingStrategy(option); } } private void consumeMissingSource( DmdlSemantics environment, AstAttribute attribute, Map<String, AstAttributeElement> elements, OrcFileTrait result) { ExceptionHandlingStrategy option = consumeOption( environment, attribute, elements, ELEMENT_MISSING_SOURCE, Messages.getString("OrcFileDriver.labelExceptionHandlingStrategy"), //$NON-NLS-1$ ExceptionHandlingStrategy.values()); if (option != null) { result.configuration().withOnMissingSource(option); } } private void consumeMissingTarget( DmdlSemantics environment, AstAttribute attribute, Map<String, AstAttributeElement> elements, OrcFileTrait result) { ExceptionHandlingStrategy option = consumeOption( environment, attribute, elements, ELEMENT_MISSING_TARGET, Messages.getString("OrcFileDriver.labelExceptionHandlingStrategy"), //$NON-NLS-1$ ExceptionHandlingStrategy.values()); if (option != null) { result.configuration().withOnMissingTarget(option); } } private void consumeIncompatibleType( DmdlSemantics environment, AstAttribute attribute, Map<String, AstAttributeElement> elements, OrcFileTrait result) { ExceptionHandlingStrategy option = consumeOption( environment, attribute, elements, ELEMENT_INCOMPATIBLE_TYPE, Messages.getString("OrcFileDriver.labelExceptionHandlingStrategy"), //$NON-NLS-1$ ExceptionHandlingStrategy.values()); if (option != null) { result.configuration().withOnIncompatibleType(option); } } private <T extends Enum<?>> T consumeOption( DmdlSemantics environment, AstAttribute attribute, Map<String, AstAttributeElement> elements, String key, String description, T[] options) { AstLiteral literal = take(environment, attribute, elements, key, LiteralKind.STRING); if (literal != null) { String symbol = literal.toStringValue(); T value = find(options, symbol); if (value == null) { environment.report(new Diagnostic( Level.ERROR, literal, Messages.getString("OrcFileDriver.diagnosticUnknownElement"), //$NON-NLS-1$ TARGET_NAME, key, description, symbol)); } else { return value; } } return null; } private void consumeStripeSize( DmdlSemantics environment, AstAttribute attribute, Map<String, AstAttributeElement> elements, OrcFileTrait result) { AstLiteral stripeSize = take(environment, attribute, elements, ELEMENT_STRIPE_SIZE, LiteralKind.INTEGER); if (stripeSize != null) { String label = label(ELEMENT_STRIPE_SIZE); BigInteger value = stripeSize.toIntegerValue(); if (AttributeUtil.checkRange(environment, stripeSize, label, value, MINIMUM_STRIPE_SIZE, Long.MAX_VALUE)) { result.configuration().withStripeSize(value.longValue()); } } } private AstLiteral take( DmdlSemantics environment, AstAttribute attribute, Map<String, AstAttributeElement> elements, String elementName, LiteralKind kind) { return AttributeUtil.takeLiteral(environment, attribute, elements, elementName, kind, false); } private static String label(String key) { return String.format("@%s(%s)", TARGET_NAME, key); //$NON-NLS-1$ } private <T extends Enum<?>> T find(T[] values, String symbol) { for (T value : values) { if (value.name().equalsIgnoreCase(symbol)) { return value; } } return null; } }