/* * Copyright © 2016 Cask Data, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of * the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. */ package co.cask.cdap.data2.metadata.system; import co.cask.cdap.api.data.batch.BatchReadable; import co.cask.cdap.api.data.batch.BatchWritable; import co.cask.cdap.api.data.batch.InputFormatProvider; import co.cask.cdap.api.data.batch.OutputFormatProvider; import co.cask.cdap.api.data.batch.RecordScannable; import co.cask.cdap.api.dataset.Dataset; import co.cask.cdap.api.dataset.DatasetProperties; import co.cask.cdap.api.dataset.lib.FileSetProperties; import co.cask.cdap.api.dataset.lib.ObjectMappedTableProperties; import co.cask.cdap.api.dataset.table.Table; import co.cask.cdap.data2.metadata.store.MetadataStore; import co.cask.cdap.proto.Id; import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.ImmutableMap; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.util.ArrayList; import java.util.List; import java.util.Map; import javax.annotation.Nullable; /** * A {@link AbstractSystemMetadataWriter} for a {@link Id.DatasetInstance dataset}. */ public class DatasetSystemMetadataWriter extends AbstractSystemMetadataWriter { private static final Logger LOG = LoggerFactory.getLogger(DatasetSystemMetadataWriter.class); public static final String EXPLORE_TAG = "explore"; public static final String BATCH_TAG = "batch"; public static final String TYPE = "type"; @VisibleForTesting static final String FILESET_AVRO_SCHEMA_PROPERTY = "avro.schema.literal"; static final String FILESET_PARQUET_SCHEMA_OUTPUT_KEY = "parquet.avro.schema"; static final String FILESET_AVRO_SCHEMA_OUTPUT_KEY = "avro.schema.output.key"; private final Id.DatasetInstance dsInstance; private final String dsType; private final DatasetProperties dsProperties; private final Dataset dataset; private final long createTime; private final String description; public DatasetSystemMetadataWriter(MetadataStore metadataStore, Id.DatasetInstance dsInstance, DatasetProperties dsProperties, @Nullable Dataset dataset, @Nullable String dsType, @Nullable String description) { this(metadataStore, dsInstance, dsProperties, -1, dataset, dsType, description); } public DatasetSystemMetadataWriter(MetadataStore metadataStore, Id.DatasetInstance dsInstance, DatasetProperties dsProperties, long createTime, @Nullable Dataset dataset, @Nullable String dsType, @Nullable String description) { super(metadataStore, dsInstance); this.dsInstance = dsInstance; this.dsType = dsType; this.dsProperties = dsProperties; this.createTime = createTime; this.dataset = dataset; this.description = description; if (dataset == null) { LOG.warn("Dataset {} is null, some metadata will not be recorded for the dataset", dsInstance); } } @Override protected Map<String, String> getSystemPropertiesToAdd() { ImmutableMap.Builder<String, String> properties = ImmutableMap.builder(); Map<String, String> datasetProperties = dsProperties.getProperties(); if (dsType != null) { properties.put(TYPE, dsType); } if (datasetProperties.containsKey(Table.PROPERTY_TTL)) { properties.put(TTL_KEY, datasetProperties.get(Table.PROPERTY_TTL)); } if (description != null) { properties.put(DESCRIPTION, description); } if (createTime > 0) { properties.put(CREATION_TIME, String.valueOf(createTime)); } return properties.build(); } @Override protected String[] getSystemTagsToAdd() { List<String> tags = new ArrayList<>(); tags.add(dsInstance.getId()); if (dataset instanceof RecordScannable) { tags.add(EXPLORE_TAG); } if (dataset instanceof BatchReadable || dataset instanceof BatchWritable || dataset instanceof InputFormatProvider || dataset instanceof OutputFormatProvider) { tags.add(BATCH_TAG); } return tags.toArray(new String[tags.size()]); } @Nullable @Override protected String getSchemaToAdd() { // TODO: fix schema determination after CDAP-2790 is fixed (CDAP-5408) Map<String, String> datasetProperties = dsProperties.getProperties(); String schemaStr = null; if (datasetProperties.containsKey(DatasetProperties.SCHEMA)) { schemaStr = datasetProperties.get(DatasetProperties.SCHEMA); } else if (datasetProperties.containsKey(ObjectMappedTableProperties.OBJECT_SCHEMA)) { // If it is an ObjectMappedTable, the schema is in a property called 'object.schema' schemaStr = datasetProperties.get(ObjectMappedTableProperties.OBJECT_SCHEMA); } else if (datasetProperties.containsKey(getExplorePropName(FILESET_AVRO_SCHEMA_PROPERTY))) { // Fileset with avro schema (CDAP-5322) schemaStr = datasetProperties.get(getExplorePropName(FILESET_AVRO_SCHEMA_PROPERTY)); } else if (datasetProperties.containsKey(getOutputPropName(FILESET_AVRO_SCHEMA_OUTPUT_KEY))) { // Fileset with avro schema defined in output property (CDAP-5322) schemaStr = datasetProperties.get(getOutputPropName(FILESET_AVRO_SCHEMA_OUTPUT_KEY)); } else if (datasetProperties.containsKey(getOutputPropName(FILESET_PARQUET_SCHEMA_OUTPUT_KEY))) { // Fileset with parquet schema defined in output property (CDAP-5322) schemaStr = datasetProperties.get(getOutputPropName(FILESET_PARQUET_SCHEMA_OUTPUT_KEY)); } return schemaStr; } private static String getExplorePropName(String prop) { return FileSetProperties.PROPERTY_EXPLORE_TABLE_PROPERTY_PREFIX + prop; } private static String getOutputPropName(String prop) { return FileSetProperties.OUTPUT_PROPERTIES_PREFIX + prop; } }