/*
* Copyright © 2016 Cask Data, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package co.cask.cdap.data2.metadata.indexer;
import co.cask.cdap.api.data.schema.Schema;
import co.cask.cdap.data2.metadata.dataset.MetadataDataset;
import co.cask.cdap.data2.metadata.dataset.MetadataEntry;
import java.io.IOException;
import java.util.Collections;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import javax.annotation.Nullable;
/**
* An {@link Indexer} to generate indexes for a {@link Schema}
*/
public class SchemaIndexer implements Indexer {
@Override
public Set<String> getIndexes(MetadataEntry entry) {
return createIndexes(getSchema(entry.getValue()));
}
private Schema getSchema(String schemaStr) {
if (schemaStr.startsWith("\"") && schemaStr.endsWith("\"")) {
// simple type in lower case
schemaStr = schemaStr.substring(1, schemaStr.length() - 1);
return Schema.of(Schema.Type.valueOf(schemaStr.toUpperCase()));
}
// otherwise its a json
try {
return Schema.parseJson(schemaStr);
} catch (IOException e) {
throw new IllegalArgumentException(e);
}
}
private Set<String> createIndexes(Schema schema) {
if (schema == null) {
return Collections.emptySet();
}
Set<String> indexes = new HashSet<>();
processSchema(indexes, schema, schema.getRecordName());
return indexes;
}
private void processSchema(Set<String> indexes, Schema schema, @Nullable String fieldName) {
switch (schema.getType()) {
case NULL:
// Ignore null types
break;
case BOOLEAN:
case INT:
case LONG:
case FLOAT:
case DOUBLE:
case BYTES:
case ENUM:
case STRING:
createIndexes(indexes, schema, fieldName);
break;
case ARRAY:
createIndexes(indexes, schema, fieldName);
processSchema(indexes, schema.getComponentSchema(), schema.getComponentSchema().getRecordName());
break;
case MAP:
createIndexes(indexes, schema, fieldName);
Map.Entry<Schema, Schema> mapSchema = schema.getMapSchema();
processSchema(indexes, mapSchema.getKey(), mapSchema.getKey().getRecordName());
processSchema(indexes, mapSchema.getValue(), mapSchema.getValue().getRecordName());
break;
case RECORD:
createIndexes(indexes, schema, fieldName);
for (Schema.Field field : schema.getFields()) {
processSchema(indexes, field.getSchema(), field.getName());
}
break;
case UNION:
createIndexes(indexes, schema, fieldName);
for (Schema us : schema.getUnionSchemas()) {
processSchema(indexes, us, us.getRecordName());
}
}
}
private void createIndexes(Set<String> indexes, Schema schema, @Nullable String fieldName) {
if (fieldName != null) {
String type = getSimpleType(schema);
indexes.add(fieldName + MetadataDataset.KEYVALUE_SEPARATOR + type);
indexes.add(fieldName);
}
}
private String getSimpleType(Schema schema) {
if (schema.isNullable()) {
schema = schema.getNonNullable();
}
return schema.getType().toString();
}
}