/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.drill.exec.store.dfs;
import static com.google.common.collect.Collections2.transform;
import static com.google.common.collect.Sets.newHashSet;
import static java.util.Collections.unmodifiableList;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Queue;
import java.util.Set;
import java.util.concurrent.ThreadLocalRandom;
import java.util.regex.Pattern;
import org.apache.calcite.rel.type.RelDataType;
import org.apache.calcite.rel.type.RelDataTypeFactory;
import org.apache.calcite.schema.Function;
import org.apache.calcite.schema.FunctionParameter;
import org.apache.calcite.schema.Table;
import org.apache.calcite.schema.TableMacro;
import org.apache.calcite.schema.TranslatableTable;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.drill.common.config.LogicalPlanPersistence;
import org.apache.drill.common.exceptions.ExecutionSetupException;
import org.apache.drill.common.exceptions.UserException;
import org.apache.drill.common.logical.FormatPluginConfig;
import org.apache.drill.common.scanner.persistence.ScanResult;
import org.apache.drill.exec.ExecConstants;
import org.apache.drill.exec.dotdrill.DotDrillFile;
import org.apache.drill.exec.dotdrill.DotDrillType;
import org.apache.drill.exec.dotdrill.DotDrillUtil;
import org.apache.drill.exec.dotdrill.View;
import org.apache.drill.exec.store.StorageStrategy;
import org.apache.drill.exec.planner.logical.CreateTableEntry;
import org.apache.drill.exec.planner.logical.DrillTable;
import org.apache.drill.exec.planner.logical.DrillTranslatableTable;
import org.apache.drill.exec.planner.logical.DrillViewTable;
import org.apache.drill.exec.planner.logical.DynamicDrillTable;
import org.apache.drill.exec.planner.logical.FileSystemCreateTableEntry;
import org.apache.drill.exec.planner.sql.ExpandingConcurrentMap;
import org.apache.drill.exec.store.AbstractSchema;
import org.apache.drill.exec.store.PartitionNotFoundException;
import org.apache.drill.exec.store.SchemaConfig;
import org.apache.drill.exec.util.ImpersonationUtil;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.security.AccessControlException;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.base.Joiner;
import com.google.common.base.Strings;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
public class WorkspaceSchemaFactory {
private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(WorkspaceSchemaFactory.class);
private final List<FormatMatcher> fileMatchers;
private final List<FormatMatcher> dropFileMatchers;
private final List<FormatMatcher> dirMatchers;
private final WorkspaceConfig config;
private final Configuration fsConf;
private final String storageEngineName;
private final String schemaName;
private final FileSystemPlugin plugin;
private final ObjectMapper mapper;
private final LogicalPlanPersistence logicalPlanPersistence;
private final Path wsPath;
private final FormatPluginOptionExtractor optionExtractor;
public WorkspaceSchemaFactory(
FileSystemPlugin plugin,
String schemaName,
String storageEngineName,
WorkspaceConfig config,
List<FormatMatcher> formatMatchers,
LogicalPlanPersistence logicalPlanPersistence,
ScanResult scanResult) throws ExecutionSetupException, IOException {
this.logicalPlanPersistence = logicalPlanPersistence;
this.fsConf = plugin.getFsConf();
this.plugin = plugin;
this.config = config;
this.mapper = logicalPlanPersistence.getMapper();
this.fileMatchers = Lists.newArrayList();
this.dirMatchers = Lists.newArrayList();
this.storageEngineName = storageEngineName;
this.schemaName = schemaName;
this.wsPath = new Path(config.getLocation());
this.optionExtractor = new FormatPluginOptionExtractor(scanResult);
for (FormatMatcher m : formatMatchers) {
if (m.supportDirectoryReads()) {
dirMatchers.add(m);
}
fileMatchers.add(m);
}
// NOTE: Add fallback format matcher if given in the configuration. Make sure fileMatchers is an order-preserving list.
final String defaultInputFormat = config.getDefaultInputFormat();
if (!Strings.isNullOrEmpty(defaultInputFormat)) {
final FormatPlugin formatPlugin = plugin.getFormatPlugin(defaultInputFormat);
if (formatPlugin == null) {
final String message = String.format("Unable to find default input format[%s] for workspace[%s.%s]",
defaultInputFormat, storageEngineName, schemaName);
throw new ExecutionSetupException(message);
}
final FormatMatcher fallbackMatcher = new BasicFormatMatcher(formatPlugin,
ImmutableList.of(Pattern.compile(".*")), ImmutableList.<MagicString>of());
fileMatchers.add(fallbackMatcher);
dropFileMatchers = fileMatchers.subList(0, fileMatchers.size() - 1);
} else {
dropFileMatchers = fileMatchers.subList(0, fileMatchers.size());
}
}
/**
* Checks whether the given user has permission to list files/directories under the workspace directory.
*
* @param userName User who is trying to access the workspace.
* @return True if the user has access. False otherwise.
*/
public boolean accessible(final String userName) throws IOException {
final FileSystem fs = ImpersonationUtil.createFileSystem(userName, fsConf);
try {
// We have to rely on the listStatus as a FileSystem can have complicated controls such as regular unix style
// permissions, Access Control Lists (ACLs) or Access Control Expressions (ACE). Hadoop 2.7 version of FileSystem
// has a limited private API (FileSystem.access) to check the permissions directly
// (see https://issues.apache.org/jira/browse/HDFS-6570). Drill currently relies on Hadoop 2.5.0 version of
// FileClient. TODO: Update this when DRILL-3749 is fixed.
fs.listStatus(wsPath);
} catch (final UnsupportedOperationException e) {
logger.trace("The filesystem for this workspace does not support this operation.", e);
} catch (final FileNotFoundException | AccessControlException e) {
return false;
}
return true;
}
private Path getViewPath(String name) {
return DotDrillType.VIEW.getPath(config.getLocation(), name);
}
public WorkspaceSchema createSchema(List<String> parentSchemaPath, SchemaConfig schemaConfig) throws IOException {
return new WorkspaceSchema(parentSchemaPath, schemaName, schemaConfig);
}
/**
* Implementation of a table macro that generates a table based on parameters
*/
static final class WithOptionsTableMacro implements TableMacro {
private final TableSignature sig;
private final WorkspaceSchema schema;
WithOptionsTableMacro(TableSignature sig, WorkspaceSchema schema) {
super();
this.sig = sig;
this.schema = schema;
}
@Override
public List<FunctionParameter> getParameters() {
List<FunctionParameter> result = new ArrayList<>();
for (int i = 0; i < sig.params.size(); i++) {
final TableParamDef p = sig.params.get(i);
final int ordinal = i;
result.add(new FunctionParameter() {
@Override
public int getOrdinal() {
return ordinal;
}
@Override
public String getName() {
return p.name;
}
@Override
public RelDataType getType(RelDataTypeFactory typeFactory) {
return typeFactory.createJavaType(p.type);
}
@Override
public boolean isOptional() {
return p.optional;
}
});
}
return result;
}
@Override
public TranslatableTable apply(List<Object> arguments) {
return new DrillTranslatableTable(schema.getDrillTable(new TableInstance(sig, arguments)));
}
}
private static Object[] array(Object... objects) {
return objects;
}
static final class TableInstance {
final TableSignature sig;
final List<Object> params;
TableInstance(TableSignature sig, List<Object> params) {
super();
if (params.size() != sig.params.size()) {
throw UserException.parseError()
.message(
"should have as many params (%d) as signature (%d)",
params.size(), sig.params.size())
.addContext("table", sig.name)
.build(logger);
}
this.sig = sig;
this.params = unmodifiableList(params);
}
String presentParams() {
StringBuilder sb = new StringBuilder("(");
boolean first = true;
for (int i = 0; i < params.size(); i++) {
Object param = params.get(i);
if (param != null) {
if (first) {
first = false;
} else {
sb.append(", ");
}
TableParamDef paramDef = sig.params.get(i);
sb.append(paramDef.name).append(": ").append(paramDef.type.getSimpleName()).append(" => ").append(param);
}
}
sb.append(")");
return sb.toString();
}
private Object[] toArray() {
return array(sig, params);
}
@Override
public int hashCode() {
return Arrays.hashCode(toArray());
}
@Override
public boolean equals(Object obj) {
if (obj instanceof TableInstance) {
return Arrays.equals(this.toArray(), ((TableInstance)obj).toArray());
}
return false;
}
@Override
public String toString() {
return sig.name + (params.size() == 0 ? "" : presentParams());
}
}
static final class TableParamDef {
final String name;
final Class<?> type;
final boolean optional;
TableParamDef(String name, Class<?> type) {
this(name, type, false);
}
TableParamDef(String name, Class<?> type, boolean optional) {
this.name = name;
this.type = type;
this.optional = optional;
}
TableParamDef optional() {
return new TableParamDef(name, type, true);
}
private Object[] toArray() {
return array(name, type, optional);
}
@Override
public int hashCode() {
return Arrays.hashCode(toArray());
}
@Override
public boolean equals(Object obj) {
if (obj instanceof TableParamDef) {
return Arrays.equals(this.toArray(), ((TableParamDef)obj).toArray());
}
return false;
}
@Override
public String toString() {
String p = name + ": " + type;
return optional ? "[" + p + "]" : p;
}
}
static final class TableSignature {
final String name;
final List<TableParamDef> params;
TableSignature(String name, TableParamDef... params) {
this(name, Arrays.asList(params));
}
TableSignature(String name, List<TableParamDef> params) {
this.name = name;
this.params = unmodifiableList(params);
}
private Object[] toArray() {
return array(name, params);
}
@Override
public int hashCode() {
return Arrays.hashCode(toArray());
}
@Override
public boolean equals(Object obj) {
if (obj instanceof TableSignature) {
return Arrays.equals(this.toArray(), ((TableSignature)obj).toArray());
}
return false;
}
@Override
public String toString() {
return name + params;
}
}
public class WorkspaceSchema extends AbstractSchema implements ExpandingConcurrentMap.MapValueFactory<TableInstance, DrillTable> {
private final ExpandingConcurrentMap<TableInstance, DrillTable> tables = new ExpandingConcurrentMap<>(this);
private final SchemaConfig schemaConfig;
private final DrillFileSystem fs;
public WorkspaceSchema(List<String> parentSchemaPath, String wsName, SchemaConfig schemaConfig) throws IOException {
super(parentSchemaPath, wsName);
this.schemaConfig = schemaConfig;
this.fs = ImpersonationUtil.createFileSystem(schemaConfig.getUserName(), fsConf);
}
DrillTable getDrillTable(TableInstance key) {
return tables.get(key);
}
@Override
public boolean createView(View view) throws IOException {
Path viewPath = getViewPath(view.getName());
boolean replaced = fs.exists(viewPath);
final FsPermission viewPerms =
new FsPermission(schemaConfig.getOption(ExecConstants.NEW_VIEW_DEFAULT_PERMS_KEY).string_val);
try (OutputStream stream = DrillFileSystem.create(fs, viewPath, viewPerms)) {
mapper.writeValue(stream, view);
}
return replaced;
}
@Override
public Iterable<String> getSubPartitions(String table,
List<String> partitionColumns,
List<String> partitionValues
) throws PartitionNotFoundException {
List<FileStatus> fileStatuses;
try {
fileStatuses = getFS().list(false, new Path(getDefaultLocation(), table));
} catch (IOException e) {
throw new PartitionNotFoundException("Error finding partitions for table " + table, e);
}
return new SubDirectoryList(fileStatuses);
}
@Override
public void dropView(String viewName) throws IOException {
fs.delete(getViewPath(viewName), false);
}
private Set<String> getViews() {
Set<String> viewSet = Sets.newHashSet();
// Look for files with ".view.drill" extension.
List<DotDrillFile> files;
try {
files = DotDrillUtil.getDotDrills(fs, new Path(config.getLocation()), DotDrillType.VIEW);
for (DotDrillFile f : files) {
viewSet.add(f.getBaseName());
}
} catch (UnsupportedOperationException e) {
logger.debug("The filesystem for this workspace does not support this operation.", e);
} catch (AccessControlException e) {
if (!schemaConfig.getIgnoreAuthErrors()) {
logger.debug(e.getMessage());
throw UserException
.permissionError(e)
.message("Not authorized to list view tables in schema [%s]", getFullSchemaName())
.build(logger);
}
} catch (Exception e) {
logger.warn("Failure while trying to list .view.drill files in workspace [{}]", getFullSchemaName(), e);
}
return viewSet;
}
private Set<String> rawTableNames() {
return newHashSet(
transform(tables.keySet(), new com.google.common.base.Function<TableInstance, String>() {
@Override
public String apply(TableInstance input) {
return input.sig.name;
}
}));
}
@Override
public Set<String> getTableNames() {
return Sets.union(rawTableNames(), getViews());
}
@Override
public Set<String> getFunctionNames() {
return rawTableNames();
}
@Override
public List<Function> getFunctions(String name) {
List<TableSignature> sigs = optionExtractor.getTableSignatures(name);
return Lists.transform(sigs, new com.google.common.base.Function<TableSignature, Function>() {
@Override
public Function apply(TableSignature input) {
return new WithOptionsTableMacro(input, WorkspaceSchema.this);
}
});
}
private View getView(DotDrillFile f) throws IOException {
assert f.getType() == DotDrillType.VIEW;
return f.getView(logicalPlanPersistence);
}
@Override
public Table getTable(String tableName) {
TableInstance tableKey = new TableInstance(new TableSignature(tableName), ImmutableList.of());
// first check existing tables.
if (tables.alreadyContainsKey(tableKey)) {
return tables.get(tableKey);
}
// then look for files that start with this name and end in .drill.
List<DotDrillFile> files = Collections.emptyList();
try {
try {
files = DotDrillUtil.getDotDrills(fs, new Path(config.getLocation()), tableName, DotDrillType.VIEW);
} catch (AccessControlException e) {
if (!schemaConfig.getIgnoreAuthErrors()) {
logger.debug(e.getMessage());
throw UserException.permissionError(e)
.message("Not authorized to list or query tables in schema [%s]", getFullSchemaName())
.build(logger);
}
} catch (IOException e) {
logger.warn("Failure while trying to list view tables in workspace [{}]", tableName, getFullSchemaName(), e);
}
for (DotDrillFile f : files) {
switch (f.getType()) {
case VIEW:
try {
return new DrillViewTable(getView(f), f.getOwner(), schemaConfig.getViewExpansionContext());
} catch (AccessControlException e) {
if (!schemaConfig.getIgnoreAuthErrors()) {
logger.debug(e.getMessage());
throw UserException.permissionError(e)
.message("Not authorized to read view [%s] in schema [%s]", tableName, getFullSchemaName())
.build(logger);
}
} catch (IOException e) {
logger.warn("Failure while trying to load {}.view.drill file in workspace [{}]", tableName, getFullSchemaName(), e);
}
}
}
} catch (UnsupportedOperationException e) {
logger.debug("The filesystem for this workspace does not support this operation.", e);
}
return tables.get(tableKey);
}
@Override
public boolean isMutable() {
return config.isWritable();
}
public DrillFileSystem getFS() {
return fs;
}
public String getDefaultLocation() {
return config.getLocation();
}
@Override
public CreateTableEntry createNewTable(String tableName, List<String> partitionColumns, StorageStrategy storageStrategy) {
String storage = schemaConfig.getOption(ExecConstants.OUTPUT_FORMAT_OPTION).string_val;
FormatPlugin formatPlugin = plugin.getFormatPlugin(storage);
if (formatPlugin == null) {
throw new UnsupportedOperationException(
String.format("Unsupported format '%s' in workspace '%s'", config.getDefaultInputFormat(),
Joiner.on(".").join(getSchemaPath())));
}
return new FileSystemCreateTableEntry(
(FileSystemConfig) plugin.getConfig(),
formatPlugin,
config.getLocation() + Path.SEPARATOR + tableName,
partitionColumns,
storageStrategy);
}
@Override
public String getTypeName() {
return FileSystemConfig.NAME;
}
private DrillTable isReadable(FormatMatcher m, FileSelection fileSelection) throws IOException {
return m.isReadable(fs, fileSelection, plugin, storageEngineName, schemaConfig.getUserName());
}
@Override
public DrillTable create(TableInstance key) {
try {
final FileSelection fileSelection = FileSelection.create(fs, config.getLocation(), key.sig.name);
if (fileSelection == null) {
return null;
}
final boolean hasDirectories = fileSelection.containsDirectories(fs);
if (key.sig.params.size() > 0) {
FormatPluginConfig fconfig = optionExtractor.createConfigForTable(key);
return new DynamicDrillTable(
plugin, storageEngineName, schemaConfig.getUserName(),
new FormatSelection(fconfig, fileSelection));
}
if (hasDirectories) {
for (final FormatMatcher matcher : dirMatchers) {
try {
DrillTable table = matcher.isReadable(fs, fileSelection, plugin, storageEngineName, schemaConfig.getUserName());
if (table != null) {
return table;
}
} catch (IOException e) {
logger.debug("File read failed.", e);
}
}
}
final FileSelection newSelection = hasDirectories ? fileSelection.minusDirectories(fs) : fileSelection;
if (newSelection == null) {
return null;
}
for (final FormatMatcher matcher : fileMatchers) {
DrillTable table = matcher.isReadable(fs, newSelection, plugin, storageEngineName, schemaConfig.getUserName());
if (table != null) {
return table;
}
}
return null;
} catch (AccessControlException e) {
if (!schemaConfig.getIgnoreAuthErrors()) {
logger.debug(e.getMessage());
throw UserException.permissionError(e)
.message("Not authorized to read table [%s] in schema [%s]", key, getFullSchemaName())
.build(logger);
}
} catch (IOException e) {
logger.debug("Failed to create DrillTable with root {} and name {}", config.getLocation(), key, e);
}
return null;
}
private FormatMatcher findMatcher(FileStatus file) {
FormatMatcher matcher = null;
try {
for (FormatMatcher m : dropFileMatchers) {
if (m.isFileReadable(fs, file)) {
return m;
}
}
} catch (IOException e) {
logger.debug("Failed to find format matcher for file: %s", file, e);
}
return matcher;
}
@Override
public void destroy(DrillTable value) {
}
/**
* Check if the table contains homogenenous files that can be read by Drill. Eg: parquet, json csv etc.
* However if it contains more than one of these formats or a totally different file format that Drill cannot
* understand then we will raise an exception.
* @param tableName - name of the table to be checked for homogeneous property
* @return
* @throws IOException
*/
private boolean isHomogeneous(String tableName) throws IOException {
FileSelection fileSelection = FileSelection.create(fs, config.getLocation(), tableName);
if (fileSelection == null) {
throw UserException
.validationError()
.message(String.format("Table [%s] not found", tableName))
.build(logger);
}
FormatMatcher matcher = null;
Queue<FileStatus> listOfFiles = new LinkedList<>();
listOfFiles.addAll(fileSelection.getStatuses(fs));
while (!listOfFiles.isEmpty()) {
FileStatus currentFile = listOfFiles.poll();
if (currentFile.isDirectory()) {
listOfFiles.addAll(fs.list(true, currentFile.getPath()));
} else {
if (matcher != null) {
if (!matcher.isFileReadable(fs, currentFile)) {
return false;
}
} else {
matcher = findMatcher(currentFile);
// Did not match any of the file patterns, exit
if (matcher == null) {
return false;
}
}
}
}
return true;
}
/**
* We check if the table contains homogeneous file formats that Drill can read. Once the checks are performed
* we rename the file to start with an "_". After the rename we issue a recursive delete of the directory.
* @param table - Path of table to be dropped
*/
@Override
public void dropTable(String table) {
DrillFileSystem fs = getFS();
String defaultLocation = getDefaultLocation();
try {
if (!isHomogeneous(table)) {
throw UserException
.validationError()
.message("Table contains different file formats. \n" +
"Drop Table is only supported for directories that contain homogeneous file formats consumable by Drill")
.build(logger);
}
StringBuilder tableRenameBuilder = new StringBuilder();
int lastSlashIndex = table.lastIndexOf(Path.SEPARATOR);
if (lastSlashIndex != -1) {
tableRenameBuilder.append(table.substring(0, lastSlashIndex + 1));
}
// Generate unique identifier which will be added as a suffix to the table name
ThreadLocalRandom r = ThreadLocalRandom.current();
long time = (System.currentTimeMillis()/1000);
Long p1 = ((Integer.MAX_VALUE - time) << 32) + r.nextInt();
Long p2 = r.nextLong();
final String fileNameDelimiter = DrillFileSystem.HIDDEN_FILE_PREFIX;
String[] pathSplit = table.split(Path.SEPARATOR);
/*
* Builds the string for the renamed table
* Prefixes the table name with an underscore (intent for this to be treated as a hidden file)
* and suffixes the table name with unique identifiers (similar to how we generate query id's)
* separated by underscores
*/
tableRenameBuilder
.append(DrillFileSystem.HIDDEN_FILE_PREFIX)
.append(pathSplit[pathSplit.length - 1])
.append(fileNameDelimiter)
.append(p1.toString())
.append(fileNameDelimiter)
.append(p2.toString());
String tableRename = tableRenameBuilder.toString();
fs.rename(new Path(defaultLocation, table), new Path(defaultLocation, tableRename));
fs.delete(new Path(defaultLocation, tableRename), true);
} catch (AccessControlException e) {
throw UserException
.permissionError(e)
.message("Unauthorized to drop table")
.build(logger);
} catch (IOException e) {
throw UserException
.dataWriteError(e)
.message("Failed to drop table: " + e.getMessage())
.build(logger);
}
}
@Override
public List<Pair<String, TableType>> getTableNamesAndTypes(boolean bulkLoad, int bulkSize) {
final List<Pair<String, TableType>> tableNamesAndTypes = Lists.newArrayList();
// Look for raw tables first
if (!tables.isEmpty()) {
for (Map.Entry<TableInstance, DrillTable> tableEntry : tables.entrySet()) {
tableNamesAndTypes
.add(Pair.of(tableEntry.getKey().sig.name, tableEntry.getValue().getJdbcTableType()));
}
}
// Then look for files that start with this name and end in .drill.
List<DotDrillFile> files = Collections.emptyList();
try {
files = DotDrillUtil.getDotDrills(fs, new Path(config.getLocation()), DotDrillType.VIEW);
} catch (AccessControlException e) {
if (!schemaConfig.getIgnoreAuthErrors()) {
logger.debug(e.getMessage());
throw UserException.permissionError(e)
.message("Not authorized to list or query tables in schema [%s]", getFullSchemaName())
.build(logger);
}
} catch (IOException e) {
logger.warn("Failure while trying to list view tables in workspace [{}]", getFullSchemaName(), e);
} catch (UnsupportedOperationException e) {
// the file system (e.g. the classpath filesystem) may not support listing
// of files. But see getViews(), it ignores the exception and continues
logger.debug("Failure while trying to list view tables in workspace [{}]", getFullSchemaName(), e);
}
try {
for (DotDrillFile f : files) {
if (f.getType() == DotDrillType.VIEW) {
tableNamesAndTypes.add(Pair.of(f.getBaseName(), TableType.VIEW));
}
}
} catch (UnsupportedOperationException e) {
logger.debug("The filesystem for this workspace does not support this operation.", e);
}
return tableNamesAndTypes;
}
}
}