/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.drill.exec.store.dfs.easy; import java.io.IOException; import java.util.List; import java.util.Map; import java.util.Set; import com.google.common.base.Functions; import com.google.common.collect.Maps; import org.apache.drill.common.exceptions.ExecutionSetupException; import org.apache.drill.common.expression.SchemaPath; import org.apache.drill.common.logical.FormatPluginConfig; import org.apache.drill.common.logical.StoragePluginConfig; import org.apache.drill.exec.ops.FragmentContext; import org.apache.drill.exec.ops.OperatorContext; import org.apache.drill.exec.physical.base.AbstractGroupScan; import org.apache.drill.exec.physical.base.AbstractWriter; import org.apache.drill.exec.physical.base.PhysicalOperator; import org.apache.drill.exec.physical.base.ScanStats; import org.apache.drill.exec.physical.base.ScanStats.GroupScanProperty; import org.apache.drill.exec.physical.impl.ScanBatch; import org.apache.drill.exec.physical.impl.WriterRecordBatch; import org.apache.drill.exec.planner.physical.PlannerSettings; import org.apache.drill.exec.record.CloseableRecordBatch; import org.apache.drill.exec.record.RecordBatch; import org.apache.drill.exec.server.DrillbitContext; import org.apache.drill.exec.store.ImplicitColumnExplorer; import org.apache.drill.exec.store.RecordReader; import org.apache.drill.exec.store.RecordWriter; import org.apache.drill.exec.store.StoragePluginOptimizerRule; import org.apache.drill.exec.store.dfs.BasicFormatMatcher; import org.apache.drill.exec.store.dfs.DrillFileSystem; import org.apache.drill.exec.store.dfs.FileSelection; import org.apache.drill.exec.store.dfs.FormatMatcher; import org.apache.drill.exec.store.dfs.FormatPlugin; import org.apache.drill.exec.store.schedule.CompleteFileWork; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Lists; public abstract class EasyFormatPlugin<T extends FormatPluginConfig> implements FormatPlugin { @SuppressWarnings("unused") private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(EasyFormatPlugin.class); private final BasicFormatMatcher matcher; private final DrillbitContext context; private final boolean readable; private final boolean writable; private final boolean blockSplittable; private final Configuration fsConf; private final StoragePluginConfig storageConfig; protected final T formatConfig; private final String name; private final boolean compressible; protected EasyFormatPlugin(String name, DrillbitContext context, Configuration fsConf, StoragePluginConfig storageConfig, T formatConfig, boolean readable, boolean writable, boolean blockSplittable, boolean compressible, List<String> extensions, String defaultName){ this.matcher = new BasicFormatMatcher(this, fsConf, extensions, compressible); this.readable = readable; this.writable = writable; this.context = context; this.blockSplittable = blockSplittable; this.compressible = compressible; this.fsConf = fsConf; this.storageConfig = storageConfig; this.formatConfig = formatConfig; this.name = name == null ? defaultName : name; } @Override public Configuration getFsConf() { return fsConf; } @Override public DrillbitContext getContext() { return context; } @Override public String getName() { return name; } public abstract boolean supportsPushDown(); /** * Whether or not you can split the format based on blocks within file boundaries. If not, the simple format engine will * only split on file boundaries. * * @return True if splittable. */ public boolean isBlockSplittable() { return blockSplittable; } /** Method indicates whether or not this format could also be in a compression container (for example: csv.gz versus csv). * If this format uses its own internal compression scheme, such as Parquet does, then this should return false. * @return <code>true</code> if it is compressible */ public boolean isCompressible() { return compressible; } public abstract RecordReader getRecordReader(FragmentContext context, DrillFileSystem dfs, FileWork fileWork, List<SchemaPath> columns, String userName) throws ExecutionSetupException; @SuppressWarnings("resource") CloseableRecordBatch getReaderBatch(FragmentContext context, EasySubScan scan) throws ExecutionSetupException { final ImplicitColumnExplorer columnExplorer = new ImplicitColumnExplorer(context, scan.getColumns()); if (!columnExplorer.isStarQuery()) { scan = new EasySubScan(scan.getUserName(), scan.getWorkUnits(), scan.getFormatPlugin(), columnExplorer.getTableColumns(), scan.getSelectionRoot()); scan.setOperatorId(scan.getOperatorId()); } OperatorContext oContext = context.newOperatorContext(scan); final DrillFileSystem dfs; try { dfs = oContext.newFileSystem(fsConf); } catch (IOException e) { throw new ExecutionSetupException(String.format("Failed to create FileSystem: %s", e.getMessage()), e); } List<RecordReader> readers = Lists.newArrayList(); List<Map<String, String>> implicitColumns = Lists.newArrayList(); Map<String, String> mapWithMaxColumns = Maps.newLinkedHashMap(); for(FileWork work : scan.getWorkUnits()){ RecordReader recordReader = getRecordReader(context, dfs, work, scan.getColumns(), scan.getUserName()); readers.add(recordReader); Map<String, String> implicitValues = columnExplorer.populateImplicitColumns(work, scan.getSelectionRoot()); implicitColumns.add(implicitValues); if (implicitValues.size() > mapWithMaxColumns.size()) { mapWithMaxColumns = implicitValues; } } // all readers should have the same number of implicit columns, add missing ones with value null Map<String, String> diff = Maps.transformValues(mapWithMaxColumns, Functions.constant((String) null)); for (Map<String, String> map : implicitColumns) { map.putAll(Maps.difference(map, diff).entriesOnlyOnRight()); } return new ScanBatch(scan, context, oContext, readers.iterator(), implicitColumns); } public abstract RecordWriter getRecordWriter(FragmentContext context, EasyWriter writer) throws IOException; public CloseableRecordBatch getWriterBatch(FragmentContext context, RecordBatch incoming, EasyWriter writer) throws ExecutionSetupException { try { return new WriterRecordBatch(writer, incoming, context, getRecordWriter(context, writer)); } catch(IOException e) { throw new ExecutionSetupException(String.format("Failed to create the WriterRecordBatch. %s", e.getMessage()), e); } } protected ScanStats getScanStats(final PlannerSettings settings, final EasyGroupScan scan) { long data = 0; for (final CompleteFileWork work : scan.getWorkIterable()) { data += work.getTotalBytes(); } final long estRowCount = data / 1024; return new ScanStats(GroupScanProperty.NO_EXACT_ROW_COUNT, estRowCount, 1, data); } @Override public AbstractWriter getWriter(PhysicalOperator child, String location, List<String> partitionColumns) throws IOException { return new EasyWriter(child, location, partitionColumns, this); } @Override public AbstractGroupScan getGroupScan(String userName, FileSelection selection, List<SchemaPath> columns) throws IOException { return new EasyGroupScan(userName, selection, this, columns, selection.selectionRoot); } @Override public T getConfig() { return formatConfig; } @Override public StoragePluginConfig getStorageConfig() { return storageConfig; } @Override public boolean supportsRead() { return readable; } @Override public boolean supportsWrite() { return writable; } @Override public boolean supportsAutoPartitioning() { return false; } @Override public FormatMatcher getMatcher() { return matcher; } @Override public Set<StoragePluginOptimizerRule> getOptimizerRules() { return ImmutableSet.of(); } public abstract int getReaderOperatorType(); public abstract int getWriterOperatorType(); }