package org.embulk.standards; import com.google.common.base.Optional; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.inject.Inject; import org.embulk.config.Config; import org.embulk.config.ConfigDefault; import org.embulk.config.ConfigException; import org.embulk.config.ConfigSource; import org.embulk.config.Task; import org.embulk.config.TaskSource; import org.embulk.spi.Column; import org.embulk.spi.ColumnVisitor; import org.embulk.spi.Exec; import org.embulk.spi.FilterPlugin; import org.embulk.spi.Page; import org.embulk.spi.PageBuilder; import org.embulk.spi.PageOutput; import org.embulk.spi.PageReader; import org.embulk.spi.Schema; import org.embulk.spi.SchemaConfigException; import org.slf4j.Logger; import java.util.List; import java.util.Map; import java.util.HashMap; import static java.util.Locale.ENGLISH; import static org.embulk.spi.Exec.getBufferAllocator; public class RemoveColumnsFilterPlugin implements FilterPlugin { public interface PluginTask extends Task { @Config("remove") @ConfigDefault("null") public Optional<List<String>> getRemove(); // TODO remove_pattern option @Config("keep") @ConfigDefault("null") public Optional<List<String>> getKeep(); // TODO keep_pattern option @Config("accept_unmatched_columns") @ConfigDefault("false") public boolean getAcceptUnmatchedColumns(); public void setIndexMapping(int[] mapping); public int[] getIndexMapping(); } private final Logger LOG; @Inject public RemoveColumnsFilterPlugin() { LOG = Exec.getLogger(getClass()); } @Override public void transaction(ConfigSource config, Schema inputSchema, FilterPlugin.Control control) { PluginTask task = config.loadConfig(PluginTask.class); // validate remove: and keep: if (task.getRemove().isPresent() && task.getKeep().isPresent()) { throw new ConfigException("remove: and keep: must not be multi-select"); } if (!task.getRemove().isPresent() && !task.getKeep().isPresent()) { throw new ConfigException("Must require remove: or keep:"); } boolean acceptUnmatchedColumns = task.getAcceptUnmatchedColumns(); ImmutableList.Builder<Column> outputColumns = ImmutableList.builder(); int index = 0; int[] indexMapping = new int[inputSchema.size()]; for (int i = 0; i < indexMapping.length; i++) { indexMapping[i] = -1; } if (task.getRemove().isPresent()) { // specify remove: List<String> removeColumns = getExistentColumns(inputSchema, task.getRemove().get(), acceptUnmatchedColumns); for (Column column : inputSchema.getColumns()) { if (!removeColumns.contains(column.getName())) { outputColumns.add(new Column(index, column.getName(), column.getType())); indexMapping[column.getIndex()] = index; index++; } } } else { // specify keep: List<String> keepColumns = getExistentColumns(inputSchema, task.getKeep().get(), acceptUnmatchedColumns); for (Column column : inputSchema.getColumns()) { if (keepColumns.contains(column.getName())) { outputColumns.add(new Column(index, column.getName(), column.getType())); indexMapping[column.getIndex()] = index; index++; } } } task.setIndexMapping(indexMapping); control.run(task.dump(), new Schema(outputColumns.build())); } private List<String> getExistentColumns(Schema schema, List<String> specifiedColumns, boolean acceptUnmatch) { ImmutableList.Builder<String> existentColumns = ImmutableList.builder(); for (String column : specifiedColumns) { try { schema.lookupColumn(column); existentColumns.add(column); } catch (SchemaConfigException e) { if (!acceptUnmatch) { throw new ConfigException(String.format(ENGLISH, "Column '%s' doesn't exist in the schema", column)); } } } return existentColumns.build(); } @Override public PageOutput open(TaskSource taskSource, Schema inputSchema, Schema outputSchema, PageOutput output) { PluginTask task = taskSource.loadTask(PluginTask.class); PageReader pageReader = new PageReader(inputSchema); PageBuilder pageBuilder = new PageBuilder(getBufferAllocator(), outputSchema, output); return new PageConverter(pageReader, pageBuilder, task.getIndexMapping()); } static class PageConverter implements PageOutput { private final PageReader pageReader; private final PageBuilder pageBuilder; private final int[] indexMapping; PageConverter(PageReader pageReader, PageBuilder pageBuilder, int[] indexMapping) { this.pageReader = pageReader; this.pageBuilder = pageBuilder; this.indexMapping = indexMapping; } @Override public void add(Page page) { pageReader.setPage(page); while (pageReader.nextRecord()) { pageReader.getSchema().visitColumns(new ColumnVisitor() { @Override public void booleanColumn(Column inputColumn) { int index = indexMapping[inputColumn.getIndex()]; if (index >= 0) { if (pageReader.isNull(inputColumn)) { pageBuilder.setNull(index); } else { pageBuilder.setBoolean(index, pageReader.getBoolean(inputColumn)); } } } @Override public void longColumn(Column inputColumn) { int index = indexMapping[inputColumn.getIndex()]; if (index >= 0) { if (pageReader.isNull(inputColumn)) { pageBuilder.setNull(index); } else { pageBuilder.setLong(index, pageReader.getLong(inputColumn)); } } } @Override public void doubleColumn(Column inputColumn) { int index = indexMapping[inputColumn.getIndex()]; if (index >= 0) { if (pageReader.isNull(inputColumn)) { pageBuilder.setNull(index); } else { pageBuilder.setDouble(index, pageReader.getDouble(inputColumn)); } } } @Override public void stringColumn(Column inputColumn) { int index = indexMapping[inputColumn.getIndex()]; if (index >= 0) { if (pageReader.isNull(inputColumn)) { pageBuilder.setNull(index); } else { pageBuilder.setString(index, pageReader.getString(inputColumn)); } } } @Override public void timestampColumn(Column inputColumn) { int index = indexMapping[inputColumn.getIndex()]; if (index >= 0) { if (pageReader.isNull(inputColumn)) { pageBuilder.setNull(index); } else { pageBuilder.setTimestamp(index, pageReader.getTimestamp(inputColumn)); } } } @Override public void jsonColumn(Column inputColumn) { int index = indexMapping[inputColumn.getIndex()]; if (index >= 0) { if (pageReader.isNull(inputColumn)) { pageBuilder.setNull(index); } else { pageBuilder.setJson(index, pageReader.getJson(inputColumn)); } } } }); pageBuilder.addRecord(); } } private Map<String, Integer> newColumnIndex(Schema schema) { ImmutableMap.Builder<String, Integer> builder = ImmutableMap.builder(); for (Column column : schema.getColumns()) { builder.put(column.getName(), column.getIndex()); } return builder.build(); } @Override public void finish() { pageBuilder.finish(); } @Override public void close() { pageBuilder.close(); } } }