/** * */ package edu.washington.escience.myria.operator; import java.io.File; import java.util.List; import java.util.Map; import java.util.Objects; import com.almworks.sqlite4java.SQLiteConnection; import com.almworks.sqlite4java.SQLiteException; import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; import edu.washington.escience.myria.DbException; import edu.washington.escience.myria.MyriaConstants; import edu.washington.escience.myria.RelationKey; import edu.washington.escience.myria.accessmethod.AccessMethod; import edu.washington.escience.myria.accessmethod.AccessMethod.IndexRef; import edu.washington.escience.myria.operator.network.distribute.DistributeFunction; import edu.washington.escience.myria.accessmethod.ConnectionInfo; import edu.washington.escience.myria.accessmethod.SQLiteInfo; import edu.washington.escience.myria.parallel.RelationWriteMetadata; import edu.washington.escience.myria.storage.TupleBatch; import edu.washington.escience.myria.storage.TupleUtils; /** @author valmeida */ public class DbInsert extends AbstractDbInsert { /** Required for Java serialization. */ private static final long serialVersionUID = 1L; /** The connection to the database database. */ private AccessMethod accessMethod; /** The information for the database connection. */ private ConnectionInfo connectionInfo; /** The name of the table the tuples should be inserted into. */ private final RelationKey relationKey; /** Whether to overwrite an existing table or not. */ private final boolean overwriteTable; /** The name of the table the tuples should be inserted into. */ private RelationKey tempRelationKey; /** The indexes to be created on the table. Each entry is a list of columns. */ private final List<List<IndexRef>> indexes; /** The DistributeFunction used to distribute the table across workers. */ private final DistributeFunction distributeFunction; /** Constructs an insertion operator to store the tuples from the specified child into the specified database. If the * table does not exist, it will be created; if it does exist then old data will persist and new data will be * inserted. * * @param child the source of tuples to be inserted. * @param relationKey the key of the table the tuples should be inserted into. * @param connectionInfo the parameters of the database connection. */ public DbInsert( final Operator child, final RelationKey relationKey, final ConnectionInfo connectionInfo) { this(child, relationKey, connectionInfo, false); } /** Constructs an insertion operator to store the tuples from the specified child into the worker's default database. * If the table does not exist, it will be created. If <code>overwriteTable</code> is <code>true</code>, any existing * data will be dropped. * * @param child the source of tuples to be inserted. * @param relationKey the key of the table the tuples should be inserted into. * @param overwriteTable whether to overwrite a table that already exists. */ public DbInsert( final Operator child, final RelationKey relationKey, final boolean overwriteTable) { this(child, relationKey, null, overwriteTable); } /** Constructs an insertion operator to store the tuples from the specified child into the specified database. If the * table does not exist, it will be created. If <code>overwriteTable</code> is <code>true</code>, any existing data * will be dropped. * * @param child the source of tuples to be inserted. * @param relationKey the key of the table the tuples should be inserted into. * @param overwriteTable whether to overwrite a table that already exists. * @param indexes indexes created. */ public DbInsert( final Operator child, final RelationKey relationKey, final boolean overwriteTable, final List<List<IndexRef>> indexes) { this(child, relationKey, null, overwriteTable, indexes); } /** Constructs an insertion operator to store the tuples from the specified child into the specified database. If the * table does not exist, it will be created. If <code>overwriteTable</code> is <code>true</code>, any existing data * will be dropped. * * @param child the source of tuples to be inserted. * @param relationKey the key of the table the tuples should be inserted into. * @param connectionInfo the parameters of the database connection. * @param overwriteTable whether to overwrite a table that already exists. */ public DbInsert( final Operator child, final RelationKey relationKey, final ConnectionInfo connectionInfo, final boolean overwriteTable) { this(child, relationKey, connectionInfo, overwriteTable, null); } /** Constructs an insertion operator to store the tuples from the specified child into the specified database. If the * table does not exist, it will be created. If <code>overwriteTable</code> is <code>true</code>, any existing data * will be dropped. * * @param child the source of tuples to be inserted. * @param relationKey the key of the table the tuples should be inserted into. * @param connectionInfo the parameters of the database connection. * @param overwriteTable whether to overwrite a table that already exists. * @param indexes the indexes to be created on the table. Each entry is a list of columns. */ public DbInsert( final Operator child, final RelationKey relationKey, final ConnectionInfo connectionInfo, final boolean overwriteTable, final List<List<IndexRef>> indexes) { this(child, relationKey, connectionInfo, overwriteTable, indexes, null); } /** Constructs an insertion operator to store the tuples from the specified child into the specified database. If the * table does not exist, it will be created. If <code>overwriteTable</code> is <code>true</code>, any existing data * will be dropped. * * @param child the source of tuples to be inserted. * @param relationKey the key of the table the tuples should be inserted into. * @param connectionInfo the parameters of the database connection. * @param overwriteTable whether to overwrite a table that already exists. * @param indexes the indexes to be created on the table. Each entry is a list of columns. * @param partitionFunction the PartitionFunction used to partition the table across workers. */ public DbInsert( final Operator child, final RelationKey relationKey, final ConnectionInfo connectionInfo, final boolean overwriteTable, final List<List<IndexRef>> indexes, final DistributeFunction distributeFunction) { super(child); Objects.requireNonNull(relationKey, "relationKey"); this.connectionInfo = connectionInfo; this.relationKey = relationKey; this.overwriteTable = overwriteTable; this.distributeFunction = distributeFunction; /* Sanity check arguments -- cannot create an index in append mode. */ Preconditions.checkArgument( overwriteTable || indexes == null || indexes.size() == 0, "Cannot create indexes when appending to a relation."); /* 1) construct immutable copies of the given indexes. * 2) ensure that the index requests are valid: * - lists of column references must be non-null. * - column references are unique per index. */ if (indexes != null) { ImmutableList.Builder<List<IndexRef>> index = ImmutableList.builder(); for (List<IndexRef> i : indexes) { Objects.requireNonNull(i); Preconditions.checkArgument( i.size() == ImmutableSet.copyOf(i).size(), "Column references cannot be repeated in index definition: %s", i); index.add(ImmutableList.copyOf(i)); } this.indexes = index.build(); } else { this.indexes = ImmutableList.of(); } } @Override public void cleanup() { try { if (accessMethod != null) { accessMethod.close(); } } catch (DbException e) { throw new RuntimeException(e); } } @Override protected void consumeTuples(final TupleBatch tupleBatch) throws DbException { Objects.requireNonNull(accessMethod, "accessMethod"); Objects.requireNonNull(tempRelationKey, "tempRelationKey"); Preconditions.checkArgument( tupleBatch.getSchema().equals(getSchema()), "tuple schema %s does not match operator schema %s", tupleBatch.getSchema(), getSchema()); accessMethod.tupleBatchInsert(tempRelationKey, tupleBatch); } @Override protected void init(final ImmutableMap<String, Object> execEnvVars) throws DbException { setThreshold(TupleUtils.getBatchSize(getSchema())); /* retrieve connection information from the environment variables, if not already set */ if (connectionInfo == null && execEnvVars != null) { connectionInfo = (ConnectionInfo) execEnvVars.get(MyriaConstants.EXEC_ENV_VAR_DATABASE_CONN_INFO); } if (connectionInfo == null) { throw new DbException("Unable to instantiate DbInsert: connection information unknown"); } /* open the database connection */ accessMethod = AccessMethod.of(connectionInfo.getDbms(), connectionInfo, false); if (overwriteTable) { /* If overwriting, we insert into a temp table and then on success we drop the old and rename. */ tempRelationKey = RelationKey.of(relationKey.getUserName(), "MyriaSysTemp", relationKey.getRelationName()); /* Drop the temp table, if it exists. */ accessMethod.dropTableIfExists(tempRelationKey); } else { /* Otherwise go ahead and write into the same table. */ tempRelationKey = relationKey; } /* Create the table */ accessMethod.createTableIfNotExists(tempRelationKey, getSchema()); /* Create indexes. */ accessMethod.createIndexes(tempRelationKey, getSchema(), indexes); } @Override protected void childEOS() throws DbException { /* If the child finished, we're done too. If in overwrite mode, drop the existing table and rename. */ if (overwriteTable) { accessMethod.dropAndRenameTables(relationKey, tempRelationKey); } } @Override protected void childEOI() throws DbException {} /** @return the name of the relation that this operator will write to. */ public RelationKey getRelationKey() { return relationKey; } @Override public Map<RelationKey, RelationWriteMetadata> writeSet() { return ImmutableMap.of( relationKey, new RelationWriteMetadata( relationKey, getSchema(), overwriteTable, false, distributeFunction)); } }