package com.linkedin.cubert.utils; import com.linkedin.cubert.block.BlockSchema; import com.linkedin.cubert.block.ColumnType; import com.linkedin.cubert.block.DataType; import org.apache.pig.backend.executionengine.ExecException; import org.apache.pig.data.Tuple; import org.apache.pig.data.TupleFactory; /** * @author Mani Parkhe */ public class TupleCopier { private final Copier copier; private final BlockSchema schema; public TupleCopier(BlockSchema schema) { this.schema = schema; if (schemaSupportShallowCopy(schema)) { copier = new ShallowCopier(); } else if (schemaSupportsDeepCopyWithReuse(schema)) { copier = new DeepReuseCopier(); } else { copier = new DeepCopier(); } } interface Copier { public void copy(Tuple src, Tuple dest) throws ExecException; public Tuple initializeOutput(BlockSchema schema) throws ExecException; } class ShallowCopier implements Copier { @Override public void copy(Tuple src, Tuple dest) throws ExecException { TupleUtils.copy(src, dest); } @Override public Tuple initializeOutput(BlockSchema schema) { return TupleFactory.getInstance().newTuple(schema.getNumColumns()); } } class DeepCopier implements Copier { @Override public void copy(Tuple src, Tuple dest) throws ExecException { TupleUtils.deepCopy(src, dest); } @Override public Tuple initializeOutput(BlockSchema schema) { return TupleFactory.getInstance().newTuple(schema.getNumColumns()); } } class DeepReuseCopier implements Copier { @Override public void copy(Tuple src, Tuple dest) throws ExecException { TupleUtils.deepCopyWithReuse(src, dest); } @Override public Tuple initializeOutput(BlockSchema schema) throws ExecException { Tuple tuple = TupleFactory.getInstance().newTuple(schema.getNumColumns()); int idx = 0; for (ColumnType ct : schema.getColumnTypes()) { DataType type = ct.getType(); if (type == DataType.RECORD || type == DataType.TUPLE) { tuple.set(idx, TupleFactory.getInstance().newTuple(ct.getColumnSchema().getNumColumns())); } idx++; } return tuple; } } public Tuple newTuple() throws ExecException { return copier.initializeOutput(schema); } public void copy(Tuple src, Tuple dest) throws ExecException { copier.copy(src, dest); } private boolean schemaSupportsDeepCopyWithReuse(BlockSchema schema) { for (ColumnType ct : schema.getColumnTypes()) { DataType type = ct.getType(); if (type.allowShallowCopy()) { // Numeric, String or Enum type continue; } else if (type == DataType.RECORD || type == DataType.TUPLE) { if (schemaSupportsDeepCopyWithReuse(ct.getColumnSchema())) { continue; } else { return false; } } // Everything else return false; } return true; } /** * @param schema * @return true if all columns are either numeric, string or enum type. */ private boolean schemaSupportShallowCopy(BlockSchema schema) { for (ColumnType ct : schema.getColumnTypes()) { if (ct.getType().allowShallowCopy()) { continue; } return false; } return true; } }