package edu.washington.escience.myria; import java.io.Serializable; import java.sql.ResultSetMetaData; import java.sql.SQLException; import java.util.ArrayList; import java.util.Arrays; import java.util.HashSet; import java.util.List; import java.util.NoSuchElementException; import java.util.Objects; import java.util.regex.Pattern; import net.jcip.annotations.Immutable; import com.almworks.sqlite4java.SQLiteConstants; import com.almworks.sqlite4java.SQLiteException; import com.almworks.sqlite4java.SQLiteStatement; import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableList; import edu.washington.escience.myria.util.MyriaUtils; /** * Schema describes the schema of a tuple. */ @Immutable public final class Schema implements Serializable { /** Required for Java serialization. */ private static final long serialVersionUID = 1L; /** The regular expression specifying what names are valid. */ public static final String VALID_NAME_REGEX = "^[a-zA-Z_]\\w*$"; /** The regular expression matcher for {@link #VALID_NAME_REGEX}. */ private static final Pattern VALID_NAME_PATTERN = Pattern.compile(VALID_NAME_REGEX); /** * Validate a potential column name for use in a Schema. Valid names are given by {@link #VALID_NAME_REGEX}. * * @param name the candidate column name. * @return the supplied name, if it is valid. * @throws IllegalArgumentException if the name does not match the regex {@link #VALID_NAME_REGEX}. */ private static String checkName(final String name) { Objects.requireNonNull(name, "name"); Preconditions.checkArgument( VALID_NAME_PATTERN.matcher(name).matches(), "supplied column name %s does not match the valid name regex %s", name, VALID_NAME_REGEX); return name; } /** * Converts a JDBC ResultSetMetaData object into a Schema. * * @param rsmd the input ResultSetMetaData. * @return the output Schema. * @throws SQLException if JDBC throws a SQLException. */ @Deprecated public static Schema fromResultSetMetaData(final ResultSetMetaData rsmd) throws SQLException { /* How many columns in this result set? */ final int columnCount = rsmd.getColumnCount(); /* Allocate space for the type and string arrays */ final ImmutableList.Builder<Type> columnTypes = new ImmutableList.Builder<Type>(); final ImmutableList.Builder<String> columnNames = new ImmutableList.Builder<String>(); /* Fill them out */ for (int i = 0; i < columnCount; ++i) { /* JDBC numbers columns from 1. Yes, really. */ // Type final int type = rsmd.getColumnType(i + 1); switch (type) { case java.sql.Types.BOOLEAN: columnTypes.add(Type.BOOLEAN_TYPE); break; case java.sql.Types.FLOAT: columnTypes.add(Type.FLOAT_TYPE); break; case java.sql.Types.DOUBLE: columnTypes.add(Type.DOUBLE_TYPE); break; case java.sql.Types.INTEGER: columnTypes.add(Type.INT_TYPE); break; case java.sql.Types.BIGINT: /* Yes, really. http://dev.mysql.com/doc/refman/5.0/en/numeric-types.html#integer-types */ columnTypes.add(Type.LONG_TYPE); break; case java.sql.Types.VARCHAR: case java.sql.Types.CHAR: columnTypes.add(Type.STRING_TYPE); break; case java.sql.Types.TIMESTAMP: columnTypes.add(Type.DATETIME_TYPE); break; case java.sql.Types.BLOB: columnTypes.add(Type.BLOB_TYPE); break; default: throw new UnsupportedOperationException( "JDBC type (java.SQL.Types) of " + type + " is not supported"); } // Name columnNames.add(rsmd.getColumnName(i + 1)); } return new Schema(columnTypes.build(), columnNames.build()); } /** * Converts a SQLiteStatement object into a Schema. * * @param statement the input SQLiteStatement (must have been stepped). * @return the output Schema. * @throws SQLiteException if SQLite throws an exception. */ @Deprecated public static Schema fromSQLiteStatement(final SQLiteStatement statement) throws SQLiteException { assert (statement.hasStepped()); /* How many columns in this result set? */ final int columnCount = statement.columnCount(); /* Allocate space for the type and string arrays */ final ImmutableList.Builder<Type> columnTypes = new ImmutableList.Builder<Type>(); final ImmutableList.Builder<String> columnNames = new ImmutableList.Builder<String>(); /* Fill them out */ for (int i = 0; i < columnCount; ++i) { // Type final int type = statement.columnType(i); switch (type) { case SQLiteConstants.SQLITE_INTEGER: /* * TODO SQLite uses variable-width ints, so there's no way to tell. Default conservatively to long. * http://www.sqlite.org/datatype3.html */ columnTypes.add(Type.LONG_TYPE); break; case SQLiteConstants.SQLITE_TEXT: columnTypes.add(Type.STRING_TYPE); break; case SQLiteConstants.SQLITE_FLOAT: /* TODO Yes really, see above. */ columnTypes.add(Type.DOUBLE_TYPE); break; default: throw new UnsupportedOperationException( "SQLite type (SQLiteConstants) " + type + " is not supported"); } // Name columnNames.add(statement.getColumnName(i)); } return new Schema(columnTypes.build(), columnNames.build()); } /** * Create a new Schema using an existing Schema and a new column. * * @param schema the existing schema. * @param type the type of the new column. * @param name the name of the new column. * @return the new Schema. */ public static Schema appendColumn(final Schema schema, final Type type, final String name) { List<Type> types = ImmutableList.<Type>builder().addAll(schema.getColumnTypes()).add(type).build(); List<String> names = ImmutableList.<String>builder().addAll(schema.getColumnNames()).add(name).build(); return new Schema(types, names); } /** * Merge two Schemas into one. The result has the columns of the first concatenated with the columns of the second. * <p> * Note that if there are duplicate column names from the two merging schemas, the duplicate columns from the first * schema will be automatically renamed by adding a suffix "_1", and the duplicate columns from the second schema will * be automatically renamed by adding a suffix "_2". * * @param first The Schema with the first columns of the new Schema. * @param second The Schema with the last columns of the Schema. * @return the new Schema. */ public static Schema merge(final Schema first, final Schema second) { final ImmutableList.Builder<Type> types = ImmutableList.builder(); types.addAll(first.getColumnTypes()).addAll(second.getColumnTypes()); List<String> names1 = new ArrayList<String>(); names1.addAll(first.getColumnNames()); List<String> names2 = new ArrayList<String>(); names2.addAll(second.getColumnNames()); for (int i = 0; i < names1.size(); ++i) { for (int j = 0; j < names2.size(); ++j) { if (names1.get(i).equals(names2.get(j))) { names1.set(i, names1.get(i) + "_1"); names2.set(j, names2.get(j) + "_2"); break; } } } final ImmutableList.Builder<String> names = ImmutableList.builder(); names.addAll(names1).addAll(names2); return new Schema(types.build(), names.build()); } /** * Static factory method. * * @param types the types of columns in this Schema. It must contain at least one entry. * @param names the names of the columns. Note that names may be null. * @return a Schema representing the specified column types and names. */ @JsonCreator public static Schema of( @JsonProperty(value = "columnTypes", required = true) final List<Type> types, @JsonProperty("columnNames") final List<String> names) { if (names == null) { return new Schema(types); } return new Schema(types, names); } /** The types of the columns in this relation. */ @JsonProperty private final List<Type> columnTypes; /** The names of the columns in this relation. */ @JsonProperty private final List<String> columnNames; /** * Helper function to build a Schema from builders. * * @param types the types of the columns in this Schema. * @param names the names of the columns in this Schema. */ public Schema( final ImmutableList.Builder<Type> types, final ImmutableList.Builder<String> names) { this(types.build(), names.build()); } /** * Helper function to generate the list of column names given a {@link Type} array. * * @param types the types of the columns * @return the list of column names given a {@link Type} array. Every column is named <code>colI</code> where * <code>I</code> counts from <code>0</code> to <code>types.size()</code>. */ private static List<String> generateNames(final List<Type> types) { Objects.requireNonNull(types, "types"); final ImmutableList.Builder<String> names = ImmutableList.builder(); for (int i = 0; i < types.size(); i++) { names.add("col" + i); } return names.build(); } /** * Create a Schema given an array of column types. Column names will be col0, col1, .... * * @param types the types of the columns. */ public Schema(final List<Type> types) { this(types, generateNames(types)); } /** * Create a new Schema with typeAr.length columns with columns of the specified types, with associated named columns. * * @param columnTypes array specifying the number of and types of columns in this Schema. It must contain at least one * entry. * @param columnNames array specifying the names of the columns. */ public Schema(final List<Type> columnTypes, final List<String> columnNames) { Objects.requireNonNull(columnTypes, "columnTypes"); Objects.requireNonNull(columnNames, "columnNames"); if (columnTypes.size() != columnNames.size()) { throw new IllegalArgumentException( "Invalid Schema: must have the same number of column types and column"); } MyriaUtils.checkHasNoNulls(columnTypes, "columnTypes may not contain null elements"); MyriaUtils.checkHasNoNulls(columnNames, "columnNames may not contain null elements"); HashSet<String> uniqueNames = new HashSet<>(); for (String name : columnNames) { checkName(name); if (!uniqueNames.add(name)) { throw new IllegalArgumentException("schema has duplicated column name " + name); } } this.columnTypes = ImmutableList.copyOf(columnTypes); this.columnNames = ImmutableList.copyOf(columnNames); } /** * Compares the specified object with this Schema for equality. Two Schemas are considered equal if they have the same * size, column types, and column names. * * @param o the Object to be compared with. * @return true if schema is equal to this Schema. */ @Override public boolean equals(final Object o) { if (this == o) { return true; } if (!(o instanceof Schema)) { return false; } final Schema other = (Schema) o; return (this == o) || columnTypes.equals(other.columnTypes) && columnNames.equals(other.columnNames); } /** * Return true if the two schema are "compatible": they have the same size and column types; column names are ignored. * * @param s2 the Schema object to compare * * @return true if the schemas are compatible */ public boolean compatible(final Schema s2) { return columnTypes.equals(s2.columnTypes); } /** * Find the index of the column with a given name. * * @param name name of the column. * @return the index of the column that is first to have the given name. * @throws NoSuchElementException if no column with a matching name is found. */ public int columnNameToIndex(final String name) { final int ret = columnNames.indexOf(name); if (ret == -1) { throw new NoSuchElementException("No column named " + name + " found"); } return ret; } /** * Return a subset of the current schema. * * @param index indices to be selected. * @return the subschema. */ public Schema getSubSchema(final int[] index) { final ImmutableList.Builder<Type> types = ImmutableList.builder(); final ImmutableList.Builder<String> names = ImmutableList.builder(); for (int i : index) { Preconditions.checkElementIndex(i, numColumns()); types.add(getColumnType(i)); names.add(getColumnName(i)); } return new Schema(types, names); } /** * Gets the (possibly null) column name of the ith column of this Schema. * * @param index index of the column name to return. It must be a valid index. * @return the name of the ith column * @throws IndexOutOfBoundsException if index is negative or not less than numColumns. */ public String getColumnName(final int index) { return columnNames.get(index); } /** * Returns a list containing the names of the columns in this Schema. * * @return a list containing the names of the columns in this Schema. */ public List<String> getColumnNames() { return columnNames; } /** * Gets the type of the ith column of this Schema. * * @param index index of the column to get the type of. It must be a valid index. * @return the type of the ith column * @throws IndexOutOfBoundsException if index is negative or not less than numColumns. */ public Type getColumnType(final int index) { return columnTypes.get(index); } /** * Returns an immutable list containing the types of the columns in this Schema. * * @return an immutable list containing the types of the columns in this Schema. */ public List<Type> getColumnTypes() { return columnTypes; } @Override public int hashCode() { return Arrays.hashCode(new Object[] {columnNames, columnTypes}); } /** * @return the number of columns in this Schema */ public int numColumns() { return columnTypes.size(); } /** * Returns a String describing this descriptor. It should be of the form * "columnType[0](columnName[0]), ..., columnType[M](columnName[M])", although the exact format does not matter. * * @return String describing this descriptor. */ @Override public String toString() { final StringBuilder sb = new StringBuilder(); for (int i = 0; i < columnTypes.size(); ++i) { if (i > 0) { sb.append(", "); } sb.append(columnNames.get(i)).append(" (").append(columnTypes.get(i)).append(')'); } return sb.toString(); } /** * The empty schema. */ public static final Schema EMPTY_SCHEMA = Schema.of(Arrays.asList(new Type[] {}), Arrays.asList(new String[] {})); /** * Construct a Schema from a list of {@link Type} and {@link String} objects. The types and names may be interleaved * in any order; ordering within types and within names is preserved. If there are no {@link String} objects given, * then the {@link #Schema(List)} constructor is used. * * @param fields any number of {@link Type} or {@link String} objects. * @return the {@link Schema} containing these objects. */ public static Schema ofFields(final Object... fields) { ImmutableList.Builder<Type> typesB = ImmutableList.builder(); ImmutableList.Builder<String> namesB = ImmutableList.builder(); for (Object o : fields) { Objects.requireNonNull(o, "field cannot be null"); if (o instanceof Type) { typesB.add((Type) o); } else if (o instanceof String) { namesB.add((String) o); } else { throw new IllegalArgumentException( "fields must be either " + Type.class.getCanonicalName() + " or " + String.class.getCanonicalName() + ", not " + o.getClass().getCanonicalName()); } } List<Type> types = typesB.build(); List<String> names = namesB.build(); if (names.isEmpty()) { return new Schema(types); } return Schema.of(types, names); } }