Schema.java example

Explorer
spork-streaming-master
/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements. See the NOTICE file distributed with this
 * work for additional information regarding copyright ownership. The ASF
 * licenses this file to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 * http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */
package org.apache.hadoop.zebra.schema;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.HashMap;
import java.util.ArrayList;
import java.util.Iterator;
import java.io.StringReader;
import java.util.HashSet;

import org.apache.hadoop.io.Writable;
import org.apache.hadoop.zebra.types.Projection;
import org.apache.hadoop.zebra.parser.ParseException;
import org.apache.hadoop.zebra.parser.TableSchemaParser;

/**
 * Logical schema of tabular data.
 */
public class Schema implements Comparable<Schema>, Writable {
  public final static String COLUMN_DELIMITER = ",";

  private static final long schemaVersion = 1L;

  /**
   * Column Schema in Schema
   */
  public static class ColumnSchema {
    private String name;
    private ColumnType type;
    private Schema schema;
    private int index; // field index in schema

    /**
     * construct a ColumnSchema for a native type
     * 
     * @param a
     *          column name
     * @param t
     *          native column type
     */
    public ColumnSchema(String a, ColumnType t) {
      name = a;
      type = t;
      schema = null;
    }

    /**
     * construct a Column schema for a RECORD column type
     * 
     * @param a
     *          column name
     * @param s
     *          column schema
     */
    public ColumnSchema(String a, Schema s) {
      name = a;
      type = ColumnType.RECORD;
      schema = s;
    }

    /**
     * access function to get the column name 
     *
     * @return name of the column
     */
    public String getName() {
      return name;
    }
    
    /**
     * access function to get the column type 
     *
     * @return column type
     */
    public ColumnType getType() {
      return type;
    }
    
    /**
     * access function to get the column name 
     *
     * @return column index in the parent schema
     */
    public int getIndex() {
      return index;
    }
    
    /**
     * construct a column schema for a complex column type
     * 
     * @param a
     *          column name
     * @param s
     *          column schema
     * @param t
     *          complex column type
     * @throws ParseException
     */
    public ColumnSchema(String a, Schema s, ColumnType t) throws ParseException {
      if ((null != s) && !(ColumnType.isSchemaType(t))) {
        throw new ParseException(
            "Only a COLLECTION or RECORD or MAP can have schemas.");
      }
      name = a;
      schema = s;
      type = t;
    }

    /**
     * copy ctor
     * 
     * @param cs
     *          source column schema
     */
    public ColumnSchema(ColumnSchema cs) {
      name = cs.name;
      type = cs.type;
      schema = cs.schema;
    }

    /**
     * Compare two field schema for equality
     * 
     * @param fschema one column schema to be compared
     * @param fother the other column schema to be compared
     * @return true if ColumnSchema are equal, false otherwise
     */
    public static boolean equals(ColumnSchema fschema, ColumnSchema fother) {
      if (fschema == null) {
        return false;
      }

      if (fother == null) {
        return false;
      }

      if (fschema.type != fother.type) {
        return false;
      }

      if ((fschema.name == null) && (fother.name == null)) {
        // good
      }
      else if ((fschema.name != null) && (fother.name == null)) {
        return false;
      }
      else if ((fschema.name == null) && (fother.name != null)) {
        return false;
      }
      else if (!fschema.name.equals(fother.name)) {
        return false;
      }

      if (ColumnType.isSchemaType(fschema.type)) {
        // Don't do the comparison if both embedded schemas are
        // null. That will cause Schema.equals to return false,
        // even though we want to view that as true.
        if (!(fschema.schema == null && fother.schema == null)) {
          // compare recursively using schema
          if (!fschema.schema.equals(fother.schema)) {
            return false;
          }
        }
      }
      return true;
    }

    /**
     * string representation of the schema
     */
    @Override
    public String toString() {
      StringBuilder sb = new StringBuilder();
      if (name != null) {
        sb.append(name);
      }
      sb.append(ColumnType.findTypeName(type));

      if (schema != null) {
        sb.append("(");
        sb.append(schema.toString());
        sb.append(")");
      }

      return sb.toString();
    }

    /*
     * Returns the schema for the next level structure, in record, collection
     * and map.
     *
     * @return Schema of the column
     */
    public Schema getSchema() {
      return schema;
    }
  }

  /**
   * Helper class to parse a column name string one section at a time and find
   * the required type for the parsed part.
   */
  public static class ParsedName {
    private String mName;
    private int mKeyOffset; // the offset where the keysstring starts
    private ColumnType mDT = ColumnType.ANY; // parent's type

    /**
     * Default ctor
     */
    public ParsedName() {
    }

    /**
     * Set the name
     *
     * @param name
     *            column name string
     */
    public void setName(String name) {
      mName = name;
    }

    /**
     * Set the name and type
     *
     * @param name
     *            column name string
     * @param pdt
     *            column type
     */
    public void setName(String name, ColumnType pdt) {
      mName = name;
      mDT = pdt;
    }

    void setName(String name, ColumnType pdt, int keyStrOffset) {
      this.setName(name, pdt);
      mKeyOffset = keyStrOffset;
    }

    /**
     * Set the column type
     *
     * @param dt
     *          column type to be set with
     */
    public void setDT(ColumnType dt) {
      mDT = dt;
    }

    /**
     * Get the column type
     * 
     * @return column type
     */
    public ColumnType getDT() {
      return mDT;
    }

    /**
     * Get the column name
     *
     * @return column name
     */
    public String getName() {
      return mName;
    }

    /**
     * Parse one sector of a fully qualified column name; also checks validity
     * of use of the MAP and RECORD delimiters
     *
     * @param fs
     *          column schema this column name is checked against with          
     */
    public String parseName(Schema.ColumnSchema fs) throws ParseException {
      int fieldIndex, hashIndex;
      fieldIndex = mName.indexOf('.');
      hashIndex = mName.indexOf('#');
      String prefix;
      if (hashIndex != -1 && fieldIndex == -1) {
        if (fs.type != ColumnType.MAP)
          throw new ParseException(mName + " : is not of type MAP");
        prefix = mName.substring(0, hashIndex);
        setName(mName.substring(hashIndex + 1), ColumnType.MAP);
      }
      else if (hashIndex == -1 && fieldIndex != -1) {
        if (fs.type != ColumnType.RECORD)
          throw new ParseException(mName + " : is not of type RECORD");
        prefix = mName.substring(0, fieldIndex);
        setName(mName.substring(fieldIndex + 1), ColumnType.RECORD);
      }
      else if (hashIndex != -1 && fieldIndex != -1) {
        if (hashIndex < fieldIndex) {
          if (fs.type != ColumnType.MAP)
            throw new ParseException(mName + " : is not of type MAP");
          prefix = mName.substring(0, hashIndex);
          setName(mName.substring(hashIndex + 1), ColumnType.MAP);
        }
        else {
          if (fs.type != ColumnType.RECORD)
            throw new ParseException(mName + " : is not of type RECORD");
          prefix = mName.substring(0, fieldIndex);
          setName(mName.substring(fieldIndex + 1), ColumnType.RECORD);
        }
      }
      else {
        prefix = mName; // no hash or subfield contained
        mDT = ColumnType.ANY;
      }
      return prefix;
    }
  }

  private ArrayList<ColumnSchema> mFields;
  private HashMap<String, ColumnSchema> mNames;
  private boolean projection;

  /**
   * Constructor - schema for empty schema (zero-column) .
   */
  public Schema() {
    init();
  }

  /**
   * Constructor - schema for empty projection/schema (zero-column) .
   *
   * @param projection
   *           A projection schema or not
   */
  public Schema(boolean projection) {
    this.projection = projection;
    init();
  }

  /**
   * Constructor - create a schema from a string representation.
   * 
   * @param schema
   *          A string representation of the schema. For this version, the
   *          schema string is simply a comma separated list of column names. Of
   *          course, comma (,) and space characters are illegal in column
   *          names. To maintain forward compatibility, please use only
   *          alpha-numeric characters in column names.
   */
  public Schema(String schema) throws ParseException {
    init(schema, false);
  }

  public Schema(String schema, boolean projection) throws ParseException {
    this.projection = projection;
    init(schema, projection);
  }

  public Schema(ColumnSchema fs) throws ParseException {
    init();
    add(fs);
  }

  /**
   * Constructor - create a schema from an array of column names.
   * 
   * @param columns
   *          An array of column names. To maintain forward compatibility,
   *          please use only alpha-numeric characters in column names.
   */
  public Schema(String[] columns) throws ParseException {
    init(columns, false);
  }

  /**
   * add a column
   * 
   * @param f
   *          Column to be added to the schema
   */
  public void add(ColumnSchema f) throws ParseException {
    if (f == null) {
      if (!projection)
        throw new ParseException("Empty column schema is not allowed");
      mFields.add(null);
      return;
    }
    f.index = mFields.size();
    mFields.add(f);
    if (null != f && null != f.name) {
      if (mNames.put(f.name, f) != null && !projection)
        throw new ParseException("Duplicate field name: " + f.name);
    }
  }

  /**
   * get a column by name
   */
  public ColumnSchema getColumn(String name) {
    return mNames.get(name);
  }

  /**
   * Get the names of the individual columns.
   * 
   * @return An array of the column names.
   */
  public String[] getColumns() {
    String[] result = new String[mFields.size()];
    for (int i = 0; i < mFields.size(); i++) {
      ColumnSchema cs = mFields.get(i);
      if (cs != null) {
        result[i] = mFields.get(i).name;
      }
      else {
        result[i] = null;
      }
    }
    return result;
  }

  /**
   * Get a particular column's schema
   */
  public ColumnSchema getColumn(int index) {
    return mFields.get(index);
  }

  public String getColumnName(int index) {
    if (mFields.get(index) == null) return null;
    return mFields.get(index).name;
  }

  /**
   * Get the names and types of the individual columns.
   * 
   * @return An array of the column names.
   */
  public String[] getTypedColumns() {
    String[] result = new String[mFields.size()];
    for (int i = 0; i < mFields.size(); i++)
      result[i] = mFields.get(i).name + ":" + mFields.get(i).toString();
    return result;
  }

  /**
   * Get the index of the column for the input column name.
   * 
   * @param name
   *          input column name.
   * @return The column index if the name is valid; -1 otherwise.
   */
  public int getColumnIndex(String name) {
    ColumnSchema fs;
    if ((fs = mNames.get(name)) != null) return fs.index;
    return -1;
  }

  /**
   * Get the number of columns as defined in the schema.
   * 
   * @return The number of columns as defined in the schema.
   */
  public int getNumColumns() {
    return mFields.size();
  }

  /**
   * Parse a schema string and create a schema object.
   * 
   * @param schema
   *          comma separated schema string.
   * @return Schema object
   */
  public static Schema parse(String schema) throws ParseException {
    Schema s = new Schema();
    s.init(schema, false);
    return s;
  }

  /**
   * Convert the schema to a String.
   * 
   * @return the string representation of the schema.
   */
  public String toString() {
    StringBuilder sb = new StringBuilder();
    try {
      stringifySchema(sb, this, ColumnType.COLLECTION, true);
    }
    catch (Exception fee) {
      throw new RuntimeException("PROBLEM PRINTING SCHEMA");
    }
    return sb.toString();
  }

  /**
   * This is used for building up output string type can only be COLLECTION or
   * RECORD or MAP
   */
  private void stringifySchema(StringBuilder sb, Schema schema,
      ColumnType type, boolean top) {
    if (!top) {
      if (type == ColumnType.RECORD) {
        sb.append("(");
      }
      else if (type == ColumnType.COLLECTION) {
        sb.append("(");
      }
      else if (type == ColumnType.MAP) {
        sb.append("(");
      }
    }
    boolean isFirst = true;
    for (int i = 0; i < schema.getNumColumns(); i++) {

      if (!isFirst) {
        sb.append(",");
      }
      else {
        isFirst = false;
      }

      ColumnSchema fs = schema.getColumn(i);

      if (fs == null) {
        continue;
      }

      if (fs.name != null && !fs.name.equals("")) {
        sb.append(fs.name);
        sb.append(":");
      }

      sb.append(ColumnType.findTypeName(fs.type));
      if ((fs.type == ColumnType.RECORD) || (fs.type == ColumnType.MAP)
          || (fs.type == ColumnType.COLLECTION)) {
        // safety net
        if (this != fs.schema) {
          stringifySchema(sb, fs.schema, fs.type, false);
        }
        else {
          throw new IllegalArgumentException("Schema refers to itself "
              + "as inner schema");
        }
      }
    }
    if (!top) {
      if (type == ColumnType.RECORD) {
        sb.append(")");
      }
      else if (type == ColumnType.COLLECTION) {
        sb.append(")");
      }
      else if (type == ColumnType.MAP) {
        sb.append(")");
      }
    }
  }

  /**
   * Normalize the schema string.
   * 
   * @param value
   *          the input string representation of the schema.
   * @return the normalized string representation.
   */
  public static String normalize(String value) {
    String result = new String();

    if (value == null || value.trim().isEmpty()) return result;

    StringBuilder sb = new StringBuilder();
    String[] parts = value.trim().split(COLUMN_DELIMITER);
    for (int nx = 0; nx < parts.length; nx++) {
      if (nx > 0) sb.append(COLUMN_DELIMITER);
      sb.append(parts[nx].trim());
    }
    return sb.toString();
  }

  /**
   * @see Comparable#compareTo(Object)
   */
  @Override
  public int compareTo(Schema other) {
    int mFieldsSize = this.mFields.size(); 
    if (mFieldsSize != other.mFields.size()) {
      return mFieldsSize - other.mFields.size();
    }
    int ret = 0;
    for (int nx = 0; nx < mFieldsSize; nx++) {
      Schema mFieldSchema = mFields.get(nx).schema;
      Schema otherFieldSchema = other.mFields.get(nx).schema;
      if (mFieldSchema == null
          && otherFieldSchema != null) return -1;
      else if (mFieldSchema != null
          && otherFieldSchema == null) return 1;
      else if (mFieldSchema == null
          && otherFieldSchema == null) return 0;
      ret = mFieldSchema.compareTo(otherFieldSchema);
      if (ret != 0) {
        return ret;
      }
    }
    return 0;
  }

  /**
   * @see Object#equals(Object)
   */
  @Override
  public boolean equals(Object obj) {
    if (this == obj) return true;
    if (obj == null) return false;
    if (getClass() != obj.getClass()) return false;
    Schema other = (Schema) obj;

    if (mFields.size() != other.mFields.size()) return false;

    Iterator<ColumnSchema> i = mFields.iterator();
    Iterator<ColumnSchema> j = other.mFields.iterator();

    while (i.hasNext()) {

      ColumnSchema myFs = i.next();
      ColumnSchema otherFs = j.next();

      if ((myFs.name == null) && (otherFs.name == null)) {
        // good
      }
      else if ((myFs.name != null) && (otherFs.name == null)) {
        return false;
      }
      else if ((myFs.name == null) && (otherFs.name != null)) {
        return false;
      }
      else if (!myFs.name.equals(otherFs.name)) {
        return false;
      }

      if (myFs.type != otherFs.type) {
        return false;
      }

      // Compare recursively using field schema
      if (!ColumnSchema.equals(myFs, otherFs)) {
        return false;
      }
    }
    return true;
  }

  /**
   * @see Writable#readFields(DataInput)
   */
  @Override
  public void readFields(DataInput in) throws IOException {
    long version = org.apache.hadoop.zebra.tfile.Utils.readVLong(in);

    if (version > schemaVersion)
      throw new IOException("Schema version is newer than that in software.");

    // check-ups are needed for future versions for backward-compatibility
    String strSchema = org.apache.hadoop.zebra.tfile.Utils.readString(in);
    try {
      init(strSchema, false);
    }
    catch (Exception e) {
      throw new IOException(e.getMessage());
    }
  }

  /**
   * @see Writable#write(DataOutput)
   */
  @Override
  public void write(DataOutput out) throws IOException {
    org.apache.hadoop.zebra.tfile.Utils.writeVLong(out, schemaVersion);
    org.apache.hadoop.zebra.tfile.Utils.writeString(out, toString());
  }

  private void init(String[] columnNames, boolean projection) throws ParseException {
    // the arg must be of type or they will be treated as the default type
    mFields = new ArrayList<ColumnSchema>();
    mNames = new HashMap<String, ColumnSchema>();
    StringBuilder sb = new StringBuilder();
    for (int i = 0; i < columnNames.length; i++) {
      if (columnNames[i].contains(COLUMN_DELIMITER))
        throw new ParseException("Column name should not contain "
            + COLUMN_DELIMITER);
      if (i > 0) sb.append(",");
      sb.append(columnNames[i]);
    }
    TableSchemaParser parser =
        new TableSchemaParser(new StringReader(sb.toString()));
    if (projection)
      parser.ProjectionSchema(this);
    else
      parser.RecordSchema(this);
  }

  private void init() {
    mFields = new ArrayList<ColumnSchema>();
    mNames = new HashMap<String, ColumnSchema>();
  }

  private void init(String columnString, boolean projection) throws ParseException {
    String trimmedColumnStr;
    if (columnString == null || (trimmedColumnStr = columnString.trim()).isEmpty()) {
      init();
      return;
    }

    String[] parts = trimmedColumnStr.split(COLUMN_DELIMITER);
    for (int nx = 0; nx < parts.length; nx++) {
      parts[nx] = parts[nx].trim();
    }
    init(parts, projection);
  }

  /**
   * Get a projection's schema
   */
  public Schema getProjectionSchema(String[] projcols,
      HashMap<Schema.ColumnSchema, HashSet<String>> keysmap)
      throws ParseException {
    int ncols = projcols.length;
    Schema result = new Schema(true);
    ColumnSchema cs, mycs;
    String keysStr;
    String[] keys;
    ParsedName pn = new ParsedName();
    HashSet<String> keyentries;
    for (int i = 0; i < ncols; i++) {
	    if (Projection.isVirtualColumn(projcols[i]))
	    {
	      mycs = 
	    	  new ColumnSchema(
	    		  Projection.source_table_vcolumn_name, null, ColumnType.INT
	    	  );	
	      result.add(mycs);
	      continue;
	    }
      pn.setName(projcols[i]);
      if ((cs = getColumnSchemaOnParsedName(pn)) != null) {
        mycs = new ColumnSchema(pn.mName, cs.schema, cs.type);
        result.add(mycs);
        if (pn.mDT == ColumnType.MAP) {
          keysStr = projcols[i].substring(pn.mKeyOffset);
          if (!keysStr.startsWith("{") || !keysStr.endsWith("}"))
            throw new ParseException("Invalid map key specification in "
                + projcols[i]);
          keysStr = keysStr.substring(1, keysStr.length() - 1);
          if ((keysStr.indexOf('{') != -1) || (keysStr.indexOf('}') != -1)
              || (keysStr.indexOf('.') != -1) || (keysStr.indexOf('#') != -1))
            throw new ParseException("Invalid map key specification in "
                + projcols[i]);
          keys = keysStr.split("\\|");
          if ((keyentries = keysmap.get(mycs)) == null) {
            keyentries = new HashSet<String>();
            keysmap.put(mycs, keyentries);
          }
          for (int j = 0; j < keys.length; j++) {
            keyentries.add(keys[j]);
          }
        }
      } else { 
    	  result.add( new ColumnSchema(pn.mName, null, ColumnType.ANY ) );
      }
    }
    return result;
  }

  /**
   * Get a column's schema
   *
   * @param name
   *          column name
   * @return Column schema for the named column
   */
  public ColumnSchema getColumnSchema(String name) throws ParseException {
    int hashIndex, fieldIndex;
    String currentName = name, prefix;
    Schema currentSchema = this;
    ColumnSchema fs = null;

    while (true) {
      /**
       * this loop is necessary because the top columns in schema may contain .s
       * and #s, particularly for column group schemas
       */
      fieldIndex = currentName.lastIndexOf('.');
      hashIndex = currentName.lastIndexOf('#');
      if (fieldIndex == -1 && hashIndex == -1) {
        break;
      }
      else if (hashIndex != -1 && (fieldIndex == -1 || hashIndex > fieldIndex)) {
        currentName = currentName.substring(0, hashIndex);
        if (getColumn(currentName) != null) break;
      }
      else {
        currentName = currentName.substring(0, fieldIndex);
        if (getColumn(currentName) != null) break;
      }
    }

    currentName = name;
    while (true) {
      if (fieldIndex == -1 && hashIndex == -1) {
        fs = currentSchema.getColumn(currentName);
        return fs;
      }
      else if (hashIndex != -1 && (fieldIndex == -1 || hashIndex < fieldIndex)) {
        prefix = currentName.substring(0, hashIndex);
        fs = currentSchema.getColumn(prefix);
        if (fs == null)
          throw new ParseException("Column " + name + " does not exist");
        currentSchema = fs.schema;
        if (fs.type != ColumnType.MAP)
          throw new ParseException(name + " is not of type MAP");
        fs = currentSchema.getColumn(0);
        return fs;
      }
      else {
        prefix = currentName.substring(0, fieldIndex);
        if ((fs = currentSchema.getColumn(prefix)) == null)
          throw new ParseException("Column " + name + " does not exist");
        currentSchema = fs.schema;
        if (fs.type != ColumnType.RECORD && fs.type != ColumnType.COLLECTION)
          throw new ParseException(name
              + " is not of type RECORD or COLLECTION");
        currentName = currentName.substring(fieldIndex + 1);
        if (currentName.length() == 0)
          throw new ParseException("Column " + name
              + " does not have field after the record field separator '.'");
      }
      fieldIndex = currentName.indexOf('.');
      hashIndex = currentName.indexOf('#');
    }
  }

  /**
   * Get a subcolumn's schema and move the name just parsed into the next subtype
   *
   * @param pn
   *           The name of subcolumn to be parsed. On return it contains the
   *           subcolumn at the next level after parsing
   *
   * @return   the discovered Column Schema for the subcolumn
   */
  public ColumnSchema getColumnSchemaOnParsedName(ParsedName pn)
      throws ParseException {
    int hashIndex, fieldIndex, offset = 0;
    String name = pn.mName;

    /**
     * strip of any possible type specs
     */
    String currentName = name, prefix;
    Schema currentSchema = this;
    ColumnSchema fs = null;

    while (true) {
      /**
       * this loop is necessary because the top columns in schema may contain .s
       * and #s, particularly for column group schemas
       */
      fieldIndex = currentName.lastIndexOf('.');
      hashIndex = currentName.lastIndexOf('#');
      if (fieldIndex == -1 && hashIndex == -1) {
        break;
      }
      else if (hashIndex != -1 && (fieldIndex == -1 || hashIndex > fieldIndex)) {
        currentName = currentName.substring(0, hashIndex);
        if ((fs = getColumn(currentName)) != null) {
          if (fs.type != ColumnType.MAP)
            throw new ParseException(name + " is not of type MAP");
          offset += hashIndex;
          pn.setName(name.substring(0, hashIndex), ColumnType.MAP,
              hashIndex + 1);
          return fs;
        }
      }
      else {
        currentName = currentName.substring(0, fieldIndex);
        if (getColumn(currentName) != null) break;
      }
    }

    currentName = name;
    ColumnType ct = ColumnType.ANY;

    while (true) {
      if (fieldIndex == -1 && hashIndex == -1) {
        offset += currentName.length();
        pn.setName(name.substring(0, offset), ct);
        fs = currentSchema.getColumn(currentName);
        return fs;
      }
      else if (hashIndex != -1 && (fieldIndex == -1 || hashIndex < fieldIndex)) {
        prefix = currentName.substring(0, hashIndex);
        fs = currentSchema.getColumn(prefix);
        if (fs == null)
          throw new ParseException("Column " + name + " does not exist");
        currentSchema = fs.schema;
        if (fs.type != ColumnType.MAP)
          throw new ParseException(name + " is not of type MAP");
        offset += hashIndex;
        pn.setName(name.substring(0, offset), ColumnType.MAP, offset + 1);
        return fs;
      }
      else {
        prefix = currentName.substring(0, fieldIndex);
        if ((fs = currentSchema.getColumn(prefix)) == null)
          throw new ParseException("Column " + name + " does not exist");
        currentSchema = fs.schema;
        if (fs.type != ColumnType.RECORD && fs.type != ColumnType.COLLECTION)
          throw new ParseException(name
              + " is not of type RECORD or COLLECTION");
        else if( fs.type == ColumnType.COLLECTION ) 
		  throw new ParseException( name + "Projection within COLLECTION is not supported" );
        currentName = currentName.substring(fieldIndex + 1);
        if (currentName.length() == 0)
          throw new ParseException("Column " + name
              + " does not have field after the record field separator '.'");
        offset += fieldIndex + 1;
        ct = fs.type;
      }
      fieldIndex = currentName.indexOf('.');
      hashIndex = currentName.indexOf('#');
    }
  }

  /**
   * find the most fitting subcolumn containing the name: the parsed name is set
   * after the field name plus any possible separator of '.' or '#'.
   *
   * This is used to help discover the most fitting column schema in multiple
   * CG schemas.
   *
   * For instance, if pn contains a name of r.r1.f11 and current schema has
   * r.r1:record(f11:int, f12), it will return f11's column schema, and pn
   * is set at "f12".
   *
   * @param pn
   *           The name of subcolumn to be parsed. On return it contains the
   *           subcolumn at the next level after parsing
   *
   * @return   the discovered Column Schema for the subcolumn
   */
  public ColumnSchema getColumnSchema(ParsedName pn) throws ParseException {
    int maxlen = 0, size = getNumColumns(), len;
    Schema.ColumnSchema fs = null;
    String name = pn.getName(), fname;
    boolean whole = false, record = false, map = false, tmprecord = false, tmpmap =
        false;
    for (int i = 0; i < size; i++) {
      fname = getColumnName(i);
      if ((whole = name.equals(fname))
          || (tmprecord = name.startsWith(fname + "."))
          || (tmpmap = name.startsWith(fname + "#"))) {
        len = fname.length();
        if (len > maxlen) {
          maxlen = len;
          record = tmprecord;
          map = tmpmap;
          fs = getColumn(i);
          if (whole) break;
        }
      }
    }
    if (fs != null) {
      name = name.substring(maxlen);
      if (record) {
        if (fs.type != ColumnType.RECORD && fs.type != ColumnType.COLLECTION)
          throw new ParseException(name
              + " is not of type RECORD or COLLECTION");
        name = name.substring(1);
        pn.setName(name, fs.type);
      }
      else if (map) {
        if (fs.type != ColumnType.MAP)
          throw new ParseException(name + " is not of type MAP");
        name = name.substring(1);
        pn.setName(name, ColumnType.MAP);
      }
      else pn.setName(name, ColumnType.ANY);
      return fs;
    }
    else return null;
  }

  /**
   * union compatible schemas. Exception will be thrown if a name appears in
   * multiple schemas but the types are different.
   */
  public void unionSchema(Schema other) throws ParseException {
    int size = other.getNumColumns();
    ColumnSchema fs, otherfs;
    for (int i = 0; i < size; i++) {
      otherfs = other.getColumn(i);
      if (otherfs == null) continue;
      fs = getColumn(otherfs.name);
      if (fs == null) add(otherfs);
      else {
        if (!ColumnSchema.equals(fs, otherfs))
          throw new ParseException("Different types of column " + fs.name
              + " in tables of a union");
      }
    }
  }

  /**
   * return untyped schema string for projection
   */
  public String toProjectionString() {
    String result = new String();
    ColumnSchema fs;
    for (int i = 0; i < mFields.size(); i++) {
      if (i > 0) result += ",";
      if ((fs = mFields.get(i)) != null) result += fs.name;
    }
    return result;
  }
}