/** * Copyright (C) 2014-2016 LinkedIn Corp. (pinot-core@linkedin.com) * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.linkedin.pinot.common.utils; import com.linkedin.pinot.common.data.FieldSpec; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.DataInputStream; import java.io.DataOutputStream; import java.io.IOException; import java.nio.charset.Charset; import java.util.Arrays; import javax.annotation.Nonnull; /** * The <code>DataSchema</code> class describes the schema of {@link DataTable}. */ public class DataSchema { private static final Charset UTF_8 = Charset.forName("UTF-8"); private String[] _columnNames; private FieldSpec.DataType[] _columnTypes; public DataSchema(@Nonnull String[] columnNames, @Nonnull FieldSpec.DataType[] columnTypes) { _columnNames = columnNames; _columnTypes = columnTypes; } public int size() { return _columnNames.length; } @Nonnull public String getColumnName(int index) { return _columnNames[index]; } @Nonnull public FieldSpec.DataType getColumnType(int index) { return _columnTypes[index]; } /** * Indicates whether the given {@link DataSchema} is type compatible with this one. * <ul> * <li>All numbers are type compatible.</li> * <li>Number is not type compatible with String.</li> * <li>Single-value is not type compatible with multi-value.</li> * </ul> * * @param anotherDataSchema data schema to compare. * @return whether the two data schemas are type compatible. */ public boolean isTypeCompatibleWith(@Nonnull DataSchema anotherDataSchema) { if (!Arrays.equals(_columnNames, anotherDataSchema._columnNames)) { return false; } int numColumns = _columnNames.length; for (int i = 0; i < numColumns; i++) { if (!_columnTypes[i].isCompatible(anotherDataSchema._columnTypes[i])) { return false; } } return true; } /** * Upgrade the current data schema to cover the column types in the given data schema. * <p>Type <code>long</code> can cover <code>int</code> and <code>long</code>. * <p>Type <code>double</code> can cover all numbers, but with potential precision loss when use it to cover * <code>long</code>. * <p>The given data schema should be type compatible with this one. * * @param anotherDataSchema data schema to be covered. */ public void upgradeToCover(@Nonnull DataSchema anotherDataSchema) { int numColumns = _columnTypes.length; for (int i = 0; i < numColumns; i++) { FieldSpec.DataType thisColumnType = _columnTypes[i]; FieldSpec.DataType thatColumnType = anotherDataSchema._columnTypes[i]; if (thisColumnType != thatColumnType) { if (thisColumnType.isSingleValue()) { if (thisColumnType.isInteger() && thatColumnType.isInteger()) { _columnTypes[i] = FieldSpec.DataType.LONG; } else { _columnTypes[i] = FieldSpec.DataType.DOUBLE; } } else { if (thisColumnType.toSingleValue().isInteger() && thatColumnType.toSingleValue().isInteger()) { _columnTypes[i] = FieldSpec.DataType.LONG_ARRAY; } else { _columnTypes[i] = FieldSpec.DataType.DOUBLE_ARRAY; } } } } } @Nonnull public byte[] toBytes() throws IOException { ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(); DataOutputStream dataOutputStream = new DataOutputStream(byteArrayOutputStream); int length = _columnNames.length; // Write the number of columns. dataOutputStream.writeInt(length); // Write the column names. for (String columnName : _columnNames) { byte[] bytes = columnName.getBytes(UTF_8); dataOutputStream.writeInt(bytes.length); dataOutputStream.write(bytes); } // Write the column types. for (FieldSpec.DataType columnType : _columnTypes) { // We don't want to use ordinal of the enum since adding a new data type will break things if server and broker // use different versions of DataType class. byte[] bytes = columnType.name().getBytes(UTF_8); dataOutputStream.writeInt(bytes.length); dataOutputStream.write(bytes); } return byteArrayOutputStream.toByteArray(); } @Nonnull public static DataSchema fromBytes(@Nonnull byte[] buffer) throws IOException { ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream(buffer); DataInputStream dataInputStream = new DataInputStream(byteArrayInputStream); // Read the number of columns. int numColumns = dataInputStream.readInt(); String[] columnNames = new String[numColumns]; FieldSpec.DataType[] columnTypes = new FieldSpec.DataType[numColumns]; // Read the column names. int readLength; for (int i = 0; i < numColumns; i++) { int length = dataInputStream.readInt(); byte[] bytes = new byte[length]; readLength = dataInputStream.read(bytes); assert readLength == length; columnNames[i] = new String(bytes, UTF_8); } // Read the column types. for (int i = 0; i < numColumns; i++) { int length = dataInputStream.readInt(); byte[] bytes = new byte[length]; readLength = dataInputStream.read(bytes); assert readLength == length; columnTypes[i] = FieldSpec.DataType.valueOf(new String(bytes, UTF_8)); } return new DataSchema(columnNames, columnTypes); } @SuppressWarnings("CloneDoesntCallSuperClone") @Override public DataSchema clone() { return new DataSchema(_columnNames.clone(), _columnTypes.clone()); } @Override public String toString() { StringBuilder stringBuilder = new StringBuilder(); stringBuilder.append('['); int numColumns = _columnNames.length; for (int i = 0; i < numColumns; i++) { stringBuilder.append(_columnNames[i]).append('(').append(_columnTypes[i]).append(')').append(','); } stringBuilder.setCharAt(stringBuilder.length() - 1, ']'); return stringBuilder.toString(); } @Override public boolean equals(Object anObject) { if (this == anObject) { return true; } if (anObject instanceof DataSchema) { DataSchema anotherDataSchema = (DataSchema) anObject; return Arrays.equals(_columnNames, anotherDataSchema._columnNames) && Arrays.equals(_columnTypes, anotherDataSchema._columnTypes); } return false; } @Override public int hashCode() { int hashCode = EqualityUtils.hashCodeOf(_columnNames); hashCode = EqualityUtils.hashCodeOf(hashCode, _columnTypes); return hashCode; } }