/**
* Copyright (C) 2014-2016 LinkedIn Corp. (pinot-core@linkedin.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.linkedin.pinot.core.realtime.utils;
import com.linkedin.pinot.common.data.Schema;
import com.linkedin.pinot.core.data.GenericRow;
import com.linkedin.pinot.core.realtime.impl.dictionary.BaseOnHeapMutableDictionary;
import java.nio.ByteBuffer;
import java.util.Arrays;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
public class RealtimeDimensionsSerDe {
private final List<String> dimensionsList;
private final Schema dataSchema;
private final Map<String, BaseOnHeapMutableDictionary> dictionaryMap;
public RealtimeDimensionsSerDe(List<String> dimensionName, Schema schema,
Map<String, BaseOnHeapMutableDictionary> dictionary) {
this.dimensionsList = dimensionName;
this.dataSchema = schema;
this.dictionaryMap = dictionary;
}
public ByteBuffer serialize(GenericRow row) {
List<Integer> rowConvertedToDictionaryId = new LinkedList<Integer>();
List<Integer> columnOffsets = new LinkedList<Integer>();
int pointer = 0;
for (int i = 0; i < dataSchema.getDimensionNames().size(); i++) {
columnOffsets.add(pointer);
if (dataSchema.getFieldSpecFor(dataSchema.getDimensionNames().get(i)).isSingleValueField()) {
rowConvertedToDictionaryId.add(dictionaryMap.get(dataSchema.getDimensionNames().get(i))
.indexOf(row.getValue(dataSchema.getDimensionNames().get(i))));
pointer += 1;
} else {
Object[] multivalues = (Object[]) row.getValue(dataSchema.getDimensionNames().get(i));
if (multivalues != null && multivalues.length > 0) {
Arrays.sort(multivalues);
for (Object multivalue : multivalues) {
rowConvertedToDictionaryId.add(
dictionaryMap.get(dataSchema.getDimensionNames().get(i)).indexOf(multivalue));
}
pointer += multivalues.length;
} else {
rowConvertedToDictionaryId.add(0);
pointer += 1;
}
}
if (i == dataSchema.getDimensionNames().size() - 1) {
columnOffsets.add(pointer);
}
}
ByteBuffer buff = ByteBuffer.allocate((columnOffsets.size() + rowConvertedToDictionaryId.size()) * 4);
for (Integer offset : columnOffsets) {
buff.putInt(offset + columnOffsets.size());
}
for (Integer dicId : rowConvertedToDictionaryId) {
buff.putInt(dicId);
}
return buff;
}
public int[] deSerializeAndReturnDicIdsFor(String column, ByteBuffer buffer) {
int ret[] = null;
int dimIndex = dataSchema.getDimensionNames().indexOf(column);
int start = buffer.getInt(dimIndex * 4);
int end = buffer.getInt((dimIndex + 1) * 4);
ret = new int[end - start];
int counter = 0;
for (int i = start; i < end; i++) {
ret[counter] = buffer.getInt(i * 4);
counter++;
}
return ret;
}
public GenericRow deSerialize(ByteBuffer buffer) {
GenericRow row = new GenericRow();
Map<String, Object> rowValues = new HashMap<String, Object>();
for (String dimension : dataSchema.getDimensionNames()) {
int[] ret = deSerializeAndReturnDicIdsFor(dimension, buffer);
if (dataSchema.getFieldSpecFor(dimension).isSingleValueField()) {
rowValues.put(dimension, dictionaryMap.get(dimension).get(ret[0]));
} else {
Object[] mV = new Object[ret.length];
for (int i = 0; i < ret.length; i++) {
mV[i] = dictionaryMap.get(dimension).get(ret[i]);
}
rowValues.put(dimension, mV);
}
}
row.init(rowValues);
return row;
}
}