/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tajo.engine.utils;
import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import org.apache.commons.codec.binary.Base64;
import org.apache.hadoop.fs.Path;
import org.apache.tajo.catalog.Column;
import org.apache.tajo.catalog.Schema;
import org.apache.tajo.catalog.SortSpec;
import org.apache.tajo.catalog.statistics.ColumnStats;
import org.apache.tajo.datum.DatumFactory;
import org.apache.tajo.datum.NullDatum;
import org.apache.tajo.engine.eval.EvalNode;
import org.apache.tajo.storage.RowStoreUtil;
import org.apache.tajo.storage.Tuple;
import org.apache.tajo.storage.TupleRange;
import org.apache.tajo.storage.VTuple;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
public class TupleUtil {
public static String rangeToQuery(Schema schema, TupleRange range, boolean last)
throws UnsupportedEncodingException {
StringBuilder sb = new StringBuilder();
byte [] firstKeyBytes = RowStoreUtil.RowStoreEncoder
.toBytes(schema, range.getStart());
byte [] endKeyBytes = RowStoreUtil.RowStoreEncoder
.toBytes(schema, range.getEnd());
String firstKeyBase64 = new String(Base64.encodeBase64(firstKeyBytes));
String lastKeyBase64 = new String(Base64.encodeBase64(endKeyBytes));
sb.append("start=")
.append(URLEncoder.encode(firstKeyBase64, "utf-8"))
.append("&")
.append("end=")
.append(URLEncoder.encode(lastKeyBase64, "utf-8"));
if (last) {
sb.append("&final=true");
}
return sb.toString();
}
public static TupleRange columnStatToRange(SortSpec [] sortSpecs, Schema target, List<ColumnStats> colStats) {
Map<Column, ColumnStats> statSet = Maps.newHashMap();
for (ColumnStats stat : colStats) {
statSet.put(stat.getColumn(), stat);
}
for (Column col : target.getColumns()) {
Preconditions.checkState(statSet.containsKey(col),
"ERROR: Invalid Column Stats (column stats: " + colStats + ", there exists not target " + col);
}
Tuple startTuple = new VTuple(target.size());
Tuple endTuple = new VTuple(target.size());
int i = 0;
// In outer join, empty table could be searched.
// As a result, min value and max value would be null.
// So, we should put NullDatum for this case.
for (Column col : target.getColumns()) {
if (sortSpecs[i].isAscending()) {
if (statSet.get(col).getMinValue() != null)
startTuple.put(i, statSet.get(col).getMinValue());
else
startTuple.put(i, DatumFactory.createNullDatum());
if (statSet.get(col).getMaxValue() != null)
endTuple.put(i, statSet.get(col).getMaxValue());
else
endTuple.put(i, DatumFactory.createNullDatum());
} else {
if (statSet.get(col).getMaxValue() != null)
startTuple.put(i, statSet.get(col).getMaxValue());
else
startTuple.put(i, DatumFactory.createNullDatum());
if (statSet.get(col).getMinValue() != null)
endTuple.put(i, statSet.get(col).getMinValue());
else
endTuple.put(i, DatumFactory.createNullDatum());
}
i++;
}
return new TupleRange(sortSpecs, startTuple, endTuple);
}
/**
* It creates a tuple of a given size filled with NULL values in all fields
* It is usually used in outer join algorithms.
*
* @param size The number of columns of a creating tuple
* @return The created tuple filled with NULL values
*/
public static Tuple createNullPaddedTuple(int size){
VTuple aTuple = new VTuple(size);
int i;
for(i = 0; i < size; i++){
aTuple.put(i, DatumFactory.createNullDatum());
}
return aTuple;
}
@SuppressWarnings("unused")
public static Collection<Tuple> filterTuple(Schema schema, Collection<Tuple> tupleBlock, EvalNode filterCondition) {
TupleBlockFilterScanner filter = new TupleBlockFilterScanner(schema, tupleBlock, filterCondition);
return filter.nextBlock();
}
private static class TupleBlockFilterScanner {
private EvalNode qual;
private Iterator<Tuple> iterator;
private Schema schema;
public TupleBlockFilterScanner(Schema schema, Collection<Tuple> tuples, EvalNode qual) {
this.schema = schema;
this.qual = qual;
this.iterator = tuples.iterator();
}
public List<Tuple> nextBlock() {
List<Tuple> results = Lists.newArrayList();
Tuple tuple;
while (iterator.hasNext()) {
tuple = iterator.next();
if (qual.eval(schema, tuple).isTrue()) {
results.add(tuple);
}
}
return results;
}
}
/**
* Take a look at a column partition path. A partition path consists
* of a table path part and column values part. This method transforms
* a partition path into a tuple with a given partition column schema.
*
* hdfs://192.168.0.1/tajo/warehouse/table1/col1=abc/col2=def/col3=ghi
* ^^^^^^^^^^^^^^^^^^^^^ ^^^^^^^^^^^^^^^^^^^^^^^^^^
* table path part column values part
*
* When a file path is given, it can perform two ways depending on beNullIfFile flag.
* If it is true, it returns NULL when a given path is a file.
* Otherwise, it returns a built tuple regardless of file or directory.
*
* @param partitionColumnSchema The partition column schema
* @param partitionPath The partition path
* @param beNullIfFile If true, this method returns NULL when a given path is a file.
* @return The tuple transformed from a column values part.
*/
public static Tuple buildTupleFromPartitionPath(Schema partitionColumnSchema, Path partitionPath,
boolean beNullIfFile) {
int startIdx = partitionPath.toString().indexOf(getColumnPartitionPathPrefix(partitionColumnSchema));
if (startIdx == -1) { // if there is no partition column in the patch
return null;
}
String columnValuesPart = partitionPath.toString().substring(startIdx);
String [] columnValues = columnValuesPart.split("/");
// true means this is a file.
if (beNullIfFile && partitionColumnSchema.size() < columnValues.length) {
return null;
}
Tuple tuple = new VTuple(partitionColumnSchema.size());
int i = 0;
for (; i < columnValues.length && i < partitionColumnSchema.size(); i++) {
String [] parts = columnValues[i].split("=");
if (parts.length != 2) {
return null;
}
int columnId = partitionColumnSchema.getColumnIdByName(parts[0]);
Column keyColumn = partitionColumnSchema.getColumn(columnId);
tuple.put(columnId, DatumFactory.createFromString(keyColumn.getDataType(), parts[1]));
}
for (; i < partitionColumnSchema.size(); i++) {
tuple.put(i, NullDatum.get());
}
return tuple;
}
/**
* Get a prefix of column partition path. For example, consider a column partition (col1, col2).
* Then, you will get a string 'col1='.
*
* @param partitionColumn the schema of column partition
* @return The first part string of column partition path.
*/
private static String getColumnPartitionPathPrefix(Schema partitionColumn) {
StringBuilder sb = new StringBuilder();
sb.append(partitionColumn.getColumn(0).getSimpleName()).append("=");
return sb.toString();
}
}