/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.facebook.presto.rcfile.text;
import com.facebook.presto.rcfile.ColumnEncoding;
import com.facebook.presto.rcfile.RcFileEncoding;
import com.facebook.presto.spi.type.Type;
import io.airlift.slice.Slice;
import io.airlift.slice.Slices;
import org.joda.time.DateTimeZone;
import java.util.List;
import java.util.stream.Collectors;
public class TextRcFileEncoding
implements RcFileEncoding
{
public static final byte[] DEFAULT_SEPARATORS = new byte[] {
1, // Start of Heading
2, // Start of text
3, // End of Text
4, // End of Transmission
5, // Enquiry
6, // Acknowledge
7, // Bell
8, // Backspace
// RESERVED 9, // Horizontal Tab
// RESERVED 10, // Line Feed
11, // Vertical Tab
// RESERVED 12, // Form Feed
// RESERVED 13, // Carriage Return
14, // Shift Out
15, // Shift In
16, // Data Link Escape
17, // Device Control One
18, // Device Control Two
19, // Device Control Three
20, // Device Control Four
21, // Negative Acknowledge
22, // Synchronous Idle
23, // End of Transmission Block
24, // Cancel
25, // End of medium
26, // Substitute
// RESERVED 27, // Escape
28, // File Separator
29, // Group separator
// RESERVED 30, // Record Separator
// RESERVED 31, // Unit separator
};
public static final Slice DEFAULT_NULL_SEQUENCE = Slices.utf8Slice("\\N");
private final DateTimeZone hiveStorageTimeZone;
private final Slice nullSequence;
private final byte[] separators;
private final Byte escapeByte;
private final boolean lastColumnTakesRest;
public TextRcFileEncoding(DateTimeZone hiveStorageTimeZone)
{
this(hiveStorageTimeZone,
DEFAULT_NULL_SEQUENCE,
DEFAULT_SEPARATORS,
null,
false);
}
public TextRcFileEncoding(DateTimeZone hiveStorageTimeZone, Slice nullSequence, byte[] separators, Byte escapeByte, boolean lastColumnTakesRest)
{
this.hiveStorageTimeZone = hiveStorageTimeZone;
this.nullSequence = nullSequence;
this.separators = separators;
this.escapeByte = escapeByte;
this.lastColumnTakesRest = lastColumnTakesRest;
}
@Override
public ColumnEncoding booleanEncoding(Type type)
{
return new BooleanEncoding(type, nullSequence);
}
@Override
public ColumnEncoding byteEncoding(Type type)
{
return longEncoding(type);
}
@Override
public ColumnEncoding shortEncoding(Type type)
{
return longEncoding(type);
}
@Override
public ColumnEncoding intEncoding(Type type)
{
return longEncoding(type);
}
@Override
public ColumnEncoding longEncoding(Type type)
{
return new LongEncoding(type, nullSequence);
}
@Override
public ColumnEncoding decimalEncoding(Type type)
{
return new DecimalEncoding(type, nullSequence);
}
@Override
public ColumnEncoding floatEncoding(Type type)
{
return new FloatEncoding(type, nullSequence);
}
@Override
public ColumnEncoding doubleEncoding(Type type)
{
return new DoubleEncoding(type, nullSequence);
}
@Override
public ColumnEncoding stringEncoding(Type type)
{
return new StringEncoding(type, nullSequence, escapeByte);
}
@Override
public ColumnEncoding binaryEncoding(Type type)
{
// binary text encoding is not escaped
return new BinaryEncoding(type, nullSequence);
}
@Override
public ColumnEncoding dateEncoding(Type type)
{
return new DateEncoding(type, nullSequence);
}
@Override
public ColumnEncoding timestampEncoding(Type type)
{
return new TimestampEncoding(type, nullSequence, hiveStorageTimeZone);
}
@Override
public ColumnEncoding listEncoding(Type type, ColumnEncoding elementEncoding)
{
return new ListEncoding(
type,
nullSequence,
separators,
escapeByte,
(TextColumnEncoding) elementEncoding);
}
@Override
public ColumnEncoding mapEncoding(Type type, ColumnEncoding keyEncoding, ColumnEncoding valueEncoding)
{
return new MapEncoding(
type,
nullSequence,
separators,
escapeByte,
(TextColumnEncoding) keyEncoding,
(TextColumnEncoding) valueEncoding);
}
@Override
public ColumnEncoding structEncoding(Type type, List<ColumnEncoding> fieldEncodings)
{
return new StructEncoding(
type,
nullSequence,
separators,
escapeByte,
lastColumnTakesRest,
fieldEncodings.stream()
.map(TextColumnEncoding.class::cast)
.collect(Collectors.toList()));
}
}