/* * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.facebook.presto.localfile; import com.facebook.presto.spi.HostAddress; import com.facebook.presto.spi.PrestoException; import com.facebook.presto.spi.RecordCursor; import com.facebook.presto.spi.SchemaTableName; import com.facebook.presto.spi.predicate.Domain; import com.facebook.presto.spi.predicate.TupleDomain; import com.facebook.presto.spi.type.Type; import com.google.common.base.Joiner; import com.google.common.base.Splitter; import com.google.common.base.Strings; import com.google.common.base.Throwables; import com.google.common.collect.Iterables; import io.airlift.slice.Slice; import io.airlift.slice.Slices; import org.joda.time.format.DateTimeFormatter; import org.joda.time.format.DateTimeFormatterBuilder; import org.joda.time.format.ISODateTimeFormat; import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.RandomAccessFile; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Optional; import java.util.OptionalInt; import java.util.Set; import java.util.zip.GZIPInputStream; import static com.facebook.presto.localfile.LocalFileColumnHandle.SERVER_ADDRESS_ORDINAL_POSITION; import static com.facebook.presto.localfile.LocalFileErrorCode.LOCAL_FILE_READ_ERROR; import static com.facebook.presto.spi.type.BigintType.BIGINT; import static com.facebook.presto.spi.type.BooleanType.BOOLEAN; import static com.facebook.presto.spi.type.DoubleType.DOUBLE; import static com.facebook.presto.spi.type.IntegerType.INTEGER; import static com.facebook.presto.spi.type.TimestampType.TIMESTAMP; import static com.facebook.presto.spi.type.VarcharType.createUnboundedVarcharType; import static com.google.common.base.Preconditions.checkArgument; import static com.google.common.base.Preconditions.checkState; import static java.lang.String.format; import static java.util.Objects.requireNonNull; import static java.util.stream.Collectors.toSet; import static java.util.zip.GZIPInputStream.GZIP_MAGIC; public class LocalFileRecordCursor implements RecordCursor { private static final Splitter LINE_SPLITTER = Splitter.on("\t").trimResults(); // TODO This should be a config option as it may be different for different log files public static final DateTimeFormatter ISO_FORMATTER = new DateTimeFormatterBuilder() .append(ISODateTimeFormat.dateHourMinuteSecondFraction()) .appendTimeZoneOffset("Z", true, 2, 2) .toFormatter(); private final int[] fieldToColumnIndex; private final HostAddress address; private final List<LocalFileColumnHandle> columns; private final FilesReader reader; private final boolean includeServer; private List<String> fields; public LocalFileRecordCursor(LocalFileTables localFileTables, List<LocalFileColumnHandle> columns, SchemaTableName tableName, HostAddress address, TupleDomain<LocalFileColumnHandle> predicate) { this.columns = requireNonNull(columns, "columns is null"); this.address = requireNonNull(address, "address is null"); fieldToColumnIndex = new int[columns.size()]; for (int i = 0; i < columns.size(); i++) { LocalFileColumnHandle columnHandle = columns.get(i); fieldToColumnIndex[i] = columnHandle.getOrdinalPosition(); } this.includeServer = isThisServerIncluded(address, predicate, localFileTables.getTable(tableName)); this.reader = includeServer ? getFilesReader(localFileTables, predicate, tableName) : null; } private static boolean isThisServerIncluded(HostAddress address, TupleDomain<LocalFileColumnHandle> predicate, LocalFileTableHandle table) { if (!table.getServerAddressColumn().isPresent()) { return true; } Optional<Map<LocalFileColumnHandle, Domain>> domains = predicate.getDomains(); if (!domains.isPresent()) { return true; } Set<Domain> serverAddressDomain = domains.get().entrySet().stream() .filter(entry -> entry.getKey().getOrdinalPosition() == table.getServerAddressColumn().getAsInt()) .map(Map.Entry::getValue) .collect(toSet()); if (serverAddressDomain.isEmpty()) { return true; } for (Domain domain : serverAddressDomain) { if (domain.includesNullableValue(Slices.utf8Slice(address.toString()))) { return true; } } return false; } private static FilesReader getFilesReader(LocalFileTables localFileTables, TupleDomain<LocalFileColumnHandle> predicate, SchemaTableName tableName) { LocalFileTableHandle table = localFileTables.getTable(tableName); List<File> fileNames = localFileTables.getFiles(tableName); try { return new FilesReader(table.getTimestampColumn(), fileNames.iterator(), predicate); } catch (IOException e) { throw Throwables.propagate(e); } } @Override public long getTotalBytes() { return 0; } @Override public long getCompletedBytes() { return 0; } @Override public long getReadTimeNanos() { return 0; } @Override public Type getType(int field) { checkArgument(field < columns.size(), "Invalid field index"); return columns.get(field).getColumnType(); } @Override public boolean advanceNextPosition() { if (!includeServer) { return false; } try { fields = reader.readFields(); return fields != null; } catch (IOException e) { throw Throwables.propagate(e); } } private String getFieldValue(int field) { checkState(fields != null, "Cursor has not been advanced yet"); int columnIndex = fieldToColumnIndex[field]; if (columnIndex == SERVER_ADDRESS_ORDINAL_POSITION) { return address.toString(); } if (columnIndex >= fields.size()) { return null; } return fields.get(columnIndex); } @Override public boolean getBoolean(int field) { checkFieldType(field, BOOLEAN); return Boolean.parseBoolean(getFieldValue(field)); } @Override public long getLong(int field) { if (getType(field).equals(TIMESTAMP)) { return ISO_FORMATTER.parseDateTime(getFieldValue(field)).getMillis(); } else { checkFieldType(field, BIGINT, INTEGER); return Long.parseLong(getFieldValue(field)); } } @Override public double getDouble(int field) { checkFieldType(field, DOUBLE); return Double.parseDouble(getFieldValue(field)); } @Override public Slice getSlice(int field) { checkFieldType(field, createUnboundedVarcharType()); return Slices.utf8Slice(getFieldValue(field)); } @Override public Object getObject(int field) { throw new UnsupportedOperationException(); } @Override public boolean isNull(int field) { checkArgument(field < columns.size(), "Invalid field index"); String fieldValue = getFieldValue(field); return fieldValue.equals("null") || Strings.isNullOrEmpty(fieldValue); } private void checkFieldType(int field, Type... expected) { Type actual = getType(field); for (Type type : expected) { if (actual.equals(type)) { return; } } String expectedTypes = Joiner.on(", ").join(expected); throw new IllegalArgumentException(format("Expected field %s to be type %s but is %s", field, expectedTypes, actual)); } @Override public void close() { reader.close(); } private static class FilesReader { private final Iterator<File> files; private final Optional<Domain> domain; private final OptionalInt timestampOrdinalPosition; private BufferedReader reader; public FilesReader(OptionalInt timestampOrdinalPosition, Iterator<File> files, TupleDomain<LocalFileColumnHandle> predicate) throws IOException { requireNonNull(files, "files is null"); this.files = files; requireNonNull(predicate, "predicate is null"); this.domain = getDomain(timestampOrdinalPosition, predicate); this.timestampOrdinalPosition = timestampOrdinalPosition; reader = createNextReader(); } private static Optional<Domain> getDomain(OptionalInt timestampOrdinalPosition, TupleDomain<LocalFileColumnHandle> predicate) { Optional<Map<LocalFileColumnHandle, Domain>> domains = predicate.getDomains(); Domain domain = null; if (domains.isPresent() && timestampOrdinalPosition.isPresent()) { Map<LocalFileColumnHandle, Domain> domainMap = domains.get(); Set<Domain> timestampDomain = domainMap.entrySet().stream() .filter(entry -> entry.getKey().getOrdinalPosition() == timestampOrdinalPosition.getAsInt()) .map(Map.Entry::getValue) .collect(toSet()); if (!timestampDomain.isEmpty()) { domain = Iterables.getOnlyElement(timestampDomain); } } return Optional.ofNullable(domain); } private BufferedReader createNextReader() throws IOException { if (!files.hasNext()) { return null; } File file = files.next(); FileInputStream fileInputStream = new FileInputStream(file); InputStream in = isGZipped(file) ? new GZIPInputStream(fileInputStream) : fileInputStream; return new BufferedReader(new InputStreamReader(in)); } public static boolean isGZipped(File file) throws IOException { try (RandomAccessFile inputFile = new RandomAccessFile(file, "r")) { int magic = inputFile.read() & 0xff | ((inputFile.read() << 8) & 0xff00); return magic == GZIP_MAGIC; } catch (IOException e) { throw new PrestoException(LOCAL_FILE_READ_ERROR, "Error reading file: " + file.getName(), e); } } public List<String> readFields() throws IOException { List<String> fields = null; boolean newReader = false; while (fields == null) { if (reader == null) { return null; } String line = reader.readLine(); if (line != null) { fields = LINE_SPLITTER.splitToList(line); if (!newReader || meetsPredicate(fields)) { return fields; } } reader.close(); reader = createNextReader(); newReader = true; } return fields; } private boolean meetsPredicate(List<String> fields) { if (!timestampOrdinalPosition.isPresent() || !domain.isPresent()) { return true; } long millis = ISO_FORMATTER.parseDateTime(fields.get(timestampOrdinalPosition.getAsInt())).getMillis(); return domain.get().includesNullableValue(millis); } public void close() { if (reader != null) { try { reader.close(); } catch (IOException ignored) { } } } } }