/**
* Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
* file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
* License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
* an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
*/
package org.apache.drill.exec.store.httpd;
import java.io.IOException;
import java.util.List;
import nl.basjes.parse.core.exceptions.DissectionFailure;
import nl.basjes.parse.core.exceptions.InvalidDissectorException;
import nl.basjes.parse.core.exceptions.MissingDissectorsException;
import org.apache.drill.common.exceptions.ExecutionSetupException;
import org.apache.drill.common.exceptions.UserException;
import org.apache.drill.common.expression.SchemaPath;
import org.apache.drill.common.logical.FormatPluginConfig;
import org.apache.drill.common.logical.StoragePluginConfig;
import org.apache.drill.exec.ExecConstants;
import org.apache.drill.exec.ops.FragmentContext;
import org.apache.drill.exec.ops.OperatorContext;
import org.apache.drill.exec.physical.impl.OutputMutator;
import org.apache.drill.exec.server.DrillbitContext;
import org.apache.drill.exec.store.AbstractRecordReader;
import org.apache.drill.exec.store.RecordWriter;
import org.apache.drill.exec.store.dfs.DrillFileSystem;
import org.apache.drill.exec.store.dfs.easy.EasyFormatPlugin;
import org.apache.drill.exec.store.dfs.easy.EasyWriter;
import org.apache.drill.exec.store.dfs.easy.FileWork;
import org.apache.drill.exec.vector.complex.impl.VectorContainerWriter;
import org.apache.drill.exec.vector.complex.writer.BaseWriter.ComplexWriter;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileSplit;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.LineRecordReader;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.TextInputFormat;
import com.fasterxml.jackson.annotation.JsonTypeName;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import java.util.Map;
import org.apache.drill.exec.store.RecordReader;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class HttpdLogFormatPlugin extends EasyFormatPlugin<HttpdLogFormatPlugin.HttpdLogFormatConfig> {
private static final Logger LOG = LoggerFactory.getLogger(HttpdLogFormatPlugin.class);
private static final String PLUGIN_EXTENSION = "httpd";
private static final int VECTOR_MEMORY_ALLOCATION = 4095;
public HttpdLogFormatPlugin(final String name, final DrillbitContext context, final Configuration fsConf,
final StoragePluginConfig storageConfig, final HttpdLogFormatConfig formatConfig) {
super(name, context, fsConf, storageConfig, formatConfig, true, false, true, true,
Lists.newArrayList(PLUGIN_EXTENSION), PLUGIN_EXTENSION);
}
/**
* This class is a POJO to hold the configuration for the HttpdLogFormat Parser. This is automatically
* serialized/deserialized from JSON format.
*/
@JsonTypeName(PLUGIN_EXTENSION)
public static class HttpdLogFormatConfig implements FormatPluginConfig {
private String logFormat;
private String timestampFormat;
/**
* @return the logFormat
*/
public String getLogFormat() {
return logFormat;
}
/**
* @return the timestampFormat
*/
public String getTimestampFormat() {
return timestampFormat;
}
}
/**
* This class performs the work for the plugin. This is where all logic goes to read records. In this case httpd logs
* are lines terminated with a new line character.
*/
private class HttpdLogRecordReader extends AbstractRecordReader {
private final DrillFileSystem fs;
private final FileWork work;
private final FragmentContext fragmentContext;
private ComplexWriter writer;
private HttpdParser parser;
private LineRecordReader lineReader;
private LongWritable lineNumber;
public HttpdLogRecordReader(final FragmentContext context, final DrillFileSystem fs, final FileWork work, final List<SchemaPath> columns) {
this.fs = fs;
this.work = work;
this.fragmentContext = context;
setColumns(columns);
}
/**
* The query fields passed in are formatted in a way that Drill requires. Those must be cleaned up to work with the
* parser.
*
* @return Map<DrillFieldNames, ParserFieldNames>
*/
private Map<String, String> makeParserFields() {
final Map<String, String> fieldMapping = Maps.newHashMap();
for (final SchemaPath sp : getColumns()) {
final String drillField = sp.getRootSegment().getPath();
final String parserField = HttpdParser.parserFormattedFieldName(drillField);
fieldMapping.put(drillField, parserField);
}
return fieldMapping;
}
@Override
public void setup(final OperatorContext context, final OutputMutator output) throws ExecutionSetupException {
try {
/**
* Extract the list of field names for the parser to use if it is NOT a star query. If it is a star query just
* pass through an empty map, because the parser is going to have to build all possibilities.
*/
final Map<String, String> fieldMapping = !isStarQuery() ? makeParserFields() : null;
writer = new VectorContainerWriter(output);
parser = new HttpdParser(writer.rootAsMap(), context.getManagedBuffer(),
HttpdLogFormatPlugin.this.getConfig().getLogFormat(),
HttpdLogFormatPlugin.this.getConfig().getTimestampFormat(),
fieldMapping);
final Path path = fs.makeQualified(new Path(work.getPath()));
FileSplit split = new FileSplit(path, work.getStart(), work.getLength(), new String[]{""});
TextInputFormat inputFormat = new TextInputFormat();
JobConf job = new JobConf(fs.getConf());
job.setInt("io.file.buffer.size", fragmentContext.getConfig().getInt(ExecConstants.TEXT_LINE_READER_BUFFER_SIZE));
job.setInputFormat(inputFormat.getClass());
lineReader = (LineRecordReader) inputFormat.getRecordReader(split, job, Reporter.NULL);
lineNumber = lineReader.createKey();
}
catch (NoSuchMethodException | MissingDissectorsException | InvalidDissectorException e) {
throw handleAndGenerate("Failure creating HttpdParser", e);
}
catch (IOException e) {
throw handleAndGenerate("Failure creating HttpdRecordReader", e);
}
}
private RuntimeException handleAndGenerate(final String s, final Exception e) {
throw UserException.dataReadError(e)
.message(s + "\n%s", e.getMessage())
.addContext("Path", work.getPath())
.addContext("Split Start", work.getStart())
.addContext("Split Length", work.getLength())
.addContext("Local Line Number", lineNumber.get())
.build(LOG);
}
/**
* This record reader is given a batch of records (lines) to read. Next acts upon a batch of records.
*
* @return Number of records in this batch.
*/
@Override
public int next() {
try {
final Text line = lineReader.createValue();
writer.allocate();
writer.reset();
int recordCount = 0;
while (recordCount < VECTOR_MEMORY_ALLOCATION && lineReader.next(lineNumber, line)) {
writer.setPosition(recordCount);
parser.parse(line.toString());
recordCount++;
}
writer.setValueCount(recordCount);
return recordCount;
}
catch (DissectionFailure | InvalidDissectorException | MissingDissectorsException | IOException e) {
throw handleAndGenerate("Failure while parsing log record.", e);
}
}
@Override
public void close() throws Exception {
try {
if (lineReader != null) {
lineReader.close();
}
}
catch (IOException e) {
LOG.warn("Failure while closing Httpd reader.", e);
}
}
}
/**
* This plugin supports pushing down into the parser. Only fields specifically asked for within the configuration will
* be parsed. If no fields are asked for then all possible fields will be returned.
*
* @return true
*/
@Override
public boolean supportsPushDown() {
return true;
}
@Override
public RecordReader getRecordReader(final FragmentContext context, final DrillFileSystem dfs, final FileWork fileWork, final List<SchemaPath> columns, final String userName) throws ExecutionSetupException {
return new HttpdLogRecordReader(context, dfs, fileWork, columns);
}
@Override
public RecordWriter getRecordWriter(final FragmentContext context, final EasyWriter writer) throws IOException {
throw new UnsupportedOperationException("Drill doesn't currently support writing HTTPd logs");
}
@Override
public int getReaderOperatorType() {
return -1;
}
@Override
public int getWriterOperatorType() {
return -1;
}
}