/** * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the * License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the * specific language governing permissions and limitations under the License. */ package org.apache.drill.exec.store.httpd; import java.io.IOException; import java.util.List; import nl.basjes.parse.core.exceptions.DissectionFailure; import nl.basjes.parse.core.exceptions.InvalidDissectorException; import nl.basjes.parse.core.exceptions.MissingDissectorsException; import org.apache.drill.common.exceptions.ExecutionSetupException; import org.apache.drill.common.exceptions.UserException; import org.apache.drill.common.expression.SchemaPath; import org.apache.drill.common.logical.FormatPluginConfig; import org.apache.drill.common.logical.StoragePluginConfig; import org.apache.drill.exec.ExecConstants; import org.apache.drill.exec.ops.FragmentContext; import org.apache.drill.exec.ops.OperatorContext; import org.apache.drill.exec.physical.impl.OutputMutator; import org.apache.drill.exec.server.DrillbitContext; import org.apache.drill.exec.store.AbstractRecordReader; import org.apache.drill.exec.store.RecordWriter; import org.apache.drill.exec.store.dfs.DrillFileSystem; import org.apache.drill.exec.store.dfs.easy.EasyFormatPlugin; import org.apache.drill.exec.store.dfs.easy.EasyWriter; import org.apache.drill.exec.store.dfs.easy.FileWork; import org.apache.drill.exec.vector.complex.impl.VectorContainerWriter; import org.apache.drill.exec.vector.complex.writer.BaseWriter.ComplexWriter; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.FileSplit; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.LineRecordReader; import org.apache.hadoop.mapred.Reporter; import org.apache.hadoop.mapred.TextInputFormat; import com.fasterxml.jackson.annotation.JsonTypeName; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import java.util.Map; import org.apache.drill.exec.store.RecordReader; import org.slf4j.Logger; import org.slf4j.LoggerFactory; public class HttpdLogFormatPlugin extends EasyFormatPlugin<HttpdLogFormatPlugin.HttpdLogFormatConfig> { private static final Logger LOG = LoggerFactory.getLogger(HttpdLogFormatPlugin.class); private static final String PLUGIN_EXTENSION = "httpd"; private static final int VECTOR_MEMORY_ALLOCATION = 4095; public HttpdLogFormatPlugin(final String name, final DrillbitContext context, final Configuration fsConf, final StoragePluginConfig storageConfig, final HttpdLogFormatConfig formatConfig) { super(name, context, fsConf, storageConfig, formatConfig, true, false, true, true, Lists.newArrayList(PLUGIN_EXTENSION), PLUGIN_EXTENSION); } /** * This class is a POJO to hold the configuration for the HttpdLogFormat Parser. This is automatically * serialized/deserialized from JSON format. */ @JsonTypeName(PLUGIN_EXTENSION) public static class HttpdLogFormatConfig implements FormatPluginConfig { private String logFormat; private String timestampFormat; /** * @return the logFormat */ public String getLogFormat() { return logFormat; } /** * @return the timestampFormat */ public String getTimestampFormat() { return timestampFormat; } } /** * This class performs the work for the plugin. This is where all logic goes to read records. In this case httpd logs * are lines terminated with a new line character. */ private class HttpdLogRecordReader extends AbstractRecordReader { private final DrillFileSystem fs; private final FileWork work; private final FragmentContext fragmentContext; private ComplexWriter writer; private HttpdParser parser; private LineRecordReader lineReader; private LongWritable lineNumber; public HttpdLogRecordReader(final FragmentContext context, final DrillFileSystem fs, final FileWork work, final List<SchemaPath> columns) { this.fs = fs; this.work = work; this.fragmentContext = context; setColumns(columns); } /** * The query fields passed in are formatted in a way that Drill requires. Those must be cleaned up to work with the * parser. * * @return Map<DrillFieldNames, ParserFieldNames> */ private Map<String, String> makeParserFields() { final Map<String, String> fieldMapping = Maps.newHashMap(); for (final SchemaPath sp : getColumns()) { final String drillField = sp.getRootSegment().getPath(); final String parserField = HttpdParser.parserFormattedFieldName(drillField); fieldMapping.put(drillField, parserField); } return fieldMapping; } @Override public void setup(final OperatorContext context, final OutputMutator output) throws ExecutionSetupException { try { /** * Extract the list of field names for the parser to use if it is NOT a star query. If it is a star query just * pass through an empty map, because the parser is going to have to build all possibilities. */ final Map<String, String> fieldMapping = !isStarQuery() ? makeParserFields() : null; writer = new VectorContainerWriter(output); parser = new HttpdParser(writer.rootAsMap(), context.getManagedBuffer(), HttpdLogFormatPlugin.this.getConfig().getLogFormat(), HttpdLogFormatPlugin.this.getConfig().getTimestampFormat(), fieldMapping); final Path path = fs.makeQualified(new Path(work.getPath())); FileSplit split = new FileSplit(path, work.getStart(), work.getLength(), new String[]{""}); TextInputFormat inputFormat = new TextInputFormat(); JobConf job = new JobConf(fs.getConf()); job.setInt("io.file.buffer.size", fragmentContext.getConfig().getInt(ExecConstants.TEXT_LINE_READER_BUFFER_SIZE)); job.setInputFormat(inputFormat.getClass()); lineReader = (LineRecordReader) inputFormat.getRecordReader(split, job, Reporter.NULL); lineNumber = lineReader.createKey(); } catch (NoSuchMethodException | MissingDissectorsException | InvalidDissectorException e) { throw handleAndGenerate("Failure creating HttpdParser", e); } catch (IOException e) { throw handleAndGenerate("Failure creating HttpdRecordReader", e); } } private RuntimeException handleAndGenerate(final String s, final Exception e) { throw UserException.dataReadError(e) .message(s + "\n%s", e.getMessage()) .addContext("Path", work.getPath()) .addContext("Split Start", work.getStart()) .addContext("Split Length", work.getLength()) .addContext("Local Line Number", lineNumber.get()) .build(LOG); } /** * This record reader is given a batch of records (lines) to read. Next acts upon a batch of records. * * @return Number of records in this batch. */ @Override public int next() { try { final Text line = lineReader.createValue(); writer.allocate(); writer.reset(); int recordCount = 0; while (recordCount < VECTOR_MEMORY_ALLOCATION && lineReader.next(lineNumber, line)) { writer.setPosition(recordCount); parser.parse(line.toString()); recordCount++; } writer.setValueCount(recordCount); return recordCount; } catch (DissectionFailure | InvalidDissectorException | MissingDissectorsException | IOException e) { throw handleAndGenerate("Failure while parsing log record.", e); } } @Override public void close() throws Exception { try { if (lineReader != null) { lineReader.close(); } } catch (IOException e) { LOG.warn("Failure while closing Httpd reader.", e); } } } /** * This plugin supports pushing down into the parser. Only fields specifically asked for within the configuration will * be parsed. If no fields are asked for then all possible fields will be returned. * * @return true */ @Override public boolean supportsPushDown() { return true; } @Override public RecordReader getRecordReader(final FragmentContext context, final DrillFileSystem dfs, final FileWork fileWork, final List<SchemaPath> columns, final String userName) throws ExecutionSetupException { return new HttpdLogRecordReader(context, dfs, fileWork, columns); } @Override public RecordWriter getRecordWriter(final FragmentContext context, final EasyWriter writer) throws IOException { throw new UnsupportedOperationException("Drill doesn't currently support writing HTTPd logs"); } @Override public int getReaderOperatorType() { return -1; } @Override public int getWriterOperatorType() { return -1; } }