/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* <p/>
* http://www.apache.org/licenses/LICENSE-2.0
* <p/>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql.hooks;
import java.io.IOException;
import java.io.PrintStream;
import java.util.List;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.QueryPlan;
import org.apache.hadoop.hive.ql.exec.FetchTask;
import org.apache.hadoop.hive.ql.io.HdfsUtils;
import org.apache.orc.tools.FileDump;
import org.apache.orc.FileFormatException;
import org.apache.hadoop.hive.ql.io.orc.OrcFile;
import org.apache.hadoop.hive.ql.plan.FetchWork;
import org.apache.hadoop.hive.ql.session.SessionState;
import com.google.common.collect.Lists;
/**
* Post exec hook to print orc file dump for files that will be read by fetch task. The file dump
* output will be printed before fetch task output. It also prints the row index for the 1st column
* in the file just to verify the impact of bloom filter fpp.
*/
public class PostExecOrcFileDump implements ExecuteWithHookContext {
private static final Logger LOG = LoggerFactory.getLogger(PostExecOrcFileDump.class.getName());
private static final PathFilter hiddenFileFilter = new PathFilter() {
@Override
public boolean accept(Path p) {
String name = p.getName();
return !name.startsWith("_") && !name.startsWith(".");
}
};
@Override
public void run(HookContext hookContext) throws Exception {
assert (hookContext.getHookType() == HookContext.HookType.POST_EXEC_HOOK);
HiveConf conf = hookContext.getConf();
LOG.info("Executing post execution hook to print orc file dump..");
QueryPlan plan = hookContext.getQueryPlan();
if (plan == null) {
return;
}
FetchTask fetchTask = plan.getFetchTask();
if (fetchTask != null) {
SessionState ss = SessionState.get();
SessionState.LogHelper console = ss.getConsole();
// file dump should write to session state console's error stream
PrintStream old = System.out;
System.setOut(console.getErrStream());
FetchWork fetchWork = fetchTask.getWork();
boolean partitionedTable = fetchWork.isPartitioned();
List<Path> directories;
if (partitionedTable) {
LOG.info("Printing orc file dump for files from partitioned directory..");
directories = fetchWork.getPartDir();
} else {
LOG.info("Printing orc file dump for files from table directory..");
directories = Lists.newArrayList();
directories.add(fetchWork.getTblDir());
}
for (Path dir : directories) {
FileSystem fs = dir.getFileSystem(conf);
List<FileStatus> fileList = HdfsUtils.listLocatedStatus(fs, dir,
hiddenFileFilter);
for (FileStatus fileStatus : fileList) {
LOG.info("Printing orc file dump for " + fileStatus.getPath());
if (fileStatus.getLen() > 0) {
try {
// just creating orc reader is going to do sanity checks to make sure its valid ORC file
OrcFile.createReader(fs, fileStatus.getPath());
console.printError("-- BEGIN ORC FILE DUMP --");
FileDump.main(new String[]{fileStatus.getPath().toString(), "--rowindex=*"});
console.printError("-- END ORC FILE DUMP --");
} catch (FileFormatException e) {
LOG.warn("File " + fileStatus.getPath() + " is not ORC. Skip printing orc file dump");
} catch (IOException e) {
LOG.warn("Skip printing orc file dump. Exception: " + e.getMessage());
}
} else {
LOG.warn("Zero length file encountered. Skip printing orc file dump.");
}
}
}
// restore the old out stream
System.out.flush();
System.setOut(old);
}
}
}