/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql.stats.fs;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.apache.hadoop.hive.common.StatsSetupConst;
import org.apache.hadoop.hive.ql.exec.SerializationUtilities;
import org.apache.hadoop.hive.ql.stats.StatsAggregator;
import org.apache.hadoop.hive.ql.stats.StatsCollectionContext;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.esotericsoftware.kryo.Kryo;
import com.esotericsoftware.kryo.io.Input;
public class FSStatsAggregator implements StatsAggregator {
private final Logger LOG = LoggerFactory.getLogger(this.getClass().getName());
private List<Map<String,Map<String,String>>> statsList;
private Map<String, Map<String,String>> statsMap;
private FileSystem fs;
@Override
public boolean connect(StatsCollectionContext scc) {
List<String> statsDirs = scc.getStatsTmpDirs();
assert statsDirs.size() == 1 : "Found multiple stats dirs: " + statsDirs;
Path statsDir = new Path(statsDirs.get(0));
LOG.debug("About to read stats from : " + statsDir);
statsMap = new HashMap<String, Map<String,String>>();
try {
fs = statsDir.getFileSystem(scc.getHiveConf());
statsList = new ArrayList<Map<String,Map<String,String>>>();
FileStatus[] status = fs.listStatus(statsDir, new PathFilter() {
@Override
public boolean accept(Path file) {
return file.getName().startsWith(StatsSetupConst.STATS_FILE_PREFIX);
}
});
for (FileStatus file : status) {
Input in = new Input(fs.open(file.getPath()));
Kryo kryo = SerializationUtilities.borrowKryo();
try {
statsMap = kryo.readObject(in, statsMap.getClass());
} finally {
SerializationUtilities.releaseKryo(kryo);
}
LOG.info("Read stats : " +statsMap);
statsList.add(statsMap);
in.close();
}
return true;
} catch (IOException e) {
LOG.error("Failed to read stats from filesystem ", e);
return false;
}
}
@Override
public String aggregateStats(String partID, String statType) {
long counter = 0;
LOG.debug("Part ID: " + partID + "\t" + statType);
for (Map<String,Map<String,String>> statsMap : statsList) {
Map<String,String> partStat = statsMap.get(partID);
if (null == partStat) { // not all partitions are scanned in all mappers, so this could be null.
continue;
}
String statVal = partStat.get(statType);
if (null == statVal) { // partition was found, but was empty.
continue;
}
counter += Long.parseLong(statVal);
}
LOG.info("Read stats for : " + partID + "\t" + statType + "\t" + counter);
return String.valueOf(counter);
}
@Override
public boolean closeConnection(StatsCollectionContext scc) {
List<String> statsDirs = scc.getStatsTmpDirs();
assert statsDirs.size() == 1 : "Found multiple stats dirs: " + statsDirs;
Path statsDir = new Path(statsDirs.get(0));
LOG.debug("About to delete stats tmp dir :" + statsDir);
try {
fs.delete(statsDir,true);
return true;
} catch (IOException e) {
LOG.error("Failed to delete stats dir", e);
return true;
}
}
}