package com.splout.db.benchmark;
/*
* #%L
* Splout SQL Hadoop library
* %%
* Copyright (C) 2012 Datasalt Systems S.L.
* %%
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* #L%
*/
import com.beust.jcommander.JCommander;
import com.beust.jcommander.Parameter;
import com.beust.jcommander.ParameterException;
import com.datasalt.pangool.utils.HadoopUtils;
import com.google.common.base.Joiner;
import com.google.common.collect.HashBasedTable;
import com.google.common.io.Files;
import com.splout.db.common.JSONSerDe;
import com.splout.db.common.SploutClient;
import com.splout.db.common.Tablespace;
import com.splout.db.hadoop.JSONTablespaceDefinition;
import com.splout.db.qnode.beans.QNodeStatus;
import com.splout.db.qnode.beans.QueryStatus;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import java.io.BufferedWriter;
import java.io.File;
import java.io.IOException;
import java.nio.charset.Charset;
import java.util.*;
public class TablespaceAnalyserCMD extends Configured implements Tool {
@Parameter(required = true, names = {"-tf", "--tablespacefile"}, description = "The JSON config file with the Tablespace specifications. Multiple files can be provided. Non full qualified URLs forces to load the file from the current Hadoop filesystem.")
private String tablespaceFile;
@Parameter(required = true, names = {"-q", "--qnodes"}, description = "Comma-separated list QNode addresses.")
private String qNodes;
@Parameter(required = false, names = {"-t", "--top-size"}, description = "Size of calculated tops")
private int topSize = 10;
@Parameter(required = false, names = {"-p", "--partition"},
description = "Restrict the analysis to particular partitions.")
private List<Integer> partitions = new ArrayList<Integer>();
protected JSONTablespaceDefinition loadTablespaceFile(String tablespaceFile) throws IOException, JSONSerDe.JSONSerDeException {
Path file = new Path(tablespaceFile);
FileSystem fS = FileSystem.get(file.toUri(), getConf());
if (!fS.exists(file)) {
throw new IllegalArgumentException("Config input file: " + file + " doesn't exist!");
}
String strContents = HadoopUtils.fileToString(fS, file);
JSONTablespaceDefinition def = JSONSerDe.deSer(strContents, JSONTablespaceDefinition.class);
return def;
}
/*protected String totalRowsSQL(TablespaceSpec spec) {
String query = "SELECT SUM(*) FROM (";
for(int i =0 ; i<spec.getPartitionedTables().size(); i++){
Table table = spec.getPartitionedTables().get(i);
String tblName = table.getTableSpec().getSchema().getName();
query += "SELECT COUNT(*) FROM " + tblName + " ";
}
} */
public int start() throws Exception {
JSONTablespaceDefinition def = loadTablespaceFile(tablespaceFile);
String tsName = def.getName();
SploutClient client = new SploutClient(1000 * 60 * 60 * 24, qNodes.split(","));
QNodeStatus overview = client.overview();
if (overview.getTablespaceMap().get(tsName) == null) {
System.out.println("Tablespace " + tsName + " not found in QNodes " + qNodes + ".");
}
Tablespace tablespace = overview.getTablespaceMap().get(tsName);
int nPartitions = tablespace.getPartitionMap().getPartitionEntries().size();
System.out.println("TABLESPACE [" + tsName + "]");
System.out.println("#Partitions: " + nPartitions);
HashBasedTable<Integer, String, Long> counts = HashBasedTable.create();
HashBasedTable<Integer, String, LinkedHashMap<String, Long>> tops = HashBasedTable.create();
for (int part = 0; part < nPartitions; part++) {
if (partitions.size() > 0 && !partitions.contains(part)) {
continue;
}
for (int i = 0; i < def.getPartitionedTables().size(); i++) {
JSONTablespaceDefinition.JSONTableDefinition table = def.getPartitionedTables().get(i);
String tblName = table.getName();
String query = "SELECT COUNT(*) FROM " + tblName;
QueryStatus status = client.query(tsName, null, query, part + "");
if (status.getError() != null) {
throw new Exception("Query error: " + status.getError());
}
System.out.println(query + ": " + JSONSerDe.ser(status));
long count = (Integer) ((Map) status.getResult().get(0)).values().iterator().next();
counts.put(part, tblName, count);
String partFields[] = table.getPartitionFields().split(",");
String concatFields = Joiner.on("||").join(partFields);
query = "SELECT " + concatFields + " key, COUNT(*) c FROM "
+ tblName + " GROUP BY key ORDER by c DESC LIMIT " + topSize;
status = client.query(tsName, null, query, part + "");
if (status.getError() != null) {
throw new Exception("Query error: " + status.getError());
}
System.out.println(query + ": " + JSONSerDe.ser(status));
LinkedHashMap<String, Long> top = new LinkedHashMap<String, Long>();
for (Map row : (ArrayList<Map<String, Long>>) status.getResult()) {
top.put(row.get("key").toString(), new Long(row.get("c").toString()));
}
tops.put(part, tblName, top);
}
}
Hashtable<String, Long> totalsPerTable = new Hashtable<String, Long>();
for (String table : counts.columnKeySet()) {
long count = 0;
for (Map.Entry<Integer, Long> entry : counts.column(table).entrySet()) {
count += entry.getValue();
}
totalsPerTable.put(table, count);
}
BufferedWriter countsFile = Files.newWriter(new File(tsName + "-counts.txt"), Charset.defaultCharset());
countsFile.write("Table\tPartition\tRows\tPercent from total rows\n");
for (String table : counts.columnKeySet()) {
for (int partition : counts.column(table).keySet()) {
long count = counts.get(partition, table);
long total = totalsPerTable.get(table);
double percent = count / (double) total;
countsFile.write(table + "\t" + partition + "\t" + count + "\t" + percent + "\n");
}
}
countsFile.close();
BufferedWriter topsFile = Files.newWriter(new File(tsName + "-tops.txt"), Charset.defaultCharset());
topsFile.write("Table\tPartition\tKey\tRows\tPercent from total rows\n");
for (String table : tops.columnKeySet()) {
for (int partition : tops.column(table).keySet()) {
long total = totalsPerTable.get(table);
LinkedHashMap<String, Long> top = tops.get(partition, table);
for (Map.Entry<String, Long> entry : top.entrySet()) {
double percent = entry.getValue() / (double) total;
topsFile.write(table + "\t" + partition + "\t" + entry.getKey() + "\t" + entry.getValue() + "\t" + percent + "\n");
}
}
}
topsFile.close();
return 0;
}
public static void main(String[] args) throws Exception {
ToolRunner.run(new TablespaceAnalyserCMD(), args);
}
@Override
public int run(String[] args) throws Exception {
JCommander jComm = new JCommander(this);
jComm.setProgramName("Tablespace Analyser Tool");
try {
jComm.parse(args);
} catch (ParameterException e) {
System.out.println(e.getMessage());
System.out.println();
jComm.usage();
return -1;
} catch (Throwable t) {
t.printStackTrace();
jComm.usage();
return -1;
}
return start();
}
}