/*
* JBoss, Home of Professional Open Source.
* See the COPYRIGHT.txt file distributed with this work for information
* regarding copyright ownership. Some portions may be licensed
* to Red Hat, Inc. under one or more contributor license agreements.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301 USA.
*/
package com.jboss.teiid.sizing;
public class CaculationTool {
private CaculationEntity caculationEntity ;
public CaculationTool(CaculationEntity caculationEntity) {
super();
this.caculationEntity = caculationEntity;
}
public CaculationEntity getCaculationEntity() {
return caculationEntity;
}
public void setCaculationEntity(CaculationEntity caculationEntity) {
this.caculationEntity = caculationEntity;
}
/**
* #concurrency * (5mb) * #source queries + 300mb
*
* @return heap size in GB
*/
public int heapCaculation() {
int sources = caculationEntity.getSource_count();
int concurrent = caculationEntity.getQueries_concurrent();
int total_in_mb = concurrent * 5 * sources + 300 ;
int heap = total_in_mb/1024 + 1;
if(heap < 16) {
heap = 16 ;
}
return heap;
}
public int coreCaculation() {
int sources = caculationEntity.getSource_count();
int row_count_each = caculationEntity.getRow_count_each();
int row_size_each = caculationEntity.getRow_size_each();
int source_latency = caculationEntity.getAvg_time_each();
int row_count_federdated = caculationEntity.getRow_count_federated();
int row_size_federdated = caculationEntity.getRow_size_federated();
int walltime = caculationEntity.getAvg_time_sample();
boolean isAggregation = caculationEntity.isAggregation();
int queries_per_sec = caculationEntity.getQueries_per_sec();
long source_processing = getSourceProcessing(row_count_each, row_size_each, sources);
long initial_latency = getInitialLatency(row_count_each, row_size_each, sources, source_latency);
long additional_latency = getAdditionalLatency(sources, source_latency);
long client_processing = getClientProcessing(row_count_federdated, row_size_federdated);
long engine_time = getEngineTime(isAggregation, sources, row_count_each, row_size_each, row_size_federdated, row_count_federdated, walltime, source_latency);
long cores = getcorenumbers(source_latency, sources, source_processing, initial_latency, additional_latency, engine_time, client_processing, queries_per_sec);
return (int) cores;
}
/*
* How much time took to deserialize rows coming back from source.
*/
private long getSourceProcessing(int row_count_each, int row_size_each, int sources) {
double total_byte = row_count_each * row_size_each * sources;
double source_processing = 0;
if (total_byte > 1000000) {
double size_in_mb = total_byte / 1000000;
source_processing = 100 + 4.6 * size_in_mb;
} else if (total_byte > 100000 && total_byte <= 1000000) {
double percentage = 80.0 / 900000.0 ;
source_processing = percentage * (total_byte - 100000) + 20;
} else if (total_byte > 10000 && total_byte <= 100000) {
double percentage = 18.0 / 90000.0 ;
source_processing = percentage * (total_byte - 10000) + 2;
} else {
source_processing = 0;
}
return Math.round(source_processing);
}
/*
* this also variation "source latency", as to first source to return results, where processing starts. We can say this is "low(source_latency)".
*
* 'source_latency' is average source latency for each data source, so the formula used to estimate source latency like below method
*/
private long getInitialLatency(int row_count_each, int row_size_each, int sources, int source_latency) {
long total_byte = row_count_each * row_size_each;
double initial_latency = 0 ;
if (sources == 1) {
initial_latency = source_latency;
} else if (total_byte > 100000000) {
initial_latency = source_latency * 0.8;
} else {
initial_latency = source_latency * 0.6;
}
return Math.round(initial_latency);
}
/*
* Even after the first row of results came(lowest of source_latency), how much more *additional* time spent on waiting for results. Consider a guess of half (0.5) when we parallelize,
*
* in serialized situations (XA) this will be 1. So, typically this should be 0.5(high(source_latency) - low(source_latency)) or in XA it should be 1 * sum(source_latency).
*/
private long getAdditionalLatency(int sources, int source_latency) {
double additional_latency = 0 ;
if(sources == 1){
additional_latency = source_latency * 0.5;
} else {
additional_latency = source_latency * 0.4;
}
return Math.round(additional_latency);
}
/*
* How much time took for serializing the results and put on the socket.
*/
private long getClientProcessing(int row_count_federdated, int row_size_federdated) {
double total_byte = row_count_federdated * row_size_federdated;
double client_procesing = 0;
if(total_byte > 10000000){
double size_in_mb = total_byte/1000000 ;
client_procesing = 210 + 4.6 * size_in_mb;
} else if (total_byte > 1000000 && total_byte <= 10000000) {
double percentage = 125.0 / 9000000.0 ;
client_procesing = percentage * (total_byte - 1000000) + 85;
} else if (total_byte > 100000 && total_byte <= 1000000) {
double percentage = 75.0 / 900000.0 ;
client_procesing = percentage * (total_byte - 100000) + 10;
} else if (total_byte > 10000 && total_byte <= 100000) {
double percentage = 5.0 / 90000.0 ;
client_procesing = percentage * (total_byte - 10000) + 5;
} else if (total_byte > 1000 && total_byte <= 10000) {
double percentage = 2.0 / 9000.0 ;
client_procesing = percentage * (total_byte - 1000) + 3;
} else if (total_byte > 0 && total_byte <= 1000) {
double percentage = 2.0 / 1000.0 ;
client_procesing = percentage * total_byte;
} else if (total_byte == 0) {
client_procesing = 0;
}
return Math.round(client_procesing);
}
/*
* If there are lot of sorting, aggregations this can be high, if not can be very low as in pass through scenarios.
*
* "how much time they took in their sample runs" will get a time for running a sample query, then remove all source and deserialization/Serialization latencies then we roughly have the engine time
*
* based on that time, and sorting and aggregation, we can say low, medium or high processing (< 25%, 60%, > 90%) of times
*/
private long getEngineTime( boolean isAggregation,
int sources,
int row_count_each,
int row_size_each,
int row_size_federdated,
int row_count_federdated,
int walltime,
int source_latency) {
long serializing_time = getSourceProcessing(row_count_each, row_size_each, sources);
long deserializing_time = getClientProcessing(row_count_federdated, row_size_federdated);
long initial_latency = getInitialLatency(row_count_each, row_size_each, sources, source_latency);
long additional_latency = getAdditionalLatency(sources, source_latency);
double engine_time = 0;
//sampleruntime should large than latencies + serializing_time + deserializing_time
double engine_time_rough = walltime - serializing_time - deserializing_time - initial_latency - additional_latency ;
long total_fer_size = row_size_federdated * row_count_federdated ;
if(engine_time_rough <= 10) {
engine_time = 10 ;
} else if (isAggregation && total_fer_size > 100000000) {
engine_time = engine_time_rough * 0.9;
} else if (isAggregation) {
engine_time = engine_time_rough * 0.6;
} else {
engine_time = engine_time_rough * 0.3;
}
return Math.round(engine_time);
}
/*
* CPU calculation logic & Formula:
* cpu_time = sum(source_processing) + engine_time + client_processing
* wall_time = low(source_latency) + cpu_time + additional_latency
* cpu_utilization_per_query = cpu_time/wall_time
* total_cpu_time_available = cpu_core_count * 2 * 1000ms
* queries/sec = total_cpu_time_available / (threads_used_per_query * cpu_utilization_per_query * cpu_time)
*
*/
private long getcorenumbers(int source_latency,
int sources,
long source_processing,
long initial_latency,
long additional_latency,
long engine_time,
long client_processing,
int queries_per_sec) {
double cpu_time = source_processing + engine_time + client_processing;
double wall_time = cpu_time + initial_latency + additional_latency;
double cpu_utilization_per_query = cpu_time / wall_time ;
int threads_used_per_query = sources + 1 ;
double cores = (cpu_time * queries_per_sec * cpu_utilization_per_query * threads_used_per_query)/(1000 * 2);
if (cores < 16) {
cores = 16;
}
if(cores > 128) {
cores = 128 ;
}
return Math.round(cores);
}
}