package org.geogebra.common.util;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map.Entry;
import java.util.TreeSet;
import org.geogebra.common.kernel.StringTemplate;
import org.geogebra.common.kernel.algos.AlgoDependentBoolean;
import org.geogebra.common.kernel.algos.AlgoElement;
import org.geogebra.common.kernel.algos.GetCommand;
import org.geogebra.common.kernel.arithmetic.ExpressionNode;
import org.geogebra.common.kernel.arithmetic.Traversing.GeoCollector;
import org.geogebra.common.kernel.geos.GeoElement;
import org.geogebra.common.util.debug.Log;
/**
* Helper class for Prover
*/
class StatementFeatures {
private static final String[] rules = { "Intersect", "Segment", "Midpoint",
"OrthogonalLine", "Circle", "Line", "Point", "Free Point",
"Ray", "Area", "Distance", "LineBisector", "Expression",
"Translate", "Vector", "Polygon", "Tangent", "Parabola",
"Mirror", "Ellipse", "AngularBisector", "Rotate", "Angle",
"Hyperbola" };
private static final String[] obj_types = { "Point", "Circle", "Line",
"Segment", "Triangle", "Numeric", "Pentagon", "Angle",
"Triangle", "Parabola", "Ray", "Ellipse", "Hyperbola",
"Quadrilateral", "Vector" };
private static String csv_header = "", csv_data = "";
private static HashMap<GeoElement, Integer> nodeLongestPath;
private static HashMap<GeoElement, Integer> nodeComplexity;
private static int longestPath;
private static HashSet<ArrayList<GeoElement>> deps;
private static void computeNodeLongestPath(GeoElement node, int set) {
nodeLongestPath.put(node, set);
if (set > longestPath) {
longestPath = set;
}
AlgoElement ae = node.getParentAlgorithm();
if (ae != null) {
for (GeoElement dependency : ae.getInput()) {
ArrayList<GeoElement> item = new ArrayList<GeoElement>();
item.add(dependency);
item.add(node);
deps.add(item);
computeNodeLongestPath(dependency, set + 1);
}
}
}
private static int computeNodeComplexity(GeoElement node) {
Integer complexity = nodeComplexity.get(node);
if (complexity != null) {
return complexity;
}
AlgoElement ae = node.getParentAlgorithm();
if (ae == null) {
nodeComplexity.put(node, 0);
return 0;
}
int parentsComplexity = 1;
/*
* Compute node complexity by counting multiplicites in occurrences
* of GeoElement objects, if an expression is found.
*/
if (ae instanceof AlgoDependentBoolean) {
ExpressionNode root = ((AlgoDependentBoolean) ae)
.getExpression();
HashMap<GeoElement, Integer> gSet = new HashMap<GeoElement, Integer>();
GeoCollector gc = GeoCollector.getCollector(gSet);
root.traverse(gc);
Iterator<Entry<GeoElement, Integer>> it = gSet.entrySet().iterator();
while (it.hasNext()) {
Entry<GeoElement, Integer> entry = it.next();
GeoElement dependency = entry.getKey();
parentsComplexity += computeNodeComplexity(dependency)
* entry.getValue();
}
} else {
/* Otherwise just count each GeoElement once. */
for (GeoElement dependency : ae.getInput()) {
parentsComplexity += computeNodeComplexity(dependency);
}
}
nodeComplexity.put(node, parentsComplexity);
return parentsComplexity;
}
/**
* @param description
* description
* @param nodes
* nodes
* @param categories
* ccategories
*/
static void generateStatistics(String description, List<Object> nodes,
String[] categories) {
/*
* collecting algos, generating population and computing basic
* statistics
*/
int size = 0;
double mean, variation_coefficient, minimum, maximum, entropy;
HashMap<Object, Integer> frequencies = new HashMap<Object, Integer>();
Iterator<Object> it = nodes.iterator();
int number_of_nodes = 0;
maximum = 1;
minimum = -1; // assuming non-negative values
mean = 0;
while (it.hasNext()) {
number_of_nodes++;
int freq = 1;
Object node = it.next();
if (frequencies.containsKey(node)) {
freq = frequencies.get(node) + 1;
}
frequencies.put(node, freq);
if (node instanceof Integer) {
maximum = Math.max(maximum, (Integer) node);
if (minimum == -1) {
minimum = (Integer) node;
} else {
minimum = Math.min(minimum, (Integer) node);
}
mean += (Integer) node;
} else {
maximum = Math.max(maximum, freq);
if (categories == null) {
if (minimum == -1) {
minimum = freq;
} else {
minimum = Math.min(minimum, freq);
}
mean += freq;
}
}
}
int zeros;
if (categories != null) {
size = categories.length;
minimum = maximum;
// normalize
maximum /= number_of_nodes;
mean = (double) number_of_nodes / size;
zeros = size - frequencies.size();
} else {
size = number_of_nodes;
mean /= size;
zeros = 0;
}
/* computing rest of statistics */
/* ((3/7-1/23)^2+(1/7-1/23)^2*4+18*(1/23)^2)/23 == .00925 */
variation_coefficient = 0;
/*
* -((3/7)*log(3/7;A)+(1/7)*log(1/7;A)+(1/7)*log(1/7;A)+(1/7)*log(1/
* 7 ;A)+(1/7)*log(1/7;A))
*/
entropy = 0;
Iterator<Entry<Object, Integer>> it2 = frequencies.entrySet()
.iterator();
while (it2.hasNext()) {
Entry<Object, Integer> entry = it2.next();
Object node = entry.getKey();
int freq = entry.getValue();
if (freq < minimum) {
minimum = freq;
}
double rel_freq = freq / (double) number_of_nodes;
double value;
if (node instanceof Integer) {
value = ((Integer) node) - mean;
variation_coefficient += freq * value * value;
} else {
value = rel_freq - 1.0 / size;
variation_coefficient += value * value;
}
entropy -= rel_freq * Math.log(rel_freq) / Math.log(2);
}
if (categories != null) {
if (zeros > 0) {
minimum = 0;
} else {
// normalize
minimum /= number_of_nodes;
}
}
double value = 1.0 / size;
variation_coefficient += zeros * value * value;
variation_coefficient /= size;
Log.debug("population=" + frequencies);
Log.debug("minimum=" + minimum + " maximum=" + maximum + " mean="
+ mean + " variation_coefficient=" + variation_coefficient
+ " entropy=" + entropy);
String description1 = description;
if (categories != null) {
description1 = "NF(" + description1 + ")";
double rel_freq;
for (String category : categories) {
if (frequencies.containsKey(category)) {
rel_freq = (double) frequencies.get(category)
/ number_of_nodes;
} else {
rel_freq = 0;
}
csvAdd("NF(" + category + ")", rel_freq);
}
}
csvAdd("max " + description1, maximum);
csvAdd("min " + description1, minimum);
csvAdd("mean " + description1, mean);
csvAdd("variation " + description1, variation_coefficient);
csvAdd("entropy " + description1, entropy);
}
private static void csvAdd(String header, double data) {
csv_header += header + ",";
csv_data += data + ",";
}
private static void csvAdd(String header, String data) {
csv_header += header + ",";
csv_data += data + ",";
}
/**
* @param statement
* element
*/
static void init(GeoElement statement) {
nodeLongestPath = new HashMap<GeoElement, Integer>();
nodeComplexity = new HashMap<GeoElement, Integer>();
longestPath = 0;
deps = new HashSet<ArrayList<GeoElement>>();
csv_header = "";
csv_data = "";
TreeSet<GeoElement> geos = statement.getAllPredecessors();
geos.add(statement);
Iterator<GeoElement> it = geos.iterator();
List<Object> geo_nodes, nodes_in_deg, nodes_out_deg, nodes_deg,
types, objs;
geo_nodes = new ArrayList<Object>();
nodes_in_deg = new ArrayList<Object>();
nodes_out_deg = new ArrayList<Object>();
nodes_deg = new ArrayList<Object>();
types = new ArrayList<Object>();
objs = new ArrayList<Object>();
int number_of_nodes = 0, free = 0, edges = 0;
while (it.hasNext()) {
GeoElement geo = it.next();
TreeSet<GeoElement> children = geo.getAllChildren();
int out = 0;
for (GeoElement child : children) {
if (geos.contains(child)) {
boolean directChild = false;
for (GeoElement father : child.getParentAlgorithm()
.getInput()) {
if (father.equals(geo)) {
directChild = true;
}
}
if (directChild && !child.equals(statement)) {
out++;
}
}
}
int in = 0;
AlgoElement ae = geo.getParentAlgorithm();
String algo = "Free Point";
if (ae != null) {
GetCommand gc = ae.getClassName();
if (gc != null) {
/*
* Some algos don't have commands, e.g. IsPointOnPath.
* In such cases the digraph will use "null" label for
* all such nodes.
*/
algo = gc.getCommand();
}
GeoElement[] inputs = ae.getInput();
if (!geo.equals(statement)) {
edges += inputs.length;
}
in = inputs.length;
for (GeoElement ref : inputs) {
objs.add(ref);
}
} else {
free++;
}
if (!geo.equals(statement)) {
geo_nodes.add(algo);
types.add(geo.getTypeString());
nodes_in_deg.add(in);
nodes_out_deg.add(out);
nodes_deg.add(in + out);
number_of_nodes++;
}
}
computeNodeLongestPath(statement, 0);
longestPath--;
computeNodeComplexity(statement);
// CSV output
csvAdd("number of nodes", number_of_nodes);
csvAdd("number of nodes with in-degree 0", free);
csvAdd("number of edges", edges);
csvAdd("num of nodes/num of edges",
(double) number_of_nodes / edges);
csvAdd("num of edges/num of nodes",
(double) edges / number_of_nodes);
csvAdd("max path length/num of nodes",
(double) longestPath / number_of_nodes);
csvAdd("num of nodes/max path length",
(double) number_of_nodes / longestPath);
csvAdd("max path length/num of edges",
(double) longestPath / edges);
csvAdd("num of edges/max path length",
(double) edges / longestPath);
csvAdd("statement complexity", nodeComplexity.get(statement));
GetCommand dominantPredicate = statement.getParentAlgorithm()
.getClassName();
String dominantPredicateS = "";
if (dominantPredicate != null) {
dominantPredicateS = dominantPredicate.toString();
}
csvAdd("statement dominant predicate",
dominantPredicateS);
csvAdd("statement predicates",
'"' + statement.getDefinition(StringTemplate.ogpTemplate)
+ '"');
generateStatistics("node in-degree", nodes_in_deg, null);
generateStatistics("node out-degree", nodes_out_deg, null);
generateStatistics("node degree", nodes_deg, null);
/*
* csvAdd("num of nodes not labeled by A or B or C with in-degree 0"
* , 0);
*/
generateStatistics("Wi", geo_nodes, rules);
generateStatistics("types", types, obj_types);
generateStatistics("objs", objs, null);
csvAdd("statement size", number_of_nodes - free);
Log.debug("portfolio csv_header:" + csv_header);
Log.debug("portfolio csv_data:" + csv_data);
StringBuilder digraph = new StringBuilder("digraph dependencies { ");
Iterator<ArrayList<GeoElement>> it2 = deps.iterator();
digraph.append(statement.getLabelSimple());
digraph.append("_");
digraph.append(nodeComplexity.get(statement));
digraph.append(" [style=filled]; ");
while (it2.hasNext()) {
ArrayList<GeoElement> al = it2.next();
digraph.append(al.get(0).getLabelSimple());
digraph.append("_");
digraph.append(nodeComplexity.get(al.get(0)));
digraph.append(" -> ");
digraph.append(al.get(1).getLabelSimple());
digraph.append("_");
digraph.append(nodeComplexity.get(al.get(1)));
if (al.get(1).equals(statement)) {
digraph.append(" [style=dashed]");
}
digraph.append("; ");
}
digraph.append("}");
Log.debug(digraph);
}
}