/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.mahout.utils.clustering;
import org.apache.mahout.clustering.Cluster;
import org.apache.mahout.clustering.WeightedVectorWritable;
import org.apache.mahout.common.StringUtils;
import org.apache.mahout.common.distance.DistanceMeasure;
import org.apache.mahout.math.NamedVector;
import org.apache.mahout.math.Vector;
import java.io.IOException;
import java.io.Writer;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.regex.Pattern;
/**
* GraphML -- see http://gephi.org/users/supported-graph-formats/graphml-format/
*/
public class GraphMLClusterWriter extends AbstractClusterWriter {
private static final Pattern VEC_PATTERN = Pattern.compile("\\{|\\:|\\,|\\}");
private Map<Integer, Color> colors = new HashMap<Integer, Color>();
private Color lastClusterColor;
private float lastX, lastY;
private Random random;
private int posStep;
private final String[] dictionary;
private final int numTopFeatures;
private int subString;
public GraphMLClusterWriter(Writer writer, Map<Integer, List<WeightedVectorWritable>> clusterIdToPoints,
DistanceMeasure measure,
int numTopFeatures, String[] dictionary, int subString)
throws IOException {
super(writer, clusterIdToPoints, measure);
this.dictionary = dictionary;
this.numTopFeatures = numTopFeatures;
this.subString = subString;
init(writer);
}
private void init(Writer writer) throws IOException {
writer.append("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
writer.append("<graphml xmlns=\"http://graphml.graphdrawing.org/xmlns\"\n" +
"xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n" +
"xsi:schemaLocation=\"http://graphml.graphdrawing.org/xmlns\n" +
"http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd\">");
//support rgb
writer.append("<key attr.name=\"r\" attr.type=\"int\" for=\"node\" id=\"r\"/>\n" +
"<key attr.name=\"g\" attr.type=\"int\" for=\"node\" id=\"g\"/>\n" +
"<key attr.name=\"b\" attr.type=\"int\" for=\"node\" id=\"b\"/>" +
"<key attr.name=\"size\" attr.type=\"int\" for=\"node\" id=\"size\"/>" +
"<key attr.name=\"weight\" attr.type=\"float\" for=\"edge\" id=\"weight\"/>" +
"<key attr.name=\"x\" attr.type=\"float\" for=\"node\" id=\"x\"/>" +
"<key attr.name=\"y\" attr.type=\"float\" for=\"node\" id=\"y\"/>");
writer.append("<graph edgedefault=\"undirected\">");
lastClusterColor = new Color();
posStep = (int) (0.1 * clusterIdToPoints.size()) + 100;
random = new Random();
}
/*
<?xml version="1.0" encoding="UTF-8"?>
<graphml xmlns="http://graphml.graphdrawing.org/xmlns"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://graphml.graphdrawing.org/xmlns
http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd">
<graph id="G" edgedefault="undirected">
<node id="n0"/>
<node id="n1"/>
<edge id="e1" source="n0" target="n1"/>
</graph>
</graphml>
*/
@Override
public void write(Cluster cluster) throws IOException {
StringBuilder line = new StringBuilder();
Color rgb = getColor(cluster.getId());
String topTerms = "";
if (dictionary != null) {
topTerms = getTopTerms(cluster.getCenter(), dictionary, numTopFeatures);
}
String clusterLabel = String.valueOf(cluster.getId()) + "_" + topTerms;
//do some positioning so that items are visible and grouped together
//TODO: put in a real layout algorithm
float x = lastX + 1000;
float y = lastY;
if (x > (1000 + posStep)) {
y = lastY + 1000;
x = 0;
}
line.append(createNode(clusterLabel, rgb, x, y));
List<WeightedVectorWritable> points = clusterIdToPoints.get(cluster.getId());
if (points != null) {
for (WeightedVectorWritable point : points) {
Vector theVec = point.getVector();
double distance = 1;
if (measure != null) {
distance = measure.distance(cluster.getCenter().getLengthSquared(), cluster.getCenter(), theVec) * 500;//scale the distance
}
String vecStr;
int angle = random.nextInt(360);//pick an angle at random and then scale along that angle
double angleRads = Math.toRadians(angle);
float targetX = x + (float) (distance * Math.cos(angleRads));
float targetY = y + (float) (distance * Math.sin(angleRads));
if (theVec instanceof NamedVector) {
vecStr = ((NamedVector) theVec).getName();
} else {
vecStr = theVec.asFormatString();
//do some basic manipulations for display
vecStr = VEC_PATTERN.matcher(vecStr).replaceAll("_");
}
if (subString > 0 && vecStr.length() > subString) {
vecStr = vecStr.substring(0, subString);
}
line.append(createNode(vecStr, rgb, targetX, targetY));
line.append(createEdge(clusterLabel, vecStr, distance));
}
}
lastClusterColor = rgb;
lastX = x;
lastY = y;
getWriter().append(line).append("\n");
}
private Color getColor(int clusterId) {
Color result = colors.get(clusterId);
if (result == null) {
result = new Color();
//there is probably some better way to color a graph
int incR = 0, incG = 0, incB = 0;
if (lastClusterColor.r + 20 < 256 && lastClusterColor.g + 20 < 256 && lastClusterColor.b + 20 < 256) {
incR = 20;
incG = 0;
incB = 0;
} else if (lastClusterColor.r + 20 >= 256 && lastClusterColor.g + 20 < 256 && lastClusterColor.b + 20 < 256) {
incG = 20;
incB = 0;
} else if (lastClusterColor.r + 20 >= 256 && lastClusterColor.g + 20 >= 256 && lastClusterColor.b + 20 < 256) {
incB = 20;
} else {
incR += 3;
incG += 3;
incR += 3;
}
result.r = (lastClusterColor.r + incR) % 256;
result.g = (lastClusterColor.g + incG) % 256;
result.b = (lastClusterColor.b + incB) % 256;
colors.put(clusterId, result);
}
return result;
}
private static String createEdge(String left, String right, double distance) {
left = StringUtils.escapeXML(left);
right = StringUtils.escapeXML(right);
return "<edge id=\"" + left + '_' + right + "\" source=\"" + left + "\" target=\"" + right + "\">" +
"<data key=\"weight\">" + distance + "</data></edge>";
}
private static String createNode(String s) {
return "<node id=\"" + StringUtils.escapeXML(s) + "\"/>";
}
private static String createNode(String s, Color rgb, float x, float y) {
return "<node id=\"" + StringUtils.escapeXML(s) + "\"><data key=\"r\">" + rgb.r +
"</data>" +
"<data key=\"g\">" + rgb.g +
"</data>" +
"<data key=\"b\">" + rgb.b +
"</data>" +
"<data key=\"x\">" + x +
"</data>" +
"<data key=\"y\">" + y +
"</data>"
+ "</node>";
}
@Override
public void close() throws IOException {
getWriter().append("</graph>").append("</graphml>");
super.close();
}
private class Color {
int r, g, b;
}
}