// ============================================================================ // // Copyright (C) 2006-2016 Talend Inc. - www.talend.com // // This source code is available under agreement available at // %InstallDIR%\features\org.talend.rcp.branding.%PRODUCTNAME%\%PRODUCTNAME%license.txt // // You should have received a copy of the agreement // along with this program; if not, write to Talend SA // 9 rue Pages 92150 Suresnes, France // // ============================================================================ package org.talend.dq.indicators.graph; import java.util.HashMap; import java.util.List; import java.util.Map; import org.apache.commons.lang.StringUtils; import org.talend.dataquality.helpers.IndicatorHelper; import org.talend.utils.collections.MultipleKey; import edu.uci.ics.jung.graph.Edge; import edu.uci.ics.jung.graph.Graph; import edu.uci.ics.jung.graph.Vertex; import edu.uci.ics.jung.graph.decorators.NumberEdgeValue; import edu.uci.ics.jung.graph.decorators.NumberVertexValue; import edu.uci.ics.jung.graph.decorators.UserDatumNumberEdgeValue; import edu.uci.ics.jung.graph.decorators.UserDatumNumberVertexValue; import edu.uci.ics.jung.graph.impl.UndirectedSparseEdge; import edu.uci.ics.jung.graph.impl.UndirectedSparseGraph; import edu.uci.ics.jung.graph.impl.UndirectedSparseVertex; import edu.uci.ics.jung.utils.UserData; /** * @author scorreia * * Class to help building a graph from data. */ public class GraphBuilder { /** * The key to set the index of a column used to identify instances of a same column. */ public static final String COLUMN_IDX_KEY = "COLUMNID"; //$NON-NLS-1$ /** * The key to set the label of a vertex (instance of a column). */ public static final String V_LABEL_KEY = "V_LABEL"; //$NON-NLS-1$ /** * The key to set the index of the row in the edge. */ public static final String E_ROWNUM_KEY = "ROWID"; //$NON-NLS-1$ /** * The key to set the weight of the edge (function of the row count). */ public static final String E_WEIGHT_KEY = "WEIGHT"; //$NON-NLS-1$ /** * The key to set the label of the edge. */ public static final String E_LABEL_KEY = "E_LABEL"; //$NON-NLS-1$ /** * the graph. */ private Graph graph; protected NumberEdgeValue edgeWeight; private long totalWeight; boolean proportionalWidth = false; public void setProportionalWidth(boolean proportionalWidth) { this.proportionalWidth = proportionalWidth; } /** * Getter for totalWeight. * * @return the totalWeight */ public long getTotalWeight() { return this.totalWeight; } /** * Sets the totalWeight. * * @param totalWeight the totalWeight to set */ public void setTotalWeight(long totalWeight) { this.totalWeight = totalWeight; } /** * Getter for edgeWeight. * * @return the edgeWeight */ public NumberEdgeValue getEdgeWeight() { return this.edgeWeight; } /** * Getter for vertexWeight. * * @return the vertexWeight */ public NumberVertexValue getVertexWeight() { return this.vertexWeight; } protected NumberVertexValue vertexWeight; /** * Method "createMultiGraph". * * @param listOfRows a list of rows. Each row contains instance values. The last column gives the number of * instances of the row. * @return the graph */ public Graph createMultiGraph(List<Object[]> listOfRows) { initAttributes(); createVertices(listOfRows); return graph; } private void initAttributes() { graph = new UndirectedSparseGraph(); graph.getEdgeConstraints().remove(Graph.NOT_PARALLEL_EDGE); // allow multigraph edgeWeight = new UserDatumNumberEdgeValue("edge_weight"); //$NON-NLS-1$ vertexWeight = new UserDatumNumberVertexValue("vertex_weight"); //$NON-NLS-1$ } /** * create vertices from given list of rows. * * @param listOfRows list of rows * @return true */ private boolean createVertices(List<Object[]> listOfRows) { // map [key -> vertex] to store created vertices Map<MultipleKey, Vertex> key2Vertex = new HashMap<MultipleKey, Vertex>(); // link each vertex of the row together Map<MultipleKey, Edge> v1v2ToEdge = new HashMap<MultipleKey, Edge>(); // loop on each row for (int rowIdx = 0; rowIdx < listOfRows.size(); rowIdx++) { Object[] row = listOfRows.get(rowIdx); createVertices(key2Vertex, v1v2ToEdge, row, rowIdx); } return true; } /** * DOC scorreia Comment method "createVertices". * * @param key2Vertex * @param toEdge * @param row * @param rowIdx * @return */ private Vertex[] createVertices(Map<MultipleKey, Vertex> key2Vertex, Map<MultipleKey, Edge> toEdge, Object[] row, int rowIdx) { // last column is the result of count(*) final int nbNominalColumn = row.length - 1; // vertices in one row Vertex[] verticesInRow = new Vertex[nbNominalColumn]; for (int colIdx = 0; colIdx < nbNominalColumn; colIdx++) { final Object value = row[colIdx]; Object[] key = { value, String.valueOf(colIdx) }; MultipleKey mulkey = new MultipleKey(key, 2); // search for already created vertex Vertex myVertex = key2Vertex.get(mulkey); // create the new vertex if (myVertex == null) { myVertex = newVertex(key2Vertex, colIdx, value, mulkey); vertexWeight.setNumber(myVertex, getIntegerValue(row[nbNominalColumn])); } else { // existing vertex vertexWeight.setNumber(myVertex, vertexWeight.getNumber(myVertex).intValue() + getIntegerValue(row[nbNominalColumn])); } verticesInRow[colIdx] = myVertex; } for (int i = 0; i < verticesInRow.length; i++) { Vertex vi = verticesInRow[i]; for (int j = 0; j < i; j++) { Vertex vj = verticesInRow[j]; MultipleKey mulkey = new MultipleKey(new Object[] { vi, vj }, 2); Edge edge = toEdge.get(mulkey); if (edge == null) { // either allow parallel edges or avoid create a new edge when it already exists edge = newEdge(row, rowIdx, nbNominalColumn, vi, vj); toEdge.put(mulkey, edge); } else { updateEdge(edge, row, rowIdx, nbNominalColumn, vi, vj); } } } return verticesInRow; } /** * DOC scorreia Comment method "getIntegerValue". * * @param object * @return */ private Integer getIntegerValue(Object object) { assert object != null; return (object instanceof Integer) ? (Integer) object : IndicatorHelper.getIntegerFromObject(String.valueOf(object)); } /** * DOC scorreia Comment method "newEdge". * * @param row * @param rowIdx * @param countColumnIndex * @param vertexFrom * @param vertexTo * @return */ private Edge newEdge(Object[] row, int rowIdx, final int countColumnIndex, Vertex vertexFrom, Vertex vertexTo) { final Edge edge = graph.addEdge(new UndirectedSparseEdge(vertexFrom, vertexTo)); String fullRow = StringUtils.join(row, " , "); //$NON-NLS-1$ edge.addUserDatum(GraphBuilder.E_ROWNUM_KEY, rowIdx, UserData.CLONE); // TODO remove this line ? edge.addUserDatum(GraphBuilder.E_WEIGHT_KEY, getIntegerValue(row[countColumnIndex]), UserData.CLONE); edge.addUserDatum(E_LABEL_KEY, fullRow, UserData.SHARED); edgeWeight.setNumber(edge, getIntegerValue(row[countColumnIndex])); return edge; } /** * DOC scorreia Comment method "updateEdge". * * @param edge * @param row * @param rowIdx * @param countColumnIndex * @param vi * @param vj */ private void updateEdge(Edge edge, Object[] row, int rowIdx, int countColumnIndex, Vertex vi, Vertex vj) { edgeWeight.setNumber(edge, edgeWeight.getNumber(edge).intValue() + getIntegerValue(row[countColumnIndex])); } /** * DOC scorreia Comment method "newVertex". * * @param key2Vertex * @param columnIndex * @param value * @param multikey * @return */ private Vertex newVertex(Map<MultipleKey, Vertex> key2Vertex, int columnIndex, final Object value, MultipleKey multikey) { Vertex myVertex; myVertex = new UndirectedSparseVertex(); myVertex.addUserDatum(GraphBuilder.COLUMN_IDX_KEY, columnIndex, UserData.CLONE); myVertex.addUserDatum(GraphBuilder.V_LABEL_KEY, String.valueOf(value), UserData.SHARED); key2Vertex.put(multikey, myVertex); graph.addVertex(myVertex); return myVertex; } /** * Getter for graph. * * @return the graph */ public Graph getGraph() { return graph; } /** * Method "computeEdgeWidth". * * @param weight the weight of an edge * @return the width to be used for the edge */ public float getEdgeWidth(int weight) { return (proportionalWidth) ? (float) (10 * Math.pow((double) weight / this.getTotalWeight(), 1.0d / 2)) : 10.0f / weight; } }