/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.tinkerpop.gremlin.algorithm.generator;
import org.apache.tinkerpop.gremlin.structure.Edge;
import org.apache.tinkerpop.gremlin.structure.Graph;
import org.apache.tinkerpop.gremlin.structure.Vertex;
import org.apache.tinkerpop.gremlin.util.iterator.IteratorUtils;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Random;
import java.util.Set;
import java.util.function.BiConsumer;
import java.util.function.Consumer;
import java.util.function.Supplier;
/**
* Generates a synthetic network with a community structure, that is, several densely connected
* sub-networks that are loosely connected with one another.
*
* @author Matthias Broecheler (me@matthiasb.com)
* @author Stephen Mallette (http://stephen.genoprime.com)
*/
public class CommunityGenerator extends AbstractGenerator {
public static final double DEFAULT_CROSS_COMMUNITY_PERCENTAGE = 0.1;
public static final int DEFAULT_NUMBER_OF_COMMUNITIES = 2;
private final Distribution communitySize;
private final Distribution edgeDegree;
private final double crossCommunityPercentage;
private final Iterable<Vertex> vertices;
private final int expectedNumCommunities;
private final int expectedNumEdges;
private final Random random;
private CommunityGenerator(final Graph g, final String label, final Optional<Consumer<Edge>> edgeProcessor,
final Optional<BiConsumer<Vertex, Map<String, Object>>> vertexProcessor,
final Supplier<Long> seedGenerator, final Distribution communitySize,
final Distribution edgeDegree, final double crossCommunityPercentage,
final Iterable<Vertex> vertices, final int expectedNumCommunities,
final int expectedNumEdges) {
super(g, label, edgeProcessor, vertexProcessor, seedGenerator);
random = new Random(this.seedSupplier.get());
this.communitySize = communitySize;
this.edgeDegree = edgeDegree;
this.crossCommunityPercentage = crossCommunityPercentage;
this.vertices = vertices;
this.expectedNumCommunities = expectedNumCommunities;
this.expectedNumEdges = expectedNumEdges;
}
/**
* Generates a synthetic network for provided vertices in the given graph such that the provided expected number
* of communities are generated with the specified expected number of edges.
*
* @return The actual number of edges generated. May be different from the expected number.
*/
@Override
public int generate() {
int numVertices = SizableIterable.sizeOf(vertices);
final Iterator<Vertex> iter = vertices.iterator();
final ArrayList<ArrayList<Vertex>> communities = new ArrayList<>(expectedNumCommunities);
final Distribution communityDist = communitySize.initialize(expectedNumCommunities, numVertices);
final Map<String, Object> context = new HashMap<>();
while (iter.hasNext()) {
final int nextSize = communityDist.nextValue(random);
context.put("communityIndex", communities.size());
final ArrayList<Vertex> community = new ArrayList<>(nextSize);
for (int i = 0; i < nextSize && iter.hasNext(); i++) {
community.add(processVertex(iter.next(), context));
}
if (!community.isEmpty()) communities.add(community);
}
final double inCommunityPercentage = 1.0 - crossCommunityPercentage;
final Distribution degreeDist = edgeDegree.initialize(numVertices, expectedNumEdges);
if (crossCommunityPercentage > 0 && communities.size() < 2)
throw new IllegalArgumentException("Cannot have cross links with only one community");
int addedEdges = 0;
//System.out.println("Generating links on communities: "+communities.size());
for (ArrayList<Vertex> community : communities) {
for (Vertex v : community) {
final int randomDegree = degreeDist.nextValue(random);
final int degree = Math.min(randomDegree, (int) Math.ceil((community.size() - 1) / inCommunityPercentage) - 1);
final Set<Vertex> inlinks = new HashSet<>();
final Set<Vertex> outlinks = new HashSet<>();
for (int i = 0; i < degree; i++) {
Vertex selected = null;
if (random.nextDouble() < crossCommunityPercentage || (community.size() - 1 <= inlinks.size())) {
//Cross community
int tries = 0;
ArrayList<Vertex> othercomm = null;
// this limit on the number of tries prevents infinite loop where the selected vertex to
// link to doesn't exist given the nature and structure of the graph.
while (null == selected && tries < 100) {
// choose another community to connect to and make sure it's not in the current
// community of the current vertex
while (null == othercomm) {
othercomm = communities.get(random.nextInt(communities.size()));
if (othercomm.equals(community)) othercomm = null;
}
selected = othercomm.get(random.nextInt(othercomm.size()));
if (outlinks.contains(selected)) selected = null;
tries++;
}
// if tries expires then the value of selected is null in which case it should not be added.
if (selected != null) outlinks.add(selected);
} else {
//In community
int tries = 0;
while (selected == null && tries < 100) {
selected = community.get(random.nextInt(community.size()));
if (v.equals(selected) || inlinks.contains(selected)) selected = null;
tries++;
}
if (selected != null) inlinks.add(selected);
}
// only add an edge if the vertex was actually selected.
if (selected != null) {
addEdge(v, selected);
addedEdges++;
}
}
}
}
return addedEdges;
}
public static Builder build(final Graph g) {
return new Builder(g);
}
public final static class Builder extends AbstractGeneratorBuilder<Builder> {
private final Graph g;
private Distribution communitySize = null;
private Distribution edgeDegree = null;
private double crossCommunityPercentage = DEFAULT_CROSS_COMMUNITY_PERCENTAGE;
private Iterable<Vertex> vertices;
private int expectedNumCommunities = DEFAULT_NUMBER_OF_COMMUNITIES;
private int expectedNumEdges;
private Builder(final Graph g) {
super(Builder.class);
this.g = g;
final List<Vertex> allVertices = IteratorUtils.list(g.vertices());
this.vertices = allVertices;
this.expectedNumEdges = allVertices.size() * 2;
}
public Builder verticesToGenerateEdgesFor(final Iterable<Vertex> vertices) {
this.vertices = vertices;
return this;
}
public Builder expectedNumCommunities(final int expectedNumCommunities) {
this.expectedNumCommunities = expectedNumCommunities;
return this;
}
public Builder expectedNumEdges(final int expectedNumEdges) {
this.expectedNumEdges = expectedNumEdges;
return this;
}
/**
* Sets the distribution to be used to generate the sizes of communities.
*/
public Builder communityDistribution(final Distribution community) {
this.communitySize = community;
return this;
}
/**
* Sets the distribution to be used to generate the out-degrees of vertices.
*/
public Builder degreeDistribution(final Distribution degree) {
this.edgeDegree = degree;
return this;
}
/**
* Sets the percentage of edges that cross a community, i.e. connect a vertex to a vertex in
* another community. The lower this value, the higher the modularity of the generated communities.
*
* @param percentage Percentage of community crossing edges. Must be in [0,1]
*/
public Builder crossCommunityPercentage(final double percentage) {
if (percentage < 0.0 || percentage > 1.0)
throw new IllegalArgumentException("Percentage must be between 0 and 1");
this.crossCommunityPercentage = percentage;
return this;
}
public CommunityGenerator create() {
if (null == communitySize)
throw new IllegalStateException("Need to initialize community size distribution");
if (null == edgeDegree) throw new IllegalStateException("Need to initialize degree distribution");
return new CommunityGenerator(this.g, this.label, this.edgeProcessor, this.vertexProcessor, this.seedSupplier,
this.communitySize, this.edgeDegree, crossCommunityPercentage, vertices,
expectedNumCommunities, expectedNumEdges);
}
}
}