/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package com.aliyun.odps.graph;
import java.io.IOException;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import com.aliyun.odps.io.Writable;
import com.aliyun.odps.io.WritableComparable;
/**
* RemoveDuplicatesLoadingResolver 是解决 {@link GraphLoader} 载入图数据时引入的点冲突的一种实现.
*
* <p>
* 在图载入阶段,用户可以调用 {@link MutationContext} 的接口向图中添加、删除点或边,由此引入的冲突默认由此类解决, 用户也可以通过
* {@link GraphJob} 提供的
* {@linkplain JobConf#setLoadingVertexResolverClass(Class)
* setLoadingVertexResolverClass} 方法指定自己的实现。
* </p>
*
* <p>
* 对于同一个点ID,RemoveDuplicatesLoadingResolver 解决该点在图载入阶段的冲突是按照以下顺序进行的:
* <ol>
* <li>解决 {@linkplain MutationContext#addVertexRequest(Vertex) addVertexRequest}
* 引起的冲突: 添加点时,选择第一个添加的点。
* <li>解决 {@linkplain MutationContext#addEdgeRequest(WritableComparable, Edge)
* addEdgeRequest} 引起的冲突: 添加边时,首先删除点中已有的重复边(终点相同),然后添加不重复的边。
* <li>
* 忽略
* {@linkplain MutationContext#removeEdgeRequest(WritableComparable, WritableComparable)
* removeEdgeRequest} 以及 {@linkplain MutationContext#removeVertexRequest(WritableComparable)
* removeVertexRequest}。
* </ol>
* </p>
*
* @param <I>
* Vertex ID 类型
* @param <V>
* Vertex Value 类型
* @param <E>
* Edge Value 类型
* @param <M>
* Message 类型
* @see JobConf#setLoadingVertexResolverClass(Class)
*/
@SuppressWarnings("rawtypes")
public class RemoveDuplicatesLoadingResolver<I extends WritableComparable, V extends Writable, E extends Writable, M extends Writable>
extends LoadingVertexResolver<I, V, E, M> {
/**
* 提供图载入时的一种去重冲突处理方法.
*
*
* <p>
* 首先处理添加点请求,然后处理添加边的请求,详细处理规则见:{@linkplain RemoveDuplicatesLoadingResolver
* 本类说明}
* </p>
*
* @param vertexId
* 冲突点的ID
* @param vertexChanges
* 关于该点的添加和删除请求
*/
@Override
public Vertex<I, V, E, M> resolve(I vertexId, VertexChanges<I, V, E, M> vertexChanges)
throws IOException {
/**
* 1. If creation of vertex desired, pick first vertex.
*/
Vertex<I, V, E, M> vertex = addVertexIfDesired(vertexId, vertexChanges);
if (vertex != null) {
/** 2. If edge addition, add the unique edges */
addEdges(vertexId, vertex, vertexChanges);
} else {
System.err.println("Ignore all addEdgeRequests for vertex#" + vertexId);
}
return vertex;
}
/**
* 图载入阶段,处理添加点的请求.
*
* @param vertexId
* 请求添加的点的ID
* @param vertexChanges
* 包含请求添加的点
* @return 第一个请求添加的点,或者没有请求添加点时,返回null
*/
protected Vertex<I, V, E, M> addVertexIfDesired(I vertexId,
VertexChanges<I, V, E, M> vertexChanges) {
Vertex<I, V, E, M> vertex = null;
if (hasVertexAdditions(vertexChanges)) {
vertex = vertexChanges.getAddedVertexList().get(0);
}
return vertex;
}
/**
* 图载入阶段,处理添加边的请求.
*
* @param vertexId
* 请求添加的边所在的点的ID
* @param vertex
* 请求点的边所在的点
* @param vertexChanges
* 包含请求添加的边
* @throws IOException
* 去除点本身拥有的重复边,以及请求添加的边中的重复边
*/
protected void addEdges(I vertexId, Vertex<I, V, E, M> vertex,
VertexChanges<I, V, E, M> vertexChanges) throws IOException {
// I. Remove duplicate edges from vertex's edge list.
Set<I> destVertexId = new HashSet<I>();
if (vertex.hasEdges()) {
List<Edge<I, E>> edgeList = vertex.getEdges();
for (Iterator<Edge<I, E>> edges = edgeList.iterator(); edges.hasNext(); ) {
Edge<I, E> edge = edges.next();
if (destVertexId.contains(edge.getDestVertexId())) {
edges.remove();
} else {
destVertexId.add(edge.getDestVertexId());
}
}
}
if (hasEdgeAdditions(vertexChanges)) {
// II. Ignore duplicate edge request
for (Edge<I, E> edge : vertexChanges.getAddedEdgeList()) {
if (destVertexId.contains(edge.getDestVertexId())) {
continue;
}
destVertexId.add(edge.getDestVertexId());
vertex.addEdge(edge.getDestVertexId(), edge.getValue());
}
}
}
/**
* 检查是否存在添加点的请求。
*
* @param changes
* 待检查的点变化的集合
* @return 集合中包含添加点的请求,返回true,否则返回false
*/
protected boolean hasVertexAdditions(VertexChanges<I, V, E, M> changes) {
return changes != null && changes.getAddedVertexList() != null
&& !changes.getAddedVertexList().isEmpty();
}
/**
* 检查是否存在添加边的请求。
*
* @param changes
* 待检查的点变化的集合
* @return 集合中包含添加边的请求,则返回true,否则返回false
*/
protected boolean hasEdgeAdditions(VertexChanges<I, V, E, M> changes) {
return changes != null && changes.getAddedEdgeList() != null
&& !changes.getAddedEdgeList().isEmpty();
}
}