并查集-校验图是否有回路

并查集(Disjoint Set)是一种树型的数据结构,用于处理一些不交集(Disjoint Sets)的合并及查询问题。有一个联合-查找算法(union-find algorithm)定义了两个用于此数据结构的操作

1.Find 确定元素属于哪一个子集。它可以被用来确定两个元素是否属于同一子集

2.Union 将两个子集合并成同一个集合


假设我们按照边的序号开始遍历,子集为C

0. 初始   C = 1 2 3 4 5 6 7 8 = 8
1. 1-2    C = 12 3 4 5 6 7 8 = 7
2. 2-5    C = 125 3 4 6 7 8 = 6
3. 5-6    C = 1256 3 4 7 8 = 5
4. 6-8    C = 12568 3 4 7 = 4
5. 3-4    C = 12568 34 7 = 3
6. 3-7    C = 12568 347 = 2
7. 7-6    C = 12345678 = 1
8. 4-2    此时2和4已经在同一个子集里,说明组成了环路

下面是未经过优化的例子

#include <stdio.h> 
#include <stdlib.h> 
#include <string.h> 

// 图的边
struct Edge 
{
    int src, dest; 
}; 

// 图
struct Graph 
{ 
    int V;  //顶点数
    int E;  //边数
    struct Edge* edge;
};

// Creates a graph with V vertices and E edges 
struct Graph* createGraph(int V, int E) 
{ 
    struct Graph* graph = (struct Graph*) malloc( sizeof(struct Graph) ); 
    graph->V = V; 
    graph->E = E; 

    graph->edge = (struct Edge*) malloc( graph->E * sizeof( struct Edge ) ); 

    return graph; 
}

int find(int set[], int i) 
{ 
    if (set[i] == 0) 
        return i; 
    return find(set, set[i]); 
} 

void Union(int set[], int x, int y) 
{ 
    int xset = find(set, x); 
    int yset = find(set, y); 
    if(xset!=yset) {
        set[xset] = yset;
        printf("set[%d] = %d\n", xset, yset);
    }
} 


int isCycle( struct Graph* graph ) 
{ 
    int *set = (int*) malloc( graph->V * sizeof(int) ); 

    
    memset(set, 0, sizeof(int) * graph->V);

    //遍历所有的边
    for(int i = 1; i < graph->E; ++i) 
    { 
        int x = find(set, graph->edge[i].src); 
        int y = find(set, graph->edge[i].dest); 

        if (x == y) 
            return 1; 

        Union(set, x, y); 
    } 
    return 0; 
} 

// Driver program to test above functions 
int main() 
{ 
    int V = 9, E = 9;
    struct Graph* graph = createGraph(V, E); 

    graph->edge[1].src = 1; 
    graph->edge[1].dest = 2; 

    graph->edge[2].src = 2; 
    graph->edge[2].dest = 5;

    graph->edge[3].src = 5; 
    graph->edge[3].dest = 6;

    graph->edge[4].src = 6; 
    graph->edge[4].dest = 8;

    graph->edge[5].src = 3; 
    graph->edge[5].dest = 4;

    graph->edge[6].src = 3; 
    graph->edge[6].dest = 7;

    graph->edge[7].src = 7; 
    graph->edge[7].dest = 6;

    graph->edge[8].src = 4; 
    graph->edge[8].dest = 2;

    if (isCycle(graph)) 
        printf( "graph contains cycle\n" ); 
    else
        printf( "graph doesn't contain cycle\n" ); 

    return 0; 
}
[root@izbp1irxwqt7ei21awv6wvz ccc]# ./a.out 
set[1] = 2
set[2] = 5
set[5] = 6
set[6] = 8
set[3] = 4
set[4] = 7
set[7] = 8
graph contains cycle

最后的树为


按照上面的方法,创建的树可能严重不平衡,这里引入两种优化方法

1.按秩合并(union by rank),即总是将更小的树连接至更大的树上

2.路径压缩(Path Compression),在执行“查找”时扁平化树结构的方法。关键在于在路径上的每个节点都可以直接连接到根上


下面是优化后的例子

// A union by rank and path compression based program to detect cycle in a graph 
#include <stdio.h> 
#include <stdlib.h> 

// 图的边
struct Edge 
{
    int src, dest; 
}; 

// 图
struct Graph 
{ 
    int V;  //顶点数
    int E;  //边数
    struct Edge* edge;
};

struct subset 
{ 
    int parent; 
    int rank; 
}; 

struct Graph* createGraph(int V, int E) 
{ 
    struct Graph* graph = (struct Graph*) malloc( sizeof(struct Graph) ); 
    graph->V = V; 
    graph->E = E; 

    graph->edge = (struct Edge*) malloc( graph->E * sizeof( struct Edge ) ); 

    return graph; 
} 

//查找子集的父节点
int find(struct subset subsets[], int i) 
{ 
    // 超找,顺带路径压缩
    if (subsets[i].parent != i) 
        subsets[i].parent = find(subsets, subsets[i].parent); 

    return subsets[i].parent; 
}

void Union(struct subset subsets[], int x, int y) 
{ 
    int xroot = find(subsets, x); 
    int yroot = find(subsets, y); 

    // 秩大的作为父节点
    if (subsets[xroot].rank < subsets[yroot].rank) {
        subsets[xroot].parent = yroot;
        printf("%d->%d\n", yroot, xroot);
    }
    else if (subsets[xroot].rank > subsets[yroot].rank) {
        subsets[yroot].parent = xroot;
        printf("%d->%d\n", xroot, yroot);
    }

    // 如果秩相同,那么将xroot作为父并秩加一
    else
    {
        subsets[yroot].parent = xroot; 
        subsets[xroot].rank++;

        printf("%d->%d\n", xroot, yroot);
    } 
} 

// The main function to check whether a given graph contains cycle or not 
int isCycle( struct Graph* graph ) 
{ 
    int V = graph->V; 
    int E = graph->E; 

    // Allocate memory for creating V sets 
    struct subset *subsets = (struct subset*) malloc( V * sizeof(struct subset) ); 

    for (int v = 1; v < V; ++v) 
    { 
        subsets[v].parent = v; 
        subsets[v].rank = 0;
    } 

    for(int e = 1; e < E; ++e) 
    { 
        int x = find(subsets, graph->edge[e].src); 
        int y = find(subsets, graph->edge[e].dest); 

        if (x == y) 
            return 1;

        Union(subsets, x, y); 
    } 
    return 0; 
} 

int main() 
{ 
    int V = 9, E = 9;
    struct Graph* graph = createGraph(V, E); 

    graph->edge[1].src = 1; 
    graph->edge[1].dest = 2; 

    graph->edge[2].src = 2; 
    graph->edge[2].dest = 5;

    graph->edge[3].src = 5; 
    graph->edge[3].dest = 6;

    graph->edge[4].src = 6; 
    graph->edge[4].dest = 8;

    graph->edge[5].src = 3; 
    graph->edge[5].dest = 4;

    graph->edge[6].src = 3; 
    graph->edge[6].dest = 7;

    graph->edge[7].src = 7; 
    graph->edge[7].dest = 6;

    graph->edge[8].src = 4; 
    graph->edge[8].dest = 2;

    if (isCycle(graph)) 
        printf( "graph contains cycle\n" ); 
    else
        printf( "graph doesn't contain cycle\n" ); 

    return 0; 
}
[root@izbp1irxwqt7ei21awv6wvz ccc]# ./a.out 
1->2
1->5
1->6
1->8
3->4
3->7
3->1
graph contains cycle


最后生成的经过压缩后的树如下图




参考:

geeksforgeeks-union-by-rank

维基-并查集

上一篇: 图算法
下一篇: 拓扑排序
作者邮箱: 203328517@qq.com