大图存储在邻接表中。

huangapple go评论50阅读模式
英文:

big graph storing in adjacency list

问题

你的项目是创建一个包含100-200万个节点的图,使用邻接列表来表示。输入数据来自文本文件。你需要计算连接组件的数量以及每个组件中顶点的最大/最小度数。你的代码在节点数量较小的情况下可以正常工作,但如果节点太多,它会在addEdge函数处停止运行并耗尽内存。

编辑:在发布之前,我已经更改了输入文件的格式以使其与我的程序匹配,但无论如何它仍然崩溃。

以下是你提供的代码中翻译好的部分:

#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <stdbool.h>

struct Node {
    struct Node* next;
    int vertex;
};

struct Graph {
    struct Node** adj_list;
    int num_vertices;
};

struct Node* createNode(int v) {
    struct Node *newNode = (struct Node*)malloc(sizeof(struct Node));
    newNode->next = NULL;
    newNode->vertex = v;
    return newNode;
}

void addEdge(struct Graph* graph, FILE* file) {
    printf("\n开始创建边");
    int src,dest;
    struct Node* check,* newNode;

    while (fscanf(file, "%d\t%d", &src, &dest) != EOF) {
        newNode = createNode(dest);
        if (graph->adj_list[src] != NULL) {
            check = graph->adj_list[src];       
            while (check->next) {
                check = check->next;
            }
            check->next = newNode;
            printf("\n结束创建边");
        }
        else {
            graph->adj_list[src] = newNode;
            printf("\n结束创建边");
        }
    }
}
// 以下略...

希望这有助于你解决问题。如果有其他问题或需要进一步帮助,请告诉我。

英文:

My project is to create a graph with 1-2 milions nodes using adjacency list. Input data is from text file. I need to count the connected components and max/min degree of vetices of each components. My code can work fine with small number of nodes, but if the nodes are too big, it can't. It always stop at addEdge fucntion and run out of memory.

Edit: before posting, i have already change the fomat of input file so it match my progaram, but it still crash anyway.
大图存储在邻接表中。

Hope sb can give me some answers !

#include &lt;stdio.h&gt;
#include &lt;stdlib.h&gt;
#include &lt;time.h&gt;
#include &lt;stdbool.h&gt;

struct Node {
    struct Node* next;
    int vertex;
};

struct Graph {
    struct Node** adj_list;
    int num_vertices;
};

struct Node* createNode(int v) {
    struct Node *newNode = (struct Node*)malloc(sizeof(struct Node));
    newNode-&gt;next = NULL;
    newNode-&gt;vertex = v;
    return newNode;
}


void addEdge(struct Graph* graph, FILE* file) {
    printf(&quot;\nSTART create EDGE&quot;);
    int src,dest;
    struct Node* check,* newNode;

    while (fscanf(file, &quot;%d\t%d&quot;, &amp;src, &amp;dest) != EOF) {
        newNode=createNode(dest);
        if (graph-&gt;adj_list[src] !=NULL) {
            check = graph-&gt;adj_list[src];       
            while (check-&gt;next) {
                check = check-&gt;next;
            }
            check-&gt;next= newNode;
            printf(&quot;\nENDING 2 create EDGE&quot;);
        }
        else {
            graph-&gt;adj_list[src] = newNode;
            printf(&quot;\nENDING 1 create EDGE&quot;);
        }
    }
   
    
}

struct Graph* createGraph(int num_vertices) {
    struct Graph* graph = (struct Graph*)malloc(sizeof(struct Graph));
    graph-&gt;adj_list = (struct Node**)malloc(num_vertices * sizeof(struct Node*));
    graph-&gt;num_vertices = num_vertices;
    int i;
    for (i = 0; i &lt; num_vertices; ++i) {
        graph-&gt;adj_list[i] = NULL;
    }

    return graph;
}

void printGraph(struct Graph* graph)
{
    int v;
    for (v = 0; v &lt; graph-&gt;num_vertices; v++) {
        if(graph-&gt;adj_list[v]!=NULL){
            struct Node* pCrawl = graph-&gt;adj_list[v];
            printf(&quot;\n Adjacency list of vertex %d\n head &quot;, v);

            while (pCrawl-&gt;next !=NULL) {
                printf(&quot;-&gt; %d&quot;, pCrawl-&gt;vertex);
                pCrawl = pCrawl-&gt;next;
            }
            printf(&quot;-&gt; %d&quot;, pCrawl-&gt;vertex);
            printf(&quot;\n&quot;);
        }
    }
}


void BFS(struct Graph* graph, int start_vertex, bool* visited, int* num_vertices, int* num_edges, int* max_degree, int* min_degree) {
    int* queue = (int*)malloc(graph-&gt;num_vertices * sizeof(int));
    struct Node* temp=NULL;
    int front = 0, rear = 0;
    int current_vertex;
    
    visited[start_vertex] = true;
    queue[rear++] = start_vertex;
    
    while (front &lt; rear) {
        int degree=0;
        current_vertex = queue[front++];
        ++(*num_vertices);

        temp = graph-&gt;adj_list[current_vertex];
        while (temp) {
            int adj_vertex = temp-&gt;vertex;
            ++(*num_edges);
            if (!visited[adj_vertex]) {
                
                visited[adj_vertex] = true;
                queue[rear++] = adj_vertex;
            }
            ++degree;
            temp = temp-&gt;next;
        }
        *max_degree = (*max_degree &lt;= degree) ? degree : *max_degree;
        *min_degree = (*min_degree &gt;= degree) ? degree : *min_degree;


    }

    free(queue);
}

void findConnectedComponents(struct Graph* graph) {
    bool* visited = (bool*)calloc(graph-&gt;num_vertices, sizeof(bool));
    for (int i = 0; i &lt; graph-&gt;num_vertices; i++) {
        visited[i] = false;
    }
    int num_components = 0;


    int v;
    for (v = 0; v &lt; graph-&gt;num_vertices; v++) {
        if (graph-&gt;adj_list[v]!=NULL &amp;&amp;!visited[v]) {
            num_components++;
            int num_vertices = 0;
            int num_edges = 0;
            int component_max_degree = 0;
            int component_min_degree = graph-&gt;num_vertices;

            BFS(graph, v, visited, &amp;num_vertices, &amp;num_edges, &amp;component_max_degree, &amp;component_min_degree);

            printf(&quot;Connected Component %d:\n&quot;, num_components);
            printf(&quot;Number of vertices: %d\n&quot;, num_vertices);
            printf(&quot;Number of edges: %d\n&quot;, num_edges);
            printf(&quot;Max degree: %d\n&quot;, component_max_degree);
            printf(&quot;Min degree: %d\n&quot;, component_min_degree);

        }
    }

    printf(&quot;Total number of connected components: %d\n&quot;,num_components);
    free(visited);
}

int main() {
    char filename[] = &quot;roadNet-CA.txt&quot;;
    FILE* file = fopen(filename, &quot;r+&quot;);
    if (file == NULL) {
        printf(&quot;Failed to open the file.\n&quot;);
        return -1;
    }
   
    int num_vertices;
    fscanf(file, &quot;%d&quot;, &amp;num_vertices);
    
    struct Graph* graph = createGraph(num_vertices);
    
    addEdge(graph, file);
    
    fclose(file);

    printGraph(graph);

    printf(&quot;Breadth-First Search (BFS): &quot;);
    findConnectedComponents(graph);
    printf(&quot;\n&quot;);


    return 0;
}

Here are my input file <https://snap.stanford.edu/data/roadNet-CA.html>

答案1

得分: 2

问题不是你的程序耗尽了内存,问题是输入文件格式与你的程序期望的不匹配。但程序并不在意,尝试读取它,因此导致段错误。

我可以发表关于为什么清理输入以防止此类恶性错误很重要的演讲,但你可能只是想要一个快速解决方法,所以在这里给你。

用输入文件中的前4行替换为节点数量的保守估计

之前:

# 有向图(每个无序节点对只保存一次):roadNet-CA.txt
# 加利福尼亚州道路网络
# 节点:1965206 边:5533214
# FromNodeId	ToNodeId

之后:

2000000
英文:

The problem is not that your program runs out of memory. The problem is that the input file format does not match what your program is expecting. But the program doesn't care and tries to read it in anyways, and so it segfaults.

I could give a whole speech about why it's important to sanitize your inputs to prevent nasty bugs like this but you're probably just looking for a quick fix so here you go.

Replace the first 4 lines in your input file with a conservative estimate of the number of nodes

Before:

# Directed graph (each unordered pair of nodes is saved once): roadNet-CA.txt 
# California road network
# Nodes: 1965206 Edges: 5533214
# FromNodeId	ToNodeId

After:

2000000

huangapple
  • 本文由 发表于 2023年5月14日 03:35:30
  • 转载请务必保留本文链接:https://go.coder-hub.com/76244562.html
匿名

发表评论

匿名网友

:?: :razz: :sad: :evil: :!: :smile: :oops: :grin: :eek: :shock: :???: :cool: :lol: :mad: :twisted: :roll: :wink: :idea: :arrow: :neutral: :cry: :mrgreen:

确定