与Python相比，具有动态分配的结构数组在C中运行非常慢

#include <stdio.h> #include <stdlib.h> // Initial number of maximal lines in a file enum { MAXL = 200}; typedef struct { unsigned int first; unsigned int second; } edge; typedef struct { unsigned int hashed; char **map; } hash; int insertInMap(hash *map, char *entry) { int i =0; for (i=0;i<map->hashed;i++) { if (strcmp(map->map[i],entry) == 0) return i+1; } /* Warning no boundary check is added */ map->map[map->hashed++] = strdup(entry); return map->hashed; } int main() { FILE *fp = NULL; char node1[30]; char node2[30]; int idx = 0; int i, n = 0, maxl = MAXL; edge *edges; hash map; edges = malloc(MAXL * sizeof(edge)); map.map = malloc(MAXL * sizeof(char*)); map.hashed = 0; fp = fopen("./test.txt", "r"); while (fscanf(fp, "%s %s", &node1, &node2) == 2) { if (++n == maxl) { /* if limit reached, realloc lines */ void *tmp = realloc (edges, (maxl + 40) * sizeof *edges); void *tmp1 = realloc (map.map, (maxl + 80) * sizeof(char*)); if (!tmp) { /* validate realloc succeeded */ fprintf (stderr, "error: realloc - virtual memory exhausted.\n"); break; /* on failure, exit with existing data */ } edges = tmp; /* assign reallocated block to lines */ map.map = tmp1; maxl += 40; /* update maxl to reflect new size */ } edges[idx].first = insertInMap(&map,node1); edges[idx].second = insertInMap(&map,node2); idx++; } fclose(fp); for (int i = 0; i < idx; i++) { printf("%d -- %d\n", edges[i].first, edges[i].second); } free(edges); return 0; }

import fileinput i = 0 cui2int = {} for line in fileinput.input(): (cui1, cui2) = line.split() if cui1 in cui2int: int1 = cui2int[cui1] else: i += 1 cui2int[cui1] = i int1 = i if cui2 in cui2int: int2 = cui2int[cui2] else: i += 1 cui2int[cui2] = i int2 = i print(int1, int2)

#include <stdio.h> #include <stdlib.h> #include <glib.h> #include <stdint.h> int main() { GHashTable *table; table = g_hash_table_new(g_int_hash, g_int_equal); FILE *fp = NULL; char node1[30]; char node2[30]; fp = fopen("./test.txt", "r"); int i = 0; while (fscanf(fp, "%s %s", &node1, &node2) == 2) { char *key1 = malloc(sizeof(char)*1024); char *key2 = malloc(sizeof(char)*1024); uint32_t* value = (uint32_t *)malloc(sizeof(uint32_t)); key1 = g_strdup(node1); key2 = g_strdup(node2); *value = i; uint32_t *x; if (g_hash_table_contains(table, key1)) { x = (uint32_t *)g_hash_table_lookup(table, key1); } else { i++; g_hash_table_insert(table, (gpointer)key1, (gpointer)value); x = (uint32_t *)value; } uint32_t *y; if (g_hash_table_contains(table, key2)) { y = (uint32_t *)g_hash_table_lookup(table, key2); } else { g_hash_table_insert(table, (gpointer)key2, (gpointer)value); y = (uint32_t *)value; } printf("%d -- %d\n", *x, *y); } fclose(fp); g_hash_table_destroy(table); table = NULL; return 0; }

3条回答

网友

1楼 · 编辑于 2024-09-28 03:19:39

这两个程序使用的数据结构完全不同，具有不同的时间复杂度。python程序正在使用一个字典，它是一个经过高度调优的hash-table，具有O（1）摊余性能的查找和删除。在

所以python程序是以渐进复杂性运行的。在

现在，谈到您的C程序，您尝试创建的基本上只是一个键值对数组。在这里插入数组的可能需要一个匹配的数组。在

如果你做一些数学运算，结果是O（（字数）²。在

C++具有内置的哈希表实现，名为unordered_map，如果您没有问题切换到C++，则可以使用它。或者看看这个问题，学习用C写你自己的哈希表。What is a hash table and how do you make it in C?

网友
2楼 · 编辑于 2024-09-28 03:19:39

你的代码的问题是，尽管它的名字，它不是一个工作的哈希表。你用一种非常慢的线性搜索来浏览地图。你应该做的是：
将哈希表大小设置为固定大小。避免任何基于重新分配的解决方案。在
想出一个哈希函数来确定表索引。网上应该有很多使用字符串的代码示例。在
实现存储/检查索引的方法。这可以存储在下一个可用的表索引中，或者通过实现“链接”，其中每个索引都是一个链接列表，等等

网友
3楼 · 编辑于 2024-09-28 03:19:39

C语言中的“hash”操作起来更像一个链表，具有线性插入和查找功能。另一方面，Python的字典具有工业级的优势，具有O（1）平均插入和查找（即in运算符）。如果您正在用C从头开始编写hashmap，那么为了在性能方面接近Python's implementation，您需要将大量的理论付诸实践。在

在我看来，最好的办法是在可能的情况下用C++编写代码，使用^{}。这是两全其美的：所有的工作都已经为您完成了，但是您不需要对性能做出妥协。在
如果你是设置（或坚持）C，有相当多的资源在互联网上，但我犹豫是否张贴任何链接在这里，因为我不能保证他们的质量。这应该是一种教育努力。在

相关问题更多 >

编程相关推荐

热门问题

热门文章