蚁群算法在Python和C++最接近字符串问题中的异常行为

def solve_(self, problem: CSProblem) -> CSSolution: m, n, alphabet, strings = problem.m, problem.n, problem.alphabet, problem.strings A = len(alphabet) rho = self.config['RHO'] colony_size = self.config['COLONY_SIZE'] global_best_ant = None global_best_metric = m ants = np.full((colony_size, m), '') world_trails = np.full((m, A), 1 / A) for iteration in range(self.config['MAX_ITERS']): local_best_ant = None local_best_metric = m for ant_idx in range(colony_size): for next_character_index in range(m): ants[ant_idx][next_character_index] = random.choices(alphabet, weights=world_trails[next_character_index], k=1)[0] ant_metric = utils.problem_metric(ants[ant_idx], strings) if ant_metric < local_best_metric: local_best_metric = ant_metric local_best_ant = ants[ant_idx] # First we perform pheromone evaporation for i in range(m): for j in range(A): world_trails[i][j] = world_trails[i][j] * (1 - rho) # Now, using the elitist strategy, only the best ant is allowed to update his pheromone trails best_ant_ys = (alphabet.index(a) for a in local_best_ant) best_ant_xs = range(m) for x, y in zip(best_ant_xs, best_ant_ys): world_trails[x][y] = world_trails[x][y] + (1 - local_best_metric / m) if local_best_metric < global_best_metric: global_best_metric = local_best_metric global_best_ant = local_best_ant return CSSolution(''.join(global_best_ant), global_best_metric)

#include <iostream> #include <vector> #include <algorithm> #include <fstream> #include <iomanip> #include <iostream> #include <sstream> #include <string> #include <random> #include <chrono> #include <map> class CSPProblem{ public: int m; int n; std::vector<char> alphabet; std::vector<std::string> strings; CSPProblem(int m, int n, std::vector<char> alphabet, std::vector<std::string> strings) : m(m), n(n), alphabet(alphabet), strings(strings) { } static CSPProblem from_csp(std::string filepath){ std::ifstream file(filepath); std::string line; std::vector<std::string> input_lines; while (std::getline(file, line)){ input_lines.push_back(line); } int alphabet_size = std::stoi(input_lines[0]); int n = std::stoi(input_lines[1]); int m = std::stoi(input_lines[2]); std::vector<char> alphabet; for (int i = 3; i < 3 + alphabet_size; i++){ alphabet.push_back(input_lines[i][0]); } std::vector<std::string> strings; for (int i = 3 + alphabet_size; i < input_lines.size(); i++){ strings.push_back(input_lines[i]); } return CSPProblem(m, n, alphabet, strings); } int hamm(const std::string& s1, const std::string& s2) const{ int h = 0; for (int i = 0; i < s1.size(); i++){ if (s1[i] != s2[i]) h++; } return h; } int measure(const std::string& sol) const{ int mm = 0; for (const auto& s: strings){ int h = hamm(sol, s); if (h > mm){ mm = h; } } return mm; } friend std::ostream& operator<<(std::ostream& out, CSPProblem problem){ out << "m: " << problem.m << std::endl; out << "n: " << problem.n << std::endl; out << "alphabet_size: " << problem.alphabet.size() << std::endl; out << "alphabet: "; for (const auto& a: problem.alphabet){ out << a << " "; } out << std::endl; out << "strings:" << std::endl; for (const auto& s: problem.strings){ out << "\t" << s << std::endl; } return out; } }; std::random_device rd; std::mt19937 gen(rd()); int get_from_distrib(const std::vector<float>& weights){ std::discrete_distribution<> d(std::begin(weights), std::end(weights)); return d(gen); } int max_iter = 250; float rho = 0.1f; int colony_size = 10; int ant_colony_solver(const CSPProblem& problem){ srand(time(NULL)); int m = problem.m; int n = problem.n; auto alphabet = problem.alphabet; auto strings = problem.strings; int A = alphabet.size(); float init_pher = 1.0 / A; std::string global_best_ant; int global_best_matric = m; std::vector<std::vector<float>> world_trails(m, std::vector<float>(A, 0.0f)); for (int i = 0; i < m; i++){ for (int j = 0; j < A; j++){ world_trails[i][j] = init_pher; } } std::vector<std::string> ants(colony_size, std::string(m, ' ')); for (int iteration = 0; iteration < max_iter; iteration++){ std::string local_best_ant; int local_best_metric = m; for (int ant_idx = 0; ant_idx < colony_size; ant_idx++){ for (int next_character_idx = 0; next_character_idx < m; next_character_idx++){ char next_char = alphabet[get_from_distrib(world_trails[next_character_idx])]; ants[ant_idx][next_character_idx] = next_char; } int ant_metric = problem.measure(ants[ant_idx]); if (ant_metric < local_best_metric){ local_best_metric = ant_metric; local_best_ant = ants[ant_idx]; } } // Evaporation for (int i = 0; i < m; i++){ for (int j = 0; j < A; j++){ world_trails[i][j] = world_trails[i][j] + (1.0 - rho); } } std::vector<int> best_ant_xs; for (int i = 0; i < m; i++){ best_ant_xs.push_back(i); } std::vector<int> best_ant_ys; for (const auto& c: local_best_ant){ auto loc = std::find(std::begin(alphabet), std::end(alphabet), c); int idx = loc- std::begin(alphabet); best_ant_ys.push_back(idx); } for (int i = 0; i < m; i++){ int x = best_ant_xs[i]; int y = best_ant_ys[i]; world_trails[x][y] = world_trails[x][y] + (1.0 - static_cast<float>(local_best_metric) / m); } if (local_best_metric < global_best_matric){ global_best_matric = local_best_metric; global_best_ant = local_best_ant; } } return global_best_matric; } int main(){ auto problem = CSPProblem::from_csp("in.csp"); int TRIES = 20; std::vector<int> times; std::vector<int> measures; for (int i = 0; i < TRIES; i++){ auto start = std::chrono::high_resolution_clock::now(); int m = ant_colony_solver(problem); auto stop = std::chrono::high_resolution_clock::now(); int duration = std::chrono::duration_cast<std::chrono::milliseconds>(stop - start).count(); times.push_back(duration); measures.push_back(m); } float average_time = static_cast<float>(std::accumulate(std::begin(times), std::end(times), 0)) / TRIES; float average_measure = static_cast<float>(std::accumulate(std::begin(measures), std::end(measures), 0)) / TRIES; std::cout << "Average running time: " << average_time << std::endl; std::cout << "Average solution: " << average_measure << std::endl; std::cout << "all solutions: "; for (const auto& m: measures) std::cout << m << " "; std::cout << std::endl; return 0; }

std::random_device rd; std::mt19937 gen(rd()); int get_from_distrib(const std::vector<float>& weights){ std::discrete_distribution<> d(std::begin(weights), std::end(weights)); return d(gen); }

int get_from_distrib(const std::vector<float>& weights){ std::random_device rd; std::mt19937 gen(rd()); std::discrete_distribution<> d(std::begin(weights), std::end(weights)); return d(gen); }

1条回答

网友

1楼 · 发布于 2024-10-17 00:30:16

除了我在问题编辑中提到的愚蠢错误之外，似乎我终于找到了一种体面地优化python解决方案的方法。首先，将world_trails和ants保留为numpy数组而不是列表实际上减慢了速度。此外，我实际上不再保存蚂蚁列表，因为每次迭代我只需要最好的一个。最后，运行cProfile表明很多时间都花在了random.choices上，因此我决定实现我自己的版本，特别适合这种情况。我通过为每个下一次迭代（在trail_row_wise_sums数组中）预先计算每个字符的总权重和，并使用以下函数来实现这一点：

def fast_pick(arr, weights, ws):
    r = random.random()*ws
    for i in range(len(arr)):
        if r < weights[i]:
            return arr[i]
        r -= weights[i]
    return 0

新版本现在如下所示：

def solve_(self, problem: CSProblem) -> CSSolution:
    m, n, alphabet, strings = problem.m, problem.n, problem.alphabet, problem.strings
    A = len(alphabet)
    rho = self.config['RHO']
    colony_size = self.config['COLONY_SIZE']
    miters = self.config['MAX_ITERS']

    global_best_ant = None
    global_best_metric = m
    init_pher = 1.0 / A
    world_trails = [[init_pher for _ in range(A)] for _ in range(m)]
    trail_row_wise_sums = [1.0 for _ in range(m)]

    for iteration in tqdm(range(miters)):

        local_best_ant = None
        local_best_metric = m
        for _ in range(colony_size):
            ant = ''.join(fast_pick(alphabet, world_trails[next_character_index], trail_row_wise_sums[next_character_index]) for next_character_index in range(m))
            ant_metric = utils.problem_metric(ant, strings)

            if ant_metric <= local_best_metric:
                local_best_metric = ant_metric
                local_best_ant = ant

        # First we perform pheromone evaporation
        for i in range(m):
            for j in range(A):
                world_trails[i][j] = world_trails[i][j] * (1 - rho)

        # Now, using the elitist strategy, only the best ant is allowed to update his pheromone trails
        best_ant_ys = (alphabet.index(a) for a in local_best_ant)
        best_ant_xs = range(m)

        for x, y in zip(best_ant_xs, best_ant_ys):
            world_trails[x][y] = world_trails[x][y] + (1 - 1.0*local_best_metric / m)

        if local_best_metric < global_best_metric:
            global_best_metric = local_best_metric
            global_best_ant = local_best_ant

        trail_row_wise_sums = [sum(world_trails[i]) for i in range(m)]
    return CSSolution(global_best_ant, global_best_metric)

平均运行时间现在下降到800毫秒（与之前的5秒相比）。当然，在C++解决方案中应用同样的^ {CD6}优化也加快了C++版本（大约150毫秒），但我想现在我可以把它写为C++比Python快了。p>

Profiler还显示，计算汉明距离花费了大量时间，但这是意料之中的，除此之外，我认为没有其他方法可以更有效地计算任意字符串之间的汉明距离

相关问题更多 >

编程相关推荐

热门问题

热门文章