延森香农发散

from numpy import zeros, array from math import sqrt, log class JSD(object): def __init__(self): self.log2 = log(2) def KL_divergence(self, p, q): """ Compute KL divergence of two vectors, K(p || q).""" return sum(p[x] * log((p[x]) / (q[x])) for x in range(len(p)) if p[x] != 0.0 or p[x] != 0) def Jensen_Shannon_divergence(self, p, q): """ Returns the Jensen-Shannon divergence. """ self.JSD = 0.0 weight = 0.5 average = zeros(len(p)) #Average for x in range(len(p)): average[x] = weight * p[x] + (1 - weight) * q[x] self.JSD = (weight * self.KL_divergence(array(p), average)) + ((1 - weight) * self.KL_divergence(array(q), average)) return 1-(self.JSD/sqrt(2 * self.log2)) if __name__ == '__main__': J = JSD() p = [1.0/10, 9.0/10, 0] q = [0, 1.0/10, 9.0/10] print J.Jensen_Shannon_divergence(p, q)

3条回答

网友

1楼 · 编辑于 2024-05-20 15:45:39

python中n个概率分布的通用版本

import numpy as np
from scipy.stats import entropy as H


def JSD(prob_distributions, weights, logbase=2):
    # left term: entropy of misture
    wprobs = weights * prob_distributions
    mixture = wprobs.sum(axis=0)
    entropy_of_mixture = H(mixture, base=logbase)

    # right term: sum of entropies
    entropies = np.array([H(P_i, base=logbase) for P_i in prob_distributions])
    wentropies = weights * entropies
    sum_of_entropies = wentropies.sum()

    divergence = entropy_of_mixture - sum_of_entropies
    return(divergence)

# From the original example with three distributions:
P_1 = np.array([1/2, 1/2, 0])
P_2 = np.array([0, 1/10, 9/10])
P_3 = np.array([1/3, 1/3, 1/3])

prob_distributions = np.array([P_1, P_2, P_3])
n = len(prob_distributions)
weights = np.empty(n)
weights.fill(1/n)

print(JSD(prob_distributions, weights))
#0.546621319446

网友

2楼 · 编辑于 2024-05-20 15:45:39

注意下面的scipy熵调用是Kullback-Leibler散度。

见：http://en.wikipedia.org/wiki/Jensen%E2%80%93Shannon_divergence

#!/usr/bin/env python
from scipy.stats import entropy
from numpy.linalg import norm
import numpy as np

def JSD(P, Q):
    _P = P / norm(P, ord=1)
    _Q = Q / norm(Q, ord=1)
    _M = 0.5 * (_P + _Q)
    return 0.5 * (entropy(_P, _M) + entropy(_Q, _M))

还要注意问题中的测试用例看起来是错误的？？p分布的和不等于1.0。

见：http://www.itl.nist.gov/div898/handbook/eda/section3/eda361.htm

网友

3楼 · 编辑于 2024-05-20 15:45:39

获取一些已知散度分布的数据，并将结果与已知值进行比较。

顺便说一下：KL_散度的和可以用zip built-in function重写，如下所示：

sum(_p * log(_p / _q) for _p, _q in zip(p, q) if _p != 0)

这样可以消除很多“噪音”，而且更像是“Python”。不需要与0.0和0进行双重比较。

相关问题更多 >

编程相关推荐

热门问题

热门文章