熵计算的numpy数组划分

2024-09-29 23:29:53 发布

您现在位置:Python中文网/ 问答频道 /正文

我正在尝试创建一个shannon_熵函数,它同时适用于python2和python3。但是,下面的代码在python3中工作,用于计算norm_counts的语句在python2中返回等于0的narray,在python3中返回正确。你知道吗

我将代码分解并简化如下:

import unittest   

import numpy as np

def shannon_ent(labels, base=256):

    value, counts = np.unique(labels, return_counts=True)    
    sum_counts = counts.sum()

    norm_counts = counts / sum_counts

    print(norm_counts)

    base = e if base is None else base

    logged_counts = np.log(norm_counts)
    logged_base = np.log(base)
    logged = logged_counts/logged_base
    final = -(norm_counts * logged)

    return final.sum()


class function_tests(unittest.TestCase):

    def test_shannon_ent(self):

        chunk = [32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126]
        ent = shannon_ent(chunk)
        print('*** is: {}'.format(ent))

        self.assertEqual(ent, 0.8212319510413685)

if __name__ == '__main__':
    unittest.main()

给出以下输出:

Python2

# python unittest_binGraph.py 
(array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1]), 95)
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
E
======================================================================
ERROR: test_shannon_ent (__main__.function_tests)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "unittest_binGraph.py", line 39, in test_shannon_ent
    ent = shannon_ent(chunk)
  File "unittest_binGraph.py", line 22, in shannon_ent
    logged_counts = np.log(norm_counts)
FloatingPointError: divide by zero encountered in log

----------------------------------------------------------------------
Ran 1 test in 0.007s

FAILED (errors=1)

Python3

# python unittest_binGraph.py 
[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1] 95
[0.01052632 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632
 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632
 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632
 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632
 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632
 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632
 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632
 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632
 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632
 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632
 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632
 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632
 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632
 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632
 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632
 0.01052632 0.01052632 0.01052632 0.01052632 0.01052632]
*** is: 0.8212319510413685
.
----------------------------------------------------------------------
Ran 1 test in 0.007s

OK

除非有人有更好的方法来计算熵?!我目前正在代码中使用scripy和statistics模块。你知道吗


Tags: inpytestlognormbasenpunittest

热门问题