<p>您可以将数据融合,然后将其分组以获得每个双计数,然后再次分组以获得总和。注意<code>.transform()</code>对数据进行分组,然后返回与原始数据帧长度相同的结果</p>
<p><strong>样本数据:</strong></p>
<pre><code>from numpy import nan
import pandas as pd
d = {'job1_category': {0: nan, 1: 4.0, 2: 5.0, 3: 5.0, 4: 4.0, 5: 4.0, 6: 5.0, 7: 4.0, 8: 4.0, 9: 4.0, 10: 4.0, 11: 4.0, 12: 4.0, 13: 4.0, 14: nan, 15: 4.0, 16: 3.0, 17: 7.0, 18: 4.0, 19: 4.0, 20: nan, 21: nan, 22: 4.0, 23: 1.0, 24: 4.0, 25: 1.0, 26: 4.0, 27: 2.0, 28: 5.0, 29: 3.0, 30: 4.0, 31: 5.0, 32: 4.0, 33: 4.0, 34: 4.0, 35: 4.0, 36: 3.0, 37: 4.0, 38: 4.0, 39: 4.0, 40: 9.0, 41: 4.0, 42: 4.0, 43: 3.0, 44: 4.0, 45: 9.0, 46: 10.0, 47: nan, 48: 10.0, 49: 4.0, 50: 8.0, 51: nan, 52: 5.0, 53: 8.0, 54: 4.0, 55: nan, 56: 4.0, 57: 8.0, 58: 4.0, 59: 4.0, 60: 4.0, 61: 4.0, 62: 8.0, 63: 4.0, 64: 8.0, 65: 7.0, 66: 4.0, 67: 7.0, 68: 8.0, 69: 7.0, 70: nan, 71: 7.0, 72: nan, 73: 10.0, 74: 7.0, 75: 6.0, 76: 7.0, 77: 4.0, 78: 7.0, 79: 7.0, 80: 7.0, 81: 7.0, 82: 4.0, 83: nan, 84: 4.0, 85: nan, 86: 4.0, 87: 9.0, 88: 4.0, 89: 4.0, 90: 4.0, 91: 4.0, 92: 4.0, 93: 7.0, 94: 2.0, 95: 4.0, 96: 4.0, 97: nan, 98: 1.0, 99: 9.0}, 'job2_category': {0: 6.0, 1: 5.0, 2: 4.0, 3: 5.0, 4: 6.0, 5: 4.0, 6: 5.0, 7: 5.0, 8: 2.0, 9: 4.0, 10: 4.0, 11: nan, 12: 5.0, 13: 4.0, 14: nan, 15: 1.0, 16: 4.0, 17: 4.0, 18: 4.0, 19: 4.0, 20: 4.0, 21: 3.0, 22: 4.0, 23: 4.0, 24: 4.0, 25: 4.0, 26: 8.0, 27: 6.0, 28: 6.0, 29: 3.0, 30: 4.0, 31: 5.0, 32: 7.0, 33: 4.0, 34: 4.0, 35: 4.0, 36: 4.0, 37: 4.0, 38: 4.0, 39: 4.0, 40: 4.0, 41: 5.0, 42: 5.0, 43: 4.0, 44: 4.0, 45: 1.0, 46: 10.0, 47: 10.0, 48: 1.0, 49: 4.0, 50: 4.0, 51: nan, 52: 7.0, 53: 4.0, 54: 4.0, 55: 9.0, 56: 9.0, 57: 4.0, 58: 3.0, 59: 4.0, 60: 4.0, 61: nan, 62: 7.0, 63: nan, 64: 7.0, 65: 7.0, 66: 7.0, 67: 7.0, 68: 8.0, 69: 7.0, 70: 7.0, 71: 7.0, 72: nan, 73: 10.0, 74: 8.0, 75: 7.0, 76: 8.0, 77: 5.0, 78: nan, 79: 7.0, 80: nan, 81: 4.0, 82: 7.0, 83: nan, 84: 4.0, 85: nan, 86: 4.0, 87: 4.0, 88: 4.0, 89: 4.0, 90: 10.0, 91: 5.0, 92: nan, 93: 7.0, 94: 4.0, 95: 4.0, 96: 4.0, 97: nan, 98: 1.0, 99: 9.0}, 'job3_category': {0: 5.0, 1: 5.0, 2: 4.0, 3: nan, 4: 10.0, 5: nan, 6: nan, 7: nan, 8: nan, 9: nan, 10: 4.0, 11: 4.0, 12: 5.0, 13: nan, 14: 4.0, 15: 1.0, 16: 4.0, 17: 4.0, 18: 4.0, 19: 4.0, 20: nan, 21: nan, 22: nan, 23: nan, 24: nan, 25: 1.0, 26: 4.0, 27: nan, 28: nan, 29: 3.0, 30: 4.0, 31: 3.0, 32: 4.0, 33: nan, 34: nan, 35: nan, 36: 4.0, 37: nan, 38: nan, 39: 4.0, 40: 7.0, 41: 5.0, 42: nan, 43: 4.0, 44: 4.0, 45: 1.0, 46: 7.0, 47: 1.0, 48: 7.0, 49: 7.0, 50: 4.0, 51: 8.0, 52: nan, 53: 4.0, 54: 7.0, 55: nan, 56: 8.0, 57: nan, 58: nan, 59: 3.0, 60: nan, 61: nan, 62: nan, 63: nan, 64: nan, 65: 7.0, 66: 7.0, 67: 7.0, 68: 7.0, 69: 8.0, 70: 7.0, 71: 4.0, 72: 8.0, 73: nan, 74: 3.0, 75: 10.0, 76: 4.0, 77: 8.0, 78: 8.0, 79: nan, 80: nan, 81: 4.0, 82: 7.0, 83: 4.0, 84: nan, 85: 8.0, 86: 4.0, 87: 4.0, 88: 4.0, 89: nan, 90: 5.0, 91: 7.0, 92: 4.0, 93: nan, 94: 1.0, 95: nan, 96: nan, 97: 10.0, 98: 1.0, 99: 1.0}, 'job4_category': {0: nan, 1: nan, 2: nan, 3: nan, 4: nan, 5: nan, 6: nan, 7: nan, 8: nan, 9: nan, 10: 4.0, 11: nan, 12: nan, 13: nan, 14: 4.0, 15: 8.0, 16: nan, 17: 4.0, 18: 3.0, 19: nan, 20: nan, 21: nan, 22: nan, 23: nan, 24: nan, 25: nan, 26: nan, 27: nan, 28: nan, 29: nan, 30: nan, 31: nan, 32: 4.0, 33: nan, 34: nan, 35: nan, 36: 4.0, 37: nan, 38: nan, 39: nan, 40: 4.0, 41: nan, 42: nan, 43: nan, 44: nan, 45: nan, 46: nan, 47: 7.0, 48: nan, 49: nan, 50: 5.0, 51: 8.0, 52: nan, 53: 4.0, 54: nan, 55: nan, 56: 8.0, 57: nan, 58: nan, 59: nan, 60: nan, 61: nan, 62: nan, 63: nan, 64: nan, 65: 8.0, 66: 7.0, 67: nan, 68: nan, 69: nan, 70: 8.0, 71: 5.0, 72: nan, 73: nan, 74: nan, 75: 10.0, 76: nan, 77: 4.0, 78: nan, 79: nan, 80: nan, 81: nan, 82: nan, 83: nan, 84: nan, 85: nan, 86: 10.0, 87: nan, 88: 8.0, 89: nan, 90: nan, 91: nan, 92: nan, 93: nan, 94: 4.0, 95: nan, 96: nan, 97: nan, 98: nan, 99: 5.0}}
df = pd.DataFrame(d)
</code></pre>
<p><strong>代码:</strong></p>
<pre><code>df_melted = df.melt()
df_grouped = df_melted.groupby(['variable', 'value'])['value'].count().to_frame()
df_grouped['pct'] = df_grouped['value'] / df_grouped.groupby(['variable'])['value'].transform('count')
df_grouped['pct']
</code></pre>
<p><strong>输出:</strong></p>
<pre><code>variable value
job1_category 1.0 0.300000
2.0 0.200000
3.0 0.400000
4.0 4.700000
5.0 0.600000
6.0 0.100000
7.0 1.200000
8.0 0.600000
9.0 0.400000
10.0 0.300000
job2_category 1.0 0.400000
2.0 0.100000
3.0 0.300000
4.0 4.100000
5.0 1.000000
6.0 0.400000
7.0 1.400000
8.0 0.400000
9.0 0.300000
10.0 0.400000
job3_category 1.0 1.000000
3.0 0.571429
4.0 3.571429
5.0 0.714286
7.0 1.714286
8.0 1.000000
10.0 0.428571
job4_category 3.0 0.166667
4.0 1.500000
5.0 0.500000
7.0 0.333333
8.0 1.000000
10.0 0.333333
Name: pct, dtype: float64
</code></pre>