回答此问题可获得 20 贡献值,回答如果被采纳可获得 50 分。
<p>我正在努力计算1-10的值相对于总值(百分比)在一列中出现的频率。获取4个不同列中的值1-10的百分比。我尝试使用以下代码,但这只适用于第1列和第2列,并为第3列和第4列提供NAN值。有人知道为什么吗?谢谢大家!</p>
<pre><code>{'job1_category': {0: nan, 1: 4.0, 2: 5.0, 3: 5.0, 4: 4.0, 5: 4.0, 6: 5.0, 7: 4.0, 8: 4.0, 9: 4.0, 10: 4.0, 11: 4.0, 12: 4.0, 13: 4.0, 14: nan, 15: 4.0, 16: 3.0, 17: 7.0, 18: 4.0, 19: 4.0, 20: nan, 21: nan, 22: 4.0, 23: 1.0, 24: 4.0, 25: 1.0, 26: 4.0, 27: 2.0, 28: 5.0, 29: 3.0, 30: 4.0, 31: 5.0, 32: 4.0, 33: 4.0, 34: 4.0, 35: 4.0, 36: 3.0, 37: 4.0, 38: 4.0, 39: 4.0, 40: 9.0, 41: 4.0, 42: 4.0, 43: 3.0, 44: 4.0, 45: 9.0, 46: 10.0, 47: nan, 48: 10.0, 49: 4.0, 50: 8.0, 51: nan, 52: 5.0, 53: 8.0, 54: 4.0, 55: nan, 56: 4.0, 57: 8.0, 58: 4.0, 59: 4.0, 60: 4.0, 61: 4.0, 62: 8.0, 63: 4.0, 64: 8.0, 65: 7.0, 66: 4.0, 67: 7.0, 68: 8.0, 69: 7.0, 70: nan, 71: 7.0, 72: nan, 73: 10.0, 74: 7.0, 75: 6.0, 76: 7.0, 77: 4.0, 78: 7.0, 79: 7.0, 80: 7.0, 81: 7.0, 82: 4.0, 83: nan, 84: 4.0, 85: nan, 86: 4.0, 87: 9.0, 88: 4.0, 89: 4.0, 90: 4.0, 91: 4.0, 92: 4.0, 93: 7.0, 94: 2.0, 95: 4.0, 96: 4.0, 97: nan, 98: 1.0, 99: 9.0}, 'job2_category': {0: 6.0, 1: 5.0, 2: 4.0, 3: 5.0, 4: 6.0, 5: 4.0, 6: 5.0, 7: 5.0, 8: 2.0, 9: 4.0, 10: 4.0, 11: nan, 12: 5.0, 13: 4.0, 14: nan, 15: 1.0, 16: 4.0, 17: 4.0, 18: 4.0, 19: 4.0, 20: 4.0, 21: 3.0, 22: 4.0, 23: 4.0, 24: 4.0, 25: 4.0, 26: 8.0, 27: 6.0, 28: 6.0, 29: 3.0, 30: 4.0, 31: 5.0, 32: 7.0, 33: 4.0, 34: 4.0, 35: 4.0, 36: 4.0, 37: 4.0, 38: 4.0, 39: 4.0, 40: 4.0, 41: 5.0, 42: 5.0, 43: 4.0, 44: 4.0, 45: 1.0, 46: 10.0, 47: 10.0, 48: 1.0, 49: 4.0, 50: 4.0, 51: nan, 52: 7.0, 53: 4.0, 54: 4.0, 55: 9.0, 56: 9.0, 57: 4.0, 58: 3.0, 59: 4.0, 60: 4.0, 61: nan, 62: 7.0, 63: nan, 64: 7.0, 65: 7.0, 66: 7.0, 67: 7.0, 68: 8.0, 69: 7.0, 70: 7.0, 71: 7.0, 72: nan, 73: 10.0, 74: 8.0, 75: 7.0, 76: 8.0, 77: 5.0, 78: nan, 79: 7.0, 80: nan, 81: 4.0, 82: 7.0, 83: nan, 84: 4.0, 85: nan, 86: 4.0, 87: 4.0, 88: 4.0, 89: 4.0, 90: 10.0, 91: 5.0, 92: nan, 93: 7.0, 94: 4.0, 95: 4.0, 96: 4.0, 97: nan, 98: 1.0, 99: 9.0}, 'job3_category': {0: 5.0, 1: 5.0, 2: 4.0, 3: nan, 4: 10.0, 5: nan, 6: nan, 7: nan, 8: nan, 9: nan, 10: 4.0, 11: 4.0, 12: 5.0, 13: nan, 14: 4.0, 15: 1.0, 16: 4.0, 17: 4.0, 18: 4.0, 19: 4.0, 20: nan, 21: nan, 22: nan, 23: nan, 24: nan, 25: 1.0, 26: 4.0, 27: nan, 28: nan, 29: 3.0, 30: 4.0, 31: 3.0, 32: 4.0, 33: nan, 34: nan, 35: nan, 36: 4.0, 37: nan, 38: nan, 39: 4.0, 40: 7.0, 41: 5.0, 42: nan, 43: 4.0, 44: 4.0, 45: 1.0, 46: 7.0, 47: 1.0, 48: 7.0, 49: 7.0, 50: 4.0, 51: 8.0, 52: nan, 53: 4.0, 54: 7.0, 55: nan, 56: 8.0, 57: nan, 58: nan, 59: 3.0, 60: nan, 61: nan, 62: nan, 63: nan, 64: nan, 65: 7.0, 66: 7.0, 67: 7.0, 68: 7.0, 69: 8.0, 70: 7.0, 71: 4.0, 72: 8.0, 73: nan, 74: 3.0, 75: 10.0, 76: 4.0, 77: 8.0, 78: 8.0, 79: nan, 80: nan, 81: 4.0, 82: 7.0, 83: 4.0, 84: nan, 85: 8.0, 86: 4.0, 87: 4.0, 88: 4.0, 89: nan, 90: 5.0, 91: 7.0, 92: 4.0, 93: nan, 94: 1.0, 95: nan, 96: nan, 97: 10.0, 98: 1.0, 99: 1.0}, 'job4_category': {0: nan, 1: nan, 2: nan, 3: nan, 4: nan, 5: nan, 6: nan, 7: nan, 8: nan, 9: nan, 10: 4.0, 11: nan, 12: nan, 13: nan, 14: 4.0, 15: 8.0, 16: nan, 17: 4.0, 18: 3.0, 19: nan, 20: nan, 21: nan, 22: nan, 23: nan, 24: nan, 25: nan, 26: nan, 27: nan, 28: nan, 29: nan, 30: nan, 31: nan, 32: 4.0, 33: nan, 34: nan, 35: nan, 36: 4.0, 37: nan, 38: nan, 39: nan, 40: 4.0, 41: nan, 42: nan, 43: nan, 44: nan, 45: nan, 46: nan, 47: 7.0, 48: nan, 49: nan, 50: 5.0, 51: 8.0, 52: nan, 53: 4.0, 54: nan, 55: nan, 56: 8.0, 57: nan, 58: nan, 59: nan, 60: nan, 61: nan, 62: nan, 63: nan, 64: nan, 65: 8.0, 66: 7.0, 67: nan, 68: nan, 69: nan, 70: 8.0, 71: 5.0, 72: nan, 73: nan, 74: nan, 75: 10.0, 76: nan, 77: 4.0, 78: nan, 79: nan, 80: nan, 81: nan, 82: nan, 83: nan, 84: nan, 85: nan, 86: 10.0, 87: nan, 88: 8.0, 89: nan, 90: nan, 91: nan, 92: nan, 93: nan, 94: 4.0, 95: nan, 96: nan, 97: nan, 98: nan, 99: 5.0}}
</code></pre>
<pre><code>job1_freq = data_rel1.groupby('job1_category').size()
job2_freq = data_rel1.groupby('job2_category').size()
job3_freq = data_rel1.groupby('job3_category').size()
job4_freq = data_rel1.groupby('job4_category').size()
data_freq = pd.concat([job1_freq, job2_freq, job3_freq, job4_freq], axis=1)
data_freq.columns = [1,2,3,4]
data_freq["prob_1"] = data_freq[1]/sum(data_freq[1])
data_freq["prob_2"] = data_freq[2]/sum(data_freq[2])
data_freq["prob_3"] = data_freq[3]/sum(data_freq[3])
data_freq["prob_4"] = data_freq[4]/sum(data_freq[4])
</code></pre>