从csv文件生成子图

2024-09-29 01:23:28 发布

您现在位置:Python中文网/ 问答频道 /正文

这是我正在处理的csv文件的要点:

CODE     AGEGROUP      SEX     CITY      HEALTHSTATUS 
----     ---------     ---     ----      ------------
E101      25 to 29      M      Denver    Recovered
E102      25 to 29      F      Chicago   Recovered
E105      45 to 49      M      Denver    Mild

我希望用条形图来表示这一点,但对于如何编写子图,我感到相当困惑。在这段代码中,我可以尽可能显示有多少男性和女性受到影响,但我无法显示健康状况和年龄组:

import matplotlib.pyplot as plt

plt.style.use("bmh")
x = df2["SEX"]
y = df2["SEX"].value_counts().plot(kind="bar")

plt.xlabel("Sex", fontsize=12)
plt.ylabel("Number of People", fontsize=12)
plt.title("Number of people affected based on sex", fontsize=12)
plt.show()

我如何显示其他两个(健康状况和年龄组)


Tags: 文件ofcsvtonumbercodepltrecovered
2条回答

百分比栏:
我想应该是这样的(ps): enter image description here

但plt是: enter image description here

守则如下:

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

df2 = pd.read_csv('data.csv')

people = ('F', 'M')
segments = 4

# multi-dimensional data
arr = ['25to29Recovered', '25to29Mild', '30to44Recovered', '30to44Mild', '45to49Recovered', '45to49Mild']
data = np.asarray([[1, 2],#25to29Recovered
                   [3, 4],#25to29Mild
                    [5, 6],#30to44Recovered
                    [7, 8],#30to44Mild
                    [9, 10],#45to49Recovered
                    [11, 12],#45to49Mild
                   ])

percentages = np.zeros((data.shape[1], data.shape[0]))
col_sum = np.sum(data, axis=0)
for i in range(data.shape[0]):
    for j in range(len(data[i])):
        percentages[j, i] = data[i, j] / col_sum[j] * 100

y_pos = np.arange(len(people))

fig = plt.figure(figsize=(10, 8))
ax = fig.add_subplot(111)

colors = 'rgbm'
patch_handles = []

bottom = np.zeros(len(people))
for i, d in enumerate(data):
    patch_handles.append(ax.bar(y_pos, d,
                                color=colors[i % len(colors)], align='center',
                                bottom=bottom))
    bottom += d

# search all of the bar segments and annotate
for j in range(len(patch_handles)):
    for i, patch in enumerate(patch_handles[j].get_children()):
        bl = patch.get_xy()
        x = 0.5 * patch.get_width() + bl[0]
        y = 0.5 * patch.get_height() + bl[1]
        ax.text(x, y, "%s\n%d%%" % (arr[j],percentages[i, j]), ha='center')

plt.show()

一种方法是为每个条件创建单独的子批次:

import pandas as df
import matplotlib.pyplot as plt

df = pd.DataFrame({'CODE':["E101", "E102", "E105"],
                   'AGEGROUP':["25 to 29", "25 to 29", "45 to 49"],
                   'SEX':["M", "F", "M"],
                   'CITY':["Denver", "Chicago", "Denver"],
                   'HEALTHSTATUS':["Recovered", "Recovered", "Mild"],
                   })

fs = 6
plt.style.use("bmh")
fig = plt.figure()
ax0 = plt.subplot(3, 1, 1)
df["SEX"].value_counts().plot(kind="bar", ax=ax0)
ax0.set_xlabel("Sex", fontsize=fs)
ax0.set_ylabel("Number of People", fontsize=fs)
ax0.tick_params(axis='both', labelsize=fs)
ax1 = plt.subplot(3, 1, 2)
df["AGEGROUP"].value_counts().plot(kind="bar", ax=ax1)
ax1.set_xlabel("AGEGROUP", fontsize=fs)
ax1.set_ylabel("Number of People", fontsize=fs)
ax1.tick_params(axis='both', labelsize=fs)
ax1.tick_params(axis='x', labelrotation=45)
ax2 = plt.subplot(3, 1, 3)
df["HEALTHSTATUS"].value_counts().plot(kind="bar", ax=ax2)
ax2.set_xlabel("HEALTHSTATUS", fontsize=fs)
ax2.set_ylabel("Number of People", fontsize=fs)
ax2.tick_params(axis='both', labelsize=fs)
ax2.tick_params(axis='x', labelrotation=45)

ax0.set_title("Number of people affected based on condition", fontsize=fs)
plt.tight_layout()
plt.show()

enter image description here

相关问题 更多 >