我试着做两件事:
对每列应用一种颜色,而不是对每个单独的项目应用一种颜色
对底部值最大的列进行排序(这不太重要)
这是我当前的代码(改编自堆栈溢出)。此时,变量颜色可以是任何颜色(如果列多于颜色,则应重复)
我尝试应用一个数字id(参见注释行),但是因为我必须为mekko透视数据,所以数字有点丢失
任何帮助都将不胜感激。非常感谢
下面是一个示例数据文件:https://www.dropbox.com/s/lgf7oyrir33lpd4/sample_data.csv?dl=0
def chartMekko(excel,stack,colors=colors):
data = excel
# pivot topic_label to columns
pivot_data=data.pivot_table(index=['topic_label_group'],columns=[stack], values='cluster_frequency', aggfunc=np.sum).fillna(0)
names = pivot_data.index.tolist()
id = []
# cols = []
# for i in range(len(names)):
# id.append(names[i][1])
# cols.append(names[i][0])
pio.renderers.default ='iframe_connected'
def stacked_bar_width_plot(df, value_cols, width_col, colors=colors, **subplot):
"""A stacked column plot with variable bar width.
:param df
:param list value_cols: columns of `df`, already normalized (sum=1 for every line).
:param str width_col: column of `df`, unbounded, used (i) as label (ii) to compute width.
:param dict subplot: optional figure/row/col
"""
categories = df.index.to_list()
width = df[width_col]
x = np.cumsum([0] + list(width[:-1]))
colors = colors * len(value_cols)
figure = subplot.pop('figure', go.Figure())
for colname, color in zip(value_cols, colors):
figure.add_trace(go.Bar(name=colname, x=x, y=df[colname], width=width, offset=0, marker_color=color,customdata=categories,hovertemplate="<br>".join([
"<b>" + colname + "</b>",
"# of verbatims: %{width}",
"Pct in "+colname+": %{y}",
"<extra></extra>"])
), **subplot)
return figure.update_xaxes(
tickvals=x + np.array(width) / 2,
range=[0, np.sum(width)],
ticktext=categories, **subplot
) \
.update_yaxes(range=[0, 1], row=subplot.get('row'), col=subplot.get('col')) \
.update_layout(barmode='stack', bargap=0,showlegend=False,margin=dict(l=5, r=25, t=20, b=10),)
def mekko_plot(df, unit_name=None, colors=colors, **subplot):
"""A mekko plot is a normalized stacked column plot with variable bar width.
:param DataFrame df: already indexed by category, with only numeric columns:
there will be one stacked-bar per line (X), labeled after the index, with one bar per column.
:param str unit_name: used to populate hover.
:param list colors: color of each column. None for default palette (blue, red, ...).
The rest (title..) is easily added afterwards.
"""
# Normalize then defer to stacked_bar_width_plot plot.
value_cols = df.columns
w = pd.DataFrame({unit_name: df.sum(axis='columns')})
w[value_cols] = df.div(w[unit_name], axis='index')
return stacked_bar_width_plot(w, value_cols, width_col=unit_name, colors=colors, **subplot)
mekko = mekko_plot(pivot_data, unit_name='cluster_frequency')
charts = dcc.Graph(figure=mekko, id='chartTest', style={})
return mekko
目前没有回答
相关问题 更多 >
编程相关推荐