AutoVenn图表文本呈现

2024-07-03 06:52:58 发布

您现在位置:Python中文网/ 问答频道 /正文

我脑子里有一张关于某件事的心理图,我一直在试图找出如何在R中编程,但我有点磕磕绊绊(也许R不是最优的),所以我正在征求你的意见。在

想法如下:

(1)我每天都有两份清单,在民主党和共和党最常用的30个词汇中,每一个都包含两个信息[单词、频率]。在

(2)我想画一个类似维恩或欧拉的图 (A) 以相对于频率的字体大小呈现单词的文本 (B)自动将两党使用的单词放在图表的中心部分,并将唯一的民主党或共和党单词放在各自的部分

到目前为止,我一直在使用VennDiagram、Vennerable和Venneuler包,但是没有什么是非常正确的,文本显示和autosize使我无法理解。有一些在线工具是接近ish(http://bioinfogp.cnb.csic.es/tools/venny/),但我想我想要一些我可以每天自动更新的东西。在

有什么想法吗?在


Tags: 文本信息编程图表中心单词词汇频率
1条回答
网友
1楼 · 发布于 2024-07-03 06:52:58

我觉得无聊了:

enter image description here

import numpy as np
import matplotlib.pyplot as plt

FIG_SIZE = (10,6)

class word_list_venn_diagram(object):

    def __init__(self, words, fontsizes, polarities, scale=1.):
        """
        Arguments:
             
            words: [str 1, ... str N]
                list of strings
            fontsizes: [float 1, ... float N]
                corresponding list of (relative) fontsizes
            polarity: [-1, 0, 1, ..., 0, 1]
                corresponding list of area designations;
                polarity of 0 corresponds to intersection;
                polarities -1 and 1 correspond to the disjoint sets
            scale: float
                scales the size of the circles with respect to the text
                (w.r.t. the maximum joint height of the bounding boxes of the 3 word lists)

        Returns:
            
            None

        """

        self.words = np.array(words)
        self.fontsizes = np.array(fontsizes)

        # get bounding boxes of text
        self.bboxes = [self._get_bbox(word, size) for word, size in zip(self.words, self.fontsizes)]

        # determine minimum radius of circles
        diameter = 0.
        unique_polarities = np.unique(polarities)
        for polarity in unique_polarities:
            idx, = np.where(polarities == polarity)
            heights = [self.bboxes[ii].height for ii in idx]
            total = np.sum(heights)
            if total > diameter:
                diameter = total
        radius = diameter / 2.

        # rescale
        radius *= scale
        self.radius = radius

        # arrange bboxes vertically
        for polarity in unique_polarities:
            idx, = np.where(polarities == polarity)
            order = self._argsort(self.fontsizes[idx])
            heights = [self.bboxes[ii].height for ii in idx]
            total = np.sum(heights)

            current_height = 0.
            for ii in idx[order]:
                self.bboxes[ii].y = current_height - total/2.
                current_height += self.bboxes[ii].height

        # arrange bboxes horizontally
        # NB: slightly cheeky use of polarity argument
        for ii, _ in enumerate(self.bboxes):
            self.bboxes[ii].x = polarities[ii] * self._get_shift(self.bboxes[ii].y, self.radius)

        # draw
        self.fig, self.ax = self.draw()

        return

    def draw(self):
        """
        Draws the Venn diagram.
        """

        fig, ax = plt.subplots(1,1,figsize=FIG_SIZE)

        # draw circles
        circle_left = plt.Circle((-0.5*self.radius, 0), self.radius, color='b', fill=False, axes=ax, linewidth=5)
        circle_right = plt.Circle((+0.5*self.radius, 0), self.radius, color='r', fill=False, axes=ax, linewidth=5)
        ax.add_artist(circle_left)
        ax.add_artist(circle_right)

        # draw words
        for ii, (word, bb, fs) in enumerate(zip(self.words, self.bboxes, self.fontsizes)):
            ax.text(bb.x, bb.y, word,
                    horizontalalignment='center',
                    verticalalignment='center',
                    fontsize=fs,
                    bbox=dict(pad=0., facecolor='none', edgecolor='none')
            )

        # update data limits as circles are not registered automatically
        corners = (-1.5*self.radius, -self.radius), (1.5*self.radius, self.radius)
        ax.update_datalim(corners)
        ax.autoscale_view()

        # make figure pretty-ish
        ax.set_xticks([])
        ax.set_yticks([])
        ax.set_aspect('equal')
        ax.get_figure().set_facecolor('w')
        ax.set_frame_on(False)
        ax.get_figure().canvas.draw()

        return fig, ax

    def _get_bbox(self, word, fontsize):
        """
        Get the bounding box for each word.
        Unfortunately, the bbox is dependent on the renderer,
        so a figure has to be created.
        """
        fig = plt.figure(figsize=FIG_SIZE)
        renderer = fig.canvas.get_renderer()
        text = plt.text(0.5, 0.5, word,
                        fontsize=fontsize,
                        bbox=dict(pad=0., facecolor='none', edgecolor='red'))
        bbox = text.get_window_extent(renderer=renderer)
        plt.close(fig)
        return bbox

    def _argsort(self, arr):
        """
        Returns indices to create a sorted array.
        Entries are sorted in such a way that the largest element is in the middle,
        and the size of the elements falls off towards the ends.
        """
        order = np.argsort(arr)
        order = np.r_[order[::2], order[1::2][::-1]]
        return order

    def _get_shift(self, y, r):
        """
        Get point along midline of a waxing moon formed by two overlapping
        circles of radius r as a function of y.
        """
        x1 = np.sqrt(r**2 - y**2) + r/2. # right circle
        x2 = np.sqrt(r**2 - y**2) - r/2. # left circle
        x = x2 + (x1 - x2)/2. # midpoint
        return x

def test():

    test_string = "Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet."

    # get a word list
    words = test_string.split(' ')

    # remove non alphanumeric characters
    words = [''.join(ch for ch in word if ch.isalnum()) for word in words]

    # count occurrences; remove duplicates
    from collections import Counter
    counter = Counter()
    for word in words:
        counter[word] += 1
    words, counts = counter.keys(), np.array(counter.values())

    # convert counts to reasonable fontsizes
    max_fontsize = 25
    max_count = np.float(np.max(counts))
    fontsizes = counts / max_count * max_fontsize

    # assign random polarities
    polarities = np.random.choice([-1, 0, 1], len(words))

    venn = word_list_venn_diagram(words, fontsizes, polarities, scale=1.5)

    return

相关问题 更多 >