在Python/Django中将HTML转换为DraftJS ContentState

2024-10-02 22:27:10 发布

您现在位置:Python中文网/ 问答频道 /正文

我正在从使用html所见即所得编辑器(CKEditor)迁移到使用草稿JS,并希望能够在Django迁移中将现有html数据转换为草稿JS ContentState,但我一直找不到实现这一点的方法

这可能吗?或者这是正确的方法


Tags: 数据django方法ckeditorhtmljs编辑器中将
1条回答
网友
1楼 · 发布于 2024-10-02 22:27:10

我也面临同样的情况,我的目标是使用BeautifulSoup递归解析HTML文档,并使用一些帮助函数将块构建为Python对象(您可以使用DraftJS的Python库来代替此步骤)。通过这种方式,您将100%控制转换,并且可以完全控制DraftJS文档,同时仔细考虑自定义块渲染和元素映射

如果你只是挽起袖子,这真的不难。在没有可用的库的情况下,我曾经担心这样的问题,但现在绝对不介意有机会不包含其他类似于这样的简单任务的依赖项

编辑

进一步思考,你应该考虑在DoDJS中用DRAFTJS HTML& GT做这个过程;DraftJS库

我没有时间用它创建一个库,但是这里有一个使用BeautifulSoup实现这一点的代码的初始版本,应该可以让您开始:

import uuid

from bs4 import BeautifulSoup
from bs4.element import NavigableString


INLINE_TAGNAMES = ['b', 'strong', 'i', 'em', 'u']
INLINE_ENTITIES = ['a']
"""
Expects a body object (which is probably stored and used as JSON)
"""
def add_html_to_body(html, body=None):
    if body:
        body = body.copy()
    else:
        body = {'entityMap':{}, 'blocks': []}

    soup = BeautifulSoup(html, 'lxml')
    _element_to_block([soup.body], body['blocks'], body['entityMap'])

    return body


def _element_to_block(parent_els, blocks, entity_map):
    has_blocks = len(blocks)
    if not has_blocks:
        blocks.append(_create_block('', 'unstyled', [], []))

    last_block = blocks[-1]
    parent_el = parent_els[-1]

    for el in parent_el.contents:
        if type(el) == NavigableString:
            # If this is part of an inline range, set it's length.
            reversed_parent_els = parent_els[:]
            reversed_parent_els.reverse()

            # Keep a track of the number of parent style and entity tags
            # we have already passed.
            inline_tagnames_counter = 0
            inline_entities_counter = 0

            for i in xrange(len(reversed_parent_els)):
                ancestor = reversed_parent_els[i]

                if ancestor.name in INLINE_TAGNAMES:
                    inline_tagnames_counter += 1
                    last_block['inlineStyleRanges'][-1 * inline_tagnames_counter]['length'] += len(el.string)
                elif ancestor.name in INLINE_ENTITIES:
                    inline_entities_counter += 1
                    last_block['entityRanges'][-1 * inline_entities_counter]['length'] += len(el.string)

            last_block['text'] += el.string

        elif el.name in ['b', 'strong']:
            last_block['inlineStyleRanges'].append({
                'offset': len(last_block['text']),
                'length': 0,
                'style': 'BOLD'
            })
            _element_to_block(parent_els[:] + [el], blocks, entity_map)

        elif el.name in ['i', 'em']:
            last_block['inlineStyleRanges'].append({
                'offset': len(last_block['text']),
                'length': 0,
                'style': 'ITALIC'
            })
            _element_to_block(parent_els[:] + [el], blocks, entity_map)

        elif el.name in ['div', 'p']:
            # If the parent didn't give blocks, we create an empty starting one.
            if has_blocks:
                blocks.append(_create_block(el.string or '', 'unstyled', [], []))

            _element_to_block([el], blocks, entity_map)

        elif el.name == 'a':
            # Create entity here.
            entity_key = str(uuid.uuid4())[:8]
            last_block['entityRanges'].append({
                'offset': len(last_block['text']),
                'length': len(el.string or ''),
                'key': entity_key
            })
            entity_map[entity_key] = {
                'type': 'LINK',
                'mutability': 'IMMUTABLE',
                'data': { 'url': el.get('href') }
            }
            _element_to_block(parent_els[:] + [el], blocks, entity_map)

        # More elses based on the kind of elements you expect to see like
        # u, ul, li, li, h1, h2, h3, table, etc. and entities with 'atomic' stuff.


def _create_block(text, type, inline_style_ranges, entity_ranges, data={}):
    return {
        'text': text,
        'type': type,
        'inlineStyleRanges': inline_style_ranges,
        'entityRanges': entity_ranges,
        'depth': 0,
        'data': data,
        'key': str(uuid.uuid4())[:8]
    }

相关问题 更多 >