如何按括号顺序将字符串分成块？

Input: (7+y+(66+7)+(32+(78*19-(32-0)))+(32-9))+8+9+(9-10)-(9/7)-10 Output: {'5$10': '66+7', '23$28': '32-0', '16$29': '78*19-[23$28', '12$30': '32+[16$29]))+8+9+', '32$37': '32-9', '0$38': '7+y+(66+7)+(32+[16$29][23$28]]37]])+8', '44$49': '9-10', '51$55': '9/7', '11:0': '', '31:0': '', '39:44': '+8+9+', '50:51': '-'}

def findall(sstr, substr): gen = sstr.find(substr) while gen != -1: yield gen gen = sstr.find(substr, gen + 1) def findclosest(l: list, el: list): # find closest string from L to string from EL j = el[ 1 ] minimum = j min_index = 0 for i in range(len(l)): if l[ i ][ 0 ] - j < minimum: minimum = l[ i ][ 0 ] - j min_index = l[ i ][ 0 ] return min_index def breakInChunks(temp_s): # main list_of_additions = [ ] list_of_opened = list(findall(temp_s, '(')) list_of_closed = list(findall(temp_s, ')')) if sum(list_of_opened) < sum(list_of_closed) and len(list_of_opened) == len( list_of_closed): n = 0 # <WHILE> while len( list_of_closed) != 0: # read strings-expressions from the most inner ones to the most outer ones minimum = list_of_closed[ len(list_of_closed) - 1 ] j = list_of_closed.pop(0) for i in range(len(list_of_opened)): # find the closest opening bracket to the most inner closing one diff = j - list_of_opened[ i ] if diff > 0: if diff <= minimum: pop_index = i minimum = j - list_of_opened[ i ] else: break starting_index = list_of_opened.pop(pop_index) # start filling KEYPAIRS if len(keypairs) == 0: # if KEYPAIRS is empty keypairs[ f'{starting_index}${j}' ] = temp_s[ starting_index + 1:j ] else: # if KEYPAIRS has at least one key-value pair keys = [ key.split('$') for key in keypairs.keys() ] # reading and unpacking key-value pairs (reading indecies) innerSeq = temp_s min_index_i = None min_index_j = None prevExtracted_i = 0 prevExtracted_j = 0 for p in range(len(keys) - 1, -1, -1): k = keys[ p ] extracted_i, extracted_j = int(k[ 0 ]), int(k[ 1 ]) if starting_index < extracted_i: # if the chunk we are checking contains another one, we are checking if it's in fact the closest one to the chunk we are checking if ( extracted_i < prevExtracted_i and prevExtracted_j < extracted_j) or prevExtracted_i == 0: min_index_i = extracted_i min_index_j = extracted_j if prevExtracted_i == 0: if extracted_i > int(keys[ p - 1 ][ 0 ]) and extracted_j < int(keys[ p - 1 ][ 1 ]): pass else: innerSeq = innerSeq[ :extracted_i ] + f'[{extracted_i}${extracted_j}]' + innerSeq[ extracted_j + 1: ] else: if min_index_i is not None: innerSeq = innerSeq[ :min_index_i ] + f'[{min_index_i}${min_index_j}]' + innerSeq[ min_index_j + 1: ] min_index_i = None min_index_j = None else: innerSeq = innerSeq[ :prevExtracted_i ] + f'[{prevExtracted_i}${prevExtracted_j}]' + innerSeq[ prevExtracted_j + 1: ] prevExtracted_i = extracted_i prevExtracted_j = extracted_j n += 1 keypairs[ f'{starting_index}${j}' ] = innerSeq[ starting_index + 1:j ] # </WHILE> # checking if there are any strings outside parentheses left temp = [ [ int(key.split('$')[ 0 ]), int(key.split('$')[ 1 ]) ] for key in sorted(keypairs.keys(), key=lambda el: int( el.split('$')[ 1 ])) ] # sort from the most inner to the most outer for i in range(len(temp) - 1): if temp[ i ][ 1 ] < temp[ i + 1 ][ 0 ]: # if there is a gap between parentheses # find the closest difference in order to find actual string outside chunks with the help of findclosest() # add new chunk to LIST_OF_ADDITIONS list_of_additions.append([ temp[ i ][ 1 ] + 1, findclosest(temp[ i + 1: ], temp[ i ]) ]) if len(list_of_additions) > 0: # if something is inside LIST_OF_ADDITIONS # add remaining strings to KEYPAIRS for addition in list_of_additions: keypairs[ f'{addition[ 0 ]}:{addition[ 1 ]}' ] = self.s[ addition[ 0 ]:addition[ 1 ] ] return keypairs # return KEYPAIRS else: raise RuntimeError(f'Amount of closing and opening brackets does not match')

2条回答

网友

1楼 · 编辑于 2024-10-01 00:18:10

我将使用正则表达式标记输入，并使用递归处理标记：

import re

def breakInChunks(s):
    chunks = dict()
    tokens = re.finditer(r"([^()]+|.?)", s)

    def recur(start):
        result = ""
        for match in tokens:
            if match[0] in ")":
                key = f"{start}${match.start()}"
                chunks[key] = result
                return key
            result += f"[{recur(match.start())}]" if match[0] == "(" else match[0]
        
    recur(0)
    return chunks

例如：

s = "(7+x+8*(9+10(11+12)+14))-(2*(34))"
chunks = breakInChunks(s)

chunks将是：

{
    '12$18': '11+12',
    '7$22': '9+10[12$18]+14',
    '0$23': '7+x+8*[7$22]',
    '28$31': '34', '25$32':
    '2*[28$31]',
    '0$33': '[0$23]-[25$32]'
}

请注意，最后一个条目不是您问题中给出的'24:25': '-'，而是'0$33': '[0$23]-[25$32]'，这与应用于其他条目的逻辑更为一致

对于更复杂的示例：

s = "(7+y+(66+7)+(32+(78*19-(32-0)))+(32-9))+8+9+(9-10)-(9/7)-10"
chunks = breakInChunks(s)

chunks现在是：

{
    '5$10': '66+7',
    '23$28': '32-0',
    '16$29': '78*19-[23$28]',
    '12$30': '32+[16$29]',
    '32$37': '32-9',
    '0$38': '7+y+[5$10]+[12$30]+[32$37]',
    '44$49': '9-10',
    '51$55': '9/7',
    '0$59': '[0$38]+8+9+[44$49]-[51$55]-10'
}

网友

2楼 · 编辑于 2024-10-01 00:18:10

使用堆栈在嵌套括号的每个级别累积子表达式是一种常见的方法。在每个级别存储左括号的位置和累积表达式字符串。遇到左括号时添加标高。当遇到右括号时，弹出当前级别并将其添加到结果中。此时，替换标记将添加到上一级别的表达式（成为当前级别）

def parGroups(S):
    result = dict()
    stack  = [[0,""]]*2           # parenthesis position, expression
    for i,c in enumerate(S+")"):  # extra ")" to force out main expression
        if c=="(":                
            stack.append([i,""])  # stack up new group
            continue
        if c==")":
            start,expr      = stack.pop(-1)    # unstack current group
            c               = f"[{start}${i}]" # token
            result[c[1:-1]] = expr             # build result
        stack[-1][-1] += c        # accumulate expression in current group
    return result

输出：

S = "(7+y+(66+7)+(32+(78*19-(32-0)))+(32-9))+8+9+(9-10)-(9/7)-10"
print(parGroups(S))

{'5$10' : '66+7',
 '23$28': '32-0',
 '16$29': '78*19-[23$28]',
 '12$30': '32+[16$29]',
 '32$37': '32-9',
 '0$38' : '7+y+[5$10]+[12$30]+[32$37]',
 '44$49': '9-10',
 '51$55': '9/7',
 '0$59' : '[0$38]+8+9+[44$49]-[51$55]-10'}

相关问题更多 >

编程相关推荐

热门问题

热门文章