<p>这是一个使用Regex和字符串操作的解决方案,希望注释能解释发生了什么:</p>
<pre><code>import re
def convert_to_tuple(text):
""" Take an expression and convert it to tuple
'neg(or(p,q))' --> ('neg', ('or', 'p', 'q'))
"""
# used to extract not nested expression
pattern = re.compile('(\w+)\(([^\(]*?)\)')
# extract the index of the expression
index_pattern = re.compile('#(\d+)#')
# contains all expression extracted from the text
expressions = []
# you only need to extract most global expression only
global_expressions = []
def fix_expression(expression):
""" a recursive solution to rebuild nested expression. """
if isinstance(expression, str):
# if the expression is like #index# return the correct expression else return this str expression
m = index_pattern.search(expression)
if m:
return tuple(fix_expression(expressions[int(m.group(1))]))
return expression
# if the expression is a tuple extract all fixed nested expression in a tuple
return tuple(fix_expression(subexp) for subexp in expression)
def extract_expression(code):
""" keep extracting nested expressions list, """
def add_exp(match):
""" handle the match return by sub to save the expression and return its index."""
expressions.append(None)
index = len(expressions) - 1
name, args = match.groups()
if ',' in args:
# if what is between parentheses contains ',' split is
args = tuple(args.split(','))
else:
# args should always be a tuple to support unpacking operation
args = (args,)
# expression transformed to a tuple
expressions[index] = (name, *args)
return '#{}#'.format(index)
while pattern.search(code):
code = re.sub(pattern, add_exp, code)
# for debuging string after removing all expression
# print(code)
# this extract all nested expression in the text
extract_expression(text)
# Global expression there index is not used in any other expression
# the last expression is for sure a global one because.
global_expressions.append(expressions[-1])
for i, exp in enumerate(expressions[:-1]):
for other_exp in expressions[i + 1:]:
if any('#{}#'.format(i) in sub_exp for sub_exp in other_exp):
break
else:
# this expression is not used in any expression it's a global one
global_expressions.append(exp)
# for debugging
# print(expressions)
# print(global_expressions)
return fix_expression(global_expressions[0])
print([convert_to_tuple(x) for x in ['neg(or(p,q))']]) # [('neg', ('or', 'p', 'q'))]
print([convert_to_tuple(x) for x in ['imp(p,q)', 'imp(neg(r),neg(q))']]) # [('imp', 'p', 'q'), ('imp', ('neg', 'r'), ('neg', 'q'))]
print([convert_to_tuple(x) for x in ['and(and(p,q),r)']]) # [('and', ('and', 'p', 'q'), 'r')]
</code></pre>
<p>当我们使用<code>Regex</code>时,嵌套表达式中的技巧是我使用<code>re.sub</code>首先将一个非嵌套表达式提取到一个列表中,并用它的索引替换它。继续这样做,直到没有表达式可提取。然后处理提取的表达式列表以执行所需的工作。你知道吗</p>
<p>查看我对类似问题的回答,以便对这一技巧有一个清晰的认识:</p>
<p><a href="https://stackoverflow.com/questions/58237331/extract-all-variables-from-a-string-of-python-code-regex-or-ast/58240053#58240053">Answer for Extract all variables from a string of Python code (regex or AST)</a></p>