<p>编辑:我已经学会了一些编程技巧,并重新回答了这个问题。你知道吗</p>
<p>回答我的问题,你不需要任何特殊功能。如果你想要一个相对容易编码的版本,请在下面寻找不同的答案。与下面的解决方案相比,此解决方案的文档也较少,但它使用动态编程和记忆体,因此它应该比上一个解决方案更快,并且占用更少的内存。它还正确处理正则表达式字符(例如<code>|</code>)。(以下解决方案不适用。)</p>
<pre><code>def solution(fixed_strings, target_string):
def get_middle_matches(s, fixed_strings):
'''
Gets the fixed strings matches without the first and last first strings
Example the parameter tuple ("ABCBD", ["B"]) should give back [["A", "B", "CBD"], ["ABC", "B", "D"]]
'''
# in the form {(s, s_index, fixed_string_index, fixed_character_index): return value of recursive_get_middle_matches called with those parameters}
lookup = {}
def memoized_get_middle_matches(*args):
'''memoize the recursive function'''
try:
ans = lookup[args]
return ans
except KeyError:
ans = recursive_get_middle_matches(*args)
lookup[args] = ans
return ans
def recursive_get_middle_matches(s, s_index, fixed_string_index, fixed_character_index):
'''
Takes a string, an index into that string, a index into the list of middle fixed strings,
...and an index into that middle fixed string.
Returns what fixed_string_matches(s, fixed_strings[fixed_string_index:-1]) would return, and deals with edge cases.
'''
# base case: there's no fixed strings left to match
try:
fixed_string = fixed_strings[fixed_string_index]
except IndexError:
# we just finished matching the last fixed string, but there's some stuff left over
return [[s]]
# recursive case: we've finished matching a fixed string
# note that this needs to go before the end of the string base case
# ...because otherwise the matched fixed string may not be added to the answer,
# ...since getting to the end of the main string will short-circuit it
try:
fixed_character = fixed_string[fixed_character_index]
except IndexError:
# finished matching this fixed string
upper_slice = s_index
lower_slice = upper_slice - len(fixed_string)
prefix = s[:lower_slice]
match = s[lower_slice:upper_slice]
postfix = s[upper_slice:]
match_ans = [prefix, match]
recursive_answers = memoized_get_middle_matches(postfix, 0, fixed_string_index + 1, 0)
if fixed_string == '' and s_index < len(s):
recursive_skip_answers = memoized_get_middle_matches(s, s_index + 1, fixed_string_index, fixed_character_index)
return [match_ans + recursive_ans for recursive_ans in recursive_answers] + recursive_skip_answers
else:
return [match_ans + recursive_ans for recursive_ans in recursive_answers]
# base cases: we've reached the end of the string
try:
character = s[s_index]
except IndexError:
# nothing left to match in the main string
if fixed_string_index >= len(fixed_strings):
# it completed matching everything it needed to
return [[""]]
else:
# it didn't finish matching everything it needed to
return []
# recursive cases: either we match this character or we don't
character_matched = (character == fixed_character)
starts_fixed_string = (fixed_character_index == 0)
if starts_fixed_string:
# if this character starts the fixed string, we're still searching for this same fixed string
recursive_skip_answers = memoized_get_middle_matches(s, s_index + 1, fixed_string_index, fixed_character_index)
if character_matched:
recursive_take_answers = memoized_get_middle_matches(s, s_index + 1, fixed_string_index, fixed_character_index + 1)
if starts_fixed_string:
# we have the option to either take the character as a match, or skip over it
return recursive_skip_answers + recursive_take_answers
else:
# this character is past the start of the fixed string; we can no longer match this fixed string
# since we can't match one of the fixed strings, this is a failed path if we don't match this character
# thus, we're forced to take this character as a match
return recursive_take_answers
else:
if starts_fixed_string:
# we can't match it here, so we skip over and continue
return recursive_skip_answers
else:
# this character is past the start of the fixed string; we can no longer match this fixed string
# since we can't match one of the fixed strings, there are no possible matches here
return []
## main code
return memoized_get_middle_matches(s, 0, 0, 0)
## main code
# doing the one fixed string case first because it happens a lot
if len(fixed_strings) == 1:
# if it matches, then there's just that one match, otherwise, there's none.
if target_string == fixed_strings[0]:
return [target_string]
else:
return []
if len(fixed_strings) == 0:
# there's no matches because there are no fixed strings
return []
# separate the first and last from the middle
first_fixed_string = fixed_strings[0]
middle_fixed_strings = fixed_strings[1:-1]
last_fixed_string = fixed_strings[-1]
prefix = target_string[:len(first_fixed_string)]
middle = target_string[len(first_fixed_string):len(target_string)-len(last_fixed_string)]
postfix = target_string[len(target_string)-len(last_fixed_string):]
# make sure the first and last fixed strings match the target string
# if not, the target string does not match
if not (prefix == first_fixed_string and postfix == last_fixed_string):
return []
else:
# now, do the check for the middle fixed strings
return [[prefix] + middle + [postfix] for middle in get_middle_matches(middle, middle_fixed_strings)]
print(solution(["I like ", " and ", " because ", "do"],
"I like lettuce and carrots and onions because I do"))
print([("I like ", "lettuce", " and ", "carrots and onions", " because ", "I ", "do"),
("I like ", "lettuce and carrots", " and ", "onions", " because ", "I ", "do")])
print()
print(solution(["take ", " to the park"], "take Alice to the park"))
print([("take ", "Alice", " to the park")])
print()
# Courtesy of @ktzr
print(solution(["I like ", " because "],
"I don't like cheese because I'm lactose-intolerant"))
print([])
print()
print(solution(["I", "want", "or", "done"],
"I want my sandwich or I want my pizza or salad done"))
print([("I", " ", "want", " my sandwich ", "or", " I want my pizza or salad ", "done"),
("I", " ", "want", " my sandwich or I want my pizza ", "or", " salad ", "done"),
("I", " want my sandwich or I", "want", " my pizza ", "or", " salad ", "done")])
</code></pre>
<p><strong>上一个答案:</strong></p>
<p>回答我的问题,<strong><code>itertools.product</code></strong>函数和带有<code>overlapped</code>参数的<strong><code>regex.finditer</code></strong>是这个解决方案的两个关键函数。我想我应该包括我的最终代码,以防它在类似情况下帮助其他人。你知道吗</p>
<p>我真的很关心我的代码是否具有超可读性,所以我最终基于@ktzr的解决方案编写了自己的解决方案。(谢谢!)你知道吗</p>
<p>我的解决方案使用了一些奇怪的东西。你知道吗</p>
<p>首先,它使用一个<code>overlapped</code>参数,该参数只能通过<code>regex</code>模块使用,并且必须安装(很可能是通过<code>pip install regex</code>)。然后,用<code>import regex as re</code>将其包含在顶部。这使得在字符串中搜索重叠的匹配项变得很容易。你知道吗</p>
<p>第二,我的解决方案使用了一个没有显式包含在库中的itertools函数,您必须这样定义它:</p>
<pre><code>import itertools
def itertools_pairwise(iterable):
'''s -> (s0,s1), (s1,s2), (s2, s3), ...'''
a, b = itertools.tee(iterable)
next(b, None)
return zip(a, b)
</code></pre>
<p>这个函数只允许我成对地遍历一个列表,确保列表中的每个元素(除了第一个和最后一个)都遇到两次。你知道吗</p>
<p>有了这两件事,我的解决方案是:</p>
<pre><code>def solution(fixed_strings, target_string):
# doing the one fixed string case first because it happens a lot
if len(fixed_strings) == 1:
# if it matches, then there's just that one match, otherwise, there's none.
if target_string == fixed_strings[0]:
return [target_string]
else:
return []
# make sure the first and last fixed strings match the target string
# if not, the target string does not match
if not (target_string.startswith(fixed_strings[0]) and target_string.endswith(fixed_strings[-1])):
return []
# get the fixed strings in the middle that it now needs to search for in the middle of the target string
middle_fixed_strings = fixed_strings[1:-1]
# where in the target string it found the middle fixed strings.
# middle_fixed_strings_placements is in the form: [[where it found the 1st middle fixed string], ...]
# [where it found the xth middle fixed string] is in the form: [(start index, end index), ...]
middle_fixed_strings_placements = [[match.span() for match in re.finditer(string, target_string, overlapped=True)]
for string in middle_fixed_strings]
# if any of the fixed strings couldn't be found in the target string, there's no matches
if [] in middle_fixed_strings_placements:
return []
# get all of the possible ways each of the middle strings could be found once within the target string
all_placements = itertools.product(*middle_fixed_strings_placements)
# remove the cases where the middle strings overlap or are out of order
good_placements = [placement for placement in all_placements
if not (True in [placement[index][1] > placement[index + 1][0]
for index in range(len(placement) - 1)])]
# create a list of all the possible final matches
matches = []
target_string_len = len(target_string) # cache for later
# save the start and end spans which are predetermined by their length and placement
start_span = (0, len(fixed_strings[0]))
end_span = (target_string_len - len(fixed_strings[-1]), target_string_len)
for placement in good_placements:
placement = list(placement)
# add in the spans for the first and last fixed strings
# this makes it so each placement is in the form: [1st fixed string span, ..., last fixed string span]
placement.insert(0, start_span)
placement.append(end_span)
# flatten the placements list to get the places where we need to cut up the string.
# we want to cut the string at the span values to get out the fixed strings
cuts = [cut for span in placement for cut in span]
match = []
# go through the cuts and make them to create the list
for start_cut, end_cut in itertools_pairwise(cuts):
match.append(target_string[start_cut:end_cut])
matches.append(match)
return matches
</code></pre>