匹配python中可能最长的字符集

'470470574704705747047057570570574704705727027B5747047057570570574704705727027B5747047057570570574704705727027B5747047057570570574704705727027B5747047057570570574704705727027B5747047057570570574704705727027B5747047057570570574704705727027B5747047057570570574704705727027B5747047057570570574704705727027B57470470574704705747047057B2727875377AA0577AA0577AA0577AA0577AA0577AA059959959959952257777225' ('1368A','470470574704705747047057570570574704705727027B5747047057570570574704705727027B5747047057570570574704705727027B5747047057570570574704705727027B5747047057570570574704705727027B5747047057570570574704705727027B5747047057570570574704705727027B5747047057570570574704705727027B5747047057570570574704705727027B57470470574704705747047057B2727'),('','8'),('1468B','75377AA0577AA0577AA0577AA0577AA0577AA059959959959952257777225')

import sys,re from midplay import MidiFile,NoteOn from collections import deque notes=("C","C#","D","Eb","E","F","F#","G","G#","A","Bb","B") noteshex=('0','1','2','3','4','5','6','7','8','9','A','B') major=lambda x:((x)%12,(x+2)%12,(x+4)%12,(x+5)%12,(x+7)%12,(x+9)%12,(x+11)%12,) minor=lambda x:((x)%12,(x+2)%12,(x+3)%12,(x+5)%12,(x+7)%12,(x+8)%12,(x+10)%12,) nomajor=lambda x:{(x+1)%12,(x+3)%12,(x+6)%12,(x+8)%12,(x+10)%12} nominor=lambda x:{(x+1)%12,(x+4)%12,(x+6)%12,(x+9)%12,(x+11)%12} nomajortonelist=[re.compile('([^'+''.join([noteshex[note] for note in nomajor(tonality)])+']+)') for tonality in range(12)] nominortonelist=nomajortonelist[3:]+nomajortonelist[:3] if len(sys.argv)!=2: sys.exit('usage: py tonalitydetect.py [C:\path]filename.mid') midi=MidiFile(sys.argv[1]) for num, track in enumerate(midi): print('Track:',num,'messages:',len(track)) channelnotes=['','','','','','','','','','','','','','','',''] channeltonality=[deque(),deque(),deque(),deque(),deque(),deque(),deque(),deque(),deque(),deque(),deque(),deque(),deque(),deque(),deque(),deque()] for msg in track: if isinstance(msg,NoteOn): channelnotes[msg.channel]+=(noteshex[msg.note%12]) for chnum,channel in enumerate(channelnotes): tomatch=[channel] matches=[] while ''.join(tomatch)!='': curchanmaxmatch=deque() for string in tomatch: for exp in nomajortonelist: curchanmaxmatch.append((exp,max(exp.findall(string)+[''], key=len))) matches.append(max(curchanmaxmatch+deque([('','',)]), key=lambda x:len(x[1]))) newmatch=[] found=0 for x in tomatch: if not found: match=x.split(matches[-1][1],1) if len(match)>1: found=1 newmatch.extend(match) else: newmatch.append(x) tomatch=[x for x in newmatch if x!=''] matches=sorted(matches, key=lambda x:len(x[1])) toseek=channel while len(matches): for num,match in enumerate(matches): if not toseek.find(match[1]): channeltonality[chnum].append(match) toseek=toseek[len(match[1]):] del matches[num] break for chnum,channel in enumerate(channeltonality): print('Channel',chnum,':',[notes[nomajortonelist.index(x[0])]+' major, '+notes[nominortonelist.index(x[0])]+' minor' for x in channel])

1条回答

网友

1楼 · 发布于 2024-10-05 14:32:31

编辑：有关显示最长匹配位置的解决方案，请参见下文。

最适合您的问题的内置工具是^{}:“以字符串列表的形式返回字符串中模式的所有非重叠匹配。'

对于您的情况，一个问题是不同的匹配可以重叠-但是findall只返回不重叠的匹配。例如，输入字符串2B001AA包含两个不同的匹配项：2B00和{}。re.findall函数将查找并返回第一个匹配项2B00。然后，它继续执行它停止的地方-只返回1AA作为下一个匹配。在

您可以通过将regexp分解为一个接一个匹配的片段来解决此问题：

import re
patterns=[
    r'[^1368A]+', r'[^2479B]+', r'[^0358A]+', r'[^1469B]+',
    r'[^0257A]+', r'[^1368B]+', r'[^02479]+', r'[^1358A]+',
    r'[^2469B]+', r'[^0357A]+', r'[^1468B]+', r'[^02579]+'
]

def match_patterns(string):
    for pattern in patterns:
        for match in re.findall(pattern,string):
            yield match

函数match_pattern返回所有匹配项（但不总是按顺序）。在python3中，可以将此函数写得更短：

^{pr2}$

在任何情况下，都可以使用内置函数max提取最长匹配：

def find_longest_match(string):
    return max(match_patterns(string), key=len)

print(find_longest_match('12A34B32A43')) # prints: A34B3

如果您还想获得最长匹配的位置，请使用 ^{}:'返回一个迭代器，该迭代器对字符串中的RE模式的所有非重叠匹配产生match objects。'对于每个返回的match，match.start()给出匹配的开始位置和文本。在

import re
patterns=[
    r'[^1368A]+', r'[^2479B]+', r'[^0358A]+', r'[^1469B]+',
    r'[^0257A]+', r'[^1368B]+', r'[^02479]+', r'[^1358A]+',
    r'[^2469B]+', r'[^0357A]+', r'[^1468B]+', r'[^02579]+'
]

def match_patterns(string):
    for pattern in patterns:
        yield from re.finditer(pattern, string)

def find_longest_match(string):
    match=max(match_patterns(string), key=lambda m: len(m.group(0)))
    if match:
        return match.start(), match.group(0)
    else:
        return None

print(find_longest_match('12A34B32A43')) # prints: (2, 'A34B3')

相关问题更多 >

编程相关推荐

热门问题

热门文章