用标签提取所有出现的正则表达式模式

import re def replace_entities(example): res = '' # dd mm yyyy m = re.search("(\d{1,31}(:? |\-|\/)\d{1,12}(:? |\-|\/)\d{4})", example) # dd/mm/yyyy if m: res = res + "\n{} : DATESTR".format(m.group()) # email id m = re.search("[\w\.-]+@[\w\.-]+", example) if m: res = res +"\n{} : EMAILIDSTR".format(m.group()) # URL m = re.search('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*,]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', example) if m: res= res +"\n{} : URLSTR".format(m.group()) # NUMBERS m = re.search(r'\d+', example) if m: res = res + "\n{} : NUMSTR".format(m.group()) return res.strip() print(replace_entities('My name is ali, Date is 21/08/2018 Total amount is chandanpatil@yahoo.com euros 10,2018/13/09 saylijawale@gmail.com. https://imarticus.com Account number is 123456'))

2条回答

网友

1楼 · 编辑于 2024-09-26 18:06:57

使用findall获取所有电子邮件ID并对每个ID进行迭代。你知道吗

对于NUMSTR，代码似乎在example中找到了第一个数字。如果输入格式相同，则获取字符串的最后一个数字。你知道吗

import re

def replace_entities(example):
    res = ''

    # dd mm yyyy
    m = re.search("(\d{1,31}(:? |\-|\/)\d{1,12}(:? |\-|\/)\d{4})", example)  # dd/mm/yyyy
    if m:
        res = res + "\n{} : DATESTR".format(m.group())

    # email id
    m = re.findall("[\w\.-]+@[\w\.-]+", example)
    if m:
        for id in m:
            res = res +"\n{} : EMAILIDSTR".format(id)

    # URL
    m = re.search('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', example)
    if m:
        res= res +"\n{} : URLSTR".format(m.group())

    # NUMBERS
    s = r'{}'.format(example)
    m = re.match('.*?([0-9]+)$', s)
    if m:
        res = res + "\n{} : NUMSTR".format(m.group(1))
    return res.strip()

print(replace_entities('My name is ali, Date is 21/08/2018 Total amount is chandanpatil@yahoo.com euros 10,2018/13/09  saylijawale@gmail.com. https://imarticus.com   Account number is 123456'))

'''
21/08/2018 : DATESTR
chandanpatil@yahoo.com : EMAILIDSTR 
saylijawale@gmail.com : EMAILIDSTR
https://imarticus.com : URLSTR           
123456 : NUMSTR 
 '''

网友

2楼 · 编辑于 2024-09-26 18:06:57

您可以编写一个小型生成器函数，其中包含正则表达式中的替代项：

import re

data = """My name is ali, Date is 21/08/2018 Total amount is chandanpatil@yahoo.com euros 10,2018/13/09  saylijawale@gmail.com. https://imarticus.com   Account number is 123456"""

def finder(string=None):
    # define the tokens
    tokens = {
        'DATESTR': r'\d{2}/\d{2}/\d{4}', 
        'EMAILIDSTR': r'\S+@\S+',
        'URLSTR': r'https?://\S+',
        'NUMSTR': r'\d+'}

    # build the expression
    # using join and a listcomp
    rx = re.compile("|".join(
        ['(?P<{}>{})'.format(key, value) 
        for key, value in tokens.items()])
    )

    # loop over the found matches
    for match in rx.finditer(string):
        for token in tokens:
            value = match.group(token)
            if value:
                if token in ['DATESTR', 'EMAILIDSTR']:
                    value = value.rstrip('.')
                yield (value, token)
                break

# iterate over the found tokens
for value, token in finder(data):
    print("Value: {}, Token: {}".format(value, token))

屈服

Value: 21/08/2018, Token: DATESTR
Value: chandanpatil@yahoo.com, Token: EMAILIDSTR
Value: 10, Token: NUMSTR
Value: 2018, Token: NUMSTR
Value: 13, Token: NUMSTR
Value: 09, Token: NUMSTR
Value: saylijawale@gmail.com, Token: EMAILIDSTR
Value: https://imarticus.com, Token: URLSTR
Value: 123456, Token: NUMSTR

见a demo for the expression on regex101.com。

相关问题更多 >

编程相关推荐

热门问题

热门文章