python正则表达式来获取放置在字符串不同位置的数字

4 bedrooms 2 bathrooms 3 carparks 3 bedroom house Bedrooms 2, beds 5, Bedrooms 1, 2 bedrooms, 1 bathroom, Four bedrooms home, double garage Four bedrooms home Three double bedrooms home, garage Three bedrooms home, 2 bedroom home unit with single carport. Garage car spaces: 2, Bathrooms: 4, Bedrooms: 7,

def get_bedroom_num(s): if ':' in s: out = re.search(r'(?:Bedrooms:|Bedroom:)(.*)', s,re.I).group(1) elif ',' in s: out = re.search(r'(?:bedrooms|bedroom|beds)(.*)', s,re.I).group(1) else: out = re.search(r'(.*)(?:bedrooms|bedroom).*', s,re.I).group(1) out = filter(lambda x: x.isdigit(), out) return out

#Convert word to number def text2int (textnum, numwords={}): if not numwords: units = [ "zero", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "ten", "eleven", "twelve", "thirteen", "fourteen", "fifteen", "sixteen", "seventeen", "eighteen", "nineteen", ] tens = ["", "", "twenty", "thirty", "forty", "fifty", "sixty", "seventy", "eighty", "ninety"] scales = ["hundred", "thousand", "million", "billion", "trillion"] numwords["and"] = (1, 0) for idx, word in enumerate(units): numwords[word] = (1, idx) for idx, word in enumerate(tens): numwords[word] = (1, idx * 10) for idx, word in enumerate(scales): numwords[word] = (10 ** (idx * 3 or 2), 0) ordinal_words = {'first':1, 'second':2, 'third':3, 'fifth':5, 'eighth':8, 'ninth':9, 'twelfth':12} ordinal_endings = [('ieth', 'y'), ('th', '')] textnum = textnum.replace('-', ' ') current = result = 0 curstring = "" onnumber = False for word in textnum.split(): if word in ordinal_words: scale, increment = (1, ordinal_words[word]) current = current * scale + increment if scale > 100: result += current current = 0 onnumber = True else: for ending, replacement in ordinal_endings: if word.endswith(ending): word = "%s%s" % (word[:-len(ending)], replacement) if word not in numwords: if onnumber: curstring += repr(result + current) + " " curstring += word + " " result = current = 0 onnumber = False else: scale, increment = numwords[word] current = current * scale + increment if scale > 100: result += current current = 0 onnumber = True if onnumber: curstring += repr(result + current) return curstring

1条回答

网友

1楼 · 发布于 2024-05-19 15:53:34

你可以使用下面的正则表达式来找到所有不同的组合并提取卧室号码信息

(\w+\+?) *(?:double +)?bed(?:room)?s?|bed(?:room)?s?:? *(\d+\+?)

Online Demo

由于regex有替换组，其中捕获的信息可能在group1或group2中可用，因此此Python代码显示了如何从regex中适当捕获的组中提取数据

import re

def getRoomInfo(s):
 numRooms = re.search(r'(\w+\+?) *(?:double +)?bed(?:room)?s?|bed(?:room)?s?:? *(\d+\+?)', s, re.IGNORECASE)
 if (numRooms):
  if (numRooms.group(1) is not None):
   return numRooms.group(1)
  elif (numRooms.group(2) is not None):
   return numRooms.group(2)


arr = ['4 bedrooms 2 bathrooms 3 carparks','3 bedroom house','Bedrooms 2, ','beds 5,','Bedrooms 1, ','2 bedrooms, 1 bathroom, ','Four bedrooms home, double garage','Four bedrooms home','Three double bedrooms home, garage','Three bedrooms home,','2 bedroom home unit with single carport.','Garage car spaces: 2, Bathrooms: 4, Bedrooms: 7,\\\\', 'Three bedroom bungalow with conservatory and", "One bedroom unit","4+ bedroom(s), 2 bathroom(s), 2 garage(s)']

for s in arr:
 print(s, '  > ', getRoomInfo(s))

打印以下内容

4 bedrooms 2 bathrooms 3 carparks   >  4
3 bedroom house   >  3
Bedrooms 2,    >  2
beds 5,   >  5
Bedrooms 1,    >  1
2 bedrooms, 1 bathroom,    >  2
Four bedrooms home, double garage   >  Four
Four bedrooms home   >  Four
Three double bedrooms home, garage   >  Three
Three bedrooms home,   >  Three
2 bedroom home unit with single carport.   >  2
Garage car spaces: 2, Bathrooms: 4, Bedrooms: 7,\\   >  7
Three bedroom bungalow with conservatory and", "One bedroom unit","4+ bedroom(s), 2 bathroom(s), 2 garage(s)   >  Three

Edit2:Python代码的另一个版本，它捕获字符串中的所有匹配项，并以列表的形式返回结果

import re

def getRoomInfoAll(s):
 matches = []
 pattern = re.compile(r'(\w+\+?) *(?:double +)?bed(?:room)?s?|bed(?:room)?s?:? *(\d+\+?)', re.IGNORECASE)
 for numRooms in pattern.finditer(s):
  if (numRooms.group(1) is not None and len(numRooms.group(1)) > 0):
   matches.append(numRooms.group(1))
  elif (numRooms.group(2) is not None and len(numRooms.group(2)) > 0):
   matches.append(numRooms.group(2))
 return matches


arr = ['4 bedrooms 2 bathrooms 3 carparks','3 bedroom house','Bedrooms 2, ','beds 5,','Bedrooms 1, ','2 bedrooms, 1 bathroom, ','Four bedrooms home, double garage','Four bedrooms home','Three double bedrooms home, garage','Three bedrooms home,','2 bedroom home unit with single carport.','Garage car spaces: 2, Bathrooms: 4, Bedrooms: 7,\\\\', 'Three bedroom bungalow with conservatory and", "One bedroom unit","4+ bedroom(s), 2 bathroom(s), 2 garage(s)']

for s in arr:
 print(s, '     > ', getRoomInfoAll(s))

在这里，它会打印字符串中找到的所有匹配项。你知道吗

4 bedrooms 2 bathrooms 3 carparks      >  ['4']
3 bedroom house      >  ['3']
Bedrooms 2,       >  ['2']
beds 5,      >  ['5']
Bedrooms 1,       >  ['1']
2 bedrooms, 1 bathroom,       >  ['2']
Four bedrooms home, double garage      >  ['Four']
Four bedrooms home      >  ['Four']
Three double bedrooms home, garage      >  ['Three']
Three bedrooms home,      >  ['Three']
2 bedroom home unit with single carport.      >  ['2']
Garage car spaces: 2, Bathrooms: 4, Bedrooms: 7,\\      >  ['7']
Three bedroom bungalow with conservatory and", "One bedroom unit","4+ bedroom(s), 2 bathroom(s), 2 garage(s)      >  ['Three', 'One', '4+']

相关问题更多 >

编程相关推荐

热门问题

热门文章