将字符串拆分为不同的行长度

import re #Dictionary containing the line number as key and the max line length lineLengths = { 1:9, 2:11, 3:12, 4:14, 5:14 } inputStr = "THIS IS A LONG DESC 7X7 NEEDS SPLITTING" #Test string, should be split on the spaces and around the "X" splitted = re.split("(?:\s|((?<=\d)X(?=\d)))",inputStr) #splits inputStr on white space and where X is surrounded by numbers eg. dimensions lineNum = 1 #initialises the line number at 1 lineStr1 = "" #initialises each line as a string lineStr2 = "" lineStr3 = "" lineStr4 = "" lineStr5 = "" #Dictionary creating dynamic line variables lineNumDict = { 1:lineStr1, 2:lineStr2, 3:lineStr3, 4:lineStr4, 5:lineStr5 } if len(inputStr) > 40: print "The short description is longer than 40 characters" else: while lineNum <= 5: for word in splitted: if word != None: if len(lineNumDict[lineNum]+word) <= lineLengths[lineNum]: lineNumDict[lineNum] += word else: lineNum += 1 else: if len(lineNumDict[lineNum])+1 <= lineLengths[lineNum]: lineNumDict[lineNum] += " " else: lineNum += 1 lineOut1 = lineStr1.strip() lineOut2 = lineStr2.strip() lineOut3 = lineStr3.strip() lineOut4 = lineStr4.strip() lineOut5 = lineStr5.strip()

3条回答

网友

1楼 · 编辑于 2024-06-28 19:04:14

我想知道一个正确注释的正则表达式是否更容易理解？在

lineLengths = {1:9,2:11,3:12,4:14,5:14}
inputStr = "THIS IS A LONG DESC 7X7 NEEDS SPLITTING" 
import re
pat = """
(?:                     # non-capture around the line as we want to drop leading spaces
    \s*                 # drop leading spaces
    (.{{1,{max_len}}})  # up to max_len characters, will be added through 'format'
    (?=[\b\sX]|$)       # and using word breaks, X and string ending as terminators
                        # but without capturing as we need X to go into the next match
)?                      # and ignoring missing matches if not all lines are necessary
"""

# build a pattern matching up to 5 lines with the corresponding max lengths
pattern = ''.join(pat.format(max_len=x) for x in lineLengths.values())

re.match(pattern, inputStr, re.VERBOSE).groups()
#  Out: ('THIS IS A', 'LONG DESC 7', '7 NEEDS', 'SPLITTING', None)

另外，使用dict来表示行长度并没有实际意义，列表就可以了。在

网友

2楼 · 编辑于 2024-06-28 19:04:14

for word in splitted:
    ...
    lineNum += 1

您的代码将lineNum增加splitted中的字数，即16倍。在

网友

3楼 · 编辑于 2024-06-28 19:04:14

它不起作用，因为在循环中使用lineLen条件的for words在分割的循环中。你必须这样做：

    if len(inputStr) > 40:
        print "The short description is longer than 40 characters"
    else:
        for word in splitted:
            if lineNum > 5:
                 break
            if word != None:
                if len(lineNumDict[lineNum]+word) <= lineLengths[lineNum]:
                    lineNumDict[lineNum] += word
                else:
                    lineNum += 1
            else:
                if len(lineNumDict[lineNum])+1 <= lineLengths[lineNum]:
                    lineNumDict[lineNum] += " "
                else:
                    lineNum += 1

而且lineStr1、lineStr2等不会更改，您必须直接访问dict（字符串是不可变的）。我试了一下，结果奏效了：

^{pr2}$

给出：

    Lines: {1: 'THIS IS A', 2: 'LONG DESC 7', 3: '7 NEEDS ', 4: '', 5: ''}

相关问题更多 >

编程相关推荐

热门问题

热门文章