py解析嵌入到其他OneOrMore中的OneOrMore

2024-10-02 20:29:42 发布

您现在位置:Python中文网/ 问答频道 /正文

我第一次尝试使用pyparsing。 我的解析器没有做我希望它能做的,有人能检查一下有什么问题吗。我正在尝试在oneor more中嵌入oneor more,我认为这应该可以很好地工作,但事实并非如此。

以下是整个代码:

import pyparsing

status = """
    sale number       : 11/7 
    NAME               ID    PAWN    PRICE    TIME         %C     STATE     START/STOP
    cross-cu-1       1055       1    106284K  07:49:36.19  25.05%   run          1d01h
    cross-cu-2        918       1    104708K  07:38:19.08  24.02%   run          1d01h
    sale number       : 11/8 
    NAME               ID    PAWN    PRICE    TIME         %C     STATE     START/STOP
    cross-cu-3       1055       1    106284K  07:49:36.19  25.05%   run          1d01h
    cross-cu-4        918       1    104708K  07:38:19.08  24.02%   run          1d01h
    """

integer = pyparsing.Word(pyparsing.nums).setParseAction(lambda toks: int(toks[0]))
decimal = pyparsing.Word(pyparsing.nums + ".").setParseAction(lambda toks: float(toks[0]))
wordSuppress = pyparsing.Suppress(pyparsing.Word(pyparsing.alphas))
endOfLine = pyparsing.LineEnd().suppress()
colon = pyparsing.Suppress(":")

saleNumber = pyparsing.Regex("\d{2}\/\d{1}").setResultsName("saleNumber")
lineSuppress = pyparsing.Regex("NAME.*STOP") + endOfLine
saleRow = wordSuppress + wordSuppress + colon + saleNumber + endOfLine

name = pyparsing.Regex("cross-cu-\d").setResultsName("name")
id = integer.setResultsName("id")
pawn = integer.setResultsName("pawn")
price = integer.setResultsName("price") + "K"
time = pyparsing.Regex("\d{2}:\d{2}:\d{2}.\d{2}").setResultsName("time")
c = decimal.setResultsName("c") + "%"
state = pyparsing.Word(pyparsing.alphas).setResultsName("state")
startStop = pyparsing.Word(pyparsing.alphanums).setResultsName("startStop")
row = name + id + pawn + price + time + c + state + startStop + endOfLine

table = pyparsing.OneOrMore(pyparsing.Group(saleRow + lineSuppress.suppress() + (pyparsing.OneOrMore(pyparsing.Group(row) | pyparsing.SkipTo(row).suppress())) ) | pyparsing.SkipTo(saleRow).suppress())

resultDic = [x.asDict() for x in table.parseString(status)]
print resultDic

它只返回[{'saleNumber': '11/7'}]我希望能得到这样的dic列表:

^{pr2}$

感谢任何帮助! 请不要建议其他实现此输出的方法!我也在努力学习pyparsing!


Tags: runnameintegerpyparsingregexwordstopcross
2条回答

在这种情况下,pyparsing可能有点过头了。为什么不直接逐行读取文件,然后解析结果呢?在

代码如下所示:

编辑:我已经更新了代码,以便更接近您的示例。在

从集合导入defaultdict

status = """
sale number       : 11/7
NAME               ID    PAWN    PRICE    TIME         %C     STATE     START/STOP
cross-cu-1       1055       1    106284K  07:49:36.19  25.05%   run          1d01h
cross-cu-2        918       1    104708K  07:38:19.08  24.02%   run          1d01h
sale number       : 11/8
NAME               ID    PAWN    PRICE    TIME         %C     STATE     START/STOP
cross-cu-3       1055       1    106284K  07:49:36.19  25.05%   run          1d01h
cross-cu-4        918       1    104708K  07:38:19.08  24.02%   run          1d01h
"""

sale_number = ''

sales = defaultdict(list)

for line in status.split('\n'):
    line = line.strip()
    if line.startswith("NAME"):
         continue
    elif line.startswith("sale number"):
         sale_number = line.split(':')[1].strip()
    elif not line or line.isspace() :
         continue
    else:
         # you can also use a regular expression here
         sales[sale_number].append(line.split())

for sale in sales:
    print sale, sales[sale]

这行吗?在

import pyparsing

status = """
sale number       : 11/7
NAME               ID    PAWN    PRICE    TIME         %C     STATE     START/STOP
cross-cu-1       1055       1    106284K  07:49:36.19  25.05%   run          1d01h
cross-cu-2        918       1    104708K  07:38:19.08  24.02%   run          1d01h
sale number       : 11/8
NAME               ID    PAWN    PRICE    TIME         %C     STATE     START/STOP
cross-cu-3       1055       1    106284K  07:49:36.19  25.05%   run          1d01h
cross-cu-4        918       1    104708K  07:38:19.08  24.02%   run          1d01h
"""

integer = pyparsing.Word(pyparsing.nums).setParseAction(lambda toks: int(toks[0]))
decimal = pyparsing.Word(pyparsing.nums + ".").setParseAction(lambda toks:     float(toks[0]))
wordSuppress = pyparsing.Suppress(pyparsing.Word(pyparsing.alphas))
endOfLine = pyparsing.LineEnd().suppress()
colon = pyparsing.Suppress(":")

saleNumber = pyparsing.Regex("\d{2}\/\d{1}").setResultsName("saleNumber")
lineSuppress = pyparsing.Regex("NAME.*STOP") + endOfLine
saleRow = wordSuppress + wordSuppress + colon + saleNumber + endOfLine

name = pyparsing.Regex("cross-cu-\d").setResultsName("name")
id = integer.setResultsName("id")
pawn = integer.setResultsName("pawn")
price = integer.setResultsName("price") + "K"
time = pyparsing.Regex("\d{2}:\d{2}:\d{2}.\d{2}").setResultsName("time")
c = decimal.setResultsName("c") + "%"
state = pyparsing.Word(pyparsing.alphas).setResultsName("state")
startStop = pyparsing.Word(pyparsing.alphanums).setResultsName("startStop")
row = pyparsing.Group(name + id + pawn + price + time + c + state + startStop +    endOfLine)
row.setResultsName("row")
rows = pyparsing.OneOrMore(row).setResultsName("rows")

table = pyparsing.OneOrMore(pyparsing.Group(saleRow + lineSuppress + rows))

resultDic = [x.asDict() for x in table.parseString(status)]
print resultDic

相关问题 更多 >