在Python中从多个文本文件中定位和提取字符串

import os import glob import csv def check(filename): if 'DELIVERY NOTIFICATION' in open(filename).read(): isDenied = True print ("This claim was Denied") print (isDenied) elif 'Dear Customer:' in open(filename).read(): isDenied = False print("This claim was Approved") print (isDenied) else: print("I don't know if this is approved or denied") def iterate(): path = 'text/' for infile in glob.glob(os.path.join(path, '*.txt')): print ('current file is:' + infile) filename = infile check(filename) iterate()

Shipper Number............................577140Pickup Date....................................06/27/17 Number of Parcels........................1Weight.............................................1 LBS Shipper Invoice Number..............30057010Tracking Identification Number...1Z000000YW00000000 Merchandise..................................1 S NIKE EQUALS EVERYWHERE T BK B WE HAVE BEEN UNABLE TO PROVIDE SATISFACTORY PROOF OF DELIVERY FOR THE ABOVE SHIPMENT. WE APOLOGIZE FOR THE INCONVENIENCE THIS CAUSES. NPT8AEQ:000A0000LDI 07 ----------------Page (1) Break----------------

import os import glob arrayDenied = [] def iterate(): path = 'text/' for infile in glob.glob(os.path.join(path, '*.txt')): print ('current file is:' + infile) check(infile) def check(filename): with open(filename, 'rt') as file_contents: myText = file_contents.read() if 'DELIVERY NOTIFICATION' in myText: start = myText.index("Tracking Identification Number...") + len("Tracking Identification Number...") myNumber = myText[start : start+18] print("Denied: " + myNumber) arrayDenied.append(myNumber) elif 'Dear Customer:' in open(filename).read(): print("This claim was Approved") startTrackingNum = myText.index("Tracking Identification Number...") + len("Tracking Identification Number...") myNumber = myText[startTrackingNum : startTrackingNum+18] startClaimNumberIndex = myText.index("Claim Number ") + len("Claim Number ") myClaimNumber = myText[startClaimNumberIndex : startClaimNumberIndex+11] arrayApproved.append(myNumber + " - " + myClaimNumber) else: print("I don't know if this is approved or denied") iterate() with open('Approved.csv', "w") as output: writer = csv.writer(output, lineterminator='\n') for val in arrayApproved: writer.writerow([val]) with open('Denied.csv', "w") as output: writer = csv.writer(output, lineterminator='\n') for val in arrayDenied: writer.writerow([val]) print(arrayDenied) print(arrayApproved)

3条回答

网友

1楼 · 编辑于 2024-06-28 16:13:29

如果您的目标只是找到“跟踪标识号…”字符串和随后的18个字符；您可以只找到该字符串的索引，然后到达它的结尾，并从该点开始切片，直到随后的18个字符的结尾。你知道吗

# Read the text file into memory:
with open(filename, 'rt') as txt_file:
    myText = txt_file.read()
    if 'DELIVERY NOTIFICATION' in myText:
        # Find the desired string and get the subsequent 18 characters:
        start = myText.index("Tracking Identification Number...") + len("Tracking Identification Number...")
        myNumber = myText[start : start+18]
        arrayDenied.append(myNumber)

您还可以将append行修改为arrayDenied.append(myText + ' ' + myNumber)或类似的内容。你知道吗

网友

2楼 · 编辑于 2024-06-28 16:13:29

我认为这解决了你的问题，只是把它变成一个函数。你知道吗

import re

string = 'Tracking Identification Number...1Z000000YW00000000'

no_dots = re.sub('\.', '', string) #Removes all dots from the string

matchObj = re.search('^Tracking Identification Number(.*)', no_dots) #Matches anything after the "Tracking Identification Number"

try:
   print (matchObj.group(1))
except:
    print("No match!")

如果您想阅读文档，请点击这里：https://docs.python.org/3/library/re.html#re.search

网友

3楼 · 编辑于 2024-06-28 16:13:29

正则表达式是完成任务的方法。下面是一种修改代码以搜索模式的方法。你知道吗

import re
pattern = r"(?<=Tracking Identification Number)(?:(\.+))[A-Z-a-z0-9]{18}"

def check(filename):
    file_contents = open(filename, 'r').read()
    if 'DELIVERY NOTIFICATION' in file_contents:
        isDenied = True
        print ("This claim was Denied")
        print (isDenied)
        matches = re.finditer(pattern, test_str)
        for match in matches:
            print("Tracking Number = %s" % match.group().strip("."))
    elif 'Dear Customer:' in file_contents:
        isDenied = False
        print("This claim was Approved")
        print (isDenied)
    else:
        print("I don't know if this is approved or denied")

解释：

r"(?<=Tracking Identification Number)(?:(\.+))[A-Z-a-z0-9]{18}"

(?<=Tracking Identification Number)在捕获组后面查找字符串“Tracking Identification Number”
(?:(\.+))匹配一个或多个点（.）（我们在后面去掉这些点）
[A-Z-a-z0-9]{18}匹配18个（大写或小写）字母或数字实例

更多关于Regex。你知道吗

相关问题更多 >

编程相关推荐

热门问题

热门文章