用Fuzzywuzzy比较一个CSV与Excel,并根据Excel文件中大致数据从CSV文件复制数据的Python

2024-09-30 01:34:36 发布

您现在位置:Python中文网/ 问答频道 /正文

我有以下代码,将数据从csv文件复制并粘贴到excel文件(它比较员工的姓名并复制日期)。唯一的问题是它只复制精确匹配的日期(William对William,而不是Will to William)。有没有使用fuzzyfuzzy使它也从近似匹配中复制数据。代码如下所示。在

from fuzzywuzzy import fuzz
from fuzzywuzzy import process
import csv
import openpyxl
import datetime


training_updates = {};
with open('from.csv') as frombook:
    reader = csv.DictReader(frombook)
    for row in reader:
        if int(row["ViewTotalScore"]) > 70:
            training_updates[(row["ViewerInfo"].strip("[]").lower())] = (row["DateTime"])[:-8].rstrip()

wb = openpyxl.load_workbook('to.xlsm', data_only=True, keep_vba=True)
wb2 = openpyxl.load_workbook('to.xlsm', keep_vba=True)
sheetcheck = wb.get_sheet_by_name(wb.get_sheet_names()[0])
sheetwrite = wb2.get_sheet_by_name(wb2.get_sheet_names()[0])
for rowNum in range(2, sheetcheck.max_row):
    if sheetcheck.cell(row=rowNum, column=3).value is not None:
        employee = str(sheetcheck.cell(row=rowNum, column=3).value).lower()
        if employee in training_updates:
            xldate = datetime.datetime.strptime(training_updates[employee], "%Y-%m-%d").strftime("%m/%d/%Y")
            sheetwrite.cell(row=rowNum, column=5).value = xldate
            print("Updated record for " + employee)

wb2.save('updatedto.xlsm')

我也曾使用fuzzyfuzzy extractOne来尝试拉取它们,但是对于fuzzy匹配没有任何结果:

^{pr2}$

Tags: csvtofromimportgettrainingemployeesheet
1条回答
网友
1楼 · 发布于 2024-09-30 01:34:36

我能够接近我想要做的事情:

import re
import csv
import openpyxl
import datetime

# define our dictionaries
training_updates = {}
update_exceptions = {}

training_name =  ""
regex = " \([A-Z0-9]*\)"

# create our counter variables
counttotal = 0
countupdated = 0
exception_count = 1

# open the csv we want to pull our info from
with open('from.csv') as frombook:
    reader = csv.DictReader(frombook)
    for row in reader:
        training_name = re.sub(r'\([^)]*\)', '', row["Sort Title"])
        print(training_name)
        break

    # loop through the rows and check if the user's total score is greater than 70
    for row in reader:
        if int(row["ViewTotalScore"]) > 70:
            # if it is, add 1 to our total count and add the record to the update dictionary
            training_updates[(row["ViewerInfo"].strip("[]").lower())] = (row["DateTime"])[:-8].rstrip()

counttotal = len(training_updates)
wb = openpyxl.load_workbook('to.xlsm', data_only=True, keep_vba=True)
wb2 = openpyxl.load_workbook('to.xlsm', keep_vba=True)
wbexception = openpyxl.load_workbook('template.xlsx')

sheetcheck = wb.get_sheet_by_name(wb.get_sheet_names()[0])
sheetwrite = wb2.get_sheet_by_name(wb2.get_sheet_names()[0])
sheetexception = wbexception.get_sheet_by_name(wbexception.get_sheet_names()[0])

for rowNum in range(1, sheetcheck.max_row):
    if sheetcheck.cell(row=rowNum, column=3).value is not None:     
        employee = str(sheetcheck.cell(row=rowNum, column=3).value).lower()        
        empname = employee.split(" ")
        if len(empname) >= 2:
            empsearchname = empname[0] + " " + empname[1]
            if empsearchname in training_updates:
                xldate = datetime.datetime.strptime(training_updates[empsearchname], "%Y-%m-%d").strftime("%m/%d/%Y")
                sheetwrite.cell(row=rowNum, column=14).value = xldate
                print("Updated record for " + employee)
                countupdated+=1
                del training_updates[empsearchname]

print(str(countupdated)+" of "+str(counttotal)+" updated.")
print(str(len(training_updates))+" records still need to be updated.")

wb2.save('updatedto.xlsm')

for key, value in training_updates.items():
    exception_count+=1
    xldate = datetime.datetime.strptime(value, "%Y-%m-%d").strftime("%m/%d/%Y")
    sheetexception.cell(row=exception_count, column = 1).value = key
    sheetexception.cell(row=exception_count, column = 2).value = value
    print("Noting exception for "+key)

exception_count-=1
print("Report generated for "+str(exception_count)+" exceptions. "+str(exception_count)+" + "+str(countupdated)+" = "+str(counttotal))
wbexception.save('update_exceptions.xlsx')

相关问题 更多 >

    热门问题