如何将转换后的文件写入另一个目录

from html2excel import ExcelParser import os cwd = os.getcwd() # Get the current working directory (cwd) files = os.listdir(cwd) # Get all the files in that directory print("Files in %r: %s" % (cwd, files)) os.chdir(r'C:\Users\Ahmed_Abdelmuniem\Desktop\Afternoon') cwd = os.getcwd() # Get the current working directory (cwd) files = os.listdir(cwd) # Get all the files in that directory print("Files in %r: %s" % (cwd, files)) inputFolder = r'C:\Users\Ahmed_Abdelmuniem\Desktop\Afternoon' ## Change here the input folder outputFolder = r'C:\Users\Ahmed_Abdelmuniem\Desktop\Evening' ## Change here the attachments output folder for file in os.listdir(inputFolder): if file.endswith(".html"): parser = ExcelParser(file) parser.to_excel(file)

Traceback (most recent call last): File "C:\Users\Ahmed_Abdelmuniem\PycharmProjects\Batch HTML to Excel\main.py", line 19, in <module> parser = ExcelParser(file) File "C:\Users\Ahmed_Abdelmuniem\AppData\Local\Programs\Python\Python39\lib\site-packages\html2excel\excel\parser.py", line 7, in __init__ super().__init__(file_path) File "C:\Users\Ahmed_Abdelmuniem\AppData\Local\Programs\Python\Python39\lib\site-packages\html2excel\base\parser.py", line 13, in __init__ self.load_workbook() File "C:\Users\Ahmed_Abdelmuniem\AppData\Local\Programs\Python\Python39\lib\site-packages\html2excel\excel\parser.py", line 30, in load_workbook self._write_cell(i, j, col_data) File "C:\Users\Ahmed_Abdelmuniem\AppData\Local\Programs\Python\Python39\lib\site-packages\html2excel\base\parser.py", line 47, in _write_cell self.ws.cell(row=row, column=col).value = val File "C:\Users\Ahmed_Abdelmuniem\AppData\Local\Programs\Python\Python39\lib\site-packages\openpyxl\cell\cell.py", line 215, in value self._bind_value(value) File "C:\Users\Ahmed_Abdelmuniem\AppData\Local\Programs\Python\Python39\lib\site-packages\openpyxl\cell\cell.py", line 194, in _bind_value value = self.check_string(value) File "C:\Users\Ahmed_Abdelmuniem\AppData\Local\Programs\Python\Python39\lib\site-packages\openpyxl\cell\cell.py", line 162, in check_string raise IllegalCharacterError openpyxl.utils.exceptions.IllegalCharacterError

from html2excel import ExcelParser input_file = r'C:\Users\Ahmed_Abdelmuniem\Desktop\088TE QTIF-794.html' output_file = r'C:\Users\Ahmed_Abdelmuniem\Desktop\088TE QTIF-794.xlsx' parser = ExcelParser(input_file) parser.to_excel(output_file)

2条回答

网友

1楼 · 编辑于 2024-09-28 23:23:50

类似这样的东西应该可以进行一些小的调整：

from pathlib import Path

# change the paths to wherever you need
afternoon_folder = Path("afternoon")
evening_folder = Path("evening")

for aft_file_path in afternoon_folder.iterdir():

    # aft_file_path is the path to a generic file in the afternoon folder
    print(f"aft_file_path {aft_file_path}")

    # get the extension of this file
    ext = aft_file_path.suffix
    print(f"    ext {ext}")

    # skip all non-html files
    if not ext == ".html":
        print(f"    skipping {aft_file_path}")
        continue

    # this path has the correct extension, but is in the wrong folder
    excel_path_afternoon = aft_file_path.with_suffix(".xlsx")
    print(f"    excel_path_afternoon {excel_path_afternoon}")

    # extract the name and append it to the right folder
    excel_path_evening = evening_folder / excel_path_afternoon.name
    print(f"    excel_path_evening {excel_path_evening}")

    # now you can use aft_file_path and excel_path_evening as input and output files
    # parser = ExcelParser(aft_file_path)
    # parser.to_excel(excel_path_evening)

我测试了此文件夹结构：

.
├── afternoon
│  ├── input00.html
│  ├── input01.html
│  ├── input02.html
│  └── skipthis.txt
├── evening
└── snippet.py

这是输出：

aft_file_path afternoon/input00.html
    ext .html
    excel_path_afternoon afternoon/input00.xlsx
    excel_path_evening evening/input00.xlsx
aft_file_path afternoon/input02.html
    ext .html
    excel_path_afternoon afternoon/input02.xlsx
    excel_path_evening evening/input02.xlsx
aft_file_path afternoon/skipthis.txt
    ext .txt
    skipping afternoon/skipthis.txt
aft_file_path afternoon/input01.html
    ext .html
    excel_path_afternoon afternoon/input01.xlsx
    excel_path_evening evening/input01.xlsx

我显然无法测试代码的解析部分，因为我不知道html文件中有什么

干杯

网友

2楼 · 编辑于 2024-09-28 23:23:50

首先，感谢皮埃特罗指出我的错误

其次，我得到了这个工作，但不是100%，它将所有的文件转换为excel，但我不能给他们他们的原始名称，因为它不做任何事情，也不会产生错误，当我运行它试图保持他们的原始名称

from html2excel import ExcelParser
import os

os.chdir(r'C:\Users\Ahmed_Abdelmuniem\Desktop\Afternoon')
cwd = os.getcwd()  # Get the current working directory (cwd)
files = os.listdir(cwd)  # Get all the files in that directory
print("Files in %r: %s" % (cwd, files))

inputFolder = r'C:\Users\Ahmed_Abdelmuniem\Desktop\Afternoon' ## Change here the input folder

i=0

for file in os.listdir(inputFolder):
    if file.endswith(".html"):
        i=i+1

        suffix=".xlsx"
        #name = str(file) + "\b\b\b\b" + "xlsx"
        #out= r'C:/Users/Ahmed_Abdelmuniem/Desktop/Evening/%s' % (name)
        out= r'C:/users/Ahmed_Abdelmuniem/Desktop/Evening/%d%s' % (i,suffix)
        print(str(out))
        parser = ExcelParser(file)
        parser.to_excel(out)

相关问题更多 >

编程相关推荐

热门问题

热门文章