在文件夹中搜索文件名

2024-05-17 11:14:18 发布

您现在位置:Python中文网/ 问答频道 /正文

我有一个文件夹中的ts文件列表。我尝试从XML中提取内容id,它是不带扩展名的文件名。我需要搜索与内容id匹配的ts文件。由于某些原因,它失败了。我附上下面的代码。我还附上了ts文件的截图

import glob
import lxml.etree as et
import os, csv


ASSET_METADATA_PATH = '/Users/roradhak/eVision/failed_assets/'
TS_PATH = '/Users/roradhak/eVision/ts_check/'

def parse_file(path):

    tree = et.parse(path)
    root = tree.getroot()
    trailer_id = ""
    programs = root.xpath('Program[@title="Program"]')
    if len(programs) == 0:
        return None, None, None
    program = programs[0] # TODO - Are multiple programs expected? If so, the function should return a list of tuples
# Get the Content ID
    c_id = program.xpath('props/*[@title="Content ID"]')
    if len(c_id) == 0:
        content_id = None
    else:
        content_id = c_id[0].text
    # Get the has_trailer attribute
    has_t = program.xpath('props/*[@title="Has_Trailer"]')
    has_trailer = has_t[0].text
    if has_t[0].text =="Y":
        trailer_id = content_id.replace('M','T',1)
    # Get the content name
    n = program.xpath('props/*[@title="Name"]')
    if len(n) == 0:
        content_name = None
    else:
        content_name = n[0].text
    return content_id, content_name, has_trailer, trailer_id

def main():
    asset_metadata = glob.glob(os.path.join(ASSET_METADATA_PATH, u'*.xml'))
    movies = glob.glob(os.path.join(TS_PATH, u'*.ts'))

    for p in asset_metadata:
        print(u'Processing: {p}'.format(p=p).encode('utf-8'))
        print content_id, content_name, has_trailer, trailer_id
        content_id, content_name, has_trailer, trailer_id= parse_file(p)
        if u'{c}.ts'.format(c=content_id) not in TS_PATH:
            print "No Movie"
        if has_trailer =="Y":
            if u'{c}.ts'.format(c=trailer_id) not in movies:
                print "No trailer"

if __name__ == '__main__':
    main()

输出如下

enter image description here

/Users/roradhak/IVPGET_Local/venv/bin/python /Users/roradhak/Downloads/validate_xml.py
Processing: /Users/roradhak/eVision/failed_assets/E30000001557115265_2019_08_29T11_20_08Z.xml
MD009232 Ep 143 - Cool look Hair style N 
No Movie
Processing: /Users/roradhak/eVision/failed_assets/10000000717960000_2019_10_09T15_04_20Z.xml
MZ008931 Aan: Men At Work Y TZ008931
No Movie
No trailer
Processing: /Users/roradhak/eVision/failed_assets/E30000001557537308_2019_08_09T19_15_22Z.xml
MZ010564 EP29 - Episode 29 - Raheem S1 Y TZ010564
No Movie
No trailer

Process finished with exit code 0

Tags: pathnonamenoneidifxmlcontent
1条回答
网友
1楼 · 发布于 2024-05-17 11:14:18

下面是如何使用^{}和python3.4+:

from pathlib import Path

failed_assets_folder = Path('/Users/roradhak/eVision/failed_assets')
ts_folder = Path('/Users/roradhak/eVision/ts_check')


def main():
    for failed_asset in failed_assets_folder.glob('*.xml'):
        print(f'Processing: {failed_asset.name}')

        content_id, content_name, has_trailer, trailer_id = parse_file(failed_asset.name)
        print(f'{content_id}, {content_name}, {has_trailer}, {trailer_id}')

        if not Path(ts_folder / f'{content_id}.ts').exists():
            print('No Movie')

        if has_trailer == 'Y':
            if not Path(ts_folder / f'{trailer_id}.ts').exists():
                print('No trailer')

它只实现了文件搜索部分,但没有经过测试

相关问题 更多 >