我有一个文件夹中的ts文件列表。我尝试从XML中提取内容id,它是不带扩展名的文件名。我需要搜索与内容id匹配的ts文件。由于某些原因,它失败了。我附上下面的代码。我还附上了ts文件的截图
import glob
import lxml.etree as et
import os, csv
ASSET_METADATA_PATH = '/Users/roradhak/eVision/failed_assets/'
TS_PATH = '/Users/roradhak/eVision/ts_check/'
def parse_file(path):
tree = et.parse(path)
root = tree.getroot()
trailer_id = ""
programs = root.xpath('Program[@title="Program"]')
if len(programs) == 0:
return None, None, None
program = programs[0] # TODO - Are multiple programs expected? If so, the function should return a list of tuples
# Get the Content ID
c_id = program.xpath('props/*[@title="Content ID"]')
if len(c_id) == 0:
content_id = None
else:
content_id = c_id[0].text
# Get the has_trailer attribute
has_t = program.xpath('props/*[@title="Has_Trailer"]')
has_trailer = has_t[0].text
if has_t[0].text =="Y":
trailer_id = content_id.replace('M','T',1)
# Get the content name
n = program.xpath('props/*[@title="Name"]')
if len(n) == 0:
content_name = None
else:
content_name = n[0].text
return content_id, content_name, has_trailer, trailer_id
def main():
asset_metadata = glob.glob(os.path.join(ASSET_METADATA_PATH, u'*.xml'))
movies = glob.glob(os.path.join(TS_PATH, u'*.ts'))
for p in asset_metadata:
print(u'Processing: {p}'.format(p=p).encode('utf-8'))
print content_id, content_name, has_trailer, trailer_id
content_id, content_name, has_trailer, trailer_id= parse_file(p)
if u'{c}.ts'.format(c=content_id) not in TS_PATH:
print "No Movie"
if has_trailer =="Y":
if u'{c}.ts'.format(c=trailer_id) not in movies:
print "No trailer"
if __name__ == '__main__':
main()
输出如下
/Users/roradhak/IVPGET_Local/venv/bin/python /Users/roradhak/Downloads/validate_xml.py
Processing: /Users/roradhak/eVision/failed_assets/E30000001557115265_2019_08_29T11_20_08Z.xml
MD009232 Ep 143 - Cool look Hair style N
No Movie
Processing: /Users/roradhak/eVision/failed_assets/10000000717960000_2019_10_09T15_04_20Z.xml
MZ008931 Aan: Men At Work Y TZ008931
No Movie
No trailer
Processing: /Users/roradhak/eVision/failed_assets/E30000001557537308_2019_08_09T19_15_22Z.xml
MZ010564 EP29 - Episode 29 - Raheem S1 Y TZ010564
No Movie
No trailer
Process finished with exit code 0
下面是如何使用^{} 和python3.4+:
它只实现了文件搜索部分,但没有经过测试
相关问题 更多 >
编程相关推荐