import csv
# Read input Topic or Reply file
csvfile = open('rad.csv', newline='')
reader = csv.reader(csvfile)
csvfile1 = open('new.csv', newline='')
reader1 = csv.reader(csvfile1)
# Extract image sources
for row in reader:
content = row[8]
imageExists = "<img" in content and "src=\"" in content
#print(imageExists)
imageNum = 1
while (imageExists):
startPos = content.find("src=\"") + 5
endPos = content.find("\"", startPos)
imageSrc = content[startPos:endPos]
print(imageSrc)
content = content[endPos + 1:]
imageExists = "<img" in content and "src=\"" in content
#print(imageExists)
for row1 in reader1:
#print("In For")
content1 = row1[1]
content2 = row1[7]
print(content1)
#print(imageSrc)
if content1 == imageSrc:
row = imageSrc.replace(imageSrc,row1[7])
print("Done Match Found")
print(content2)
break
else:
print("No Match")
#imageExists = "<img" in content and "src=\"" in content
#print(imageExists)
imageNum += 1
如何在有很多链接的CSV文件列中查找img标记url,并将该链接与其他CSV文件中的相同链接进行比较,然后用id替换为该链接
我建议使用BeautifulSoup,而不是尝试将HTML解析为字符串。在下面的示例中,我假设HTML条目中没有逗号
一旦您有了每个文件的图像源列表,就可以比较每个CSV文件的列表
相关问题 更多 >
编程相关推荐