在循环中添加到站点的新产品不会被bot[Python]检测到

2024-09-28 20:51:10 发布

您现在位置:Python中文网/ 问答频道 /正文

我有一个网络垃圾机器人,它工作得很好。唯一的问题是,当新产品出现在网站上时,bot无法检测到它。我必须重新启动该文件才能找到它。你有什么解决办法吗

import requests
import time
import datetime

from discord import send_hook
from datetime import datetime
from colorama import Fore, Back, Style
from bs4 import BeautifulSoup

from colorama import init
init(autoreset=True)

headers = {
    'user-agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36', "Referer": "https://www.ldlc.com/"
}

a=0
def liens():
     
    # for x in range (1,5):
    url = f'https://www.ldlc.com/recherche/1660/+fcat-4684.html?sort=1'
    reqs = requests.get(url,headers=headers, timeout=None, allow_redirects=False)
    soup = BeautifulSoup(reqs.text, 'html.parser')

    urls = []
    productlist = soup.find_all('h3',{'class':'title-3'})
    for item in productlist:
        for link in item.find_all('a', href=True):
            urls.append("https://www.ldlc.com"+link['href'])
    
    print(f"Le nombre de request URL's : {a}")

    return urls 

prix = 400
wait_time = 259200
timed_pid = {p: 0 for p in liens()} 


while True:

    try:
        liens()
        a=a+1
        print(f"Le nombre de request : {x}")
        x=x+1
        nombre_total_liens = len(liens())
        print(f'Le nombre de liens trouvé : {nombre_total_liens}')
    except:
        print("Connection refused by the server..")
        print("Let me sleep for 5 seconds")
        print("ZZzzzz...")
        time.sleep(5)
        print("Was a nice sleep, now let me continue...")
        continue

    time.sleep(15)


    for p in timed_pid:
        if (time.time() - timed_pid[p]) < wait_time:
            continue
        
        url = "{}".format(p)

        response = url
        while response == url:
            try:
                response = requests.get(url, headers=headers, timeout=None, allow_redirects=False)
                break
            except:
                print("Connection refused by the server..")
                print("Let me sleep for 5 seconds")
                print("ZZzzzz...")
                time.sleep(5)
                print("Was a nice sleep, now let me continue...")
                continue

        soup = BeautifulSoup(response.text, 'html.parser')

        try:
            title = soup.find('h1', class_='title-1').get_text().strip()
            product_title = ':flag_fr:'+title
        except:
            print('Impossible de lire le titre du produit! Restart....')
            continue
        
        now = datetime.now()
        current_time = now.strftime("%H:%M:%S")
        time_now = now.strftime("%d/%m/%Y - %H:%M:%S")

        try:
            image_url = soup.find('img', {'id':'ctl00_cphMainContent_ImgProduct'})['src']
        except:
            continue

        try:    
            stock_statut = soup.find('div', class_='content').get_text().strip()            
            price = soup.find_all('div', class_='price')[3].get_text(strip=True).replace(',','.').replace(' ','').replace('€','.').replace('&nbsp;','').replace('\xa0','')
            
            if stock_statut == 'Rupture':
                print ('[{}]'.format(current_time), Fore.GREEN + Style.RESET_ALL, Fore.YELLOW + '[LDLC]' + Style.RESET_ALL,'|', Fore.RED + 'Rupture' + Style.RESET_ALL, Fore.CYAN + product_title + Style.RESET_ALL)
            else:
                f = float(price)
                if f < prix:
                    print('[{}]'.format(current_time), Fore.GREEN + Style.RESET_ALL, Fore.YELLOW + '[LDLC]' + Style.RESET_ALL,'|', Fore.GREEN + 'En stock' + Style.RESET_ALL, Fore.CYAN + product_title + Style.RESET_ALL)
                    timed_pid[p] = time.time()
                    send_hook(product_title, url, image_url, price, stock_statut, time_now)
        except:
            continue

偶尔,如果你在速度或其他方面有所提高,我会接受

如果你能帮助我,我会很高兴的。我先谢谢你


Tags: importurlfortimetitlestylesleepall