在Discord Bot脚本中创建定时循环以重新加载网页（web刮板Bot）

from bs4 import BeautifulSoup from urllib.request import urlopen from discord.ext import commands import discord # what I want the commands to start with bot = commands.Bot(command_prefix='!') # instantiating discord client token = "************************************" client = discord.Client() # begin the scraping of passed in web page URL = "*********************************" page = urlopen(URL) soup = BeautifulSoup(page, 'html.parser') pbe_titles = soup.find_all('h1', attrs={'class': 'news-title'}) # using soup to find all header tags with the news-title # class and storing them in pbe_titles linksAndTitles = [] counter = 0 # finding tags that start with 'a' as in a href and appending those titles/links for tag in pbe_titles: for anchor in tag.find_all('a'): linksAndTitles.append(tag.text.strip()) linksAndTitles.append(anchor['href']) # counts number of lines stored inside linksAndTitles list for i in linksAndTitles: counter = counter + 1 print(counter) # separates list by line so that it looks nice when printing allPatches = '\n'.join(str(line) for line in linksAndTitles[:counter]) # stores the first two lines in list which is the current pbe patch title and link currPatch = '\n'.join(str(line) for line in linksAndTitles[:2]) # command that allows user to type in exactly what patch they want to see information for based off date @bot.command(name='patch') async def pbe_patch(ctx, *, arg): if any(item.startswith(arg) for item in linksAndTitles): await ctx.send(arg + " exists!") else: await ctx.send('The date you entered: ' + '"' + arg + '"' + ' does not have a patch associated with it or that patch expired.') # command that displays the current, most up to date, patch @bot.command(name='current') async def current_patch(ctx): response = currPatch await ctx.send(response) bot.run(token)

1条回答

网友

1楼 · 发布于 2024-06-26 13:52:26

discord有特殊的装饰程序tasks定期运行一些代码

from discord.ext import tasks

@tasks.loop(seconds=5.0)
async def scrape(): 
    # ... your scraping code ...


# ... your commands ...


scrape.start()
bot.run(token)

它将每隔5秒重复函数scrape

文件：tasks

在Linux上，我最终会使用标准服务cron定期运行一些脚本。此脚本可以刮取数据并保存在文件或数据库中，并且discord可以从此文件或数据库中读取。但是cron每1分钟检查一次任务，这样它就不能更频繁地运行任务

编辑：

最小工作代码

我使用为刮式学习创建的页面http://books.toscrape.com

我改变了一些元素。当有bot时，不需要创建client，因为bot是一种特殊的client

我把title和link作为字典

            {
                'title': tag.text.strip(),
                'link': url + anchor['href'],
            }

因此，以后更容易创建像这样的文本

title: A Light in the ...
link: http://books.toscrape.com/catalogue/a-light-in-the-attic_1000/index.html

import os
import discord
from discord.ext import commands, tasks
from bs4 import BeautifulSoup
from urllib.request import urlopen

# default value at start (before `scrape` will assign new value)
# because some function may try to use these variables before `scrape` will create them
links_and_titles = []   # PEP8: `lower_case_namese`
counter = 0
items = []

bot = commands.Bot(command_prefix='!')

@tasks.loop(seconds=5)
async def scrape():
    global links_and_titles
    global counter
    global items

    url = "http://books.toscrape.com/"
    page = urlopen(url)
    soup = BeautifulSoup(page, 'html.parser')
    #pbe_titles = soup.find_all('h1', attrs={'class': 'news-title'})  
    pbe_titles = soup.find_all('h3')  

    # remove previous content
    links_and_titles = []

    for tag in pbe_titles:
        for anchor in tag.find_all('a'):
            links_and_titles.append({
                'title': tag.text.strip(),
                'link': url + anchor['href'],
            })

    counter = len(links_and_titles)
    print('counter:', counter)
    items = [f"title: {x['title']}\nlink: {x['link']}" for x in links_and_titles]

@bot.command(name='patch')
async def pbe_patch(ctx, *, arg=None):
    if arg is None:
        await ctx.send('Use: !patch date')
    elif any(item['title'].startswith(arg) for item in links_and_titles):        
        await ctx.send(arg + " exists!")
    else:
        await ctx.send(f'The date you entered: "{arg}" does not have a patch associated with it or that patch expired.')

@bot.command(name='current')
async def current_patch(ctx, *, number=1):
    if items:
        responses = items[:number]
        text = '\n  \n'.join(responses)
        await ctx.send(text)
    else:
        await ctx.send('no patches')

scrape.start()

token = os.getenv('DISCORD_TOKEN')
bot.run(token)

PEP 8 Style Guide for Python Code

相关问题更多 >

编程相关推荐

热门问题

热门文章