My spider.py file
import scrapy
from atlasdatas.items import AtlasdatasItem
class Atlasdatas(scrapy.Spider):
name = "atlas"
def start_requests(self):
urls = ['https://yokatlas.yok.gov.tr/content/lisans-dynamic/1000_1.php?y=100110027']
for url in urls:
yield scrapy.Request(url=url, callback=self.parse)
def parse(self, response):
items = AtlasdatasItem()
parse_place = response.css("table.table-bordered")
for data in parse_place:
#PROGRAM_İSMİ = items.css("big::text")[0].getall()
OSYM_KODU = data.css("td.text-center::text").getall()[0]
ÜNİVERSİTE_TÜRÜ = data.css("td.text-center::text").getall()[1]
ÜNİVERSİTE_İSMİ = data.css("td.text-center::text").getall()[2]
FAKÜLTE_YÜKSEKOKUL = data.css("td.text-center::text").getall()[3]
PUAN_TÜRÜ = data.css("td.text-center::text").getall()[4]
BURS_TÜRÜ = data.css("td.text-center::text").getall()[5]
#items["PROGRAM_İSMİ"] = PROGRAM_İSMİ
items["OSYM_KODU"] = OSYM_KODU
items["ÜNİVERSİTE_TÜRÜ"] = ÜNİVERSİTE_TÜRÜ
items["ÜNİVERSİTE_İSMİ"] = ÜNİVERSİTE_İSMİ
items["FAKÜLTE_YÜKSEKOKUL"] = FAKÜLTE_YÜKSEKOKUL
items["PUAN_TÜRÜ"] = PUAN_TÜRÜ
items["BURS_TÜRÜ"] = BURS_TÜRÜ
yield items
My items.py file
import scrapy
class AtlasdatasItem(scrapy.Item):
#PROGRAM_İSMİ = scrapy.Field()
OSYM_KODU = scrapy.Field()
ÜNİVERSİTE_TÜRÜ = scrapy.Field()
ÜNİVERSİTE_İSMİ = scrapy.Field()
FAKÜLTE_YÜKSEKOKUL = scrapy.Field()
PUAN_TÜRÜ = scrapy.Field()
BURS_TÜRÜ = scrapy.Field()
My pipelines.py file
import sqlite3
class AtlasDB(object):
def __init__(self):
self.create_connection()
self.create_table()
def create_connection(self):
self.conn = sqlite3.connect("atlasdb")
self.curr = self.conn.cursor()
def create_table(self):
self.curr.execute("""DROP TABLE IF EXISTS GENERAL_INF""")
self.curr.execute("""CREATE TABLE GENERAL_INF(
OSYM_KODU INT PRIMARY KEY NOT NULL,
ÜNİVERSİTE_TÜRÜ TEXT,
ÜNİVERSİTE_İSMİ TEXT,
FAKÜLTE_YÜKSEKOKUL TEXT,
PUAN_TÜRÜ TEXT,
BURS_TÜRÜ TEXT
)""")
def process_ıtems(self, item, spider):
self.store_db(item)
return item
def store_db(self, item):
self.curr.execute("""INSERT INTO
GENERAL_INF values (?,?,?,?,?,?)""", (
item["OSYM_KODU"][0],
item["ÜNİVERSİTE_TÜRÜ"][0],
item["ÜNİVERSİTE_İSMİ"][0],
item["FAKÜLTE_YÜKSEKOKUL"][0],
item["PUAN_TÜRÜ"][0],
item["BURS_TÜRÜ"][0]
))
self.conn.commit()
And finally my settings.py file. I'll add just enabled settings
BOT_NAME = 'atlasdatas'
SPIDER_MODULES = ['atlasdatas.spiders']
NEWSPIDER_MODULE = 'atlasdatas.spiders'
ROBOTSTXT_OBEY = True
ITEM_PIPELINES = {
'atlasdatas.pipelines.AtlasDB': 300,
}
我该怎么办?请帮帮我,现在谢谢你
目前没有回答
相关问题 更多 >
编程相关推荐