我在一个项目中使用scrapy和selenium在python中构建一个screen scraper,然后使用xlsxwriter获取screen scraper的输出并将其写入excel文件。然而,我的脏东西似乎总是空着。我不知道现在发生了什么,所以我很感激你的帮助。只是要注意,这里有一些网址,目录,和一些其他敏感信息的脚本之前,张贴在这里。所以有些链接和目录看起来很奇怪。在
excel输出屏幕截图:
Screen Scraper File Output
蜘蛛:
import os
import time
from datetime import date
from ScreenScraper.items import *
from scrapy import *
from scrapy.http import FormRequest
from scrapy.loader import ItemLoader
from scrapy.selector import Selector
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from xlsxwriter import *
class CquentiaSpider(Spider):
name = 'cquentia'
allowed_domains = ['linktotable.com']
start_urls = ['www.linktotable.com/login'] #had to strip directories ,urls, usernames, and passwords from script.
login_user = 'Example'
login_pass = 'Example'
accnt_ID_1 = 'Example'
phantomjspath = r'C:\Users\[User]\Documents\Visual Studio 2015\Projects\ScreenScraper\Scraper\selenium\webdriver\phantomjs\bin\phantomjs.exe'
def __init__(self, name = None, **kwargs):
self.browser = webdriver.PhantomJS(executable_path=self.phantomjspath)
return super(CquentiaSpider, self).__init__(name, **kwargs)
def parse(self, response):
self.browser.get(response.url)
username = self.browser.find_element_by_name('username')
password = self.browser.find_element_by_name('password')
login = self.browser.find_element_by_name('submit')
username.send_keys(self.login_user)
password.send_keys(self.login_pass)
login.click()
time.sleep(1.5)
self.browser.get('www.linktotable.com/search')
accnt_id = self.browser.find_element_by_name('accnId')
search = self.browser.find_element_by_name('accnSrch')
accnt_id.send_keys(self.accnt_ID_1)
search.click()
time.sleep(1.5)
select = Selector(text=self.browser.page_source)
get_table_count = count()
get_table_count['row_count'] = select.xpath('//*[@id="otTable"]/thead/tr/td[1]/text()').extract() [0]
count_final = int(get_table_count['row_count']) + 1
patient = cquentiaPatientItems()
patient['Pt_First_Name'] = select.xpath('//*[@id="ptFNm"]/text()').extract()
patient['Pt_Last_Name'] = select.xpath('//*[@id="ptLNm"]/text()').extract()
patient['Client_ID_Name'] = select.xpath('//*[@id="clnNm"]/text()').extract()
patient['DOS'] = select.xpath('//*[@id="dos"]/text()').extract()
patient_First_Name = str(patient['Pt_First_Name'])
patient_Last_Name = str(patient['Pt_Last_Name'])
patient_Client_ID = str(patient['Client_ID_Name'])
patient_DOS = str(patient['DOS'])
header_layout = ['First Name:', 'Last Name:', 'Client Name:', 'DOS:']
header_data = [patient_First_Name, patient_Last_Name, patient_Client_ID, patient_DOS]
table_header_layout = ['Test ID', 'Name', 'Mod 1', 'Mod 2', 'Mod 3', 'Mod 4', 'Proc Code', 'Name','Units Billed $', 'Billed $', 'Gross $', 'Expect $', 'Price Method', 'Payor ID', 'POS', 'Rendering Phys']
workbook = Workbook('%s_spider.xlsx' % (self.name))
worksheet = workbook.add_worksheet()
row = 0
col = 0
for value in header_layout:
worksheet.write(row, 0, value)
row = 1 + row
row = 0
for value in header_data:
worksheet.write(row, 1, value)
row = 1 + row
row = 6
for value in table_header_layout:
worksheet.write(row, col, value)
col = 1 + col
col = 0
row = 7
for number in range(0, count_final):
table = Table()
table['Test_ID'] = select.xpath('//*[@id="otTable"]/tbody/tr[%s]/td[2]/text()' % (number)).extract()
table['Name_1'] = select.xpath('//*[@id="otTable"]/tbody/tr[%s]/td[3]/text()' % (number)).extract()
table['Name_2'] = select.xpath('//*[@id="otTable"]/tbody/tr[%s]/td[9]/text()' % (number)).extract()
table['Mod_1'] = select.xpath('//*[@id="otTable"]/tbody/tr[%s]/td[4]/text()' % (number)).extract()
table['Mod_2'] = select.xpath('//*[@id="otTable"]/tbody/tr[%s]/td[5]/text()' % (number)).extract()
table['Mod_3'] = select.xpath('//*[@id="otTable"]/tbody/tr[%s]/td[6]/text()' % (number)).extract()
table['Mod_4'] = select.xpath('//*[@id="otTable"]/tbody/tr[%s]/td[7]/text()' % (number)).extract()
table['Proc_Code'] = select.xpath('//*[@id="otTable"]/tbody/tr[%s]/td[8]/text()' % (number)).extract()
table['Units_Billed'] = select.xpath('//*[@id="otTable"]/tbody/tr[%s]/td[10]/text()' % (number)).extract()
table['Billed'] = select.xpath('//*[@id="otTable"]/tbody/tr[%s]/td[11]/text()' % (number)).extract()
table['Gross'] = select.xpath('//*[@id="otTable"]/tbody/tr[%s]/td[12]/text()' % (number)).extract()
table['Expect'] = select.xpath('//*[@id="otTable"]/tbody/tr[%s]/td[13]/text()' % (number)).extract()
table['Price_Method'] = select.xpath('//*[@id="otTable"]/tbody/tr[%s]/td[15]/text()' % (number)).extract()
table['Payor_ID'] = select.xpath('//*[@id="otTable"]/tbody/tr[%s]/td[16]/text()' % (number)).extract()
table['POS'] = select.xpath('//*[@id="otTable"]/tbody/tr[%s]/td[22]/text()' % (number)).extract()
table['Rendering_Phys'] = select.xpath('//*[@id="otTable"]/tbody/tr[%s]/td[23]/text()' % (number)).extract()
table_Test_ID = str(table['Test_ID'])
table_Name_1 = str(table['Name_1'])
table_Name_2 = str(table['Name_2'])
table_Mod_1 = str(table['Mod_1'])
table_Mod_2 = str(table['Mod_2'])
table_Mod_3 = str(table['Mod_3'])
table_Mod_4 = str(table['Mod_4'])
table_Proc_Code = str(table['Proc_Code'])
table_Units_Billed = str(table['Units_Billed'])
table_Billed = str(table['Billed'])
table_Gross = str(table['Gross'])
table_Expect = str(table['Expect'])
table_Price_Method = str(table['Price_Method'])
table_Payor_ID = str(table['Payor_ID'])
table_POS = str(table['POS'])
table_Rendering_Phys = str(table['Rendering_Phys'])
table_data = [table_Test_ID, table_Name_1, table_Mod_1, table_Mod_2, table_Mod_3, table_Mod_4, table_Proc_Code, table_Name_2, table_Units_Billed, table_Billed, table_Gross, table_Expect, table_Price_Method, table_Payor_ID, table_POS, table_Rendering_Phys]
for text in table_data:
worksheet.write(row, col, text)
col = 1 + col
row = 1 + row
col = 0
workbook.close()
项目:
^{pr2}$
目前没有回答
相关问题 更多 >
编程相关推荐