Web scraping他的应用程序已过期,请单击浏览器返回消息上的“刷新”按钮

2024-05-19 22:11:07 发布

您现在位置:Python中文网/ 问答频道 /正文

我想从以下网站上搜集一些数据:https://oss.uredjenazemlja.hr/public/lrServices.jsp?action=publicLdbExtract

我希望自动化的步骤包括:

  1. 选择“Opcinski sud/ZK odjel”。例如,选择“Zemljišnoknjižni odjel Benkovac”
  2. 选择“Glavna knjiga”。例如,选择“BENKOVAC”
  3. 输入“Broj kat.čestice:”。例如,输入576/2
  4. 在“Povijesni pregled”中选择“Da”(最后一行,将“Broj ZK uloska”留空)
  5. 单击“Pregledaj”并解决验证码问题
  6. 刮取出现的html

在web浏览器中打开inspector之后,我尝试使用python中的普通请求,通过跟踪网络来遵循上述步骤。 页面上有很多请求。我将把代码分为几个步骤: 启动会话并在页面开始处发出请求

  import requests
import re
import shutil
from twocaptcha import TwoCaptcha
import pandas as pd
import numpy as np
import os
from pathlib import Path
import json
import uuid


# start session
url = 'https://oss.uredjenazemlja.hr/public/lrServices.jsp?action=publicLdbExtract'
session = requests.Session()
session.get(url)
jid = session.cookies.get_dict()['JSESSIONID']

# some requests on the start of the page (probabbly redundandt)
headers = {
    'Referer': 'https://oss.uredjenazemlja.hr/public/lrServices.jsp?action=publicLdbExtract',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.122 Safari/537.36',
    }
session.get("https://oss.uredjenazemlja.hr/public/js/libs/modernizr-2.5.3.min.js", headers = headers) # 
session.get("https://oss.uredjenazemlja.hr/public/js/libs/jquery-1.7.1.min.js", headers = headers) # 
session.get("https://oss.uredjenazemlja.hr/public/js/script.js", headers = headers) # script.json

# no cache json
headers = {
    'Cookie': 'ossprivatelang=hr_HR; gxtTheme=m%3Aid%7Cs%3Agray%2Cfile%7Cs%3Axtheme-gray.css; JSESSIONID=' + jid,
    "Connection": "keep-alive",
    'Host': 'oss.uredjenazemlja.hr',
    'Referer': 'https://oss.uredjenazemlja.hr/public/lrServices.jsp?action=publicLdbExtract',
    "sec-ch-ua": '"Google Chrome";v="89", "Chromium";v="89", ";Not A Brand";v="99"',
    "sec-ch-ua-mobile": "?0",
    "Sec-Fetch-Dest": "script",
    "Sec-Fetch-Mode": "no-cors",
    "Sec-Fetch-Site": "same-origin",
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.122 Safari/537.36'
}
res = session.get('https://oss.uredjenazemlja.hr/public/gwt/hr.ericsson.oss.ui.pia.OssPiaModule.nocache.js', headers = headers)
cache_html = re.findall(r'bc=\'(.*\.cache.html)\',C', res.text)[0]
# cache_html = "1F6C776DEF6D55F56C900B938F84D726.cache.html"

# some more requests on the start of the page (probabbly redundandt)
headers = {
    'Referer': 'https://oss.uredjenazemlja.hr/public/lrServices.jsp?action=publicLdbExtract',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.122 Safari/537.36',
    }
session.get("https://oss.uredjenazemlja.hr/public/gwt/tiny_mce_editor/tiny_mce_src.js", headers = headers) # tiny_mce_src.js
session.get("https://oss.uredjenazemlja.hr/public/gwt/js/common.js", headers = headers)
session.get("https://oss.uredjenazemlja.hr/public/gwt/js/blueimp_tmpl.js", headers = headers) # blueimp_tmpl.js

# cache json
headers = {
    "DNT": "1",
    'Referer': 'https://oss.uredjenazemlja.hr/public/lrServices.jsp?action=publicLdbExtract',
    "sec-ch-ua": '"Google Chrome";v="89", "Chromium";v="89", ";Not A Brand";v="99"',
    'sec-ch-ua-mobile': '?0',
    'Sec-Fetch-Dest': 'iframe',
    'Upgrade-Insecure-Requests': '1',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.122 Safari/537.36'
}
session.get('https://oss.uredjenazemlja.hr/public/gwt/' + cache_html, headers = headers)

然后,我请求执行上述步骤1和2:

# commonRPCService opcinski sud 1
headers = {
    'Accept': '*/*',
    'Accept-Encoding': 'gzip, deflate, br',
    'Accept-Language': 'hr-HR,hr;q=0.9,en-US;q=0.8,en;q=0.7',
    'Connection': 'keep-alive',
    # 'Content-Length': '166',
    'Content-Type': 'text/x-gwt-rpc; charset=UTF-8',
    'Cookie': 'gxtTheme=m%3Aid%7Cs%3Agray%2Cfile%7Cs%3Axtheme-gray.css; ossprivatelang=hr_HR; __utma=79801043.802441445.1616788486.1616788486.1616788486.1; __utmz=79801043.1616788486.1.1.utmcsr=google|utmccn=(organic)|utmcmd=organic|utmctr=(not%20provided); x-auto-31=m%3Acollapsed%7Cb%3Atrue; JSESSIONID=' + jid,
    "DNT": "1",
    'Host': 'oss.uredjenazemlja.hr',
    'Origin': 'https://oss.uredjenazemlja.hr',
    'Referer': 'https://oss.uredjenazemlja.hr/public/gwt/' + cache_html,
    'Sec-Fetch-Dest': 'empty',
    'Sec-Fetch-Mode': 'cors',
    'Sec-Fetch-Site': 'same-origin',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.122 Safari/537.36'
}
payload = '5|0|4|https://oss.uredjenazemlja.hr/public/gwt/|957F3F03E95E97ABBDE314DFFCCEF4BC|hr.ericsson.oss.ui.common.client.core.rpc.ICommonRPCService|getMainBook|1|2|3|4|0|'
res = session.post(
    'https://oss.uredjenazemlja.hr/rpc/commonRPCService',
    headers = headers,
    data=payload
)
print(res.text)

# commonRPCService opcinski sud 2
payload = '5|0|18|https://oss.uredjenazemlja.hr/public/gwt/|957F3F03E95E97ABBDE314DFFCCEF4BC|hr.ericsson.oss.ui.common.client.core.rpc.ICommonRPCService|getLrInstitutions|com.extjs.gxt.ui.client.data.BaseModel|hr.ericsson.oss.ui.common.client.core.data.RpcModel/2891266824|dirty|java.lang.Boolean/476441737|new|deleted|resourceCode|java.lang.Integer/3438268394|elementSelected|class|java.lang.String/2004016611|hr.ericsson.jis.domain.admin.Institution|name||1|2|3|4|1|5|6|7|7|8|0|9|-2|10|-2|11|12|0|13|-2|14|15|16|17|15|18|'
res = session.post(
    'https://oss.uredjenazemlja.hr/rpc/commonRPCService',
    data=payload,
    headers=headers
)
# print(res.text)

# commonRPCService glavna knjiga 1
payload = '5|0|4|https://oss.uredjenazemlja.hr/public/gwt/|957F3F03E95E97ABBDE314DFFCCEF4BC|hr.ericsson.oss.ui.common.client.core.rpc.ICommonRPCService|getMainBook|1|2|3|4|0|'
res = session.post(
    'https://oss.uredjenazemlja.hr/rpc/commonRPCService',
    data=payload,
    headers=headers
)
print(res.text)

# commonRPCService glavna knjiga 2
payload = ('5|0|34|https://oss.uredjenazemlja.hr/public/gwt/|957F3F03E95E97ABBDE314DFFCCEF4BC|hr.ericsson.oss.ui.common.client.core.rpc.ICommonRPCService|getMainBooks|com.extjs.gxt.ui.client.data.BaseModel|hr.ericsson.oss.ui.common.client.core.data.RpcModel/2891266824|dirty|java.lang.Boolean/476441737|new|deleted|resourceCode|java.lang.Integer/3438268394|elementSelected|cadastralMunicipality|class|java.lang.String/2004016611|hr.ericsson.jis.domain.admin.CadastralMunicipality|hr.ericsson.jis.domain.admin.MainBook|institution|institutionId|parentInstitution|name|Općinski sud u Zadru|hr.ericsson.jis.domain.admin.Institution|institutionType|institutionTypeId|hr.ericsson.jis.domain.admin.InstitutionType|source|superviseInstitutionId|Zemljišnoknjižni odjel Benkovac|place|BENKOVAC|preconditionsRequired||1|2|3|4|1|5|6|10|7|8|0|9|-2|10|-2|11|12|0|13|-2|14|6|1|15|16|17|15|16|18|19|6|13|7|-2|9|-2|20|12|500|21|6|8|7|-2|9|-2|10|-2|20|12|605|11|12|0|13|-2|22|16|23|15|16|24|25|6|7|7|-2|9|-2|10|-2|26|12|14|11|-11|13|-2|15|16|27|28|12|1|10|-2|29|-10|11|-11|13|-2|22|16|30|31|16|32|15|-13|33|-2|22|16|34|').encode("utf-8")
res = session.post(
    'https://oss.uredjenazemlja.hr/rpc/commonRPCService',
    data=payload,
    headers=headers
)

然后我解决验证码:

# some captcha post
payload = ('5|0|4|https://oss.uredjenazemlja.hr/public/gwt/|957F3F03E95E97ABBDE314DFFCCEF4BC|hr.ericsson.oss.ui.common.client.core.rpc.ICommonRPCService|isCaptchaDisabled|1|2|3|4|0|').encode('utf-8')
res = session.post(
    'https://oss.uredjenazemlja.hr/rpc/commonRPCService',
    data=payload,
    headers=headers
)
print(res.text)

# get and save captcha
TWO_CAPTCHA_APY_KEY = "myapikey"
solver = TwoCaptcha(TWO_CAPTCHA_APY_KEY)
save_path = 'D:/zkrh/captchas'
p = session.get('https://oss.uredjenazemlja.hr/servlets/kaptcha.jpg?1617088523212', 
                headers=headers, 
                stream=True)
captcha_path = os.path.join(Path(save_path), 'captcha' + ".jpg")
with open(captcha_path, 'wb') as out_file:
    shutil.copyfileobj(p.raw, out_file)
  
# solve captcha
result = solver.normal(captcha_path, minLength=5, maxLength=5)    
payload = ('5|0|6|https://oss.uredjenazemlja.hr/public/gwt/|957F3F03E95E97ABBDE314DFFCCEF4BC|hr.ericsson.oss.ui.common.client.core.rpc.ICommonRPCService|validateCaptcha|java.lang.String|' + 
            result['code'] + '|1|2|3|4|1|5|6|').encode('utf-8')
res = requests.post(
    'https://oss.uredjenazemlja.hr/rpc/commonRPCService',
    data=payload,
    headers=headers
)

if res.text.startswith("//OK"):
    os.rename(captcha_path, os.path.join(Path(save_path), result['code'] + ".jpg"))
else:
    print("Kriva captcha. Rijesi!")

现在,这里是最重要的请求,我无法从中获得正确的输出。它应该返回很多数字,其中最重要的数字是一个有7位的数字(\d{7}。应该是这些数字中的一个或多个)。我可以在最后一步中使用该数字,以获取html,这是我的尝试:

payload = ('5|0|40|https://oss.uredjenazemlja.hr/public/gwt/|0EAC9F40996251FDB21FF254E1600E83|hr.ericsson.oss.ui.pia.client.rpc.IOssPublicRPCService|getLrUnitsByMainBookAndParcel|com.extjs.gxt.ui.client.data.BaseModel|java.lang.String|hr.ericsson.oss.ui.common.client.core.data.RpcModel/2891266824|date|java.sql.Date/3996530531|dirty|java.lang.Boolean/476441737|new|cadastralMunicipality|id|java.lang.Integer/3438268394|class|java.lang.String/2004016611|hr.ericsson.jis.domain.admin.CadastralMunicipality|cadastralMunicipalityId|source|creationDate|formatedName|BENKOVAC|userId|cadInstitution|deleted|institutionId|resourceCode|elementSelected|name|Odjel za katastar nekretnina Benkovac|hr.ericsson.jis.domain.admin.Institution|institution|Zemljišnoknjižni odjel Benkovac|place|sidMainBook|java.lang.Long/4227064769|hr.ericsson.jis.domain.admin.MainBook|status|576/2|1|2|3|4|2|5|6|7|18|8|9|115|10|21|10|11|0|12|-3|13|7|3|14|15|98|16|17|18|19|-5|20|15|1|21|9|116|0|1|22|17|23|24|15|-9999|25|7|8|10|-3|12|-3|26|-3|27|15|117|28|15|0|29|-3|30|17|31|16|17|32|33|7|9|10|-3|12|-3|26|-3|27|15|500|28|-13|29|-3|30|17|34|35|-9|16|-15|26|-3|28|15|0|29|-3|30|-9|36|37|4730091|0|14|15|30857|16|17|38|39|-19|40|').encode('utf-8')

res = session.post(
    'https://oss.uredjenazemlja.hr/rpc/commonRPCService',
    data=payload,
    headers=headers
)
print(res.text)

它返回:

“//EX[2,1,[“com.google.gwt.user.client.rpc.CompatibileMoteServiceException/3936916533”,“此应用程序已过期,请单击浏览器上的刷新按钮。”。(阻止了访问接口'hr.ericsson.oss.ui.pia.client.rpc.iossublicrpservice'的尝试,该接口不是由'hr.ericsson.oss.ui.common.server.core.rpc.commonrpcsservice'实现的;这可能是配置错误,也可能是黑客企图)”,0,5]“

而不是我之前解释的数字

然后,在最后一步中,我应该使用7位数字作为lrUnitNumber参数

# Publicreportservlet
headers = {
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
    'Accept-Encoding': 'gzip, deflate, br',
    'Accept-Language': 'hr-HR,hr;q=0.9,en-US;q=0.8,en;q=0.7',
    'Cache-Control': 'max-age=0',
    'Connection': 'keep-alive',
    'Content-Length': '169',
    'Content-Type': 'application/x-www-form-urlencoded',
    'Cookie': 'ossprivatelang=hr_HR; gxtTheme=m%3Aid%7Cs%3Agray%2Cfile%7Cs%3Axtheme-gray.css; JSESSIONID=' + jid,
    'Host': 'oss.uredjenazemlja.hr',
    'Origin': 'https://oss.uredjenazemlja.hr',
    'Referer': 'https://oss.uredjenazemlja.hr/public/lrServices.jsp?action=publicLdbExtract',
    'Sec-Fetch-Dest': 'iframe',
    'Sec-Fetch-Mode': 'navigate',
    'Sec-Fetch-Site': 'same-origin',
    'Sec-Fetch-User': '?1',
    'Upgrade-Insecure-Requests': '1',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.122 Safari/537.36'
}
dataFrom = {
    'pia': 1,
    'report_type_id': 4,
    'report_type_name': 'bzp_izvadak_oss',
    'source': 1,
    'institutionID': 500,
    'mainBookId': 30857,
    'lrUnitNumber': 5509665,
    'lrunitID': 5799992,
    'status': '0,1',
    'footer': '',
    'export_type': 'html'
    }
res = session.post(
    'https://oss.uredjenazemlja.hr/servlets/PublicReportServlet',
    data=dataFrom,
    headers=headers
    )
res

我也在提供R代码。也许有R和网络抓取知识的人可以帮助:

  library(httr)
  library(rvest)
  library(stringr)
  library(reticulate)
  twocaptcha <- reticulate::import("twocaptcha")
  
  
  # captcha python library 
  TWO_CAPTCHA_APY_KEY = ".."
  solver = twocaptcha$TwoCaptcha(TWO_CAPTCHA_APY_KEY)
  
  # 
  url = 'https://oss.uredjenazemlja.hr/public/lrServices.jsp?action=publicLdbExtract'
  session = GET(url)
  jid <- cookies(session)$value
  headers_cache = c(
    'Referer'= 'https://oss.uredjenazemlja.hr/public/lrServices.jsp?action=publicLdbExtract',
    'User-Agent'= 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.122 Safari/537.36'
  )
  session <- rvest:::request_GET(content(session), 'https://oss.uredjenazemlja.hr/public/gwt/hr.ericsson.oss.ui.pia.OssPiaModule.nocache.js', 
                                 add_headers(headers_cache))
  cache_html <- str_extract(session$response, "bc=\\'(.*\\.cache.html)\\',C")
  cache_html <- gsub(".*=\\'|\\'.C", "", cache_html)
  headers_cache = c(
    'Referer'= 'https://oss.uredjenazemlja.hr/public/lrServices.jsp?action=publicLdbExtract',
    'User-Agent'= 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.122 Safari/537.36'
  )
  session <- rvest:::request_GET(session, paste0('https://oss.uredjenazemlja.hr/public/gwt/', cache_html), add_headers(headers_cache))
  
  # meta
  commonRPCServiceUrl <- "https://oss.uredjenazemlja.hr/rpc/commonRPCService"
  headers = c(
    'Accept'= '*/*',
    'Accept-Encoding'= 'gzip, deflate, br',
    'Accept-Language'= 'hr-HR,hr;q=0.9,en-US;q=0.8,en;q=0.7',
    'Connection'= 'keep-alive',
    # 'Content-Length'= '166',
    'Content-Type'= 'text/x-gwt-rpc; charset=UTF-8',
    'Cookie'= paste0('gxtTheme=m%3Aid%7Cs%3Agray%2Cfile%7Cs%3Axtheme-gray.css; ossprivatelang=hr_HR; x-auto-31=m%3Acollapsed%7Cb%3Atrue; JSESSIONID=', jid),
    'Host'= 'oss.uredjenazemlja.hr',
    'Origin'= 'https://oss.uredjenazemlja.hr',
    'Referer'= paste0('https://oss.uredjenazemlja.hr/public/gwt/', cache_html),
    'Sec-Fetch-Dest'= 'empty',
    'Sec-Fetch-Mode'= 'cors',
    'Sec-Fetch-Site'= 'same-origin',
    'User-Agent'= 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.122 Safari/537.36'
  )
  payload <- "5|0|4|https://oss.uredjenazemlja.hr/public/gwt/|957F3F03E95E97ABBDE314DFFCCEF4BC|hr.ericsson.oss.ui.common.client.core.rpc.ICommonRPCService|getMainBook|1|2|3|4|0|"
  session <- rvest:::request_POST(session, commonRPCServiceUrl, body = payload, add_headers(headers))
  session$response$content
  readBin(session$response$content, character(), endian = "little")
  
  payload <- "5|0|22|https://oss.uredjenazemlja.hr/public/gwt/|957F3F03E95E97ABBDE314DFFCCEF4BC|hr.ericsson.oss.ui.common.client.core.rpc.ICommonRPCService|getMainBooks|com.extjs.gxt.ui.client.data.BaseModel|hr.ericsson.oss.ui.common.client.core.data.RpcModel/2891266824|dirty|java.lang.Boolean/476441737|new|deleted|resourceCode|java.lang.Integer/3438268394|elementSelected|cadastralMunicipality|class|java.lang.String/2004016611|hr.ericsson.jis.domain.admin.CadastralMunicipality|hr.ericsson.jis.domain.admin.MainBook|institution|preconditionsRequired|name|VELIKA GORICA|1|2|3|4|1|5|6|10|7|8|0|9|-2|10|-2|11|12|0|13|-2|14|6|1|15|16|17|15|16|18|19|0|20|-2|21|16|22|"
  session <- rvest:::request_POST(session, commonRPCServiceUrl, body = payload, add_headers(headers))
  session$response$content
  readBin(session$response$content, character(), endian = "little")
  
  # captcha
  payload <- "5|0|4|https://oss.uredjenazemlja.hr/public/gwt/|957F3F03E95E97ABBDE314DFFCCEF4BC|hr.ericsson.oss.ui.common.client.core.rpc.ICommonRPCService|isCaptchaDisabled|1|2|3|4|0|"
  session <- rvest:::request_POST(session, commonRPCServiceUrl, body = payload, add_headers(headers))
  session$response$content
  readBin(session$response$content, character(), endian = "little")
  
  headers_captcha <- c(
    "Accept"= "image/avif,image/webp,image/apng,image/svg+xml,image/*,*/*;q=0.8",
    "Accept-Encoding"= "gzip, deflate, br",
    "Accept-Language"=" hr-HR,hr;q=0.9,en-US;q=0.8,en;q=0.7",
    "Connection"= "keep-alive",
    "Cookie"= paste0("gxtTheme=m%3Aid%7Cs%3Agray%2Cfile%7Cs%3Axtheme-gray.css; ossprivatelang=hr_HR; x-auto-31=m%3Acollapsed%7Cb%3Atrue; JSESSIONID=", jid),
    "DNT"= "1",
    "Host"= "oss.uredjenazemlja.hr",
    "Referer"= "https://oss.uredjenazemlja.hr/public/lrServices.jsp?action=publicLdbExtract",
    "sec-ch-ua"= '"Google Chrome";v="89", "Chromium";v="89", ";Not A Brand";v="99"',
    "sec-ch-ua-mobile"= "?0",
    "Sec-Fetch-Dest"= "image",
    "Sec-Fetch-Mode"= "no-cors",
    "Sec-Fetch-Site"= "same-origin",
    "User-Agent"= "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.90 Safari/537.36"
  )
  captcha <- GET("https://oss.uredjenazemlja.hr/servlets/kaptcha.jpg?1617286122160", add_headers(headers_captcha))
  # session <- rvest:::request_GET(session, "https://oss.uredjenazemlja.hr/servlets/kaptcha.jpg?1617286122160", add_headers(headers_captcha))
  captcha$content
  captcha$response$content
  writeBin(captcha$content, "D:/zkrh/captchas/test.jpg")
  result = solver$normal("D:/zkrh/captchas/test.jpg", minLength=5, maxLength=5)
  
  payload <- paste0("5|0|6|https://oss.uredjenazemlja.hr/public/gwt/|957F3F03E95E97ABBDE314DFFCCEF4BC|hr.ericsson.oss.ui.common.client.core.rpc.ICommonRPCService|validateCaptcha|java.lang.String|", 
                    result$code, "|1|2|3|4|1|5|6|")
  session <- rvest:::request_POST(session, commonRPCServiceUrl, body = payload, add_headers(headers))
  session$response$content
  readBin(p$response$content, character(), endian = "little")
  
  # ID!!!!!!
  headers = c(
    'Accept'= '*/*',
    'Accept-Encoding'= 'gzip, deflate, br',
    'Accept-Language'= 'hr-HR,hr;q=0.9,en-US;q=0.8,en;q=0.7',
    'Connection'= 'keep-alive',
    # 'Content-Length'= '166',
    'Content-Type'= 'text/x-gwt-rpc; charset=UTF-8',
    'Cookie'= paste0('gxtTheme=m%3Aid%7Cs%3Agray%2Cfile%7Cs%3Axtheme-gray.css; ossprivatelang=hr_HR; x-auto-31=m%3Acollapsed%7Cb%3Atrue; JSESSIONID=', jid),
    'DNT' = '1',
    'Host'= 'oss.uredjenazemlja.hr',
    'Origin'= 'https://oss.uredjenazemlja.hr',
    'Referer'= paste0('https://oss.uredjenazemlja.hr/public/gwt/', cache_html),
    'sec-ch-ua' = '"Google Chrome";v="89", "Chromium";v="89", ";Not A Brand";v="99"',
    'sec-ch-ua-mobile' = "?0",
    'Sec-Fetch-Dest'= 'empty',
    'Sec-Fetch-Mode'= 'cors',
    'Sec-Fetch-Site'= 'same-origin',
    'User-Agent'= 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.122 Safari/537.36'
  )
  payload <- paste0("5|0|40|https://oss.uredjenazemlja.hr/public/gwt/|0EAC9F40996251FDB21FF254E1600E83|hr.ericsson.oss.ui.pia.client.rpc.IOssPublicRPCService|getLrUnitByMainBook|com.extjs.gxt.ui.client.data.BaseModel|java.lang.String|hr.ericsson.oss.ui.common.client.core.data.RpcModel/2891266824|date|java.sql.Date/3996530531|dirty|java.lang.Boolean/476441737|new|cadastralMunicipality|id|java.lang.Integer/3438268394|class|java.lang.String/2004016611|hr.ericsson.jis.domain.admin.CadastralMunicipality|cadastralMunicipalityId|source|creationDate|formatedName|VELIKA GORICA|userId|cadInstitution|deleted|institutionId|resourceCode|elementSelected|name|Odjel za katastar nekretnina Velika Gorica|hr.ericsson.jis.domain.admin.Institution|institution|Zemljišnoknjižni odjel Velika Gorica|place|sidMainBook|java.lang.Long/4227064769|hr.ericsson.jis.domain.admin.MainBook|status|1|1|2|3|4|2|5|6|7|18|8|9|114|1|21|10|11|0|12|-3|13|7|3|14|15|102844|16|17|18|19|-5|20|15|1|21|9|116|0|1|22|17|23|24|15|-20|25|7|8|10|-3|12|-3|26|-3|27|15|32|28|15|0|29|-3|30|17|31|16|17|32|33|7|9|10|-3|12|-3|26|-3|27|15|277|28|-13|29|-3|30|17|34|35|-9|16|-15|26|-3|28|-7|29|-3|30|-9|36|37|286610893|17179869184|14|15|21921|16|17|38|39|15|0|40|")
  # Encoding(payload) <- "UTF-8"
  # payload <- RCurl::curlEscape(payload)
  session <- rvest:::request_POST(session, commonRPCServiceUrl, body = payload, add_headers(headers))
  session$response$content
  readBin(session$response$content, character())
  

Tags: httpsclientuilangsessionhrossjava