def on_response(self, response):
print(response.url)
if "/loginIn" in response.url and response.status == 200:
text = response.text()
self.parse(text)
def spider_detail(self, playwright: Playwright) -> None:
# playwright = sync_playwright().start()
# 连接已打开浏览器,找好端口
self.browser = playwright.chromium.connect_over_cdp("http://127.0.0.1:9222")
self.context = self.browser.contexts[0] # 注意这里不是browser.new_page()了
self.page = self.context.pages[0]
self.page.goto(self.url)
self.page.wait_for_timeout(1000)
# 开启网页监听
self.page.on('response',self.on_response)
...
# self.page.close()
# self.context.close()
self.browser.close()
细节:
# self.page.close()
# self.context.close()
self.browser.close()
不是哥们忘了前两个close(),实在是我理解不了的BUG,加上后,text = response.text()就闪退,只保留一个关闭浏览器倒是没事。