web scrap with advance

Author

Tony Duan

Code
from seleniumbase import SB

with SB(test=True, uc=True) as sb:
    sb.open("https://google.com/ncr")
    sb.type('[title="Search"]', "SeleniumBase GitHub page\n")
    sb.click('[href*="github.com/seleniumbase/"]')
    sb.save_screenshot_to_logs()  # ./latest_logs/
    print(sb.get_page_title())
Code
from seleniumbase import SB

with SB(test=True, uc=True) as sb:
    sb.open("https://www.whiskybase.com/whiskies/")
    #sb.type('[title="Search"]', "SeleniumBase GitHub page\n")
    #sb.click('[href*="github.com/seleniumbase/"]')
    sb.save_screenshot_to_logs()  # ./latest_logs/
    print(sb.get_page_title())
Code
from seleniumbase import Driver
class Scraper(BaseCase):
def test_bypass_bot_protection(self):
driver = Driver(uc=True)
driver.open("https://www.whiskybase.com/whiskies/whisky/268484/2009-ud")
driver.uc_gui_click_captcha()
page_html = driver.get_page_source()
print(page_html)
driver.quit()
Code
from bs4 import BeautifulSoup
soup = BeautifulSoup(page_html, 'html.parser')
print(soup.prettify())
Code
print(soup.get_text())
Code
soup.select(".votes-rating-current").text

h1

1 Reference:

https://github.com/seleniumbase/SeleniumBase

https://medium.com/@datajournal/web-scraping-with-seleniumbase-e3ead6aebe7f

https://github.com/ultrafunkamsterdam/undetected-chromedriver

Back to top