Skip to content

Refresh UC Mode and dependencies #2685

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Apr 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

<h1>SeleniumBase</h1>

<p align="center"><a href="https://github.com/seleniumbase/SeleniumBase/"><img src="https://seleniumbase.github.io/cdn/img/super_logo_sb2.png" alt="SeleniumBase" title="SeleniumBase" width="350" /></a></p>
<p align="center"><a href="https://github.com/seleniumbase/SeleniumBase/"><img src="https://seleniumbase.github.io/cdn/img/super_logo_sb3.png" alt="SeleniumBase" title="SeleniumBase" width="350" /></a></p>


<p align="center" class="hero__title"><b>All-in-one Browser Automation Framework:<br />Web Crawling / Testing / Scraping / Stealth</b></p>
Expand Down Expand Up @@ -102,7 +102,7 @@ pytest test_demo_site.py

--------

<p align="left"><a href="https://github.com/seleniumbase/SeleniumBase/"><img src="https://seleniumbase.github.io/cdn/img/super_logo_sb2.png" alt="SeleniumBase" title="SeleniumBase" width="232" /></a></p>
<p align="left"><a href="https://github.com/seleniumbase/SeleniumBase/"><img src="https://seleniumbase.github.io/cdn/img/super_logo_sb3.png" alt="SeleniumBase" title="SeleniumBase" width="232" /></a></p>

<blockquote>
<p dir="auto"><strong>Explore the README:</strong></p>
Expand Down Expand Up @@ -1371,7 +1371,7 @@ pytest --reruns=1 --reruns-delay=1

<p><div><b><a href="https://github.com/mdmintz">https://github.com/mdmintz</a></b></div></p>

<div><a href="https://github.com/seleniumbase/SeleniumBase/"><img src="https://seleniumbase.github.io/cdn/img/super_logo_sb2.png" title="SeleniumBase" width="240" /></a></div>
<div><a href="https://github.com/seleniumbase/SeleniumBase/"><img src="https://seleniumbase.github.io/cdn/img/super_logo_sb3.png" title="SeleniumBase" width="240" /></a></div>
<div><a href="https://seleniumbase.io"><img src="https://img.shields.io/badge/docs-seleniumbase.io-11BBAA.svg" alt="SeleniumBase Docs" /></a></div> <div><a href="https://github.com/seleniumbase/SeleniumBase"><img src="https://img.shields.io/badge/tested%20with-SeleniumBase-04C38E.svg" alt="Tested with SeleniumBase" /></a></div> <div><a href="https://github.com/seleniumbase/SeleniumBase/blob/master/LICENSE"><img src="https://img.shields.io/badge/license-MIT-22BBCC.svg" title="SeleniumBase" /></a> <a href="https://gitter.im/seleniumbase/SeleniumBase" target="_blank"><img src="https://img.shields.io/gitter/room/seleniumbase/SeleniumBase.svg" alt="Gitter chat"/></a></div>
<div><a href="https://pepy.tech/project/seleniumbase" target="_blank"><img src="https://static.pepy.tech/badge/seleniumbase" alt="SeleniumBase PyPI downloads" /></a></div>
<div><a href="https://github.com/seleniumbase/SeleniumBase/stargazers"><img src="https://img.shields.io/github/stars/seleniumbase/seleniumbase.svg?color=19A57B" title="Stargazers" /></a></div>
Expand Down
1 change: 0 additions & 1 deletion examples/raw_ahrefs.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from seleniumbase import SB


with SB(uc=True, test=True, locale_code="en") as sb:
url = "https://ahrefs.com/website-authority-checker"
input_field = 'input[placeholder="Enter domain"]'
Expand Down
3 changes: 2 additions & 1 deletion examples/raw_form_turnstile.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
from seleniumbase import SB

with SB(uc=True, test=True) as sb:
sb.driver.uc_open_with_reconnect("seleniumbase.io/apps/form_turnstile", 3)
url = "seleniumbase.io/apps/form_turnstile"
sb.driver.uc_open_with_reconnect(url, 2)
sb.press_keys("#name", "SeleniumBase")
sb.press_keys("#email", "test@test.test")
sb.press_keys("#phone", "1-555-555-5555")
Expand Down
2 changes: 1 addition & 1 deletion examples/raw_nopecha.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from seleniumbase import SB

with SB(uc=True, test=True) as sb:
sb.driver.uc_open_with_reconnect("nopecha.com/demo/turnstile", 3.4)
sb.driver.uc_open_with_reconnect("nopecha.com/demo/turnstile", 4)
if sb.is_element_visible("#example-container0 iframe"):
sb.switch_to_frame("#example-container0 iframe")
if not sb.is_element_visible("circle.success-circle"):
Expand Down
11 changes: 11 additions & 0 deletions examples/raw_order_tickets.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from seleniumbase import SB

with SB(uc=True, test=True, ad_block_on=True) as sb:
url = "https://www.thaiticketmajor.com/concert/"
sb.driver.uc_open_with_reconnect(url, 5.5)
sb.driver.uc_click("button.btn-signin", 4)
sb.switch_to_frame('iframe[title*="Cloudflare"]')
sb.assert_element("div#success svg#success-icon")
sb.switch_to_default_content()
sb.set_messenger_theme(location="top_center")
sb.post_message("SeleniumBase wasn't detected!")
5 changes: 2 additions & 3 deletions examples/raw_turnstile.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,8 @@


def open_the_turnstile_page(sb):
sb.driver.uc_open_with_reconnect(
"seleniumbase.io/apps/turnstile", reconnect_time=3,
)
url = "seleniumbase.io/apps/turnstile"
sb.driver.uc_open_with_reconnect(url, reconnect_time=2)


def click_turnstile_and_verify(sb):
Expand Down
69 changes: 62 additions & 7 deletions help_docs/uc_mode.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,17 +19,21 @@
from seleniumbase import Driver

driver = Driver(uc=True)
driver.uc_open_with_reconnect("https://gitlab.com/users/sign_in", 3)
url = "https://gitlab.com/users/sign_in"
driver.uc_open_with_reconnect(url, 3)
driver.quit()
```

<img src="https://seleniumbase.github.io/other/gitlab_bypass.png" title="SeleniumBase" width="370">

👤 Here's an example with the <b><code translate="no">SB</code></b> manager (which has more methods and functionality than the <b><code translate="no">Driver</code></b> format):

```python
from seleniumbase import SB

with SB(uc=True) as sb:
sb.driver.uc_open_with_reconnect("https://gitlab.com/users/sign_in", 3)
url = "https://gitlab.com/users/sign_in"
sb.driver.uc_open_with_reconnect(url, 3)
```

👤 Here's a longer example, which includes a retry if the CAPTCHA isn't bypassed on the first attempt:
Expand All @@ -55,9 +59,8 @@ with SB(uc=True, test=True) as sb:
from seleniumbase import SB

def open_the_turnstile_page(sb):
sb.driver.uc_open_with_reconnect(
"https://seleniumbase.io/apps/turnstile", reconnect_time=3,
)
url = "seleniumbase.io/apps/turnstile"
sb.driver.uc_open_with_reconnect(url, reconnect_time=2)

def click_turnstile_and_verify(sb):
sb.switch_to_frame("iframe")
Expand All @@ -77,6 +80,46 @@ with SB(uc=True, test=True) as sb:

<img src="https://seleniumbase.github.io/other/turnstile_click.jpg" title="SeleniumBase" width="440">

👤 Here's an example <b>where the CAPTCHA appears after submitting a form</b>:

```python
from seleniumbase import SB

with SB(uc=True, test=True, locale_code="en") as sb:
url = "https://ahrefs.com/website-authority-checker"
input_field = 'input[placeholder="Enter domain"]'
submit_button = 'span:contains("Check Authority")'
sb.driver.uc_open_with_reconnect(url, 1) # The bot-check is later
sb.type(input_field, "github.com/seleniumbase/SeleniumBase")
sb.driver.reconnect(0.1)
sb.driver.uc_click(submit_button, reconnect_time=4)
sb.wait_for_text_not_visible("Checking", timeout=10)
sb.highlight('p:contains("github.com/seleniumbase/SeleniumBase")')
sb.highlight('a:contains("Top 100 backlinks")')
sb.set_messenger_theme(location="bottom_center")
sb.post_message("SeleniumBase wasn't detected!")
```

<img src="https://seleniumbase.github.io/other/ahrefs_bypass.png" title="SeleniumBase" width="540">

👤 Here, <b>the CAPTCHA appears after clicking to go to the sign-in screen</b>:

```python
from seleniumbase import SB

with SB(uc=True, test=True, ad_block_on=True) as sb:
url = "https://www.thaiticketmajor.com/concert/"
sb.driver.uc_open_with_reconnect(url, 5.5)
sb.driver.uc_click("button.btn-signin", 4)
sb.switch_to_frame('iframe[title*="Cloudflare"]')
sb.assert_element("div#success svg#success-icon")
sb.switch_to_default_content()
sb.set_messenger_theme(location="top_center")
sb.post_message("SeleniumBase wasn't detected!")
```

<img src="https://seleniumbase.github.io/other/ttm_bypass.png" title="SeleniumBase" width="540">

--------

👤 In <b translate="no">UC Mode</b>, <code translate="no">driver.get(url)</code> has been modified from its original version: If anti-bot services are detected from a <code translate="no">requests.get(url)</code> call that's made before navigating to the website, then <code translate="no">driver.uc_open_with_reconnect(url)</code> will be used instead. To open a URL normally in <b translate="no">UC Mode</b>, use <code translate="no">driver.default_get(url)</code>.
Expand Down Expand Up @@ -247,7 +290,7 @@ Here are the 3 primary things that <b translate="no">UC Mode</b> does to make bo

For example, if the <b translate="no">Chrome DevTools Console</b> variables aren't renamed, you can expect to find them easily when using <b><code translate="no">selenium</code></b> for browser automation:

<img src="https://seleniumbase.github.io/other/cdc_args.png" title="SeleniumBase" width="380">
<img src="https://seleniumbase.github.io/other/cdc_args.png" title="SeleniumBase" width="390">

(If those variables are still there, then websites can easily detect your bots.)

Expand Down Expand Up @@ -278,7 +321,7 @@ The above JS method is used within the <b><code translate="no">SeleniumBase</cod

🏆 <b>Choosing the right CAPTCHA service</b> for your business / website:

<img src="https://seleniumbase.github.io/other/me_se_conf.jpg" title="SeleniumBase" width="340">
<img src="https://seleniumbase.github.io/other/me_se_conf.jpg" title="SeleniumBase" width="370">

As an ethical hacker / cybersecurity researcher who builds bots that bypass CAPTCHAs for sport, <b>the CAPTCHA service that I personally recommend</b> for keeping bots out is <b translate="no">Google's reCAPTCHA</b>:

Expand All @@ -288,6 +331,18 @@ Since Google makes Chrome, Google's own <b translate="no">reCAPTCHA</b> service

--------

⚖️ <b>Legal implications of web-scraping</b>:

Based on the following article, https://nubela.co/blog/meta-lost-the-scraping-legal-battle-to-bright-data/, (which outlines a court case where social-networking company: Meta lost the legal battle to data-scraping company: Bright Data), it was determined that web scraping is 100% legal in the eyes of the courts as long as:
1. The scraping is only done with <b>public data</b> and <b>not private data</b>.
2. The scraping isn’t done while logged in on the site being scraped.

If the above criteria are met, then scrape away! (According to the article)

(Note: I'm not a lawyer, so I can't officially offer legal advice, but I can direct people to existing articles online where people can find their own answers.)

--------

<img src="https://seleniumbase.github.io/cdn/img/sb_text_f.png" alt="SeleniumBase" title="SeleniumBase" align="center" width="335">

<div><a href="https://github.com/seleniumbase/SeleniumBase"><img src="https://seleniumbase.github.io/cdn/img/sb_logo_gs.png" alt="SeleniumBase" title="SeleniumBase" width="335" /></a></div>
2 changes: 1 addition & 1 deletion mkdocs_build/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

regex>=2023.12.25
pymdown-extensions>=10.7.1
pipdeptree>=2.17.0
pipdeptree>=2.18.0
python-dateutil>=2.8.2
Markdown==3.6
markdown2==2.4.13
Expand Down
6 changes: 3 additions & 3 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,15 @@ wheel>=0.43.0;python_version>="3.8"
attrs>=23.2.0
certifi>=2024.2.2
filelock>=3.12.2;python_version<"3.8"
filelock>=3.13.3;python_version>="3.8"
filelock>=3.13.4;python_version>="3.8"
platformdirs>=4.0.0;python_version<"3.8"
platformdirs>=4.2.0;python_version>="3.8"
typing-extensions>=4.11.0;python_version>="3.8"
parse>=1.20.1
parse-type>=0.6.2
pyyaml>=6.0.1
six==1.16.0
idna==3.6
idna==3.7
chardet==5.2.0
charset-normalizer==3.3.2
urllib3>=1.26.18,<2;python_version<"3.10"
Expand All @@ -35,7 +35,7 @@ cssselect==1.2.0
sortedcontainers==2.4.0
fasteners==0.19
execnet==2.0.2;python_version<"3.8"
execnet==2.1.0;python_version>="3.8"
execnet==2.1.1;python_version>="3.8"
iniconfig==2.0.0
pluggy==1.2.0;python_version<"3.8"
pluggy==1.4.0;python_version>="3.8"
Expand Down
2 changes: 1 addition & 1 deletion seleniumbase/__version__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
# seleniumbase package
__version__ = "4.25.2"
__version__ = "4.25.3"
17 changes: 17 additions & 0 deletions seleniumbase/fixtures/base_case.py
Original file line number Diff line number Diff line change
Expand Up @@ -4134,6 +4134,23 @@ def get_new_driver(
self.__dont_record_open = True
self.open(new_start_page)
self.__dont_record_open = False
if undetectable:
if hasattr(new_driver, "uc_open"):
self.uc_open = new_driver.uc_open
if hasattr(new_driver, "uc_open_with_tab"):
self.uc_open_with_tab = new_driver.uc_open_with_tab
if hasattr(new_driver, "uc_open_with_reconnect"):
self.uc_open_with_reconnect = new_driver.uc_open_with_reconnect
if hasattr(new_driver, "reconnect"):
self.reconnect = new_driver.reconnect
if hasattr(new_driver, "disconnect"):
self.disconnect = new_driver.disconnect
if hasattr(new_driver, "connect"):
self.connect = new_driver.connect
if hasattr(new_driver, "uc_click"):
self.uc_click = new_driver.uc_click
if hasattr(new_driver, "uc_switch_to_frame"):
self.uc_switch_to_frame = new_driver.uc_switch_to_frame
return new_driver

def switch_to_driver(self, driver):
Expand Down
2 changes: 1 addition & 1 deletion seleniumbase/undetected/webelement.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ def uc_click(
):
if driver and selector and by:
delayed_click = False
if tag_name == "span" or tag_name == "button" or tag_name == "div":
if tag_name in ["span", "button", "div", "a"]:
delayed_click = True
if delayed_click and ":contains" not in selector:
selector = js_utils.convert_to_css_selector(selector, by)
Expand Down
6 changes: 3 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,15 +155,15 @@
'attrs>=23.2.0',
"certifi>=2024.2.2",
'filelock>=3.12.2;python_version<"3.8"',
'filelock>=3.13.3;python_version>="3.8"',
'filelock>=3.13.4;python_version>="3.8"',
'platformdirs>=4.0.0;python_version<"3.8"',
'platformdirs>=4.2.0;python_version>="3.8"',
'typing-extensions>=4.11.0;python_version>="3.8"',
'parse>=1.20.1',
'parse-type>=0.6.2',
'pyyaml>=6.0.1',
"six==1.16.0",
"idna==3.6",
"idna==3.7",
'chardet==5.2.0',
'charset-normalizer==3.3.2',
'urllib3>=1.26.18,<2;python_version<"3.10"',
Expand All @@ -183,7 +183,7 @@
"sortedcontainers==2.4.0",
'fasteners==0.19',
'execnet==2.0.2;python_version<"3.8"',
'execnet==2.1.0;python_version>="3.8"',
'execnet==2.1.1;python_version>="3.8"',
'iniconfig==2.0.0',
'pluggy==1.2.0;python_version<"3.8"',
'pluggy==1.4.0;python_version>="3.8"',
Expand Down