Skip to content

Commit 46161bf

Browse files
committed
test: Add coverage improvement test for scrapegraph-py/tests/test_smartscraper.py
1 parent f80fbaa commit 46161bf

File tree

1 file changed

+78
-0
lines changed

1 file changed

+78
-0
lines changed
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
import pytest
2+
from pydantic import BaseModel, ValidationError
3+
from scrapegraph_py.models.smartscraper import SmartScraperRequest, GetSmartScraperRequest
4+
5+
# Define a dummy schema to test the output_schema conversion in model_dump
6+
class DummySchema(BaseModel):
7+
"""A dummy schema to simulate a Pydantic model with JSON schema conversion."""
8+
a: int = 1
9+
10+
def test_model_dump_with_output_schema_conversion():
11+
"""
12+
Test that model_dump on SmartScraperRequest converts the provided output_schema into a JSON schema dict.
13+
"""
14+
# Create a request with a valid user prompt, website URL, and a dummy output_schema.
15+
request = SmartScraperRequest(
16+
user_prompt="Extract information about the company",
17+
website_url="https://scrapegraphai.com/",
18+
output_schema=DummySchema
19+
)
20+
# Get the dump dict from the model.
21+
output = request.model_dump()
22+
# The model_dump should include the 'output_schema' converted to its JSON schema representation.
23+
expected_schema = DummySchema.model_json_schema()
24+
assert output.get("output_schema") == expected_schema
25+
26+
def test_model_dump_without_output_schema():
27+
"""
28+
Test that model_dump on SmartScraperRequest returns output_schema as None
29+
when no output_schema is provided. This ensures that the conversion logic is only
30+
applied when output_schema is not None.
31+
"""
32+
# Create a valid SmartScraperRequest without providing an output_schema.
33+
request = SmartScraperRequest(
34+
user_prompt="Extract some meaningful data",
35+
website_url="https://scrapegraphai.com/"
36+
)
37+
# Get the dumped dictionary from the model.
38+
output = request.model_dump()
39+
# Ensure that the output contains the key "output_schema" and its value is None.
40+
assert "output_schema" in output, "Output schema key should be present even if None"
41+
assert output["output_schema"] is None, "Output schema should be None when not provided"
42+
43+
def test_invalid_get_smartscraper_request_id():
44+
"""
45+
Test that GetSmartScraperRequest raises a ValueError when provided with an invalid UUID.
46+
This test ensures that the request_id field is validated correctly.
47+
"""
48+
with pytest.raises(ValueError, match="request_id must be a valid UUID"):
49+
GetSmartScraperRequest(request_id="invalid-uuid")
50+
51+
def test_invalid_url_in_smartscraper_request():
52+
"""
53+
Test that SmartScraperRequest raises a ValueError when provided with a website_url
54+
that does not start with 'http://' or 'https://'. This ensures the URL validation works.
55+
"""
56+
with pytest.raises(ValueError, match="Invalid URL"):
57+
SmartScraperRequest(
58+
user_prompt="Extract data",
59+
website_url="ftp://invalid-url"
60+
)
61+
62+
def test_invalid_user_prompt_empty_and_non_alnum():
63+
"""
64+
Test that SmartScraperRequest raises a ValueError when the user_prompt is either empty (or only whitespace)
65+
or when it contains no alphanumeric characters. This ensures the user prompt validator is working correctly.
66+
"""
67+
# Test with a user_prompt that is empty (only whitespace)
68+
with pytest.raises(ValueError, match="User prompt cannot be empty"):
69+
SmartScraperRequest(
70+
user_prompt=" ",
71+
website_url="https://scrapegraphai.com/"
72+
)
73+
# Test with a user_prompt that contains no alphanumeric characters
74+
with pytest.raises(ValueError, match="User prompt must contain a valid prompt"):
75+
SmartScraperRequest(
76+
user_prompt="!!!",
77+
website_url="https://scrapegraphai.com/"
78+
)

0 commit comments

Comments
 (0)