|
| 1 | +import pytest |
| 2 | +from pydantic import BaseModel, ValidationError |
| 3 | +from scrapegraph_py.models.smartscraper import SmartScraperRequest, GetSmartScraperRequest |
| 4 | + |
| 5 | +# Define a dummy schema to test the output_schema conversion in model_dump |
| 6 | +class DummySchema(BaseModel): |
| 7 | + """A dummy schema to simulate a Pydantic model with JSON schema conversion.""" |
| 8 | + a: int = 1 |
| 9 | + |
| 10 | +def test_model_dump_with_output_schema_conversion(): |
| 11 | + """ |
| 12 | + Test that model_dump on SmartScraperRequest converts the provided output_schema into a JSON schema dict. |
| 13 | + """ |
| 14 | + # Create a request with a valid user prompt, website URL, and a dummy output_schema. |
| 15 | + request = SmartScraperRequest( |
| 16 | + user_prompt="Extract information about the company", |
| 17 | + website_url="https://scrapegraphai.com/", |
| 18 | + output_schema=DummySchema |
| 19 | + ) |
| 20 | + # Get the dump dict from the model. |
| 21 | + output = request.model_dump() |
| 22 | + # The model_dump should include the 'output_schema' converted to its JSON schema representation. |
| 23 | + expected_schema = DummySchema.model_json_schema() |
| 24 | + assert output.get("output_schema") == expected_schema |
| 25 | + |
| 26 | +def test_model_dump_without_output_schema(): |
| 27 | + """ |
| 28 | + Test that model_dump on SmartScraperRequest returns output_schema as None |
| 29 | + when no output_schema is provided. This ensures that the conversion logic is only |
| 30 | + applied when output_schema is not None. |
| 31 | + """ |
| 32 | + # Create a valid SmartScraperRequest without providing an output_schema. |
| 33 | + request = SmartScraperRequest( |
| 34 | + user_prompt="Extract some meaningful data", |
| 35 | + website_url="https://scrapegraphai.com/" |
| 36 | + ) |
| 37 | + # Get the dumped dictionary from the model. |
| 38 | + output = request.model_dump() |
| 39 | + # Ensure that the output contains the key "output_schema" and its value is None. |
| 40 | + assert "output_schema" in output, "Output schema key should be present even if None" |
| 41 | + assert output["output_schema"] is None, "Output schema should be None when not provided" |
| 42 | + |
| 43 | +def test_invalid_get_smartscraper_request_id(): |
| 44 | + """ |
| 45 | + Test that GetSmartScraperRequest raises a ValueError when provided with an invalid UUID. |
| 46 | + This test ensures that the request_id field is validated correctly. |
| 47 | + """ |
| 48 | + with pytest.raises(ValueError, match="request_id must be a valid UUID"): |
| 49 | + GetSmartScraperRequest(request_id="invalid-uuid") |
| 50 | + |
| 51 | +def test_invalid_url_in_smartscraper_request(): |
| 52 | + """ |
| 53 | + Test that SmartScraperRequest raises a ValueError when provided with a website_url |
| 54 | + that does not start with 'http://' or 'https://'. This ensures the URL validation works. |
| 55 | + """ |
| 56 | + with pytest.raises(ValueError, match="Invalid URL"): |
| 57 | + SmartScraperRequest( |
| 58 | + user_prompt="Extract data", |
| 59 | + website_url="ftp://invalid-url" |
| 60 | + ) |
| 61 | + |
| 62 | +def test_invalid_user_prompt_empty_and_non_alnum(): |
| 63 | + """ |
| 64 | + Test that SmartScraperRequest raises a ValueError when the user_prompt is either empty (or only whitespace) |
| 65 | + or when it contains no alphanumeric characters. This ensures the user prompt validator is working correctly. |
| 66 | + """ |
| 67 | + # Test with a user_prompt that is empty (only whitespace) |
| 68 | + with pytest.raises(ValueError, match="User prompt cannot be empty"): |
| 69 | + SmartScraperRequest( |
| 70 | + user_prompt=" ", |
| 71 | + website_url="https://scrapegraphai.com/" |
| 72 | + ) |
| 73 | + # Test with a user_prompt that contains no alphanumeric characters |
| 74 | + with pytest.raises(ValueError, match="User prompt must contain a valid prompt"): |
| 75 | + SmartScraperRequest( |
| 76 | + user_prompt="!!!", |
| 77 | + website_url="https://scrapegraphai.com/" |
| 78 | + ) |
0 commit comments