diff --git a/codebeaver.yml b/codebeaver.yml
new file mode 100644
index 0000000..a22a7b7
--- /dev/null
+++ b/codebeaver.yml
@@ -0,0 +1,7 @@
+workspaces:
+- from: jest
+ name: scrapegraph-js
+ path: scrapegraph-js
+- from: pytest
+ name: scrapegraph-py
+ path: scrapegraph-py
diff --git a/scrapegraph-py/tests/test_localscraper.py b/scrapegraph-py/tests/test_localscraper.py
new file mode 100644
index 0000000..ce54e5a
--- /dev/null
+++ b/scrapegraph-py/tests/test_localscraper.py
@@ -0,0 +1,90 @@
+import pytest
+from pydantic import BaseModel
+from scrapegraph_py.models.localscraper import LocalScraperRequest, GetLocalScraperRequest
+
+# Create a dummy output schema to test the conversion in model_dump.
+class DummySchema(BaseModel):
+ test_field: str
+
+def test_output_schema_conversion():
+ """
+ Test that when an output_schema is provided in a LocalScraperRequest,
+ model_dump returns a dictionary where the output_schema key holds the JSON schema
+ of the provided Pydantic model.
+ """
+ user_prompt = "Extract company details"
+ website_html = "
Content
"
+ # Create a LocalScraperRequest with a dummy output_schema.
+ request = LocalScraperRequest(user_prompt=user_prompt, website_html=website_html, output_schema=DummySchema)
+ dumped = request.model_dump()
+ # Verify that output_schema is converted properly in the dumped dictionary.
+ assert "output_schema" in dumped
+ assert dumped["output_schema"] == DummySchema.model_json_schema()
+
+def test_invalid_website_html_structure():
+ """
+ Test that LocalScraperRequest raises a ValueError when the website_html provided
+ has no parseable HTML tags. This ensures the HTML content validation catches
+ non-HTML input.
+ """
+ # This string has no HTML tags so BeautifulSoup.find() should return None.
+ invalid_html = "Just some random text"
+ with pytest.raises(ValueError, match="Invalid HTML - no parseable content found"):
+ LocalScraperRequest(user_prompt="Extract info about the company", website_html=invalid_html)
+
+def test_invalid_user_prompt_non_alnum():
+ """
+ Test that LocalScraperRequest raises a ValueError when the user_prompt
+ does not contain any alphanumeric characters.
+ """
+ with pytest.raises(ValueError, match="User prompt must contain a valid prompt"):
+ LocalScraperRequest(
+ user_prompt="!!!",
+ website_html="Valid Content
"
+ )
+
+def test_get_localscraper_request_invalid_uuid():
+ """
+ Test that GetLocalScraperRequest raises a ValueError when an invalid UUID is provided.
+ This ensures that the model correctly validates the request_id as a proper UUID.
+ """
+ invalid_uuid = "not-a-valid-uuid"
+ with pytest.raises(ValueError, match="request_id must be a valid UUID"):
+ GetLocalScraperRequest(request_id=invalid_uuid)
+
+def test_website_html_exceeds_maximum_size():
+ """
+ Test that LocalScraperRequest raises a ValueError when the website_html content
+ exceeds the maximum allowed size of 2MB. The generated HTML is valid but too large.
+ """
+ # Calculate the number of characters needed to exceed 2MB when encoded in UTF-8.
+ max_size_bytes = 2 * 1024 * 1024
+ # Create a valid HTML string that exceeds 2MB.
+ base_html_prefix = ""
+ base_html_suffix = ""
+ repeated_char_length = max_size_bytes - len(base_html_prefix.encode("utf-8")) - len(base_html_suffix.encode("utf-8")) + 1
+ oversized_content = "a" * repeated_char_length
+ oversized_html = f"{base_html_prefix}{oversized_content}{base_html_suffix}"
+
+ with pytest.raises(ValueError, match="Website HTML content exceeds maximum size of 2MB"):
+ LocalScraperRequest(user_prompt="Extract info", website_html=oversized_html)
+
+def test_website_html_exactly_maximum_size():
+ """
+ Test that LocalScraperRequest accepts website_html content exactly 2MB in size.
+ This ensures that the size validation correctly allows content on the boundary.
+ """
+ user_prompt = "Extract info with exact size HTML"
+ prefix = ""
+ suffix = ""
+ # Calculate the length of the content needed to exactly reach 2MB when combined with prefix and suffix.
+ max_size_bytes = 2 * 1024 * 1024
+ content_length = max_size_bytes - len(prefix.encode("utf-8")) - len(suffix.encode("utf-8"))
+ valid_content = "a" * content_length
+ html = prefix + valid_content + suffix
+
+ # Attempt to create a valid LocalScraperRequest.
+ request = LocalScraperRequest(user_prompt=user_prompt, website_html=html)
+
+ # Verify that the HTML content is exactly 2MB in size when encoded in UTF-8.
+ assert len(request.website_html.encode("utf-8")) == max_size_bytes
diff --git a/scrapegraph-py/tests/test_markdownify.py b/scrapegraph-py/tests/test_markdownify.py
new file mode 100644
index 0000000..80c6e32
--- /dev/null
+++ b/scrapegraph-py/tests/test_markdownify.py
@@ -0,0 +1,59 @@
+import pytest
+from scrapegraph_py.models.markdownify import MarkdownifyRequest, GetMarkdownifyRequest
+
+def test_markdownify_request_invalid_url_scheme():
+ """
+ Test that MarkdownifyRequest raises a ValueError when the website_url does not
+ start with either 'http://' or 'https://'.
+ """
+ with pytest.raises(ValueError, match="Invalid URL"):
+ MarkdownifyRequest(website_url="ftp://example.com")
+
+def test_markdownify_request_empty_url():
+ """
+ Test that MarkdownifyRequest raises a ValueError when the website_url is empty or contains only whitespace.
+ """
+ with pytest.raises(ValueError, match="Website URL cannot be empty"):
+ MarkdownifyRequest(website_url=" ")
+
+def test_markdownify_request_valid_url():
+ """
+ Test that MarkdownifyRequest properly creates an instance when provided with a valid URL.
+ This covers the scenario where the input URL meets all validation requirements.
+ """
+ valid_url = "https://example.com"
+ req = MarkdownifyRequest(website_url=valid_url)
+ assert req.website_url == valid_url
+
+def test_markdownify_request_untrimmed_url():
+ """
+ Test that MarkdownifyRequest raises a ValueError when the website_url contains leading or trailing whitespace.
+ Although the stripped URL would be valid, the actual value is not processed further, causing the check
+ for the proper URL scheme to fail.
+ """
+ # The URL has leading whitespace, so it does not start directly with "https://"
+ with pytest.raises(ValueError, match="Invalid URL"):
+ MarkdownifyRequest(website_url=" https://example.com")
+
+def test_get_markdownify_request_invalid_uuid():
+ """
+ Test that GetMarkdownifyRequest raises a ValueError when the request_id is not a valid UUID.
+ """
+ with pytest.raises(ValueError, match="request_id must be a valid UUID"):
+ GetMarkdownifyRequest(request_id="invalid_uuid")
+
+def test_get_markdownify_request_valid_uuid():
+ """
+ Test that GetMarkdownifyRequest properly creates an instance when provided with a valid UUID.
+ """
+ valid_uuid = "123e4567-e89b-12d3-a456-426614174000"
+ req = GetMarkdownifyRequest(request_id=valid_uuid)
+ assert req.request_id == valid_uuid
+
+def test_get_markdownify_request_untrimmed_uuid():
+ """
+ Test that GetMarkdownifyRequest raises a ValueError when the request_id
+ contains leading or trailing whitespace, despite the trimmed UUID being valid.
+ """
+ with pytest.raises(ValueError, match="request_id must be a valid UUID"):
+ GetMarkdownifyRequest(request_id=" 123e4567-e89b-12d3-a456-426614174000 ")
diff --git a/scrapegraph-py/tests/test_smartscraper.py b/scrapegraph-py/tests/test_smartscraper.py
new file mode 100644
index 0000000..3d8b598
--- /dev/null
+++ b/scrapegraph-py/tests/test_smartscraper.py
@@ -0,0 +1,78 @@
+import pytest
+from pydantic import BaseModel, ValidationError
+from scrapegraph_py.models.smartscraper import SmartScraperRequest, GetSmartScraperRequest
+
+# Define a dummy schema to test the output_schema conversion in model_dump
+class DummySchema(BaseModel):
+ """A dummy schema to simulate a Pydantic model with JSON schema conversion."""
+ a: int = 1
+
+def test_model_dump_with_output_schema_conversion():
+ """
+ Test that model_dump on SmartScraperRequest converts the provided output_schema into a JSON schema dict.
+ """
+ # Create a request with a valid user prompt, website URL, and a dummy output_schema.
+ request = SmartScraperRequest(
+ user_prompt="Extract information about the company",
+ website_url="https://scrapegraphai.com/",
+ output_schema=DummySchema
+ )
+ # Get the dump dict from the model.
+ output = request.model_dump()
+ # The model_dump should include the 'output_schema' converted to its JSON schema representation.
+ expected_schema = DummySchema.model_json_schema()
+ assert output.get("output_schema") == expected_schema
+
+def test_model_dump_without_output_schema():
+ """
+ Test that model_dump on SmartScraperRequest returns output_schema as None
+ when no output_schema is provided. This ensures that the conversion logic is only
+ applied when output_schema is not None.
+ """
+ # Create a valid SmartScraperRequest without providing an output_schema.
+ request = SmartScraperRequest(
+ user_prompt="Extract some meaningful data",
+ website_url="https://scrapegraphai.com/"
+ )
+ # Get the dumped dictionary from the model.
+ output = request.model_dump()
+ # Ensure that the output contains the key "output_schema" and its value is None.
+ assert "output_schema" in output, "Output schema key should be present even if None"
+ assert output["output_schema"] is None, "Output schema should be None when not provided"
+
+def test_invalid_get_smartscraper_request_id():
+ """
+ Test that GetSmartScraperRequest raises a ValueError when provided with an invalid UUID.
+ This test ensures that the request_id field is validated correctly.
+ """
+ with pytest.raises(ValueError, match="request_id must be a valid UUID"):
+ GetSmartScraperRequest(request_id="invalid-uuid")
+
+def test_invalid_url_in_smartscraper_request():
+ """
+ Test that SmartScraperRequest raises a ValueError when provided with a website_url
+ that does not start with 'http://' or 'https://'. This ensures the URL validation works.
+ """
+ with pytest.raises(ValueError, match="Invalid URL"):
+ SmartScraperRequest(
+ user_prompt="Extract data",
+ website_url="ftp://invalid-url"
+ )
+
+def test_invalid_user_prompt_empty_and_non_alnum():
+ """
+ Test that SmartScraperRequest raises a ValueError when the user_prompt is either empty (or only whitespace)
+ or when it contains no alphanumeric characters. This ensures the user prompt validator is working correctly.
+ """
+ # Test with a user_prompt that is empty (only whitespace)
+ with pytest.raises(ValueError, match="User prompt cannot be empty"):
+ SmartScraperRequest(
+ user_prompt=" ",
+ website_url="https://scrapegraphai.com/"
+ )
+ # Test with a user_prompt that contains no alphanumeric characters
+ with pytest.raises(ValueError, match="User prompt must contain a valid prompt"):
+ SmartScraperRequest(
+ user_prompt="!!!",
+ website_url="https://scrapegraphai.com/"
+ )