From 9b3daf6881eece5eefcf8e923d34bd9f52c61a26 Mon Sep 17 00:00:00 2001
From: aMahanna <anthony.mahanna@gmail.com>
Date: Tue, 14 Jun 2022 10:56:10 -0400
Subject: [PATCH 01/17] new: test cases

---
 tests/test_document.py |  8 ++++++++
 tests/test_graph.py    | 28 ++++++++++++++++++++++++++++
 2 files changed, 36 insertions(+)

diff --git a/tests/test_document.py b/tests/test_document.py
index 95446da1..65875622 100644
--- a/tests/test_document.py
+++ b/tests/test_document.py
@@ -1832,6 +1832,14 @@ def test_document_import_bulk(col, bad_col, docs):
         assert col[doc_key]["loc"] == doc["loc"]
     empty_collection(col)
 
+    # Test import bulk with batch_size
+    results = col.import_bulk(docs, batch_size=len(docs) // 2)
+    assert type(results) is list
+    assert len(results) == 2
+
+    result = col.import_bulk(docs, batch_size=len(docs) * 2)
+    assert type(result) is dict
+
     # Test import bulk on_duplicate actions
     doc = docs[0]
     doc_key = doc["_key"]
diff --git a/tests/test_graph.py b/tests/test_graph.py
index de440642..34215661 100644
--- a/tests/test_graph.py
+++ b/tests/test_graph.py
@@ -57,6 +57,34 @@ def test_graph_properties(graph, bad_graph, db):
     assert isinstance(properties["revision"], str)
 
 
+def test_graph_provision(graph, db):
+    vertices = [{"foo": i} for i in range(1, 101)]
+    edges = [
+        {"_from": f"numbers/{j}", "_to": f"numbers/{i}", "result": j / i}
+        for i in range(1, 101)
+        for j in range(1, 101)
+        if j % i == 0
+    ]
+    e_d = [
+        {
+            "edge_collection": "is_divisible_by",
+            "from_vertex_collections": ["numbers"],
+            "to_vertex_collections": ["numbers"],
+        }
+    ]
+    graph = db.create_graph(
+        name="DivisibilityGraph",
+        edge_definitions=e_d,
+        collections={
+            "numbers": {"docs": vertices, "options": {"batch_size": 5}},
+            "is_divisible_by": {"docs": edges, "options": {"sync": True}},
+        },
+    )
+
+    assert graph.vertex_collection("numbers").count() == len(vertices)
+    assert graph.edge_collection("is_divisible_by").count() == len(edges)
+
+
 def test_graph_management(db, bad_db):
     # Test create graph
     graph_name = generate_graph_name()

From 0a61e9cb3474cae8118460e19414c40e207f15c2 Mon Sep 17 00:00:00 2001
From: aMahanna <anthony.mahanna@gmail.com>
Date: Tue, 14 Jun 2022 10:58:56 -0400
Subject: [PATCH 02/17] initial feature commit

---
 arango/collection.py | 45 +++++++++++++++++++++++++++++------------
 arango/database.py   | 48 ++++++++++++++++++++++++++++++++++++++++----
 arango/utils.py      | 26 +++++++++++++++++++++++-
 3 files changed, 101 insertions(+), 18 deletions(-)

diff --git a/arango/collection.py b/arango/collection.py
index 4fd848fa..8e819619 100644
--- a/arango/collection.py
+++ b/arango/collection.py
@@ -42,7 +42,7 @@
 from arango.response import Response
 from arango.result import Result
 from arango.typings import Fields, Headers, Json, Params
-from arango.utils import get_doc_id, is_none_or_int, is_none_or_str
+from arango.utils import get_batches, get_doc_id, is_none_or_int, is_none_or_str
 
 
 class Collection(ApiGroup):
@@ -187,11 +187,15 @@ def _ensure_key_in_body(self, body: Json) -> Json:
             return body
         raise DocumentParseError('field "_key" or "_id" required')
 
-    def _ensure_key_from_id(self, body: Json) -> Json:
+    def _ensure_key_from_id(self, body: Json, index: Optional[int] = None) -> Json:
         """Return the body with "_key" field if it has "_id" field.
+            If it has neither, set the "_key" value to i, where i
+            is the document's index position in the sequence.
 
         :param body: Document body.
         :type body: dict
+        :param index: Document index value in the original list of documents.
+        :param index: int | None
         :return: Document body with "_key" field if it has "_id" field.
         :rtype: dict
         """
@@ -199,6 +203,11 @@ def _ensure_key_from_id(self, body: Json) -> Json:
             doc_id = self._validate_id(body["_id"])
             body = body.copy()
             body["_key"] = doc_id[len(self._id_prefix) :]
+
+        if "_id" not in body and "_key" not in body:
+            body = body.copy()
+            body["_key"] = str(index)
+
         return body
 
     @property
@@ -1934,7 +1943,8 @@ def import_bulk(
         overwrite: Optional[bool] = None,
         on_duplicate: Optional[str] = None,
         sync: Optional[bool] = None,
-    ) -> Result[Json]:
+        batch_size: Optional[int] = None,
+    ) -> Union[Result[Json], List[Result[Json]]]:
         """Insert multiple documents into the collection.
 
         .. note::
@@ -1984,11 +1994,16 @@ def import_bulk(
         :type on_duplicate: str
         :param sync: Block until operation is synchronized to disk.
         :type sync: bool | None
+        :param batch_size: Max number of documents to import at once. If
+            unspecified, will import all documents at once.
+        :type batch_size: int | None
         :return: Result of the bulk import.
         :rtype: dict
         :raise arango.exceptions.DocumentInsertError: If import fails.
         """
-        documents = [self._ensure_key_from_id(doc) for doc in documents]
+        documents = [
+            self._ensure_key_from_id(doc, i) for i, doc in enumerate(documents, 1)
+        ]
 
         params: Params = {"type": "array", "collection": self.name}
         if halt_on_error is not None:
@@ -2006,21 +2021,25 @@ def import_bulk(
         if sync is not None:
             params["waitForSync"] = sync
 
-        request = Request(
-            method="post",
-            endpoint="/_api/import",
-            data=documents,
-            params=params,
-            write=self.name,
-        )
-
         def response_handler(resp: Response) -> Json:
             if resp.is_success:
                 result: Json = resp.body
                 return result
             raise DocumentInsertError(resp, request)
 
-        return self._execute(request, response_handler)
+        result = []
+        for batch in get_batches(documents, batch_size):
+            request = Request(
+                method="post",
+                endpoint="/_api/import",
+                data=batch,
+                params=params,
+                write=self.name,
+            )
+
+            result.append(self._execute(request, response_handler))
+
+        return result[0] if len(result) == 1 else result
 
 
 class StandardCollection(Collection):
diff --git a/arango/database.py b/arango/database.py
index ea9ac058..89e92955 100644
--- a/arango/database.py
+++ b/arango/database.py
@@ -1170,6 +1170,7 @@ def create_graph(
         shard_count: Optional[int] = None,
         replication_factor: Optional[int] = None,
         write_concern: Optional[int] = None,
+        collections: Optional[Json] = None,
     ) -> Result[Graph]:
         """Create a new graph.
 
@@ -1217,18 +1218,49 @@ def create_graph(
             parameter cannot be larger than that of **replication_factor**.
             Default value is 1. Used for clusters only.
         :type write_concern: int
+        :param collections: A list collection data objects to provision
+            the graph with. See below for example.
+        :type collections: dict | None
         :return: Graph API wrapper.
         :rtype: arango.graph.Graph
         :raise arango.exceptions.GraphCreateError: If create fails.
 
         Here is an example entry for parameter **edge_definitions**:
 
+        .. code-block:: python
+
+            [
+                {
+                    'edge_collection': 'teach',
+                    'from_vertex_collections': ['teachers'],
+                    'to_vertex_collections': ['lectures']
+                }
+            ]
+
+        Here is an example entry for parameter **collections**:
+        TODO: Rework **collections** data structure?
         .. code-block:: python
 
             {
-                'edge_collection': 'teach',
-                'from_vertex_collections': ['teachers'],
-                'to_vertex_collections': ['lectures']
+                'teachers': {
+                    'docs': teacher_vertices_to_insert
+                    'options': {
+                        'overwrite' = True,
+                        'sync' = True,
+                        'batch_size' = 50
+                    }
+                },
+                'lectures': {
+                    'docs': lecture_vertices_to_insert
+                    'options': {
+                        'overwrite' = False,
+                        'sync' = False,
+                        'batch_size' = 4
+                    }
+                },
+                'teach': {
+                    'docs': teach_edges_to_insert
+                }
             }
         """
         data: Json = {"name": name, "options": dict()}
@@ -1263,7 +1295,15 @@ def response_handler(resp: Response) -> Graph:
                 return Graph(self._conn, self._executor, name)
             raise GraphCreateError(resp, request)
 
-        return self._execute(request, response_handler)
+        graph = self._execute(request, response_handler)
+
+        if collections is not None:
+            for name, data in collections.items():
+                self.collection(name).import_bulk(
+                    data["docs"], **data.get("options", {})
+                )
+
+        return graph
 
     def delete_graph(
         self,
diff --git a/arango/utils.py b/arango/utils.py
index 42d7fff3..2b48d94b 100644
--- a/arango/utils.py
+++ b/arango/utils.py
@@ -8,7 +8,7 @@
 
 import logging
 from contextlib import contextmanager
-from typing import Any, Iterator, Union
+from typing import Any, Iterator, List, Optional, Union
 
 from arango.exceptions import DocumentParseError
 from arango.typings import Json
@@ -82,3 +82,27 @@ def is_none_or_str(obj: Any) -> bool:
     :rtype: bool
     """
     return obj is None or isinstance(obj, str)
+
+
+def get_batches(
+    l: List[Any], batch_size: Optional[int] = None
+) -> Union[List[List[Any]], Iterator[List[Any]]]:
+    """Generator to split a list in batches
+        of (maximum) **batch_size** elements each.
+        If **batch_size** is invalid, return entire
+        list as one batch.
+
+    :param l: The list of elements.
+    :type l: list
+    :param batch_size: Number of elements per batch.
+    :type batch_size: int | None
+    """
+    if batch_size is None or batch_size <= 0 or batch_size >= len(l):
+        return [l]
+
+    def generator() -> Iterator[List[Any]]:
+        n = int(batch_size)  # type: ignore # (false positive)
+        for i in range(0, len(l), n):
+            yield l[i : i + n]
+
+    return generator()

From 247ed1c8c7defb28054edf90616c9c5625d2496e Mon Sep 17 00:00:00 2001
From: aMahanna <anthony.mahanna@gmail.com>
Date: Tue, 14 Jun 2022 11:06:38 -0400
Subject: [PATCH 03/17] fix: empty_collection(col)

---
 tests/test_document.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/test_document.py b/tests/test_document.py
index 65875622..eacb407b 100644
--- a/tests/test_document.py
+++ b/tests/test_document.py
@@ -1836,9 +1836,11 @@ def test_document_import_bulk(col, bad_col, docs):
     results = col.import_bulk(docs, batch_size=len(docs) // 2)
     assert type(results) is list
     assert len(results) == 2
+    empty_collection(col)
 
     result = col.import_bulk(docs, batch_size=len(docs) * 2)
     assert type(result) is dict
+    empty_collection(col)
 
     # Test import bulk on_duplicate actions
     doc = docs[0]

From 87fb1c1021152e65a15a7497d4cd87051bfe3d4f Mon Sep 17 00:00:00 2001
From: aMahanna <anthony.mahanna@gmail.com>
Date: Tue, 14 Jun 2022 15:22:20 -0400
Subject: [PATCH 04/17] attempt fix: tests

---
 arango/collection.py | 29 ++++++++++++++---------------
 arango/utils.py      |  8 ++++----
 2 files changed, 18 insertions(+), 19 deletions(-)

diff --git a/arango/collection.py b/arango/collection.py
index 8e819619..89697871 100644
--- a/arango/collection.py
+++ b/arango/collection.py
@@ -169,14 +169,18 @@ def _prep_from_doc(
             else:
                 return doc_id, doc_id, {"If-Match": rev}
 
-    def _ensure_key_in_body(self, body: Json) -> Json:
-        """Return the document body with "_key" field populated.
+    def _ensure_key_in_body(self, body: Json, index: Optional[int] = None) -> Json:
+        """Return the document body with "_key" field populated. If
+            neither "_key" or "_id" exist, set "_key" value to **index**,
+            where **index** is the document's position in the sequence.
 
         :param body: Document body.
         :type body: dict
+        :param index: Document index value in the original list of documents.
+        :param index: int | None
         :return: Document body with "_key" field.
         :rtype: dict
-        :raise arango.exceptions.DocumentParseError: On missing ID and key.
+        :raise arango.exceptions.DocumentParseError: On missing _key, _id, & index.
         """
         if "_key" in body:
             return body
@@ -185,17 +189,17 @@ def _ensure_key_in_body(self, body: Json) -> Json:
             body = body.copy()
             body["_key"] = doc_id[len(self._id_prefix) :]
             return body
+        elif index:
+            body = body.copy()
+            body["_key"] = str(index)
+            return body
+
         raise DocumentParseError('field "_key" or "_id" required')
 
-    def _ensure_key_from_id(self, body: Json, index: Optional[int] = None) -> Json:
+    def _ensure_key_from_id(self, body: Json) -> Json:
         """Return the body with "_key" field if it has "_id" field.
-            If it has neither, set the "_key" value to i, where i
-            is the document's index position in the sequence.
-
         :param body: Document body.
         :type body: dict
-        :param index: Document index value in the original list of documents.
-        :param index: int | None
         :return: Document body with "_key" field if it has "_id" field.
         :rtype: dict
         """
@@ -203,11 +207,6 @@ def _ensure_key_from_id(self, body: Json, index: Optional[int] = None) -> Json:
             doc_id = self._validate_id(body["_id"])
             body = body.copy()
             body["_key"] = doc_id[len(self._id_prefix) :]
-
-        if "_id" not in body and "_key" not in body:
-            body = body.copy()
-            body["_key"] = str(index)
-
         return body
 
     @property
@@ -2002,7 +2001,7 @@ def import_bulk(
         :raise arango.exceptions.DocumentInsertError: If import fails.
         """
         documents = [
-            self._ensure_key_from_id(doc, i) for i, doc in enumerate(documents, 1)
+            self._ensure_key_in_body(doc, i) for i, doc in enumerate(documents, 1)
         ]
 
         params: Params = {"type": "array", "collection": self.name}
diff --git a/arango/utils.py b/arango/utils.py
index 2b48d94b..4f9242b5 100644
--- a/arango/utils.py
+++ b/arango/utils.py
@@ -8,7 +8,7 @@
 
 import logging
 from contextlib import contextmanager
-from typing import Any, Iterator, List, Optional, Union
+from typing import Any, Iterator, List, Optional, Sequence, Union
 
 from arango.exceptions import DocumentParseError
 from arango.typings import Json
@@ -85,8 +85,8 @@ def is_none_or_str(obj: Any) -> bool:
 
 
 def get_batches(
-    l: List[Any], batch_size: Optional[int] = None
-) -> Union[List[List[Any]], Iterator[List[Any]]]:
+    l: Sequence[Json], batch_size: Optional[int] = None
+) -> Union[List[Sequence[Json]], Iterator[Sequence[Json]]]:
     """Generator to split a list in batches
         of (maximum) **batch_size** elements each.
         If **batch_size** is invalid, return entire
@@ -100,7 +100,7 @@ def get_batches(
     if batch_size is None or batch_size <= 0 or batch_size >= len(l):
         return [l]
 
-    def generator() -> Iterator[List[Any]]:
+    def generator() -> Iterator[Sequence[Json]]:
         n = int(batch_size)  # type: ignore # (false positive)
         for i in range(0, len(l), n):
             yield l[i : i + n]

From 918391e69be6eedc62fe767d7f8215bd1af5185c Mon Sep 17 00:00:00 2001
From: aMahanna <anthony.mahanna@gmail.com>
Date: Tue, 14 Jun 2022 15:25:02 -0400
Subject: [PATCH 05/17] improve readability

---
 arango/collection.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arango/collection.py b/arango/collection.py
index 89697871..8f116ab9 100644
--- a/arango/collection.py
+++ b/arango/collection.py
@@ -189,7 +189,7 @@ def _ensure_key_in_body(self, body: Json, index: Optional[int] = None) -> Json:
             body = body.copy()
             body["_key"] = doc_id[len(self._id_prefix) :]
             return body
-        elif index:
+        elif index is not None:
             body = body.copy()
             body["_key"] = str(index)
             return body

From 3549999b217b64bb427bfbc79b5e3ef23955e33d Mon Sep 17 00:00:00 2001
From: aMahanna <anthony.mahanna@gmail.com>
Date: Tue, 14 Jun 2022 15:42:37 -0400
Subject: [PATCH 06/17] cleanup: test_graph_provision

---
 tests/test_graph.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tests/test_graph.py b/tests/test_graph.py
index 34215661..14898459 100644
--- a/tests/test_graph.py
+++ b/tests/test_graph.py
@@ -72,8 +72,11 @@ def test_graph_provision(graph, db):
             "to_vertex_collections": ["numbers"],
         }
     ]
+
+    name = "divisibility-graph"
+    db.delete_graph(name, drop_collections=True, ignore_missing=True)
     graph = db.create_graph(
-        name="DivisibilityGraph",
+        name=name,
         edge_definitions=e_d,
         collections={
             "numbers": {"docs": vertices, "options": {"batch_size": 5}},

From 0d1b1b2728bd1238962ebb9724bddd78b6d52db5 Mon Sep 17 00:00:00 2001
From: aMahanna <anthony.mahanna@gmail.com>
Date: Tue, 14 Jun 2022 21:02:58 -0400
Subject: [PATCH 07/17] whitespace

---
 arango/collection.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arango/collection.py b/arango/collection.py
index 8f116ab9..bc399800 100644
--- a/arango/collection.py
+++ b/arango/collection.py
@@ -198,6 +198,7 @@ def _ensure_key_in_body(self, body: Json, index: Optional[int] = None) -> Json:
 
     def _ensure_key_from_id(self, body: Json) -> Json:
         """Return the body with "_key" field if it has "_id" field.
+
         :param body: Document body.
         :type body: dict
         :return: Document body with "_key" field if it has "_id" field.

From 2095fdd71bd0d5f900aca05b214e5bdc125e2a33 Mon Sep 17 00:00:00 2001
From: aMahanna <anthony.mahanna@gmail.com>
Date: Fri, 17 Jun 2022 10:48:55 -0400
Subject: [PATCH 08/17] fix: typo

---
 arango/collection.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arango/collection.py b/arango/collection.py
index bc399800..a04f6332 100644
--- a/arango/collection.py
+++ b/arango/collection.py
@@ -2027,7 +2027,7 @@ def response_handler(resp: Response) -> Json:
                 return result
             raise DocumentInsertError(resp, request)
 
-        result = []
+        results = []
         for batch in get_batches(documents, batch_size):
             request = Request(
                 method="post",
@@ -2037,9 +2037,9 @@ def response_handler(resp: Response) -> Json:
                 write=self.name,
             )
 
-            result.append(self._execute(request, response_handler))
+            results.append(self._execute(request, response_handler))
 
-        return result[0] if len(result) == 1 else result
+        return results[0] if len(results) == 1 else results
 
 
 class StandardCollection(Collection):

From 800fafdf2e80a7fc35066425bce2be322e2c1606 Mon Sep 17 00:00:00 2001
From: aMahanna <anthony.mahanna@gmail.com>
Date: Fri, 17 Jun 2022 20:05:16 -0400
Subject: [PATCH 09/17] revert: _ensure_key_in_body changes

(no suitable for client-side functionality)
---
 arango/collection.py | 20 ++++----------------
 tests/test_graph.py  |  2 +-
 2 files changed, 5 insertions(+), 17 deletions(-)

diff --git a/arango/collection.py b/arango/collection.py
index a04f6332..549710e2 100644
--- a/arango/collection.py
+++ b/arango/collection.py
@@ -169,18 +169,13 @@ def _prep_from_doc(
             else:
                 return doc_id, doc_id, {"If-Match": rev}
 
-    def _ensure_key_in_body(self, body: Json, index: Optional[int] = None) -> Json:
-        """Return the document body with "_key" field populated. If
-            neither "_key" or "_id" exist, set "_key" value to **index**,
-            where **index** is the document's position in the sequence.
-
+    def _ensure_key_in_body(self, body: Json) -> Json:
+        """Return the document body with "_key" field populated.
         :param body: Document body.
         :type body: dict
-        :param index: Document index value in the original list of documents.
-        :param index: int | None
         :return: Document body with "_key" field.
         :rtype: dict
-        :raise arango.exceptions.DocumentParseError: On missing _key, _id, & index.
+        :raise arango.exceptions.DocumentParseError: On missing ID and key.
         """
         if "_key" in body:
             return body
@@ -189,11 +184,6 @@ def _ensure_key_in_body(self, body: Json, index: Optional[int] = None) -> Json:
             body = body.copy()
             body["_key"] = doc_id[len(self._id_prefix) :]
             return body
-        elif index is not None:
-            body = body.copy()
-            body["_key"] = str(index)
-            return body
-
         raise DocumentParseError('field "_key" or "_id" required')
 
     def _ensure_key_from_id(self, body: Json) -> Json:
@@ -2001,9 +1991,7 @@ def import_bulk(
         :rtype: dict
         :raise arango.exceptions.DocumentInsertError: If import fails.
         """
-        documents = [
-            self._ensure_key_in_body(doc, i) for i, doc in enumerate(documents, 1)
-        ]
+        documents = [self._ensure_key_from_id(doc) for doc in documents]
 
         params: Params = {"type": "array", "collection": self.name}
         if halt_on_error is not None:
diff --git a/tests/test_graph.py b/tests/test_graph.py
index 14898459..7b9c8427 100644
--- a/tests/test_graph.py
+++ b/tests/test_graph.py
@@ -58,7 +58,7 @@ def test_graph_properties(graph, bad_graph, db):
 
 
 def test_graph_provision(graph, db):
-    vertices = [{"foo": i} for i in range(1, 101)]
+    vertices = [{"_key": str(i)} for i in range(1, 101)]
     edges = [
         {"_from": f"numbers/{j}", "_to": f"numbers/{i}", "result": j / i}
         for i in range(1, 101)

From 847ab74449d4e1b4ec352ab3d05216f3e5b4fe68 Mon Sep 17 00:00:00 2001
From: aMahanna <anthony.mahanna@gmail.com>
Date: Fri, 17 Jun 2022 20:06:12 -0400
Subject: [PATCH 10/17] fix: whitespace

---
 arango/collection.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arango/collection.py b/arango/collection.py
index 549710e2..fb06fb60 100644
--- a/arango/collection.py
+++ b/arango/collection.py
@@ -171,6 +171,7 @@ def _prep_from_doc(
 
     def _ensure_key_in_body(self, body: Json) -> Json:
         """Return the document body with "_key" field populated.
+
         :param body: Document body.
         :type body: dict
         :return: Document body with "_key" field.

From dc6f87fa3abe3f08d4dfd6181f8b6e8d1408ec2d Mon Sep 17 00:00:00 2001
From: aMahanna <anthony.mahanna@gmail.com>
Date: Tue, 21 Jun 2022 13:36:49 -0400
Subject: [PATCH 11/17] fix: address comment pt1

https://github.com/ArangoDB-Community/python-arango/pull/207#discussion_r901236443
---
 arango/collection.py |  2 +-
 arango/utils.py      | 25 +++++++------------------
 2 files changed, 8 insertions(+), 19 deletions(-)

diff --git a/arango/collection.py b/arango/collection.py
index fb06fb60..995ddb81 100644
--- a/arango/collection.py
+++ b/arango/collection.py
@@ -2017,7 +2017,7 @@ def response_handler(resp: Response) -> Json:
             raise DocumentInsertError(resp, request)
 
         results = []
-        for batch in get_batches(documents, batch_size):
+        for batch in get_batches(documents, batch_size or len(documents)):
             request = Request(
                 method="post",
                 endpoint="/_api/import",
diff --git a/arango/utils.py b/arango/utils.py
index 4f9242b5..f3e6702a 100644
--- a/arango/utils.py
+++ b/arango/utils.py
@@ -8,7 +8,7 @@
 
 import logging
 from contextlib import contextmanager
-from typing import Any, Iterator, List, Optional, Sequence, Union
+from typing import Any, Iterator, Sequence, Union
 
 from arango.exceptions import DocumentParseError
 from arango.typings import Json
@@ -84,25 +84,14 @@ def is_none_or_str(obj: Any) -> bool:
     return obj is None or isinstance(obj, str)
 
 
-def get_batches(
-    l: Sequence[Json], batch_size: Optional[int] = None
-) -> Union[List[Sequence[Json]], Iterator[Sequence[Json]]]:
+def get_batches(elements: Sequence[Json], batch_size: int) -> Iterator[Sequence[Json]]:
     """Generator to split a list in batches
         of (maximum) **batch_size** elements each.
-        If **batch_size** is invalid, return entire
-        list as one batch.
 
-    :param l: The list of elements.
-    :type l: list
+    :param elements: The list of elements.
+    :type elements: Sequence[Json]
     :param batch_size: Number of elements per batch.
-    :type batch_size: int | None
+    :type batch_size: int
     """
-    if batch_size is None or batch_size <= 0 or batch_size >= len(l):
-        return [l]
-
-    def generator() -> Iterator[Sequence[Json]]:
-        n = int(batch_size)  # type: ignore # (false positive)
-        for i in range(0, len(l), n):
-            yield l[i : i + n]
-
-    return generator()
+    for index in range(0, len(elements), batch_size):
+        yield elements[index : index + batch_size]

From 56d129fd337e7aa6eb97cf6c8dd33086827c4674 Mon Sep 17 00:00:00 2001
From: aMahanna <anthony.mahanna@gmail.com>
Date: Tue, 21 Jun 2022 13:40:53 -0400
Subject: [PATCH 12/17] address comment pt2

https://github.com/ArangoDB-Community/python-arango/pull/207#discussion_r902156983
---
 arango/collection.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/arango/collection.py b/arango/collection.py
index 995ddb81..e7cd0b96 100644
--- a/arango/collection.py
+++ b/arango/collection.py
@@ -1986,10 +1986,11 @@ def import_bulk(
         :param sync: Block until operation is synchronized to disk.
         :type sync: bool | None
         :param batch_size: Max number of documents to import at once. If
-            unspecified, will import all documents at once.
+            unspecified, will import all documents at once. Note that the
+            output type changes to list[dict] if **batch_size** is specified.
         :type batch_size: int | None
         :return: Result of the bulk import.
-        :rtype: dict
+        :rtype: dict | list[dict]
         :raise arango.exceptions.DocumentInsertError: If import fails.
         """
         documents = [self._ensure_key_from_id(doc) for doc in documents]

From 56d74fd898bc598561c6b44d06f64efd25286769 Mon Sep 17 00:00:00 2001
From: aMahanna <anthony.mahanna@gmail.com>
Date: Tue, 21 Jun 2022 14:00:24 -0400
Subject: [PATCH 13/17] remove: graph provision functionality

https://github.com/ArangoDB-Community/python-arango/pull/207#discussion_r901241261
---
 arango/database.py  | 37 +------------------------------------
 tests/test_graph.py | 31 -------------------------------
 2 files changed, 1 insertion(+), 67 deletions(-)

diff --git a/arango/database.py b/arango/database.py
index 89e92955..ea3adc6a 100644
--- a/arango/database.py
+++ b/arango/database.py
@@ -1170,7 +1170,6 @@ def create_graph(
         shard_count: Optional[int] = None,
         replication_factor: Optional[int] = None,
         write_concern: Optional[int] = None,
-        collections: Optional[Json] = None,
     ) -> Result[Graph]:
         """Create a new graph.
 
@@ -1236,32 +1235,6 @@ def create_graph(
                     'to_vertex_collections': ['lectures']
                 }
             ]
-
-        Here is an example entry for parameter **collections**:
-        TODO: Rework **collections** data structure?
-        .. code-block:: python
-
-            {
-                'teachers': {
-                    'docs': teacher_vertices_to_insert
-                    'options': {
-                        'overwrite' = True,
-                        'sync' = True,
-                        'batch_size' = 50
-                    }
-                },
-                'lectures': {
-                    'docs': lecture_vertices_to_insert
-                    'options': {
-                        'overwrite' = False,
-                        'sync' = False,
-                        'batch_size' = 4
-                    }
-                },
-                'teach': {
-                    'docs': teach_edges_to_insert
-                }
-            }
         """
         data: Json = {"name": name, "options": dict()}
         if edge_definitions is not None:
@@ -1295,15 +1268,7 @@ def response_handler(resp: Response) -> Graph:
                 return Graph(self._conn, self._executor, name)
             raise GraphCreateError(resp, request)
 
-        graph = self._execute(request, response_handler)
-
-        if collections is not None:
-            for name, data in collections.items():
-                self.collection(name).import_bulk(
-                    data["docs"], **data.get("options", {})
-                )
-
-        return graph
+        return self._execute(request, response_handler)
 
     def delete_graph(
         self,
diff --git a/tests/test_graph.py b/tests/test_graph.py
index 7b9c8427..de440642 100644
--- a/tests/test_graph.py
+++ b/tests/test_graph.py
@@ -57,37 +57,6 @@ def test_graph_properties(graph, bad_graph, db):
     assert isinstance(properties["revision"], str)
 
 
-def test_graph_provision(graph, db):
-    vertices = [{"_key": str(i)} for i in range(1, 101)]
-    edges = [
-        {"_from": f"numbers/{j}", "_to": f"numbers/{i}", "result": j / i}
-        for i in range(1, 101)
-        for j in range(1, 101)
-        if j % i == 0
-    ]
-    e_d = [
-        {
-            "edge_collection": "is_divisible_by",
-            "from_vertex_collections": ["numbers"],
-            "to_vertex_collections": ["numbers"],
-        }
-    ]
-
-    name = "divisibility-graph"
-    db.delete_graph(name, drop_collections=True, ignore_missing=True)
-    graph = db.create_graph(
-        name=name,
-        edge_definitions=e_d,
-        collections={
-            "numbers": {"docs": vertices, "options": {"batch_size": 5}},
-            "is_divisible_by": {"docs": edges, "options": {"sync": True}},
-        },
-    )
-
-    assert graph.vertex_collection("numbers").count() == len(vertices)
-    assert graph.edge_collection("is_divisible_by").count() == len(edges)
-
-
 def test_graph_management(db, bad_db):
     # Test create graph
     graph_name = generate_graph_name()

From 3aeadecfa2db5ce9f65463d3037f4f01d84be100 Mon Sep 17 00:00:00 2001
From: Anthony Mahanna <43019056+aMahanna@users.noreply.github.com>
Date: Tue, 21 Jun 2022 15:25:30 -0400
Subject: [PATCH 14/17] Update database.py

---
 arango/database.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/arango/database.py b/arango/database.py
index ea3adc6a..5e9dcfa8 100644
--- a/arango/database.py
+++ b/arango/database.py
@@ -1217,9 +1217,6 @@ def create_graph(
             parameter cannot be larger than that of **replication_factor**.
             Default value is 1. Used for clusters only.
         :type write_concern: int
-        :param collections: A list collection data objects to provision
-            the graph with. See below for example.
-        :type collections: dict | None
         :return: Graph API wrapper.
         :rtype: arango.graph.Graph
         :raise arango.exceptions.GraphCreateError: If create fails.

From 5db70551dc96de6b241bdff373121ea6b3111e46 Mon Sep 17 00:00:00 2001
From: aMahanna <anthony.mahanna@gmail.com>
Date: Wed, 22 Jun 2022 08:33:24 -0400
Subject: [PATCH 15/17] address docstring comments

---
 arango/collection.py | 13 +++++++++----
 arango/utils.py      |  2 +-
 2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/arango/collection.py b/arango/collection.py
index e7cd0b96..01d35b73 100644
--- a/arango/collection.py
+++ b/arango/collection.py
@@ -1985,10 +1985,15 @@ def import_bulk(
         :type on_duplicate: str
         :param sync: Block until operation is synchronized to disk.
         :type sync: bool | None
-        :param batch_size: Max number of documents to import at once. If
-            unspecified, will import all documents at once. Note that the
-            output type changes to list[dict] if **batch_size** is specified.
-        :type batch_size: int | None
+        :param batch_size: Split up **documents** into batches of max length
+            **batch_size** and import them in a loop on the client side. If
+            **batch_size** is specified, the return type of this method
+            changes from a result object to a list of result objects.
+            IMPORTANT NOTE: this parameter may go through breaking changes
+            in the future where the return type may not be a list of result
+            objects anymore. Use it at your own risk, and avoid
+            depending on the return value if possible.
+        :type batch_size: int
         :return: Result of the bulk import.
         :rtype: dict | list[dict]
         :raise arango.exceptions.DocumentInsertError: If import fails.
diff --git a/arango/utils.py b/arango/utils.py
index f3e6702a..27d459ba 100644
--- a/arango/utils.py
+++ b/arango/utils.py
@@ -90,7 +90,7 @@ def get_batches(elements: Sequence[Json], batch_size: int) -> Iterator[Sequence[
 
     :param elements: The list of elements.
     :type elements: Sequence[Json]
-    :param batch_size: Number of elements per batch.
+    :param batch_size: Max number of elements per batch.
     :type batch_size: int
     """
     for index in range(0, len(elements), batch_size):

From f722effe4fa8136f49c72c47ab6e328d93cc82d0 Mon Sep 17 00:00:00 2001
From: aMahanna <anthony.mahanna@gmail.com>
Date: Wed, 22 Jun 2022 09:15:14 -0400
Subject: [PATCH 16/17] adjust import_bulk() return behavior

(always return result in `list` format if `batch_size` is specified)
---
 arango/collection.py   | 2 +-
 tests/test_document.py | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/arango/collection.py b/arango/collection.py
index 01d35b73..4c834efc 100644
--- a/arango/collection.py
+++ b/arango/collection.py
@@ -2034,7 +2034,7 @@ def response_handler(resp: Response) -> Json:
 
             results.append(self._execute(request, response_handler))
 
-        return results[0] if len(results) == 1 else results
+        return results[0] if batch_size is None else results
 
 
 class StandardCollection(Collection):
diff --git a/tests/test_document.py b/tests/test_document.py
index eacb407b..02c47bc4 100644
--- a/tests/test_document.py
+++ b/tests/test_document.py
@@ -1839,7 +1839,8 @@ def test_document_import_bulk(col, bad_col, docs):
     empty_collection(col)
 
     result = col.import_bulk(docs, batch_size=len(docs) * 2)
-    assert type(result) is dict
+    assert type(result) is list
+    assert len(result) == 1
     empty_collection(col)
 
     # Test import bulk on_duplicate actions

From c65af2add238347c12d990a70bb7fb733a0a0ed7 Mon Sep 17 00:00:00 2001
From: aMahanna <anthony.mahanna@gmail.com>
Date: Wed, 22 Jun 2022 20:43:57 -0400
Subject: [PATCH 17/17] address comments

---
 arango/collection.py | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/arango/collection.py b/arango/collection.py
index 4c834efc..3aea6f81 100644
--- a/arango/collection.py
+++ b/arango/collection.py
@@ -2022,19 +2022,29 @@ def response_handler(resp: Response) -> Json:
                 return result
             raise DocumentInsertError(resp, request)
 
-        results = []
-        for batch in get_batches(documents, batch_size or len(documents)):
+        if batch_size is None:
             request = Request(
                 method="post",
                 endpoint="/_api/import",
-                data=batch,
+                data=documents,
                 params=params,
                 write=self.name,
             )
 
-            results.append(self._execute(request, response_handler))
+            return self._execute(request, response_handler)
+        else:
+            results = []
+            for batch in get_batches(documents, batch_size):
+                request = Request(
+                    method="post",
+                    endpoint="/_api/import",
+                    data=batch,
+                    params=params,
+                    write=self.name,
+                )
+                results.append(self._execute(request, response_handler))
 
-        return results[0] if batch_size is None else results
+            return results
 
 
 class StandardCollection(Collection):