initial feature commit

aMahanna · aMahanna · commit 0a61e9cb3474 · 2022-06-14T10:58:56.000-04:00
diff --git a/arango/collection.py b/arango/collection.py
@@ -42,7 +42,7 @@
 from arango.response import Response
 from arango.result import Result
 from arango.typings import Fields, Headers, Json, Params
-from arango.utils import get_doc_id, is_none_or_int, is_none_or_str
+from arango.utils import get_batches, get_doc_id, is_none_or_int, is_none_or_str
 
 
 class Collection(ApiGroup):
@@ -187,18 +187,27 @@ def _ensure_key_in_body(self, body: Json) -> Json:
             return body
         raise DocumentParseError('field "_key" or "_id" required')
 
-    def _ensure_key_from_id(self, body: Json) -> Json:
+    def _ensure_key_from_id(self, body: Json, index: Optional[int] = None) -> Json:
         """Return the body with "_key" field if it has "_id" field.
+            If it has neither, set the "_key" value to i, where i
+            is the document's index position in the sequence.
 
         :param body: Document body.
         :type body: dict
+        :param index: Document index value in the original list of documents.
+        :param index: int | None
         :return: Document body with "_key" field if it has "_id" field.
         :rtype: dict
         """
         if "_id" in body and "_key" not in body:
             doc_id = self._validate_id(body["_id"])
             body = body.copy()
             body["_key"] = doc_id[len(self._id_prefix) :]
+
+        if "_id" not in body and "_key" not in body:
+            body = body.copy()
+            body["_key"] = str(index)
+
         return body
 
     @property
@@ -1934,7 +1943,8 @@ def import_bulk(
         overwrite: Optional[bool] = None,
         on_duplicate: Optional[str] = None,
         sync: Optional[bool] = None,
-    ) -> Result[Json]:
+        batch_size: Optional[int] = None,
+    ) -> Union[Result[Json], List[Result[Json]]]:
         """Insert multiple documents into the collection.
 
         .. note::
@@ -1984,11 +1994,16 @@ def import_bulk(
         :type on_duplicate: str
         :param sync: Block until operation is synchronized to disk.
         :type sync: bool | None
+        :param batch_size: Max number of documents to import at once. If
+            unspecified, will import all documents at once.
+        :type batch_size: int | None
         :return: Result of the bulk import.
         :rtype: dict
         :raise arango.exceptions.DocumentInsertError: If import fails.
         """
-        documents = [self._ensure_key_from_id(doc) for doc in documents]
+        documents = [
+            self._ensure_key_from_id(doc, i) for i, doc in enumerate(documents, 1)
+        ]
 
         params: Params = {"type": "array", "collection": self.name}
         if halt_on_error is not None:
@@ -2006,21 +2021,25 @@ def import_bulk(
         if sync is not None:
             params["waitForSync"] = sync
 
-        request = Request(
-            method="post",
-            endpoint="/_api/import",
-            data=documents,
-            params=params,
-            write=self.name,
-        )
-
         def response_handler(resp: Response) -> Json:
             if resp.is_success:
                 result: Json = resp.body
                 return result
             raise DocumentInsertError(resp, request)
 
-        return self._execute(request, response_handler)
+        result = []
+        for batch in get_batches(documents, batch_size):
+            request = Request(
+                method="post",
+                endpoint="/_api/import",
+                data=batch,
+                params=params,
+                write=self.name,
+            )
+
+            result.append(self._execute(request, response_handler))
+
+        return result[0] if len(result) == 1 else result
 
 
 class StandardCollection(Collection):
diff --git a/arango/database.py b/arango/database.py
@@ -1170,6 +1170,7 @@ def create_graph(
         shard_count: Optional[int] = None,
         replication_factor: Optional[int] = None,
         write_concern: Optional[int] = None,
+        collections: Optional[Json] = None,
     ) -> Result[Graph]:
         """Create a new graph.
 
@@ -1217,18 +1218,49 @@ def create_graph(
             parameter cannot be larger than that of **replication_factor**.
             Default value is 1. Used for clusters only.
         :type write_concern: int
+        :param collections: A list collection data objects to provision
+            the graph with. See below for example.
+        :type collections: dict | None
         :return: Graph API wrapper.
         :rtype: arango.graph.Graph
         :raise arango.exceptions.GraphCreateError: If create fails.
 
         Here is an example entry for parameter **edge_definitions**:
 
+        .. code-block:: python
+
+            [
+                {
+                    'edge_collection': 'teach',
+                    'from_vertex_collections': ['teachers'],
+                    'to_vertex_collections': ['lectures']
+                }
+            ]
+
+        Here is an example entry for parameter **collections**:
+        TODO: Rework **collections** data structure?
         .. code-block:: python
 
             {
-                'edge_collection': 'teach',
-                'from_vertex_collections': ['teachers'],
-                'to_vertex_collections': ['lectures']
+                'teachers': {
+                    'docs': teacher_vertices_to_insert
+                    'options': {
+                        'overwrite' = True,
+                        'sync' = True,
+                        'batch_size' = 50
+                    }
+                },
+                'lectures': {
+                    'docs': lecture_vertices_to_insert
+                    'options': {
+                        'overwrite' = False,
+                        'sync' = False,
+                        'batch_size' = 4
+                    }
+                },
+                'teach': {
+                    'docs': teach_edges_to_insert
+                }
             }
         """
         data: Json = {"name": name, "options": dict()}
@@ -1263,7 +1295,15 @@ def response_handler(resp: Response) -> Graph:
                 return Graph(self._conn, self._executor, name)
             raise GraphCreateError(resp, request)
 
-        return self._execute(request, response_handler)
+        graph = self._execute(request, response_handler)
+
+        if collections is not None:
+            for name, data in collections.items():
+                self.collection(name).import_bulk(
+                    data["docs"], **data.get("options", {})
+                )
+
+        return graph
 
     def delete_graph(
         self,
diff --git a/arango/utils.py b/arango/utils.py
@@ -8,7 +8,7 @@
 
 import logging
 from contextlib import contextmanager
-from typing import Any, Iterator, Union
+from typing import Any, Iterator, List, Optional, Union
 
 from arango.exceptions import DocumentParseError
 from arango.typings import Json
@@ -82,3 +82,27 @@ def is_none_or_str(obj: Any) -> bool:
     :rtype: bool
     """
     return obj is None or isinstance(obj, str)
+
+
+def get_batches(
+    l: List[Any], batch_size: Optional[int] = None
+) -> Union[List[List[Any]], Iterator[List[Any]]]:
+    """Generator to split a list in batches
+        of (maximum) **batch_size** elements each.
+        If **batch_size** is invalid, return entire
+        list as one batch.
+
+    :param l: The list of elements.
+    :type l: list
+    :param batch_size: Number of elements per batch.
+    :type batch_size: int | None
+    """
+    if batch_size is None or batch_size <= 0 or batch_size >= len(l):
+        return [l]
+
+    def generator() -> Iterator[List[Any]]:
+        n = int(batch_size)  # type: ignore # (false positive)
+        for i in range(0, len(l), n):
+            yield l[i : i + n]
+
+    return generator()