WIP use arrow endpiont

FlorentinD · brs96 · orazve · FlorentinD · commit e5e8ef2b70ba · 2023-07-11T11:10:07.000+02:00
Co-authored-by: Brian Shi &lt;brian.shi@neotechnology.com&gt;
Co-authored-by: Olga Razvenskaia &lt;olga.razvenskaia@neo4j.com&gt;
diff --git a/examples/python-runtime.ipynb b/examples/python-runtime.ipynb
@@ -3,9 +3,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "DBID = \"beefbeef\"\n",
@@ -14,68 +12,46 @@
     "\n",
     "from graphdatascience import GraphDataScience\n",
     "\n",
-    "gds = GraphDataScience(\n",
-    "    f\"neo4j+s://{DBID}-{ENVIRONMENT}.databases.neo4j-dev.io/\", auth=(\"neo4j\", PASSWORD)\n",
-    ")\n",
+    "gds = GraphDataScience(f\"neo4j+s://{DBID}-{ENVIRONMENT}.databases.neo4j-dev.io/\", auth=(\"neo4j\", PASSWORD))\n",
     "gds.set_database(\"neo4j\")\n",
     "\n",
-    "gds.gnn.nodeClassification.train(\"cora\", \"model\", [\"features\"], \"subject\", node_labels=[\"Paper\"])\n"
+    "gds.gnn.nodeClassification.train(\"cora\", \"model\", [\"features\"], \"subject\", node_labels=[\"Paper\"])"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
+   "metadata": {},
    "outputs": [],
    "source": [
     "try:\n",
     "    gds.graph.load_cora()\n",
     "except:\n",
-    "    pass\n"
-   ],
-   "metadata": {
-    "collapsed": false
-   }
+    "    pass"
+   ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
+   "metadata": {},
    "outputs": [],
    "source": [
-    "gds.gnn.nodeClassification.train(\"cora\", \"model\", [\"features\"], \"subject\", node_labels=[\"Paper\"])\n"
-   ],
-   "metadata": {
-    "collapsed": false
-   }
+    "gds.gnn.nodeClassification.train(\"cora\", \"model\", [\"features\"], \"subject\", node_labels=[\"Paper\"])"
+   ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
+   "metadata": {},
    "outputs": [],
    "source": [
     "gds.gnn.nodeClassification.predict(\"cora\", \"model\", [\"features\"], \"subject\", node_labels=[\"Paper\"])"
-   ],
-   "metadata": {
-    "collapsed": false
-   }
+   ]
   }
  ],
  "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
   "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 2
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython2",
-   "version": "2.7.6"
+   "name": "python"
   }
  },
  "nbformat": 4,
diff --git a/graphdatascience/endpoints.py b/graphdatascience/endpoints.py
@@ -33,7 +33,9 @@
 """
 
 
-class DirectEndpoints(DirectSystemEndpoints, DirectUtilEndpoints, GraphEndpoints, PipelineEndpoints, ModelEndpoints, GnnEndpoints):
+class DirectEndpoints(
+    DirectSystemEndpoints, DirectUtilEndpoints, GraphEndpoints, PipelineEndpoints, ModelEndpoints, GnnEndpoints
+):
     def __init__(self, query_runner: QueryRunner, namespace: str, server_version: ServerVersion):
         super().__init__(query_runner, namespace, server_version)
 
diff --git a/graphdatascience/gnn/gnn_endpoints.py b/graphdatascience/gnn/gnn_endpoints.py
@@ -1,17 +1,18 @@
-from .gnn_nc_runner import GNNNodeClassificationRunner
 from ..caller_base import CallerBase
 from ..error.illegal_attr_checker import IllegalAttrChecker
 from ..error.uncallable_namespace import UncallableNamespace
+from .gnn_nc_runner import GNNNodeClassificationRunner
+
 
 class GNNRunner(UncallableNamespace, IllegalAttrChecker):
     @property
     def nodeClassification(self) -> GNNNodeClassificationRunner:
-        return GNNNodeClassificationRunner(self._query_runner, f"{self._namespace}.nodeClassification", self._server_version)
+        return GNNNodeClassificationRunner(
+            self._query_runner, f"{self._namespace}.nodeClassification", self._server_version
+        )
+
 
 class GnnEndpoints(CallerBase):
     @property
     def gnn(self) -> GNNRunner:
         return GNNRunner(self._query_runner, f"{self._namespace}.gnn", self._server_version)
-
-
-
diff --git a/graphdatascience/gnn/gnn_nc_runner.py b/graphdatascience/gnn/gnn_nc_runner.py
@@ -1,28 +1,44 @@
+import json
 from typing import Any, List
 
 from ..error.illegal_attr_checker import IllegalAttrChecker
 from ..error.uncallable_namespace import UncallableNamespace
-import json
 
 
 class GNNNodeClassificationRunner(UncallableNamespace, IllegalAttrChecker):
-    def train(self, graph_name: str, model_name: str, feature_properties: List[str], target_property: str,
-              target_node_label: str = None, node_labels: List[str] = None) -> "Series[Any]":
+    def train(
+        self,
+        graph_name: str,
+        model_name: str,
+        feature_properties: List[str],
+        target_property: str,
+        target_node_label: str = None,
+        node_labels: List[str] = None,
+    ) -> "Series[Any]":
         configMap = {
             "featureProperties": feature_properties,
             "targetProperty": target_property,
             "job_type": "train",
         }
+
         node_properties = feature_properties + [target_property]
         if target_node_label:
             configMap["targetNodeLabel"] = target_node_label
         mlTrainingConfig = json.dumps(configMap)
         # TODO query available node labels
         node_labels = ["Paper"] if not node_labels else node_labels
-        self._query_runner.run_query(f"CALL gds.upload.graph('{graph_name}', {{mlTrainingConfig: '{mlTrainingConfig}', modelName: '{model_name}', nodeLabels: {node_labels}, nodeProperties: {node_properties}}})")
-
+        self._query_runner.run_query(
+            f"CALL gds.upload.graph('{graph_name}', {{mlTrainingConfig: '{mlTrainingConfig}', modelName: '{model_name}', nodeLabels: {node_labels}, nodeProperties: {node_properties}}})"
+        )
 
-    def predict(self, graph_name: str, model_name: str, feature_properties: List[str], target_node_label: str = None, node_labels: List[str] = None) -> "Series[Any]":
+    def predict(
+        self,
+        graph_name: str,
+        model_name: str,
+        feature_properties: List[str],
+        target_node_label: str = None,
+        node_labels: List[str] = None,
+    ) -> "Series[Any]":
         configMap = {
             "featureProperties": feature_properties,
             "job_type": "predict",
@@ -33,4 +49,5 @@ def predict(self, graph_name: str, model_name: str, feature_properties: List[str
         # TODO query available node labels
         node_labels = ["Paper"] if not node_labels else node_labels
         self._query_runner.run_query(
-            f"CALL gds.upload.graph('{graph_name}', {{mlTrainingConfig: '{mlTrainingConfig}', modelName: '{model_name}', nodeLabels: {node_labels}, nodeProperties: {feature_properties}}})")
+            f"CALL gds.upload.graph('{graph_name}', {{mlTrainingConfig: '{mlTrainingConfig}', modelName: '{model_name}', nodeLabels: {node_labels}, nodeProperties: {feature_properties}}})"
+        )
diff --git a/graphdatascience/query_runner/arrow_query_runner.py b/graphdatascience/query_runner/arrow_query_runner.py
@@ -129,6 +129,8 @@ def run_query(
                 endpoint = "gds.beta.graph.relationships.stream"
 
             return self._run_arrow_property_get(graph_name, endpoint, {"relationship_types": relationship_types})
+        elif "gds.upload.graph" in query:
+            self._run_arrow_upload_graph(params["config"])
 
         return self._fallback_query_runner.run_query(query, params, database, custom_error)
 
@@ -170,6 +172,17 @@ def _run_arrow_property_get(self, graph_name: str, procedure_name: str, configur
 
         return result
 
+    def _run_arrow_upload_graph(self, meta_data: Dict[str, Any]) -> None:
+        result = self._flight_client.do_action()
+        # TODO : better name of the action -- INIT ML JOB ?
+        result = self._flight_client().do_put(flight.Action("UPLOAD_GRAPH"), json.dumps(meta_data).encode("utf-8"))
+
+        # Consume result fully to sanity check and avoid cancelled streams
+        collected_result = list(result)
+        assert len(collected_result) == 1
+
+        print(collected_result[0])
+
     def create_graph_constructor(
         self, graph_name: str, concurrency: int, undirected_relationship_types: Optional[List[str]]
     ) -> GraphConstructor: