From 273dc7833a44ffa67e095165218eb9a3e0f2ba61 Mon Sep 17 00:00:00 2001 From: Mustafa Eyceoz Date: Wed, 9 Aug 2023 11:16:51 -0400 Subject: [PATCH 1/8] Updated all notebooks, and outputs of non-instascale --- .../hf_interactive.ipynb | 0 .../local_interactive.ipynb | 4 +- .../batch-job/batch_mnist_mcad.ipynb | 3377 ----------- .../batch-job/batch_mnist_ray.ipynb | 5250 ----------------- demo-notebooks/batch-job/mnist.py | 160 - demo-notebooks/batch-job/requirements.txt | 4 - demo-notebooks/guided-demos/0_basic_ray.ipynb | 7 +- .../guided-demos/1_basic_instascale.ipynb | 6 +- .../guided-demos/2_basic_jobs.ipynb | 6 +- .../guided-demos/3_basic_interactive.ipynb | 6 +- demo-notebooks/guided-demos/4_gpt.ipynb | 6 +- .../notebook-ex-outputs/0_basic_ray.ipynb | 113 +- .../1_basic_instascale.ipynb | 105 +- .../notebook-ex-outputs/2_basic_jobs.ipynb | 207 +- .../3_basic_interactive.ipynb | 2728 +-------- .../notebook-ex-outputs/4_gpt.ipynb | 160 +- .../notebook-ex-outputs/jobtest.yaml | 24 +- .../notebook-ex-outputs/raytest.yaml | 24 +- src/codeflare_sdk.egg-info/SOURCES.txt | 3 + 19 files changed, 297 insertions(+), 11893 deletions(-) rename demo-notebooks/{interactive => additional-interactive}/hf_interactive.ipynb (100%) rename demo-notebooks/{interactive => additional-interactive}/local_interactive.ipynb (98%) delete mode 100644 demo-notebooks/batch-job/batch_mnist_mcad.ipynb delete mode 100644 demo-notebooks/batch-job/batch_mnist_ray.ipynb delete mode 100644 demo-notebooks/batch-job/mnist.py delete mode 100644 demo-notebooks/batch-job/requirements.txt diff --git a/demo-notebooks/interactive/hf_interactive.ipynb b/demo-notebooks/additional-interactive/hf_interactive.ipynb similarity index 100% rename from demo-notebooks/interactive/hf_interactive.ipynb rename to demo-notebooks/additional-interactive/hf_interactive.ipynb diff --git a/demo-notebooks/interactive/local_interactive.ipynb b/demo-notebooks/additional-interactive/local_interactive.ipynb similarity index 98% rename from demo-notebooks/interactive/local_interactive.ipynb rename to demo-notebooks/additional-interactive/local_interactive.ipynb index d70c00df7..7533db3d8 100644 --- a/demo-notebooks/interactive/local_interactive.ipynb +++ b/demo-notebooks/additional-interactive/local_interactive.ipynb @@ -44,7 +44,7 @@ "cluster_name = \"hfgputest-1\"\n", "local_interactive = True\n", "\n", - "cluster = Cluster(ClusterConfiguration(local_interactive=local_interactive, namespace=namespace, name=cluster_name, min_worker=1, max_worker=1, min_cpus=1, max_cpus=1, min_memory=4, max_memory=4, gpu=0, instascale=False, machine_types=[\"m5.xlarge\", \"p3.8xlarge\"]))" + "cluster = Cluster(ClusterConfiguration(local_interactive=local_interactive, namespace=namespace, name=cluster_name, num_workers=1, min_cpus=1, max_cpus=1, min_memory=4, max_memory=4, num_gpus=0, instascale=False, machine_types=[\"m5.xlarge\", \"p3.8xlarge\"]))" ] }, { @@ -319,7 +319,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.13" + "version": "3.8.17" }, "vscode": { "interpreter": { diff --git a/demo-notebooks/batch-job/batch_mnist_mcad.ipynb b/demo-notebooks/batch-job/batch_mnist_mcad.ipynb deleted file mode 100644 index 86d916af7..000000000 --- a/demo-notebooks/batch-job/batch_mnist_mcad.ipynb +++ /dev/null @@ -1,3377 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "id": "b55bc3ea-4ce3-49bf-bb1f-e209de8ca47a", - "metadata": {}, - "outputs": [], - "source": [ - "# Import pieces from codeflare-sdk\n", - "from codeflare_sdk.cluster.auth import TokenAuthentication\n", - "from codeflare_sdk.job.jobs import DDPJobDefinition" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "9de86658", - "metadata": {}, - "source": [ - "First, we begin by authenticating using the SDK." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "614daa0c", - "metadata": {}, - "outputs": [], - "source": [ - "# Create authentication object for oc user permissions\n", - "auth = TokenAuthentication(\n", - " token = \"XXXXX\",\n", - " server = \"XXXXX\",\n", - " skip_tls=True\n", - ")\n", - "auth.login()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "87d2c9b3", - "metadata": {}, - "source": [ - "Now that we are logged in, we can directly submit our batch job (model training on two workers with four gpus each) to MCAD via torchx." - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "ced6ccd6-a17e-413a-a0e4-65004fc35463", - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [] - } - ], - "source": [ - "jobdef = DDPJobDefinition(\n", - " name=\"mnistjob\",\n", - " script=\"mnist.py\",\n", - " scheduler_args={\"namespace\": \"default\"},\n", - " j=\"1x1\",\n", - " gpu=0,\n", - " cpu=1,\n", - " memMB=8000,\n", - " image=\"quay.io/michaelclifford/mnist-test:latest\"\n", - ")\n", - "job = jobdef.submit()" - ] - }, - { - "cell_type": "markdown", - "id": "ff065051", - "metadata": {}, - "source": [ - "Now we can go ahead and look at the status and logs of our batch job." - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "e5c0b0da-c22e-4142-b096-407ac8aebe5e", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/meyceoz/Documents/sdktest/lib64/python3.8/site-packages/torchx/schedulers/kubernetes_mcad_scheduler.py:1047: UserWarning: Warning - MCAD does not report individual replica statuses, but overall task status. Replica id may not match status\n", - " warnings.warn(msg)\n" - ] - }, - { - "data": { - "text/plain": [ - "AppStatus:\n", - " msg: \n", - " num_restarts: -1\n", - " roles:\n", - " - replicas:\n", - " - hostname: ''\n", - " id: 0\n", - " role: mnist\n", - " state: !!python/object/apply:torchx.specs.api.AppState\n", - " - 3\n", - " structured_error_msg: \n", - " role: mnist\n", - " state: RUNNING (3)\n", - " structured_error_msg: \n", - " ui_url: null" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "job.status()" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "id": "264c1809-de72-4acf-b0f6-e67d345640f6", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2023-04-04T21:07:18.216609937Z INFO:torch.distributed.launcher.api:Starting elastic_operator with launch configs:\n", - "2023-04-04T21:07:18.216609937Z entrypoint : mnist.py\n", - "2023-04-04T21:07:18.216609937Z min_nodes : 1\n", - "2023-04-04T21:07:18.216609937Z max_nodes : 1\n", - "2023-04-04T21:07:18.216609937Z nproc_per_node : 1\n", - "2023-04-04T21:07:18.216609937Z run_id : mnistjob-d6hz7lmsvx4scd\n", - "2023-04-04T21:07:18.216609937Z rdzv_backend : static\n", - "2023-04-04T21:07:18.216609937Z rdzv_endpoint : localhost:49782\n", - "2023-04-04T21:07:18.216609937Z rdzv_configs : {'rank': 0, 'timeout': 900}\n", - "2023-04-04T21:07:18.216609937Z max_restarts : 0\n", - "2023-04-04T21:07:18.216609937Z monitor_interval : 5\n", - "2023-04-04T21:07:18.216609937Z log_dir : None\n", - "2023-04-04T21:07:18.216609937Z metrics_cfg : {}\n", - "2023-04-04T21:07:18.216609937Z \n", - "2023-04-04T21:07:18.217434715Z INFO:torch.distributed.elastic.agent.server.local_elastic_agent:log directory set to: /tmp/torchelastic_5_zjjbf1/mnistjob-d6hz7lmsvx4scd_a107om0q\n", - "2023-04-04T21:07:18.217476057Z INFO:torch.distributed.elastic.agent.server.api:[] starting workers for entrypoint: python\n", - "2023-04-04T21:07:18.217498330Z INFO:torch.distributed.elastic.agent.server.api:[] Rendezvous'ing worker group\n", - "2023-04-04T21:07:18.218861938Z INFO:torch.distributed.elastic.agent.server.api:[] Rendezvous complete for workers. Result:\n", - "2023-04-04T21:07:18.218861938Z restart_count=0\n", - "2023-04-04T21:07:18.218861938Z master_addr=localhost\n", - "2023-04-04T21:07:18.218861938Z master_port=49782\n", - "2023-04-04T21:07:18.218861938Z group_rank=0\n", - "2023-04-04T21:07:18.218861938Z group_world_size=1\n", - "2023-04-04T21:07:18.218861938Z local_ranks=[0]\n", - "2023-04-04T21:07:18.218861938Z role_ranks=[0]\n", - "2023-04-04T21:07:18.218861938Z global_ranks=[0]\n", - "2023-04-04T21:07:18.218861938Z role_world_sizes=[1]\n", - "2023-04-04T21:07:18.218861938Z global_world_sizes=[1]\n", - "2023-04-04T21:07:18.218861938Z \n", - "2023-04-04T21:07:18.218890288Z INFO:torch.distributed.elastic.agent.server.api:[] Starting worker group\n", - "2023-04-04T21:07:18.219122325Z INFO:torch.distributed.elastic.agent.server.local_elastic_agent:Environment variable 'TORCHELASTIC_ENABLE_FILE_TIMER' not found. Do not start FileTimerServer.\n", - "2023-04-04T21:07:18.219220551Z INFO:torch.distributed.elastic.multiprocessing:Setting worker0 reply file to: /tmp/torchelastic_5_zjjbf1/mnistjob-d6hz7lmsvx4scd_a107om0q/attempt_0/0/error.json\n", - "2023-04-04T21:07:20.356947480Z [0]:GPU available: False, used: False\n", - "2023-04-04T21:07:20.356947480Z [0]:TPU available: False, using: 0 TPU cores\n", - "2023-04-04T21:07:20.356947480Z [0]:IPU available: False, using: 0 IPUs\n", - "2023-04-04T21:07:20.557348006Z [0]:\n", - "2023-04-04T21:07:20.657500757Z [0]: 0%| | 0/9912422 [00:00╭─────────────────────────╮\n", - "│ 🚀 Cluster Queue │\n", - "│ Status 🚀 │\n", - "│ +-----------+---------+ │\n", - "│ | Name | Status | │\n", - "│ +===========+=========+ │\n", - "│ | mnisttest | pending | │\n", - "│ | | | │\n", - "│ +-----------+---------+ │\n", - "╰─────────────────────────╯\n", - "\n" - ], - "text/plain": [ - "╭─────────────────────────╮\n", - "│ \u001b[3m \u001b[0m\u001b[1;3m 🚀 Cluster Queue\u001b[0m\u001b[3m \u001b[0m │\n", - "│ \u001b[3m \u001b[0m\u001b[1;3mStatus 🚀\u001b[0m\u001b[3m \u001b[0m │\n", - "│ +-----------+---------+ │\n", - "│ |\u001b[1m \u001b[0m\u001b[1mName \u001b[0m\u001b[1m \u001b[0m|\u001b[1m \u001b[0m\u001b[1mStatus \u001b[0m\u001b[1m \u001b[0m| │\n", - "│ +===========+=========+ │\n", - "│ |\u001b[36m \u001b[0m\u001b[36mmnisttest\u001b[0m\u001b[36m \u001b[0m|\u001b[35m \u001b[0m\u001b[35mpending\u001b[0m\u001b[35m \u001b[0m| │\n", - "│ |\u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[36m \u001b[0m|\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m| │\n", - "│ +-----------+---------+ │\n", - "╰─────────────────────────╯\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "(, False)" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "cluster.status()" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "a99d5aff", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Waiting for requested resources to be set up...\n", - "Requested cluster up and running!\n" - ] - } - ], - "source": [ - "cluster.wait_ready()" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "df71c1ed", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
                   🚀 CodeFlare Cluster Status 🚀                   \n",
-       "                                                                    \n",
-       " ╭────────────────────────────────────────────────────────────────╮ \n",
-       " │   Name                                                         │ \n",
-       " │   mnisttest                                        Active ✅   │ \n",
-       " │                                                                │ \n",
-       " │   URI: ray://mnisttest-head-svc.default.svc:10001              │ \n",
-       " │                                                                │ \n",
-       " │   Dashboard🔗                                                  │ \n",
-       " │                                                                │ \n",
-       " ╰────────────────────────────────────────────────────────────────╯ \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[3m \u001b[0m\u001b[1;3m 🚀 CodeFlare Cluster Status 🚀\u001b[0m\u001b[3m \u001b[0m\n", - "\u001b[1m \u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m\n", - " ╭────────────────────────────────────────────────────────────────╮ \n", - " │ \u001b[1;37;42mName\u001b[0m │ \n", - " │ \u001b[1;4mmnisttest\u001b[0m Active ✅ │ \n", - " │ │ \n", - " │ \u001b[1mURI:\u001b[0m ray://mnisttest-head-svc.default.svc:10001 │ \n", - " │ │ \n", - " │ \u001b]8;id=790579;ray-dashboard-mnisttest-default.apps.meyceoz-032023.psap.aws.rhperfscale.org\u001b\\\u001b[4;34mDashboard🔗\u001b[0m\u001b]8;;\u001b\\ │ \n", - " │ │ \n", - " ╰────────────────────────────────────────────────────────────────╯ \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "(, True)" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "cluster.status()" - ] - }, - { - "cell_type": "markdown", - "id": "b3a55fe4", - "metadata": {}, - "source": [ - "Let's quickly verify that the specs of the cluster are as expected." - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "7fd45bc5-03c0-4ae5-9ec5-dd1c30f1a084", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
                   🚀 CodeFlare Cluster Details 🚀                  \n",
-       "                                                                    \n",
-       " ╭────────────────────────────────────────────────────────────────╮ \n",
-       " │   Name                                                         │ \n",
-       " │   mnisttest                                        Active ✅   │ \n",
-       " │                                                                │ \n",
-       " │   URI: ray://mnisttest-head-svc.default.svc:10001              │ \n",
-       " │                                                                │ \n",
-       " │   Dashboard🔗                                                  │ \n",
-       " │                                                                │ \n",
-       " │                      Cluster Resources                         │ \n",
-       " │   ╭─ Workers ──╮  ╭───────── Worker specs(each) ─────────╮     │ \n",
-       " │   │  Min  Max  │  │  Memory      CPU         GPU         │     │ \n",
-       " │   │            │  │                                      │     │ \n",
-       " │   │  2    2    │  │  8~8         2           0           │     │ \n",
-       " │   │            │  │                                      │     │ \n",
-       " │   ╰────────────╯  ╰──────────────────────────────────────╯     │ \n",
-       " ╰────────────────────────────────────────────────────────────────╯ \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[3m \u001b[0m\u001b[1;3m 🚀 CodeFlare Cluster Details 🚀\u001b[0m\u001b[3m \u001b[0m\n", - "\u001b[1m \u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m\n", - " ╭────────────────────────────────────────────────────────────────╮ \n", - " │ \u001b[1;37;42mName\u001b[0m │ \n", - " │ \u001b[1;4mmnisttest\u001b[0m Active ✅ │ \n", - " │ │ \n", - " │ \u001b[1mURI:\u001b[0m ray://mnisttest-head-svc.default.svc:10001 │ \n", - " │ │ \n", - " │ \u001b]8;id=84168;http://ray-dashboard-mnisttest-default.apps.meyceoz-032023.psap.aws.rhperfscale.org\u001b\\\u001b[4;34mDashboard🔗\u001b[0m\u001b]8;;\u001b\\ │ \n", - " │ │ \n", - " │ \u001b[3m Cluster Resources \u001b[0m │ \n", - " │ ╭─ Workers ──╮ ╭───────── Worker specs(each) ─────────╮ │ \n", - " │ │ \u001b[1m \u001b[0m\u001b[1mMin\u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m\u001b[1mMax\u001b[0m\u001b[1m \u001b[0m │ │ \u001b[1m \u001b[0m\u001b[1mMemory \u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m\u001b[1mCPU \u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m\u001b[1mGPU \u001b[0m\u001b[1m \u001b[0m │ │ \n", - " │ │ \u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m │ │ \u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m │ │ \n", - " │ │ \u001b[36m \u001b[0m\u001b[36m2 \u001b[0m\u001b[36m \u001b[0m\u001b[35m \u001b[0m\u001b[35m2 \u001b[0m\u001b[35m \u001b[0m │ │ \u001b[36m \u001b[0m\u001b[36m8~8 \u001b[0m\u001b[36m \u001b[0m\u001b[35m \u001b[0m\u001b[35m2 \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m0 \u001b[0m\u001b[35m \u001b[0m │ │ \n", - " │ │ \u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m │ │ \u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m │ │ \n", - " │ ╰────────────╯ ╰──────────────────────────────────────╯ │ \n", - " ╰────────────────────────────────────────────────────────────────╯ \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "RayCluster(name='mnisttest', status=, min_workers=2, max_workers=2, worker_mem_min=8, worker_mem_max=8, worker_cpu=2, worker_gpu=0, namespace='default', dashboard='http://ray-dashboard-mnisttest-default.apps.meyceoz-032023.psap.aws.rhperfscale.org')" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "cluster.details()" - ] - }, - { - "cell_type": "markdown", - "id": "87d2c9b3", - "metadata": {}, - "source": [ - "Now that our resource cluster is ready, we can directly submit our batch job (model training on two workers with four gpus each) to the cluster via torchx." - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "ced6ccd6-a17e-413a-a0e4-65004fc35463", - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "The Ray scheduler does not support port mapping.\n" - ] - } - ], - "source": [ - "jobdef = DDPJobDefinition(\n", - " name=\"mnisttest\",\n", - " script=\"mnist.py\",\n", - " scheduler_args={\"requirements\": \"requirements.txt\"}\n", - ")\n", - "job = jobdef.submit(cluster)" - ] - }, - { - "cell_type": "markdown", - "id": "ff065051", - "metadata": {}, - "source": [ - "Now we can go ahead and look at the status and logs of our batch job." - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "id": "e5c0b0da-c22e-4142-b096-407ac8aebe5e", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "AppStatus:\n", - " msg: !!python/object/apply:ray.dashboard.modules.job.common.JobStatus\n", - " - RUNNING\n", - " num_restarts: -1\n", - " roles:\n", - " - replicas:\n", - " - hostname: \n", - " id: 0\n", - " role: ray\n", - " state: !!python/object/apply:torchx.specs.api.AppState\n", - " - 3\n", - " structured_error_msg: \n", - " role: ray\n", - " state: RUNNING (3)\n", - " structured_error_msg: \n", - " ui_url: null" - ] - }, - "execution_count": 20, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "job.status()" - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "id": "264c1809-de72-4acf-b0f6-e67d345640f6", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[RayActor(name='mnist', command=['bash', '-c', \"python -m torch.distributed.run --rdzv_backend static --rdzv_endpoint $TORCHX_RANK0_HOST:49782 --rdzv_id 'mnist-zvm96dmvgkq5hc' --nnodes 2 --nproc_per_node 1 --node_rank '0' --tee 3 --role '' mnist.py\"], env={'LOGLEVEL': 'DEBUG', 'TORCH_DISTRIBUTED_DEBUG': 'DETAIL', 'TORCHX_JOB_ID': 'ray://torchx/mnist-zvm96dmvgkq5hc'}, num_cpus=2, num_gpus=0, min_replicas=2), RayActor(name='mnist', command=['bash', '-c', \"python -m torch.distributed.run --rdzv_backend static --rdzv_endpoint $TORCHX_RANK0_HOST:49782 --rdzv_id 'mnist-zvm96dmvgkq5hc' --nnodes 2 --nproc_per_node 1 --node_rank '1' --tee 3 --role '' mnist.py\"], env={'LOGLEVEL': 'DEBUG', 'TORCH_DISTRIBUTED_DEBUG': 'DETAIL', 'TORCHX_JOB_ID': 'ray://torchx/mnist-zvm96dmvgkq5hc'}, num_cpus=2, num_gpus=0, min_replicas=2)]\n", - "2023-04-03 14:55:18,399\tINFO worker.py:1230 -- Using address 10.129.0.91:6379 set in the environment variable RAY_ADDRESS\n", - "2023-04-03 14:55:18,399\tINFO worker.py:1342 -- Connecting to existing Ray cluster at address: 10.129.0.91:6379...\n", - "2023-04-03 14:55:18,404\tINFO worker.py:1519 -- Connected to Ray cluster. View the dashboard at \u001b[1m\u001b[32mhttp://10.129.0.91:8265 \u001b[39m\u001b[22m\n", - "Waiting for minimum placement group to start.\n", - "Successfully created placement groups\n", - "rdzv_endpoint set to 10.129.0.93 for actor d45df6656fb9ae4d54e9266f02000000\n", - "rdzv_endpoint set to 10.129.0.93 for actor 1e1ce76b4ad62b80f89134cb02000000\n", - "Successfully placed command actors\n", - "Entering main loop, start executing the script on worker nodes\n", - "running ray.wait on [ObjectRef(e082c90ab8422b00d45df6656fb9ae4d54e9266f0200000001000000), ObjectRef(ce868e48e2fa9a941e1ce76b4ad62b80f89134cb0200000001000000)]\n", - "running ray.wait on [ObjectRef(ce868e48e2fa9a941e1ce76b4ad62b80f89134cb0200000001000000), ObjectRef(f81ec6ff838b16dbd45df6656fb9ae4d54e9266f0200000001000000)]\n", - "\u001b[2m\u001b[36m(CommandActor pid=145, ip=10.129.0.93)\u001b[0m INFO:torch.distributed.launcher.api:Starting elastic_operator with launch configs:\n", - "\u001b[2m\u001b[36m(CommandActor pid=145, ip=10.129.0.93)\u001b[0m entrypoint : mnist.py\n", - "\u001b[2m\u001b[36m(CommandActor pid=145, ip=10.129.0.93)\u001b[0m min_nodes : 2\n", - "\u001b[2m\u001b[36m(CommandActor pid=145, ip=10.129.0.93)\u001b[0m max_nodes : 2\n", - "\u001b[2m\u001b[36m(CommandActor pid=145, ip=10.129.0.93)\u001b[0m nproc_per_node : 1\n", - "\u001b[2m\u001b[36m(CommandActor pid=145, ip=10.129.0.93)\u001b[0m run_id : mnist-zvm96dmvgkq5hc\n", - "\u001b[2m\u001b[36m(CommandActor pid=145, ip=10.129.0.93)\u001b[0m rdzv_backend : static\n", - "\u001b[2m\u001b[36m(CommandActor pid=145, ip=10.129.0.93)\u001b[0m rdzv_endpoint : 10.129.0.93:49782\n", - "\u001b[2m\u001b[36m(CommandActor pid=145, ip=10.129.0.93)\u001b[0m rdzv_configs : {'rank': 0, 'timeout': 900}\n", - "\u001b[2m\u001b[36m(CommandActor pid=145, ip=10.129.0.93)\u001b[0m max_restarts : 0\n", - "\u001b[2m\u001b[36m(CommandActor pid=145, ip=10.129.0.93)\u001b[0m monitor_interval : 5\n", - "\u001b[2m\u001b[36m(CommandActor pid=145, ip=10.129.0.93)\u001b[0m log_dir : None\n", - "\u001b[2m\u001b[36m(CommandActor pid=145, ip=10.129.0.93)\u001b[0m metrics_cfg : {}\n", - "\u001b[2m\u001b[36m(CommandActor pid=145, ip=10.129.0.93)\u001b[0m \n", - "\u001b[2m\u001b[36m(CommandActor pid=145, ip=10.129.0.93)\u001b[0m INFO:torch.distributed.elastic.agent.server.local_elastic_agent:log directory set to: /tmp/torchelastic_pj6rj6_8/mnist-zvm96dmvgkq5hc_c86x83a_\n", - "\u001b[2m\u001b[36m(CommandActor pid=145, ip=10.129.0.93)\u001b[0m INFO:torch.distributed.elastic.agent.server.api:[] starting workers for entrypoint: python\n", - "\u001b[2m\u001b[36m(CommandActor pid=145, ip=10.129.0.93)\u001b[0m INFO:torch.distributed.elastic.agent.server.api:[] Rendezvous'ing worker group\n", - "running ray.wait on [ObjectRef(f81ec6ff838b16dbd45df6656fb9ae4d54e9266f0200000001000000), ObjectRef(32b0eec39cfa87ac1e1ce76b4ad62b80f89134cb0200000001000000)]\n", - "\u001b[2m\u001b[36m(CommandActor pid=358)\u001b[0m INFO:torch.distributed.launcher.api:Starting elastic_operator with launch configs:\n", - "\u001b[2m\u001b[36m(CommandActor pid=358)\u001b[0m entrypoint : mnist.py\n", - "\u001b[2m\u001b[36m(CommandActor pid=358)\u001b[0m min_nodes : 2\n", - "\u001b[2m\u001b[36m(CommandActor pid=358)\u001b[0m max_nodes : 2\n", - "\u001b[2m\u001b[36m(CommandActor pid=358)\u001b[0m nproc_per_node : 1\n", - "\u001b[2m\u001b[36m(CommandActor pid=358)\u001b[0m run_id : mnist-zvm96dmvgkq5hc\n", - "\u001b[2m\u001b[36m(CommandActor pid=358)\u001b[0m rdzv_backend : static\n", - "\u001b[2m\u001b[36m(CommandActor pid=358)\u001b[0m rdzv_endpoint : 10.129.0.93:49782\n", - "\u001b[2m\u001b[36m(CommandActor pid=358)\u001b[0m rdzv_configs : {'rank': 1, 'timeout': 900}\n", - "\u001b[2m\u001b[36m(CommandActor pid=358)\u001b[0m max_restarts : 0\n", - "\u001b[2m\u001b[36m(CommandActor pid=358)\u001b[0m monitor_interval : 5\n", - "\u001b[2m\u001b[36m(CommandActor pid=358)\u001b[0m log_dir : None\n", - "\u001b[2m\u001b[36m(CommandActor pid=358)\u001b[0m metrics_cfg : {}\n", - "\u001b[2m\u001b[36m(CommandActor pid=358)\u001b[0m \n", - "\u001b[2m\u001b[36m(CommandActor pid=358)\u001b[0m INFO:torch.distributed.elastic.agent.server.local_elastic_agent:log directory set to: /tmp/torchelastic_l3wjw627/mnist-zvm96dmvgkq5hc_vpc42a2t\n", - "\u001b[2m\u001b[36m(CommandActor pid=358)\u001b[0m INFO:torch.distributed.elastic.agent.server.api:[] starting workers for entrypoint: python\n", - "\u001b[2m\u001b[36m(CommandActor pid=358)\u001b[0m INFO:torch.distributed.elastic.agent.server.api:[] Rendezvous'ing worker group\n", - "\u001b[2m\u001b[36m(CommandActor pid=358)\u001b[0m INFO:torch.distributed.elastic.agent.server.api:[] Rendezvous complete for workers. Result:\n", - "\u001b[2m\u001b[36m(CommandActor pid=358)\u001b[0m restart_count=0\n", - "\u001b[2m\u001b[36m(CommandActor pid=358)\u001b[0m master_addr=10.129.0.93\n", - "\u001b[2m\u001b[36m(CommandActor pid=358)\u001b[0m master_port=49782\n", - "\u001b[2m\u001b[36m(CommandActor pid=358)\u001b[0m group_rank=1\n", - "\u001b[2m\u001b[36m(CommandActor pid=358)\u001b[0m group_world_size=2\n", - "\u001b[2m\u001b[36m(CommandActor pid=358)\u001b[0m local_ranks=[0]\n", - "\u001b[2m\u001b[36m(CommandActor pid=358)\u001b[0m role_ranks=[1]\n", - "\u001b[2m\u001b[36m(CommandActor pid=358)\u001b[0m global_ranks=[1]\n", - "\u001b[2m\u001b[36m(CommandActor pid=358)\u001b[0m role_world_sizes=[2]\n", - "\u001b[2m\u001b[36m(CommandActor pid=358)\u001b[0m global_world_sizes=[2]\n", - "\u001b[2m\u001b[36m(CommandActor pid=358)\u001b[0m \n", - "\u001b[2m\u001b[36m(CommandActor pid=358)\u001b[0m INFO:torch.distributed.elastic.agent.server.api:[] Starting worker group\n", - "\u001b[2m\u001b[36m(CommandActor pid=358)\u001b[0m INFO:torch.distributed.elastic.multiprocessing:Setting worker0 reply file to: /tmp/torchelastic_l3wjw627/mnist-zvm96dmvgkq5hc_vpc42a2t/attempt_0/0/error.json\n", - "\u001b[2m\u001b[36m(CommandActor pid=145, ip=10.129.0.93)\u001b[0m INFO:torch.distributed.elastic.agent.server.api:[] Rendezvous complete for workers. Result:\n", - "\u001b[2m\u001b[36m(CommandActor pid=145, ip=10.129.0.93)\u001b[0m restart_count=0\n", - "\u001b[2m\u001b[36m(CommandActor pid=145, ip=10.129.0.93)\u001b[0m master_addr=10.129.0.93\n", - "\u001b[2m\u001b[36m(CommandActor pid=145, ip=10.129.0.93)\u001b[0m master_port=49782\n", - "\u001b[2m\u001b[36m(CommandActor pid=145, ip=10.129.0.93)\u001b[0m group_rank=0\n", - "\u001b[2m\u001b[36m(CommandActor pid=145, ip=10.129.0.93)\u001b[0m group_world_size=2\n", - "\u001b[2m\u001b[36m(CommandActor pid=145, ip=10.129.0.93)\u001b[0m local_ranks=[0]\n", - "\u001b[2m\u001b[36m(CommandActor pid=145, ip=10.129.0.93)\u001b[0m role_ranks=[0]\n", - "\u001b[2m\u001b[36m(CommandActor pid=145, ip=10.129.0.93)\u001b[0m global_ranks=[0]\n", - "\u001b[2m\u001b[36m(CommandActor pid=145, ip=10.129.0.93)\u001b[0m role_world_sizes=[2]\n", - "\u001b[2m\u001b[36m(CommandActor pid=145, ip=10.129.0.93)\u001b[0m global_world_sizes=[2]\n", - "\u001b[2m\u001b[36m(CommandActor pid=145, ip=10.129.0.93)\u001b[0m \n", - "\u001b[2m\u001b[36m(CommandActor pid=145, ip=10.129.0.93)\u001b[0m INFO:torch.distributed.elastic.agent.server.api:[] Starting worker group\n", - "\u001b[2m\u001b[36m(CommandActor pid=145, ip=10.129.0.93)\u001b[0m INFO:torch.distributed.elastic.multiprocessing:Setting worker0 reply file to: /tmp/torchelastic_pj6rj6_8/mnist-zvm96dmvgkq5hc_c86x83a_/attempt_0/0/error.json\n", - "\u001b[2m\u001b[36m(CommandActor pid=145, ip=10.129.0.93)\u001b[0m [0]:prior to running the trainer\n", - "\u001b[2m\u001b[36m(CommandActor pid=145, ip=10.129.0.93)\u001b[0m [0]:MASTER_ADDR: is 10.129.0.93\n", - "\u001b[2m\u001b[36m(CommandActor pid=145, ip=10.129.0.93)\u001b[0m [0]:MASTER_PORT: is 49782\n", - "\u001b[2m\u001b[36m(CommandActor pid=145, ip=10.129.0.93)\u001b[0m [0]:GROUP: 2\n", - "\u001b[2m\u001b[36m(CommandActor pid=145, ip=10.129.0.93)\u001b[0m [0]:LOCAL: 1\n", - "\u001b[2m\u001b[36m(CommandActor pid=145, ip=10.129.0.93)\u001b[0m [0]:Downloading MNIST dataset...\n", - "\u001b[2m\u001b[36m(CommandActor pid=145, ip=10.129.0.93)\u001b[0m [0]:Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz\n", - "\u001b[2m\u001b[36m(CommandActor pid=145, ip=10.129.0.93)\u001b[0m [0]:Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./MNIST/raw/train-images-idx3-ubyte.gz\n", - "\u001b[2m\u001b[36m(CommandActor pid=145, ip=10.129.0.93)\u001b[0m [0]:Extracting ./MNIST/raw/train-images-idx3-ubyte.gz to ./MNIST/raw\n", - "\u001b[2m\u001b[36m(CommandActor pid=145, ip=10.129.0.93)\u001b[0m [0]:\n", - "\u001b[2m\u001b[36m(CommandActor pid=145, ip=10.129.0.93)\u001b[0m [0]:Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz\n", - "\u001b[2m\u001b[36m(CommandActor pid=145, ip=10.129.0.93)\u001b[0m [0]:Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./MNIST/raw/train-labels-idx1-ubyte.gz\n", - "\u001b[2m\u001b[36m(CommandActor pid=145, ip=10.129.0.93)\u001b[0m [0]:Extracting ./MNIST/raw/train-labels-idx1-ubyte.gz to ./MNIST/raw\n", - "\u001b[2m\u001b[36m(CommandActor pid=145, ip=10.129.0.93)\u001b[0m [0]:\n", - "\u001b[2m\u001b[36m(CommandActor pid=145, ip=10.129.0.93)\u001b[0m [0]:Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz\n", - "\u001b[2m\u001b[36m(CommandActor pid=145, ip=10.129.0.93)\u001b[0m [0]:Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./MNIST/raw/t10k-images-idx3-ubyte.gz\n", - "\u001b[2m\u001b[36m(CommandActor pid=145, ip=10.129.0.93)\u001b[0m [0]:Extracting ./MNIST/raw/t10k-images-idx3-ubyte.gz to ./MNIST/raw\n", - "\u001b[2m\u001b[36m(CommandActor pid=145, ip=10.129.0.93)\u001b[0m [0]:\n", - "\u001b[2m\u001b[36m(CommandActor pid=145, ip=10.129.0.93)\u001b[0m [0]:Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz\n", - "\u001b[2m\u001b[36m(CommandActor pid=145, ip=10.129.0.93)\u001b[0m [0]:Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./MNIST/raw/t10k-labels-idx1-ubyte.gz\n", - "\u001b[2m\u001b[36m(CommandActor pid=145, ip=10.129.0.93)\u001b[0m [0]:Extracting ./MNIST/raw/t10k-labels-idx1-ubyte.gz to ./MNIST/raw\n", - "\u001b[2m\u001b[36m(CommandActor pid=145, ip=10.129.0.93)\u001b[0m [0]:\n", - "\u001b[2m\u001b[36m(CommandActor pid=145, ip=10.129.0.93)\u001b[0m [0]:\n", - "\u001b[2m\u001b[36m(CommandActor pid=145, ip=10.129.0.93)\u001b[0m [0]:Validation sanity check: 0it [00:00, ?it/s]\n", - "\u001b[2m\u001b[36m(CommandActor pid=145, ip=10.129.0.93)\u001b[0m [0]:Validation sanity check: 0%| | 0/2 [00:00, False)" ] }, - "execution_count": 5, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -160,7 +158,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 5, "id": "a99d5aff", "metadata": {}, "outputs": [ @@ -179,7 +177,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 6, "id": "df71c1ed", "metadata": {}, "outputs": [ @@ -194,7 +192,7 @@ " │ │ \n", " │ URI: ray://raytest-head-svc.default.svc:10001 │ \n", " │ │ \n", - " │ Dashboard🔗 │ \n", + " │ Dashboard🔗 │ \n", " │ │ \n", " ╰──────────────────────────────────────────────────────────────╯ \n", "\n" @@ -208,7 +206,7 @@ " │ │ \n", " │ \u001b[1mURI:\u001b[0m ray://raytest-head-svc.default.svc:10001 │ \n", " │ │ \n", - " │ \u001b]8;id=786870;ray-dashboard-raytest-default.apps.meyceoz-032023.psap.aws.rhperfscale.org\u001b\\\u001b[4;34mDashboard🔗\u001b[0m\u001b]8;;\u001b\\ │ \n", + " │ \u001b]8;id=630217;ray-dashboard-raytest-default.apps.meyceoz-07122023.psap.aws.rhperfscale.org\u001b\\\u001b[4;34mDashboard🔗\u001b[0m\u001b]8;;\u001b\\ │ \n", " │ │ \n", " ╰──────────────────────────────────────────────────────────────╯ \n" ] @@ -222,7 +220,7 @@ "(, True)" ] }, - "execution_count": 7, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -232,7 +230,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "id": "b3a55fe4", "metadata": {}, @@ -242,52 +239,52 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 7, "id": "7fd45bc5-03c0-4ae5-9ec5-dd1c30f1a084", "metadata": {}, "outputs": [ { "data": { "text/html": [ - "
                  🚀 CodeFlare Cluster Details 🚀                 \n",
-       "                                                                  \n",
-       " ╭──────────────────────────────────────────────────────────────╮ \n",
-       " │   Name                                                       │ \n",
-       " │   raytest                                        Active ✅   │ \n",
-       " │                                                              │ \n",
-       " │   URI: ray://raytest-head-svc.default.svc:10001              │ \n",
-       " │                                                              │ \n",
-       " │   Dashboard🔗                                                │ \n",
-       " │                                                              │ \n",
-       " │                      Cluster Resources                       │ \n",
-       " │   ╭─ Workers ──╮  ╭───────── Worker specs(each) ─────────╮   │ \n",
-       " │   │  Min  Max  │  │  Memory      CPU         GPU         │   │ \n",
-       " │   │            │  │                                      │   │ \n",
-       " │   │  2    2    │  │  4~4         1           0           │   │ \n",
-       " │   │            │  │                                      │   │ \n",
-       " │   ╰────────────╯  ╰──────────────────────────────────────╯   │ \n",
-       " ╰──────────────────────────────────────────────────────────────╯ \n",
+       "
                  🚀 CodeFlare Cluster Details 🚀                  \n",
+       "                                                                   \n",
+       " ╭───────────────────────────────────────────────────────────────╮ \n",
+       " │   Name                                                        │ \n",
+       " │   raytest                                        Active ✅    │ \n",
+       " │                                                               │ \n",
+       " │   URI: ray://raytest-head-svc.default.svc:10001               │ \n",
+       " │                                                               │ \n",
+       " │   Dashboard🔗                                                 │ \n",
+       " │                                                               │ \n",
+       " │                       Cluster Resources                       │ \n",
+       " │   ╭── Workers ──╮  ╭───────── Worker specs(each) ─────────╮   │ \n",
+       " │   │  # Workers  │  │  Memory      CPU         GPU         │   │ \n",
+       " │   │             │  │                                      │   │ \n",
+       " │   │  2          │  │  4~4         1           0           │   │ \n",
+       " │   │             │  │                                      │   │ \n",
+       " │   ╰─────────────╯  ╰──────────────────────────────────────╯   │ \n",
+       " ╰───────────────────────────────────────────────────────────────╯ \n",
        "
\n" ], "text/plain": [ - "\u001b[3m \u001b[0m\u001b[1;3m 🚀 CodeFlare Cluster Details 🚀\u001b[0m\u001b[3m \u001b[0m\n", - "\u001b[1m \u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m\n", - " ╭──────────────────────────────────────────────────────────────╮ \n", - " │ \u001b[1;37;42mName\u001b[0m │ \n", - " │ \u001b[1;4mraytest\u001b[0m Active ✅ │ \n", - " │ │ \n", - " │ \u001b[1mURI:\u001b[0m ray://raytest-head-svc.default.svc:10001 │ \n", - " │ │ \n", - " │ \u001b]8;id=611457;http://ray-dashboard-raytest-default.apps.meyceoz-032023.psap.aws.rhperfscale.org\u001b\\\u001b[4;34mDashboard🔗\u001b[0m\u001b]8;;\u001b\\ │ \n", - " │ │ \n", - " │ \u001b[3m Cluster Resources \u001b[0m │ \n", - " │ ╭─ Workers ──╮ ╭───────── Worker specs(each) ─────────╮ │ \n", - " │ │ \u001b[1m \u001b[0m\u001b[1mMin\u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m\u001b[1mMax\u001b[0m\u001b[1m \u001b[0m │ │ \u001b[1m \u001b[0m\u001b[1mMemory \u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m\u001b[1mCPU \u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m\u001b[1mGPU \u001b[0m\u001b[1m \u001b[0m │ │ \n", - " │ │ \u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m │ │ \u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m │ │ \n", - " │ │ \u001b[36m \u001b[0m\u001b[36m2 \u001b[0m\u001b[36m \u001b[0m\u001b[35m \u001b[0m\u001b[35m2 \u001b[0m\u001b[35m \u001b[0m │ │ \u001b[36m \u001b[0m\u001b[36m4~4 \u001b[0m\u001b[36m \u001b[0m\u001b[35m \u001b[0m\u001b[35m1 \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m0 \u001b[0m\u001b[35m \u001b[0m │ │ \n", - " │ │ \u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m │ │ \u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m │ │ \n", - " │ ╰────────────╯ ╰──────────────────────────────────────╯ │ \n", - " ╰──────────────────────────────────────────────────────────────╯ \n" + "\u001b[3m \u001b[0m\u001b[1;3m 🚀 CodeFlare Cluster Details 🚀\u001b[0m\u001b[3m \u001b[0m\n", + "\u001b[1m \u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m\n", + " ╭───────────────────────────────────────────────────────────────╮ \n", + " │ \u001b[1;37;42mName\u001b[0m │ \n", + " │ \u001b[1;4mraytest\u001b[0m Active ✅ │ \n", + " │ │ \n", + " │ \u001b[1mURI:\u001b[0m ray://raytest-head-svc.default.svc:10001 │ \n", + " │ │ \n", + " │ \u001b]8;id=623965;http://ray-dashboard-raytest-default.apps.meyceoz-07122023.psap.aws.rhperfscale.org\u001b\\\u001b[4;34mDashboard🔗\u001b[0m\u001b]8;;\u001b\\ │ \n", + " │ │ \n", + " │ \u001b[3m Cluster Resources \u001b[0m │ \n", + " │ ╭── Workers ──╮ ╭───────── Worker specs(each) ─────────╮ │ \n", + " │ │ \u001b[1m \u001b[0m\u001b[1m# Workers\u001b[0m\u001b[1m \u001b[0m │ │ \u001b[1m \u001b[0m\u001b[1mMemory \u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m\u001b[1mCPU \u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m\u001b[1mGPU \u001b[0m\u001b[1m \u001b[0m │ │ \n", + " │ │ \u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m │ │ \u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m │ │ \n", + " │ │ \u001b[35m \u001b[0m\u001b[35m2 \u001b[0m\u001b[35m \u001b[0m │ │ \u001b[36m \u001b[0m\u001b[36m4~4 \u001b[0m\u001b[36m \u001b[0m\u001b[35m \u001b[0m\u001b[35m1 \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m0 \u001b[0m\u001b[35m \u001b[0m │ │ \n", + " │ │ \u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m │ │ \u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m │ │ \n", + " │ ╰─────────────╯ ╰──────────────────────────────────────╯ │ \n", + " ╰───────────────────────────────────────────────────────────────╯ \n" ] }, "metadata": {}, @@ -296,10 +293,10 @@ { "data": { "text/plain": [ - "RayCluster(name='raytest', status=, min_workers=2, max_workers=2, worker_mem_min=4, worker_mem_max=4, worker_cpu=1, worker_gpu=0, namespace='default', dashboard='http://ray-dashboard-raytest-default.apps.meyceoz-032023.psap.aws.rhperfscale.org')" + "RayCluster(name='raytest', status=, workers=2, worker_mem_min=4, worker_mem_max=4, worker_cpu=1, worker_gpu=0, namespace='default', dashboard='http://ray-dashboard-raytest-default.apps.meyceoz-07122023.psap.aws.rhperfscale.org')" ] }, - "execution_count": 8, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -318,7 +315,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 8, "id": "5f36db0f-31f6-4373-9503-dc3c1c4c3f57", "metadata": {}, "outputs": [], @@ -339,7 +336,7 @@ ], "metadata": { "kernelspec": { - "display_name": "sdktest", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -353,11 +350,11 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.13" + "version": "3.8.17" }, "vscode": { "interpreter": { - "hash": "ae72ec8d55aeb4773d9bab14ab14ec6c410f2dd8be83850b7c2732f479ead773" + "hash": "f9f85f796d01129d0dd105a088854619f454435301f6ffec2fea96ecbd9be4ac" } } }, diff --git a/demo-notebooks/guided-demos/notebook-ex-outputs/1_basic_instascale.ipynb b/demo-notebooks/guided-demos/notebook-ex-outputs/1_basic_instascale.ipynb index ed5716d5c..8f8a6ed73 100644 --- a/demo-notebooks/guided-demos/notebook-ex-outputs/1_basic_instascale.ipynb +++ b/demo-notebooks/guided-demos/notebook-ex-outputs/1_basic_instascale.ipynb @@ -1,7 +1,6 @@ { "cells": [ { - "attachments": {}, "cell_type": "markdown", "id": "9865ee8c", "metadata": {}, @@ -11,7 +10,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "b55bc3ea-4ce3-49bf-bb1f-e209de8ca47a", "metadata": {}, "outputs": [], @@ -38,7 +37,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "id": "bc27f84c", "metadata": {}, @@ -48,37 +46,27 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "id": "0f4bc870-091f-4e11-9642-cba145710159", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Written to: instascaletest.yaml\n" - ] - } - ], + "outputs": [], "source": [ "# Create and configure our cluster object (and appwrapper)\n", "cluster = Cluster(ClusterConfiguration(\n", " name='instascaletest',\n", " namespace='default',\n", - " min_worker=2,\n", - " max_worker=2,\n", + " num_workers=2,\n", " min_cpus=2,\n", " max_cpus=2,\n", " min_memory=8,\n", " max_memory=8,\n", - " gpu=1,\n", + " num_gpus=1,\n", " instascale=True, # InstaScale now enabled, will scale OCP cluster to guarantee resource request\n", " machine_types=[\"m5.xlarge\", \"g4dn.xlarge\"] # Head, worker AWS machine types desired\n", "))" ] }, { - "attachments": {}, "cell_type": "markdown", "id": "12eef53c", "metadata": {}, @@ -88,19 +76,10 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "id": "f0884bbc-c224-4ca0-98a0-02dfa09c2200", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Waiting for requested resources to be set up...\n", - "Requested cluster up and running!\n" - ] - } - ], + "outputs": [], "source": [ "# Bring up the cluster\n", "cluster.up()\n", @@ -108,7 +87,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "id": "6abfe904", "metadata": {}, @@ -120,68 +98,10 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "id": "7fd45bc5-03c0-4ae5-9ec5-dd1c30f1a084", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
                     🚀 CodeFlare Cluster Details 🚀                     \n",
-       "                                                                         \n",
-       " ╭─────────────────────────────────────────────────────────────────────╮ \n",
-       " │   Name                                                              │ \n",
-       " │   instascaletest                                        Active ✅   │ \n",
-       " │                                                                     │ \n",
-       " │   URI: ray://instascaletest-head-svc.default.svc:10001              │ \n",
-       " │                                                                     │ \n",
-       " │   Dashboard🔗                                                       │ \n",
-       " │                                                                     │ \n",
-       " │                      Cluster Resources                              │ \n",
-       " │   ╭─ Workers ──╮  ╭───────── Worker specs(each) ─────────╮          │ \n",
-       " │   │  Min  Max  │  │  Memory      CPU         GPU         │          │ \n",
-       " │   │            │  │                                      │          │ \n",
-       " │   │  2    2    │  │  8~8         2           1           │          │ \n",
-       " │   │            │  │                                      │          │ \n",
-       " │   ╰────────────╯  ╰──────────────────────────────────────╯          │ \n",
-       " ╰─────────────────────────────────────────────────────────────────────╯ \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[3m \u001b[0m\u001b[1;3m 🚀 CodeFlare Cluster Details 🚀\u001b[0m\u001b[3m \u001b[0m\n", - "\u001b[1m \u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m\n", - " ╭─────────────────────────────────────────────────────────────────────╮ \n", - " │ \u001b[1;37;42mName\u001b[0m │ \n", - " │ \u001b[1;4minstascaletest\u001b[0m Active ✅ │ \n", - " │ │ \n", - " │ \u001b[1mURI:\u001b[0m ray://instascaletest-head-svc.default.svc:10001 │ \n", - " │ │ \n", - " │ \u001b]8;id=164702;http://ray-dashboard-instascaletest-default.apps.meyceoz-032023.psap.aws.rhperfscale.org\u001b\\\u001b[4;34mDashboard🔗\u001b[0m\u001b]8;;\u001b\\ │ \n", - " │ │ \n", - " │ \u001b[3m Cluster Resources \u001b[0m │ \n", - " │ ╭─ Workers ──╮ ╭───────── Worker specs(each) ─────────╮ │ \n", - " │ │ \u001b[1m \u001b[0m\u001b[1mMin\u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m\u001b[1mMax\u001b[0m\u001b[1m \u001b[0m │ │ \u001b[1m \u001b[0m\u001b[1mMemory \u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m\u001b[1mCPU \u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m\u001b[1mGPU \u001b[0m\u001b[1m \u001b[0m │ │ \n", - " │ │ \u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m │ │ \u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m │ │ \n", - " │ │ \u001b[36m \u001b[0m\u001b[36m2 \u001b[0m\u001b[36m \u001b[0m\u001b[35m \u001b[0m\u001b[35m2 \u001b[0m\u001b[35m \u001b[0m │ │ \u001b[36m \u001b[0m\u001b[36m8~8 \u001b[0m\u001b[36m \u001b[0m\u001b[35m \u001b[0m\u001b[35m2 \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m1 \u001b[0m\u001b[35m \u001b[0m │ │ \n", - " │ │ \u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m │ │ \u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m │ │ \n", - " │ ╰────────────╯ ╰──────────────────────────────────────╯ │ \n", - " ╰─────────────────────────────────────────────────────────────────────╯ \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "RayCluster(name='instascaletest', status=, min_workers=2, max_workers=2, worker_mem_min=8, worker_mem_max=8, worker_cpu=2, worker_gpu=1, namespace='default', dashboard='http://ray-dashboard-instascaletest-default.apps.meyceoz-032023.psap.aws.rhperfscale.org')" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "cluster.details()" ] @@ -196,7 +116,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "id": "5f36db0f-31f6-4373-9503-dc3c1c4c3f57", "metadata": {}, "outputs": [], @@ -205,7 +125,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "id": "c883caea", "metadata": {}, @@ -226,7 +145,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -240,7 +159,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.13" + "version": "3.8.13" }, "vscode": { "interpreter": { diff --git a/demo-notebooks/guided-demos/notebook-ex-outputs/2_basic_jobs.ipynb b/demo-notebooks/guided-demos/notebook-ex-outputs/2_basic_jobs.ipynb index 045b69b3c..3c33ed204 100644 --- a/demo-notebooks/guided-demos/notebook-ex-outputs/2_basic_jobs.ipynb +++ b/demo-notebooks/guided-demos/notebook-ex-outputs/2_basic_jobs.ipynb @@ -10,7 +10,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 1, "id": "b55bc3ea-4ce3-49bf-bb1f-e209de8ca47a", "metadata": {}, "outputs": [], @@ -46,7 +46,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "id": "0f4bc870-091f-4e11-9642-cba145710159", "metadata": {}, "outputs": [ @@ -63,20 +63,19 @@ "cluster = Cluster(ClusterConfiguration(\n", " name='jobtest',\n", " namespace='default',\n", - " min_worker=2,\n", - " max_worker=2,\n", + " num_workers=2,\n", " min_cpus=1,\n", " max_cpus=1,\n", " min_memory=4,\n", " max_memory=4,\n", - " gpu=0,\n", + " num_gpus=0,\n", " instascale=False\n", "))" ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 3, "id": "f0884bbc-c224-4ca0-98a0-02dfa09c2200", "metadata": {}, "outputs": [ @@ -97,52 +96,52 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 4, "id": "df71c1ed", "metadata": {}, "outputs": [ { "data": { "text/html": [ - "
                  🚀 CodeFlare Cluster Details 🚀                 \n",
-       "                                                                  \n",
-       " ╭──────────────────────────────────────────────────────────────╮ \n",
-       " │   Name                                                       │ \n",
-       " │   jobtest                                        Active ✅   │ \n",
-       " │                                                              │ \n",
-       " │   URI: ray://jobtest-head-svc.default.svc:10001              │ \n",
-       " │                                                              │ \n",
-       " │   Dashboard🔗                                                │ \n",
-       " │                                                              │ \n",
-       " │                      Cluster Resources                       │ \n",
-       " │   ╭─ Workers ──╮  ╭───────── Worker specs(each) ─────────╮   │ \n",
-       " │   │  Min  Max  │  │  Memory      CPU         GPU         │   │ \n",
-       " │   │            │  │                                      │   │ \n",
-       " │   │  2    2    │  │  4~4         1           0           │   │ \n",
-       " │   │            │  │                                      │   │ \n",
-       " │   ╰────────────╯  ╰──────────────────────────────────────╯   │ \n",
-       " ╰──────────────────────────────────────────────────────────────╯ \n",
+       "
                  🚀 CodeFlare Cluster Details 🚀                  \n",
+       "                                                                   \n",
+       " ╭───────────────────────────────────────────────────────────────╮ \n",
+       " │   Name                                                        │ \n",
+       " │   jobtest                                        Active ✅    │ \n",
+       " │                                                               │ \n",
+       " │   URI: ray://jobtest-head-svc.default.svc:10001               │ \n",
+       " │                                                               │ \n",
+       " │   Dashboard🔗                                                 │ \n",
+       " │                                                               │ \n",
+       " │                       Cluster Resources                       │ \n",
+       " │   ╭── Workers ──╮  ╭───────── Worker specs(each) ─────────╮   │ \n",
+       " │   │  # Workers  │  │  Memory      CPU         GPU         │   │ \n",
+       " │   │             │  │                                      │   │ \n",
+       " │   │  2          │  │  4~4         1           0           │   │ \n",
+       " │   │             │  │                                      │   │ \n",
+       " │   ╰─────────────╯  ╰──────────────────────────────────────╯   │ \n",
+       " ╰───────────────────────────────────────────────────────────────╯ \n",
        "
\n" ], "text/plain": [ - "\u001b[3m \u001b[0m\u001b[1;3m 🚀 CodeFlare Cluster Details 🚀\u001b[0m\u001b[3m \u001b[0m\n", - "\u001b[1m \u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m\n", - " ╭──────────────────────────────────────────────────────────────╮ \n", - " │ \u001b[1;37;42mName\u001b[0m │ \n", - " │ \u001b[1;4mjobtest\u001b[0m Active ✅ │ \n", - " │ │ \n", - " │ \u001b[1mURI:\u001b[0m ray://jobtest-head-svc.default.svc:10001 │ \n", - " │ │ \n", - " │ \u001b]8;id=793109;http://ray-dashboard-jobtest-default.apps.meyceoz-032023.psap.aws.rhperfscale.org\u001b\\\u001b[4;34mDashboard🔗\u001b[0m\u001b]8;;\u001b\\ │ \n", - " │ │ \n", - " │ \u001b[3m Cluster Resources \u001b[0m │ \n", - " │ ╭─ Workers ──╮ ╭───────── Worker specs(each) ─────────╮ │ \n", - " │ │ \u001b[1m \u001b[0m\u001b[1mMin\u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m\u001b[1mMax\u001b[0m\u001b[1m \u001b[0m │ │ \u001b[1m \u001b[0m\u001b[1mMemory \u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m\u001b[1mCPU \u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m\u001b[1mGPU \u001b[0m\u001b[1m \u001b[0m │ │ \n", - " │ │ \u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m │ │ \u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m │ │ \n", - " │ │ \u001b[36m \u001b[0m\u001b[36m2 \u001b[0m\u001b[36m \u001b[0m\u001b[35m \u001b[0m\u001b[35m2 \u001b[0m\u001b[35m \u001b[0m │ │ \u001b[36m \u001b[0m\u001b[36m4~4 \u001b[0m\u001b[36m \u001b[0m\u001b[35m \u001b[0m\u001b[35m1 \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m0 \u001b[0m\u001b[35m \u001b[0m │ │ \n", - " │ │ \u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m │ │ \u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m │ │ \n", - " │ ╰────────────╯ ╰──────────────────────────────────────╯ │ \n", - " ╰──────────────────────────────────────────────────────────────╯ \n" + "\u001b[3m \u001b[0m\u001b[1;3m 🚀 CodeFlare Cluster Details 🚀\u001b[0m\u001b[3m \u001b[0m\n", + "\u001b[1m \u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m\n", + " ╭───────────────────────────────────────────────────────────────╮ \n", + " │ \u001b[1;37;42mName\u001b[0m │ \n", + " │ \u001b[1;4mjobtest\u001b[0m Active ✅ │ \n", + " │ │ \n", + " │ \u001b[1mURI:\u001b[0m ray://jobtest-head-svc.default.svc:10001 │ \n", + " │ │ \n", + " │ \u001b]8;id=366603;http://ray-dashboard-jobtest-default.apps.meyceoz-07122023.psap.aws.rhperfscale.org\u001b\\\u001b[4;34mDashboard🔗\u001b[0m\u001b]8;;\u001b\\ │ \n", + " │ │ \n", + " │ \u001b[3m Cluster Resources \u001b[0m │ \n", + " │ ╭── Workers ──╮ ╭───────── Worker specs(each) ─────────╮ │ \n", + " │ │ \u001b[1m \u001b[0m\u001b[1m# Workers\u001b[0m\u001b[1m \u001b[0m │ │ \u001b[1m \u001b[0m\u001b[1mMemory \u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m\u001b[1mCPU \u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m\u001b[1mGPU \u001b[0m\u001b[1m \u001b[0m │ │ \n", + " │ │ \u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m │ │ \u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m │ │ \n", + " │ │ \u001b[35m \u001b[0m\u001b[35m2 \u001b[0m\u001b[35m \u001b[0m │ │ \u001b[36m \u001b[0m\u001b[36m4~4 \u001b[0m\u001b[36m \u001b[0m\u001b[35m \u001b[0m\u001b[35m1 \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m0 \u001b[0m\u001b[35m \u001b[0m │ │ \n", + " │ │ \u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m │ │ \u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m │ │ \n", + " │ ╰─────────────╯ ╰──────────────────────────────────────╯ │ \n", + " ╰───────────────────────────────────────────────────────────────╯ \n" ] }, "metadata": {}, @@ -151,10 +150,10 @@ { "data": { "text/plain": [ - "RayCluster(name='jobtest', status=, min_workers=2, max_workers=2, worker_mem_min=4, worker_mem_max=4, worker_cpu=1, worker_gpu=0, namespace='default', dashboard='http://ray-dashboard-jobtest-default.apps.meyceoz-032023.psap.aws.rhperfscale.org')" + "RayCluster(name='jobtest', status=, workers=2, worker_mem_min=4, worker_mem_max=4, worker_cpu=1, worker_gpu=0, namespace='default', dashboard='http://ray-dashboard-jobtest-default.apps.meyceoz-07122023.psap.aws.rhperfscale.org')" ] }, - "execution_count": 5, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -173,7 +172,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 5, "id": "c7b4f232", "metadata": {}, "outputs": [], @@ -191,7 +190,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 6, "id": "8c2c5138", "metadata": {}, "outputs": [ @@ -217,12 +216,12 @@ "id": "5b9ae53a", "metadata": {}, "source": [ - "Now we can take a look at the status of our submitted job, as well as the logs:" + "Now we can take a look at the status of our submitted job, as well as retrieve the full logs:" ] }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 9, "id": "6e36c3d9", "metadata": {}, "outputs": [ @@ -231,7 +230,7 @@ "text/plain": [ "AppStatus:\n", " msg: !!python/object/apply:ray.dashboard.modules.job.common.JobStatus\n", - " - SUCCEEDED\n", + " - RUNNING\n", " num_restarts: -1\n", " roles:\n", " - replicas:\n", @@ -239,15 +238,15 @@ " id: 0\n", " role: ray\n", " state: !!python/object/apply:torchx.specs.api.AppState\n", - " - 4\n", + " - 3\n", " structured_error_msg: \n", " role: ray\n", - " state: SUCCEEDED (4)\n", + " state: RUNNING (3)\n", " structured_error_msg: \n", " ui_url: null" ] }, - "execution_count": 10, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -258,17 +257,17 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 10, "id": "834cfb5c", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "'[RayActor(name=\\'mnist\\', command=[\\'bash\\', \\'-c\\', \"torchrun --rdzv_backend static --rdzv_endpoint $TORCHX_RANK0_HOST:49782 --rdzv_id \\'mnisttest-f50nbxs5pnwwlc\\' --nnodes 2 --nproc_per_node 1 --node_rank \\'0\\' --tee 3 --role \\'\\' mnist.py\"], env={\\'TORCHX_TRACKING_EXPERIMENT_NAME\\': \\'default-experiment\\', \\'LOGLEVEL\\': \\'WARNING\\', \\'TORCHX_JOB_ID\\': \\'ray://torchx/mnisttest-f50nbxs5pnwwlc\\'}, num_cpus=1, num_gpus=0, min_replicas=2), RayActor(name=\\'mnist\\', command=[\\'bash\\', \\'-c\\', \"torchrun --rdzv_backend static --rdzv_endpoint $TORCHX_RANK0_HOST:49782 --rdzv_id \\'mnisttest-f50nbxs5pnwwlc\\' --nnodes 2 --nproc_per_node 1 --node_rank \\'1\\' --tee 3 --role \\'\\' mnist.py\"], env={\\'TORCHX_TRACKING_EXPERIMENT_NAME\\': \\'default-experiment\\', \\'LOGLEVEL\\': \\'WARNING\\', \\'TORCHX_JOB_ID\\': \\'ray://torchx/mnisttest-f50nbxs5pnwwlc\\'}, num_cpus=1, num_gpus=0, min_replicas=2)]\\n2023-04-26 11:30:03,092\\tINFO worker.py:1230 -- Using address 10.129.0.182:6379 set in the environment variable RAY_ADDRESS\\n2023-04-26 11:30:03,092\\tINFO worker.py:1342 -- Connecting to existing Ray cluster at address: 10.129.0.182:6379...\\n2023-04-26 11:30:03,097\\tINFO worker.py:1519 -- Connected to Ray cluster. View the dashboard at \\x1b[1m\\x1b[32mhttp://10.129.0.182:8265 \\x1b[39m\\x1b[22m\\nWaiting for minimum placement group to start.\\nSuccessfully created placement groups\\nrdzv_endpoint set to 10.129.0.182 for actor e0a0863650338de1363ea91502000000\\nrdzv_endpoint set to 10.129.0.182 for actor 5d5c64cf765c7982d83e6b3202000000\\nSuccessfully placed command actors\\nEntering main loop, start executing the script on worker nodes\\nrunning ray.wait on [ObjectRef(e082c90ab8422b00e0a0863650338de1363ea9150200000001000000), ObjectRef(ce868e48e2fa9a945d5c64cf765c7982d83e6b320200000001000000)]\\nrunning ray.wait on [ObjectRef(ce868e48e2fa9a945d5c64cf765c7982d83e6b320200000001000000), ObjectRef(f81ec6ff838b16dbe0a0863650338de1363ea9150200000001000000)]\\nrunning ray.wait on [ObjectRef(f81ec6ff838b16dbe0a0863650338de1363ea9150200000001000000), ObjectRef(32b0eec39cfa87ac5d5c64cf765c7982d83e6b320200000001000000)]\\n\\x1b[2m\\x1b[36m(CommandActor pid=327)\\x1b[0m [0]:prior to running the trainer\\n\\x1b[2m\\x1b[36m(CommandActor pid=327)\\x1b[0m [0]:MASTER_ADDR: is 10.129.0.182\\n\\x1b[2m\\x1b[36m(CommandActor pid=327)\\x1b[0m [0]:MASTER_PORT: is 49782\\n\\x1b[2m\\x1b[36m(CommandActor pid=327)\\x1b[0m [0]:GROUP: 2\\n\\x1b[2m\\x1b[36m(CommandActor pid=327)\\x1b[0m [0]:LOCAL: 1\\n\\x1b[2m\\x1b[36m(CommandActor pid=327)\\x1b[0m [0]:Downloading MNIST dataset...\\n\\x1b[2m\\x1b[36m(CommandActor pid=327)\\x1b[0m [0]:Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz\\n\\x1b[2m\\x1b[36m(CommandActor pid=327)\\x1b[0m [0]:Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./MNIST/raw/train-images-idx3-ubyte.gz\\n\\x1b[2m\\x1b[36m(CommandActor pid=327)\\x1b[0m [0]:Extracting ./MNIST/raw/train-images-idx3-ubyte.gz to ./MNIST/raw\\n\\x1b[2m\\x1b[36m(CommandActor pid=327)\\x1b[0m [0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=327)\\x1b[0m [0]:Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz\\n\\x1b[2m\\x1b[36m(CommandActor pid=327)\\x1b[0m [0]:Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./MNIST/raw/train-labels-idx1-ubyte.gz\\n\\x1b[2m\\x1b[36m(CommandActor pid=327)\\x1b[0m [0]:Extracting ./MNIST/raw/train-labels-idx1-ubyte.gz to ./MNIST/raw\\n\\x1b[2m\\x1b[36m(CommandActor pid=327)\\x1b[0m [0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=327)\\x1b[0m [0]:Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz\\n\\x1b[2m\\x1b[36m(CommandActor pid=327)\\x1b[0m [0]:Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./MNIST/raw/t10k-images-idx3-ubyte.gz\\n\\x1b[2m\\x1b[36m(CommandActor pid=327)\\x1b[0m [0]:Extracting ./MNIST/raw/t10k-images-idx3-ubyte.gz to ./MNIST/raw\\n\\x1b[2m\\x1b[36m(CommandActor pid=327)\\x1b[0m [0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=327)\\x1b[0m [0]:Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz\\n\\x1b[2m\\x1b[36m(CommandActor pid=327)\\x1b[0m [0]:Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./MNIST/raw/t10k-labels-idx1-ubyte.gz\\n\\x1b[2m\\x1b[36m(CommandActor pid=327)\\x1b[0m [0]:Extracting ./MNIST/raw/t10k-labels-idx1-ubyte.gz to ./MNIST/raw\\n\\x1b[2m\\x1b[36m(CommandActor pid=327)\\x1b[0m [0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=327)\\x1b[0m [0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=327)\\x1b[0m [0]:Validation sanity check: 0it [00:00, ?it/s]\\n\\x1b[2m\\x1b[36m(CommandActor pid=327)\\x1b[0m [0]:Validation sanity check: 0%| | 0/2 [00:00\n", + " id: 0\n", + " role: ray\n", + " state: !!python/object/apply:torchx.specs.api.AppState\n", + " - 4\n", + " structured_error_msg: \n", + " role: ray\n", + " state: SUCCEEDED (4)\n", + " structured_error_msg: \n", + " ui_url: null" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "job.status()" + ] + }, + { + "cell_type": "code", + "execution_count": 13, "id": "5f36db0f-31f6-4373-9503-dc3c1c4c3f57", "metadata": {}, "outputs": [], @@ -305,19 +369,10 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 14, "id": "496139cc", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/opt/app-root/lib64/python3.8/site-packages/urllib3/connectionpool.py:1045: InsecureRequestWarning: Unverified HTTPS request is being made to host 'api.meyceoz-032023.psap.aws.rhperfscale.org'. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/1.26.x/advanced-usage.html#ssl-warnings\n", - " warnings.warn(\n" - ] - } - ], + "outputs": [], "source": [ "jobdef = DDPJobDefinition(\n", " name=\"mnistjob\",\n", @@ -342,10 +397,18 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 15, "id": "3d18d42c", "metadata": {}, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/opt/homebrew/lib/python3.8/site-packages/torchx/schedulers/kubernetes_mcad_scheduler.py:1105: UserWarning: Warning - MCAD does not report individual replica statuses, but overall task status. Replica id may not match status\n", + " warnings.warn(msg)\n" + ] + }, { "data": { "text/plain": [ @@ -358,7 +421,7 @@ " id: 0\n", " role: mnist\n", " state: !!python/object/apply:torchx.specs.api.AppState\n", - " - 3\n", + " - 2\n", " structured_error_msg: \n", " role: mnist\n", " state: RUNNING (3)\n", @@ -366,7 +429,7 @@ " ui_url: null" ] }, - "execution_count": 18, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } @@ -384,7 +447,7 @@ { "data": { "text/plain": [ - "'2023-04-26T18:33:22.956866570Z [0]:GPU available: False, used: False\\n2023-04-26T18:33:22.956866570Z [0]:TPU available: False, using: 0 TPU cores\\n2023-04-26T18:33:22.956866570Z [0]:IPU available: False, using: 0 IPUs\\n2023-04-26T18:33:23.057069471Z [0]:\\n2023-04-26T18:33:23.157240736Z [0]: 0%| | 0/9912422 [00:00 🚀 CodeFlare Cluster Details 🚀 \n", - " \n", - " ╭──────────────────────────────────────────────────────────────────────╮ \n", - " │ Name │ \n", - " │ interactivetest Active ✅ │ \n", - " │ │ \n", - " │ URI: ray://interactivetest-head-svc.default.svc:10001 │ \n", - " │ │ \n", - " │ Dashboard🔗 │ \n", - " │ │ \n", - " │ Cluster Resources │ \n", - " │ ╭─ Workers ──╮ ╭───────── Worker specs(each) ─────────╮ │ \n", - " │ │ Min Max │ │ Memory CPU GPU │ │ \n", - " │ │ │ │ │ │ \n", - " │ │ 2 2 │ │ 8~8 2 1 │ │ \n", - " │ │ │ │ │ │ \n", - " │ ╰────────────╯ ╰──────────────────────────────────────╯ │ \n", - " ╰──────────────────────────────────────────────────────────────────────╯ \n", - "
\n" - ], - "text/plain": [ - "\u001b[3m \u001b[0m\u001b[1;3m 🚀 CodeFlare Cluster Details 🚀\u001b[0m\u001b[3m \u001b[0m\n", - "\u001b[1m \u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m\n", - " ╭──────────────────────────────────────────────────────────────────────╮ \n", - " │ \u001b[1;37;42mName\u001b[0m │ \n", - " │ \u001b[1;4minteractivetest\u001b[0m Active ✅ │ \n", - " │ │ \n", - " │ \u001b[1mURI:\u001b[0m ray://interactivetest-head-svc.default.svc:10001 │ \n", - " │ │ \n", - " │ \u001b]8;id=789787;http://ray-dashboard-interactivetest-default.apps.meyceoz-032023.psap.aws.rhperfscale.org\u001b\\\u001b[4;34mDashboard🔗\u001b[0m\u001b]8;;\u001b\\ │ \n", - " │ │ \n", - " │ \u001b[3m Cluster Resources \u001b[0m │ \n", - " │ ╭─ Workers ──╮ ╭───────── Worker specs(each) ─────────╮ │ \n", - " │ │ \u001b[1m \u001b[0m\u001b[1mMin\u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m\u001b[1mMax\u001b[0m\u001b[1m \u001b[0m │ │ \u001b[1m \u001b[0m\u001b[1mMemory \u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m\u001b[1mCPU \u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m\u001b[1mGPU \u001b[0m\u001b[1m \u001b[0m │ │ \n", - " │ │ \u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m │ │ \u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m │ │ \n", - " │ │ \u001b[36m \u001b[0m\u001b[36m2 \u001b[0m\u001b[36m \u001b[0m\u001b[35m \u001b[0m\u001b[35m2 \u001b[0m\u001b[35m \u001b[0m │ │ \u001b[36m \u001b[0m\u001b[36m8~8 \u001b[0m\u001b[36m \u001b[0m\u001b[35m \u001b[0m\u001b[35m2 \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m1 \u001b[0m\u001b[35m \u001b[0m │ │ \n", - " │ │ \u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m │ │ \u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m │ │ \n", - " │ ╰────────────╯ ╰──────────────────────────────────────╯ │ \n", - " ╰──────────────────────────────────────────────────────────────────────╯ \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "RayCluster(name='interactivetest', status=, min_workers=2, max_workers=2, worker_mem_min=8, worker_mem_max=8, worker_cpu=2, worker_gpu=1, namespace='default', dashboard='http://ray-dashboard-interactivetest-default.apps.meyceoz-032023.psap.aws.rhperfscale.org')" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "cluster.details()" ] @@ -177,19 +101,10 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "id": "c1719bca", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "http://ray-dashboard-interactivetest-default.apps.meyceoz-032023.psap.aws.rhperfscale.org\n", - "ray://interactivetest-head-svc.default.svc:10001\n" - ] - } - ], + "outputs": [], "source": [ "ray_dashboard_uri = cluster.cluster_dashboard_uri()\n", "ray_cluster_uri = cluster.cluster_uri()\n", @@ -207,18 +122,10 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "id": "300146dc", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Ray cluster is up and running: True\n" - ] - } - ], + "outputs": [], "source": [ "#before proceeding make sure the cluster exists and the uri is not empty\n", "assert ray_cluster_uri, \"Ray cluster needs to be started and set before proceeding\"\n", @@ -248,7 +155,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "id": "1b36e0d9", "metadata": {}, "outputs": [], @@ -327,2609 +234,10 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "id": "5901d958", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Downloading builder script: 100%|██████████| 4.31k/4.31k [00:00<00:00, 4.22MB/s]\n", - "Downloading metadata: 100%|██████████| 2.17k/2.17k [00:00<00:00, 2.26MB/s]\n", - "Downloading readme: 100%|██████████| 7.59k/7.59k [00:00<00:00, 7.62MB/s]\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[2m\u001b[36m(train_fn pid=293)\u001b[0m Downloading and preparing dataset imdb/plain_text to /home/ray/.cache/huggingface/datasets/imdb/plain_text/1.0.0/d613c88cf8fa3bab83b4ded3713f1f74830d1100e171db75bbddb80b3345c9c0...\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Downloading data: 0%| | 0.00/84.1M [00:00 🚀 CodeFlare Cluster Details 🚀 \n", - " \n", - " ╭────────────────────────────────────────────────────────────────╮ \n", - " │ Name │ \n", - " │ gptfttest Active ✅ │ \n", - " │ │ \n", - " │ URI: ray://gptfttest-head-svc.default.svc:10001 │ \n", - " │ │ \n", - " │ Dashboard🔗 │ \n", - " │ │ \n", - " │ Cluster Resources │ \n", - " │ ╭─ Workers ──╮ ╭───────── Worker specs(each) ─────────╮ │ \n", - " │ │ Min Max │ │ Memory CPU GPU │ │ \n", - " │ │ │ │ │ │ \n", - " │ │ 2 2 │ │ 8~8 2 1 │ │ \n", - " │ │ │ │ │ │ \n", - " │ ╰────────────╯ ╰──────────────────────────────────────╯ │ \n", - " ╰────────────────────────────────────────────────────────────────╯ \n", - "
\n" - ], - "text/plain": [ - "\u001b[3m \u001b[0m\u001b[1;3m 🚀 CodeFlare Cluster Details 🚀\u001b[0m\u001b[3m \u001b[0m\n", - "\u001b[1m \u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m\n", - " ╭────────────────────────────────────────────────────────────────╮ \n", - " │ \u001b[1;37;42mName\u001b[0m │ \n", - " │ \u001b[1;4mgptfttest\u001b[0m Active ✅ │ \n", - " │ │ \n", - " │ \u001b[1mURI:\u001b[0m ray://gptfttest-head-svc.default.svc:10001 │ \n", - " │ │ \n", - " │ \u001b]8;id=618416;http://ray-dashboard-gptfttest-default.apps.meyceoz-032023.psap.aws.rhperfscale.org\u001b\\\u001b[4;34mDashboard🔗\u001b[0m\u001b]8;;\u001b\\ │ \n", - " │ │ \n", - " │ \u001b[3m Cluster Resources \u001b[0m │ \n", - " │ ╭─ Workers ──╮ ╭───────── Worker specs(each) ─────────╮ │ \n", - " │ │ \u001b[1m \u001b[0m\u001b[1mMin\u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m\u001b[1mMax\u001b[0m\u001b[1m \u001b[0m │ │ \u001b[1m \u001b[0m\u001b[1mMemory \u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m\u001b[1mCPU \u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m\u001b[1mGPU \u001b[0m\u001b[1m \u001b[0m │ │ \n", - " │ │ \u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m │ │ \u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m │ │ \n", - " │ │ \u001b[36m \u001b[0m\u001b[36m2 \u001b[0m\u001b[36m \u001b[0m\u001b[35m \u001b[0m\u001b[35m2 \u001b[0m\u001b[35m \u001b[0m │ │ \u001b[36m \u001b[0m\u001b[36m8~8 \u001b[0m\u001b[36m \u001b[0m\u001b[35m \u001b[0m\u001b[35m2 \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m1 \u001b[0m\u001b[35m \u001b[0m │ │ \n", - " │ │ \u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m │ │ \u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m │ │ \n", - " │ ╰────────────╯ ╰──────────────────────────────────────╯ │ \n", - " ╰────────────────────────────────────────────────────────────────╯ \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "RayCluster(name='gptfttest', status=, min_workers=2, max_workers=2, worker_mem_min=8, worker_mem_max=8, worker_cpu=2, worker_gpu=1, namespace='default', dashboard='http://ray-dashboard-gptfttest-default.apps.meyceoz-032023.psap.aws.rhperfscale.org')" - ] - }, - "execution_count": 662, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "cluster.details()" ] }, { "cell_type": "code", - "execution_count": 663, + "execution_count": null, "id": "9ed5bd75-4230-4c7c-a9e2-0f247890e62a", "metadata": {}, "outputs": [], @@ -167,7 +91,7 @@ }, { "cell_type": "code", - "execution_count": 664, + "execution_count": null, "id": "611d203a-35aa-4357-a748-1d01b022fcdb", "metadata": {}, "outputs": [], @@ -187,18 +111,10 @@ }, { "cell_type": "code", - "execution_count": 665, + "execution_count": null, "id": "8ac7c34f-e227-44c2-a4b1-a57c853ac3a7", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "The Ray scheduler does not support port mapping.\n" - ] - } - ], + "outputs": [], "source": [ "jobdef = DDPJobDefinition(\n", " name=\"gpttest\",\n", @@ -211,65 +127,27 @@ }, { "cell_type": "code", - "execution_count": 675, + "execution_count": null, "id": "1680d287-de46-45f8-b95a-02ba3c83912c", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "AppStatus:\n", - " msg: !!python/object/apply:ray.dashboard.modules.job.common.JobStatus\n", - " - SUCCEEDED\n", - " num_restarts: -1\n", - " roles:\n", - " - replicas:\n", - " - hostname: \n", - " id: 0\n", - " role: ray\n", - " state: !!python/object/apply:torchx.specs.api.AppState\n", - " - 4\n", - " structured_error_msg: \n", - " role: ray\n", - " state: SUCCEEDED (4)\n", - " structured_error_msg: \n", - " ui_url: null" - ] - }, - "execution_count": 675, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "job.status()" ] }, { "cell_type": "code", - "execution_count": 674, + "execution_count": null, "id": "d25d6198-9941-47e8-857f-9811830cc854", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'[RayActor(name=\\'gpt_og\\', command=[\\'bash\\', \\'-c\\', \"torchrun --rdzv_backend static --rdzv_endpoint $TORCHX_RANK0_HOST:49782 --rdzv_id \\'gpttest-czbl5zhq5hnl7c\\' --nnodes 2 --nproc_per_node 1 --node_rank \\'0\\' --tee 3 --role \\'\\' gpt_og.py --model_name_or_path gpt2 --dataset_name wikitext --dataset_config_name wikitext-2-raw-v1 --per_device_train_batch_size 2 --per_device_eval_batch_size 2 --do_train --do_eval --output_dir /tmp/test-clm --overwrite_output_dir\"], env={\\'TORCHX_TRACKING_EXPERIMENT_NAME\\': \\'default-experiment\\', \\'LOGLEVEL\\': \\'WARNING\\', \\'TORCHX_JOB_ID\\': \\'ray://torchx/gpttest-czbl5zhq5hnl7c\\'}, num_cpus=2, num_gpus=1, min_replicas=2), RayActor(name=\\'gpt_og\\', command=[\\'bash\\', \\'-c\\', \"torchrun --rdzv_backend static --rdzv_endpoint $TORCHX_RANK0_HOST:49782 --rdzv_id \\'gpttest-czbl5zhq5hnl7c\\' --nnodes 2 --nproc_per_node 1 --node_rank \\'1\\' --tee 3 --role \\'\\' gpt_og.py --model_name_or_path gpt2 --dataset_name wikitext --dataset_config_name wikitext-2-raw-v1 --per_device_train_batch_size 2 --per_device_eval_batch_size 2 --do_train --do_eval --output_dir /tmp/test-clm --overwrite_output_dir\"], env={\\'TORCHX_TRACKING_EXPERIMENT_NAME\\': \\'default-experiment\\', \\'LOGLEVEL\\': \\'WARNING\\', \\'TORCHX_JOB_ID\\': \\'ray://torchx/gpttest-czbl5zhq5hnl7c\\'}, num_cpus=2, num_gpus=1, min_replicas=2)]\\n2023-04-26 07:31:11,126\\tINFO worker.py:1230 -- Using address 10.131.20.8:6379 set in the environment variable RAY_ADDRESS\\n2023-04-26 07:31:11,126\\tINFO worker.py:1342 -- Connecting to existing Ray cluster at address: 10.131.20.8:6379...\\n2023-04-26 07:31:11,133\\tINFO worker.py:1519 -- Connected to Ray cluster. View the dashboard at \\x1b[1m\\x1b[32mhttp://10.131.20.8:8265 \\x1b[39m\\x1b[22m\\nWaiting for minimum placement group to start.\\nSuccessfully created placement groups\\nrdzv_endpoint set to 10.129.22.18 for actor 937fc115f41dd3f37cce443a02000000\\nrdzv_endpoint set to 10.129.22.18 for actor 9c109cdc7c9e0dbb9d916c6702000000\\nSuccessfully placed command actors\\nEntering main loop, start executing the script on worker nodes\\nrunning ray.wait on [ObjectRef(e082c90ab8422b00937fc115f41dd3f37cce443a0200000001000000), ObjectRef(ce868e48e2fa9a949c109cdc7c9e0dbb9d916c670200000001000000)]\\nrunning ray.wait on [ObjectRef(ce868e48e2fa9a949c109cdc7c9e0dbb9d916c670200000001000000), ObjectRef(f81ec6ff838b16db937fc115f41dd3f37cce443a0200000001000000)]\\nrunning ray.wait on [ObjectRef(f81ec6ff838b16db937fc115f41dd3f37cce443a0200000001000000), ObjectRef(32b0eec39cfa87ac9c109cdc7c9e0dbb9d916c670200000001000000)]\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:Found existing installation: protobuf 3.19.6\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:Not uninstalling protobuf at /home/ray/anaconda3/lib/python3.8/site-packages, outside environment /tmp/ray/session_2023-04-26_07-13-38_751985_7/runtime_resources/pip/98fab9b6ed3822d73f034e53208a68b433de183d/virtualenv\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:Can\\'t uninstall \\'protobuf\\'. No files were found to uninstall.\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:Collecting accelerate>=0.12.0\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: Downloading accelerate-0.18.0-py3-none-any.whl (215 kB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 215.3/215.3 kB 39.3 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:Collecting torch>=1.3\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: Downloading torch-2.0.0-cp38-cp38-manylinux1_x86_64.whl (619.9 MB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 619.9/619.9 MB 7.5 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:Collecting datasets>=1.8.0\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: Downloading datasets-2.11.0-py3-none-any.whl (468 kB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 468.7/468.7 kB 125.9 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:Collecting sentencepiece!=0.1.92\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: Downloading sentencepiece-0.1.98-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1.3/1.3 MB 144.2 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:Collecting evaluate\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: Downloading evaluate-0.4.0-py3-none-any.whl (81 kB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 81.4/81.4 kB 54.8 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:Collecting scikit-learn\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: Downloading scikit_learn-1.2.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (9.8 MB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 9.8/9.8 MB 121.3 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:Collecting transformers==4.28.1\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: Downloading transformers-4.28.1-py3-none-any.whl (7.0 MB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 7.0/7.0 MB 99.3 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:Collecting protobuf<=3.20.1,>=3.8.0\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: Downloading protobuf-3.20.1-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl (1.0 MB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1.0/1.0 MB 131.4 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:Collecting regex!=2019.12.17\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: Downloading regex-2023.3.23-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (771 kB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 771.9/771.9 kB 159.3 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:Collecting packaging>=20.0\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: Downloading packaging-23.1-py3-none-any.whl (48 kB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 48.9/48.9 kB 35.8 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:Collecting tqdm>=4.27\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: Downloading tqdm-4.65.0-py3-none-any.whl (77 kB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 77.1/77.1 kB 48.2 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:Collecting numpy>=1.17\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: Downloading numpy-1.24.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.3 MB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 17.3/17.3 MB 141.2 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:Collecting filelock\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: Downloading filelock-3.12.0-py3-none-any.whl (10 kB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:Collecting huggingface-hub<1.0,>=0.11.0\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: Downloading huggingface_hub-0.14.1-py3-none-any.whl (224 kB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 224.5/224.5 kB 103.1 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:Collecting pyyaml>=5.1\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: Downloading PyYAML-6.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (701 kB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 701.2/701.2 kB 39.6 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:Collecting requests\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: Downloading requests-2.28.2-py3-none-any.whl (62 kB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 62.8/62.8 kB 48.5 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:Collecting tokenizers!=0.11.3,<0.14,>=0.11.1\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: Downloading tokenizers-0.13.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 7.8/7.8 MB 160.4 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:Collecting psutil\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: Downloading psutil-5.9.5-cp36-abi3-manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (282 kB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 282.1/282.1 kB 121.5 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:Collecting nvidia-cudnn-cu11==8.5.0.96\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: Downloading nvidia_cudnn_cu11-8.5.0.96-2-py3-none-manylinux1_x86_64.whl (557.1 MB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 557.1/557.1 MB 6.5 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:Collecting sympy\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: Downloading sympy-1.11.1-py3-none-any.whl (6.5 MB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 6.5/6.5 MB 149.2 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:Collecting nvidia-cuda-cupti-cu11==11.7.101\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: Downloading nvidia_cuda_cupti_cu11-11.7.101-py3-none-manylinux1_x86_64.whl (11.8 MB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 11.8/11.8 MB 136.8 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:Collecting jinja2\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: Downloading Jinja2-3.1.2-py3-none-any.whl (133 kB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 133.1/133.1 kB 73.2 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:Collecting triton==2.0.0\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: Downloading triton-2.0.0-1-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (63.2 MB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 63.2/63.2 MB 57.9 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:Collecting nvidia-cuda-nvrtc-cu11==11.7.99\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: Downloading nvidia_cuda_nvrtc_cu11-11.7.99-2-py3-none-manylinux1_x86_64.whl (21.0 MB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 21.0/21.0 MB 132.2 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:Collecting nvidia-nvtx-cu11==11.7.91\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: Downloading nvidia_nvtx_cu11-11.7.91-py3-none-manylinux1_x86_64.whl (98 kB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 98.6/98.6 kB 59.4 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:Collecting nvidia-nccl-cu11==2.14.3\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: Downloading nvidia_nccl_cu11-2.14.3-py3-none-manylinux1_x86_64.whl (177.1 MB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 177.1/177.1 MB 24.7 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:Collecting nvidia-cusolver-cu11==11.4.0.1\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: Downloading nvidia_cusolver_cu11-11.4.0.1-2-py3-none-manylinux1_x86_64.whl (102.6 MB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 102.6/102.6 MB 39.7 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:Collecting nvidia-curand-cu11==10.2.10.91\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: Downloading nvidia_curand_cu11-10.2.10.91-py3-none-manylinux1_x86_64.whl (54.6 MB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 54.6/54.6 MB 61.8 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:Collecting networkx\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: Downloading networkx-3.1-py3-none-any.whl (2.1 MB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:Found existing installation: protobuf 3.19.6\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:Not uninstalling protobuf at /home/ray/anaconda3/lib/python3.8/site-packages, outside environment /tmp/ray/session_2023-04-26_07-13-38_751985_7/runtime_resources/pip/98fab9b6ed3822d73f034e53208a68b433de183d/virtualenv\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:Can\\'t uninstall \\'protobuf\\'. No files were found to uninstall.\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:Collecting accelerate>=0.12.0\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: Downloading accelerate-0.18.0-py3-none-any.whl (215 kB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 215.3/215.3 kB 36.2 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:Collecting torch>=1.3\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: Downloading torch-2.0.0-cp38-cp38-manylinux1_x86_64.whl (619.9 MB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 619.9/619.9 MB 7.3 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:Collecting datasets>=1.8.0\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: Downloading datasets-2.11.0-py3-none-any.whl (468 kB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 468.7/468.7 kB 123.9 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:Collecting sentencepiece!=0.1.92\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: Downloading sentencepiece-0.1.98-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1.3/1.3 MB 143.5 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:Collecting evaluate\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: Downloading evaluate-0.4.0-py3-none-any.whl (81 kB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 81.4/81.4 kB 53.5 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:Collecting scikit-learn\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: Downloading scikit_learn-1.2.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (9.8 MB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 9.8/9.8 MB 150.7 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:Collecting transformers==4.28.1\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: Downloading transformers-4.28.1-py3-none-any.whl (7.0 MB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 7.0/7.0 MB 152.8 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:Collecting protobuf<=3.20.1,>=3.8.0\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: Downloading protobuf-3.20.1-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl (1.0 MB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1.0/1.0 MB 166.1 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:Collecting numpy>=1.17\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: Downloading numpy-1.24.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.3 MB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 17.3/17.3 MB 117.5 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:Collecting requests\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: Downloading requests-2.28.2-py3-none-any.whl (62 kB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 62.8/62.8 kB 42.3 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:Collecting filelock\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: Downloading filelock-3.12.0-py3-none-any.whl (10 kB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:Collecting packaging>=20.0\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: Downloading packaging-23.1-py3-none-any.whl (48 kB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 48.9/48.9 kB 33.9 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:Collecting pyyaml>=5.1\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: Downloading PyYAML-6.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (701 kB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 701.2/701.2 kB 149.9 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:Collecting tokenizers!=0.11.3,<0.14,>=0.11.1\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: Downloading tokenizers-0.13.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 7.8/7.8 MB 162.4 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:Collecting tqdm>=4.27\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: Downloading tqdm-4.65.0-py3-none-any.whl (77 kB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 77.1/77.1 kB 48.3 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:Collecting huggingface-hub<1.0,>=0.11.0\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: Downloading huggingface_hub-0.14.1-py3-none-any.whl (224 kB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 224.5/224.5 kB 104.0 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:Collecting regex!=2019.12.17\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: Downloading regex-2023.3.23-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (771 kB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 771.9/771.9 kB 157.2 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:Collecting psutil\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: Downloading psutil-5.9.5-cp36-abi3-manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (282 kB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 282.1/282.1 kB 115.6 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:Collecting nvidia-cusparse-cu11==11.7.4.91\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: Downloading nvidia_cusparse_cu11-11.7.4.91-py3-none-manylinux1_x86_64.whl (173.2 MB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 173.2/173.2 MB 23.6 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:Collecting triton==2.0.0\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: Downloading triton-2.0.0-1-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (63.2 MB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 63.2/63.2 MB 54.2 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:Collecting nvidia-cublas-cu11==11.10.3.66\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: Downloading nvidia_cublas_cu11-11.10.3.66-py3-none-manylinux1_x86_64.whl (317.1 MB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 317.1/317.1 MB 14.0 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:Collecting nvidia-cufft-cu11==10.9.0.58\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: Downloading nvidia_cufft_cu11-10.9.0.58-py3-none-manylinux1_x86_64.whl (168.4 MB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 168.4/168.4 MB 24.8 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:Collecting nvidia-nccl-cu11==2.14.3\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: Downloading nvidia_nccl_cu11-2.14.3-py3-none-manylinux1_x86_64.whl (177.1 MB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 177.1/177.1 MB 24.2 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:Collecting sympy\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: Downloading sympy-1.11.1-py3-none-any.whl (6.5 MB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 6.5/6.5 MB 168.8 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:Collecting nvidia-cudnn-cu11==8.5.0.96\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: Downloading nvidia_cudnn_cu11-8.5.0.96-2-py3-none-manylinux1_x86_64.whl (557.1 MB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 557.1/557.1 MB 3.9 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:Collecting nvidia-curand-cu11==10.2.10.91\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: Downloading nvidia_curand_cu11-10.2.10.91-py3-none-manylinux1_x86_64.whl (54.6 MB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 54.6/54.6 MB 66.0 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:Collecting jinja2\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: Downloading Jinja2-3.1.2-py3-none-any.whl (133 kB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 133.1/133.1 kB 73.7 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:Collecting networkx\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: Downloading networkx-3.1-py3-none-any.whl (2.1 MB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 2.1/2.1 MB 162.6 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:Collecting nvidia-nvtx-cu11==11.7.91\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: Downloading nvidia_nvtx_cu11-11.7.91-py3-none-manylinux1_x86_64.whl (98 kB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 2.1/2.1 MB 177.0 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:Collecting nvidia-cublas-cu11==11.10.3.66\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: Downloading nvidia_cublas_cu11-11.10.3.66-py3-none-manylinux1_x86_64.whl (317.1 MB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 317.1/317.1 MB 13.8 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:Collecting nvidia-cufft-cu11==10.9.0.58\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: Downloading nvidia_cufft_cu11-10.9.0.58-py3-none-manylinux1_x86_64.whl (168.4 MB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 168.4/168.4 MB 25.3 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:Collecting typing-extensions\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: Downloading typing_extensions-4.5.0-py3-none-any.whl (27 kB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:Collecting nvidia-cusparse-cu11==11.7.4.91\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: Downloading nvidia_cusparse_cu11-11.7.4.91-py3-none-manylinux1_x86_64.whl (173.2 MB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 173.2/173.2 MB 23.2 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:Collecting nvidia-cuda-runtime-cu11==11.7.99\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: Downloading nvidia_cuda_runtime_cu11-11.7.99-py3-none-manylinux1_x86_64.whl (849 kB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 849.3/849.3 kB 162.7 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:Collecting wheel\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: Downloading wheel-0.40.0-py3-none-any.whl (64 kB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 64.5/64.5 kB 47.0 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:Collecting setuptools\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: Downloading setuptools-67.7.2-py3-none-any.whl (1.1 MB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1.1/1.1 MB 164.4 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:Collecting lit\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: Downloading lit-16.0.2.tar.gz (137 kB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 137.9/137.9 kB 83.2 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: Preparing metadata (setup.py): started\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: Preparing metadata (setup.py): finished with status \\'done\\'\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:Collecting cmake\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: Downloading cmake-3.26.3-py2.py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (24.0 MB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 24.0/24.0 MB 109.9 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:Collecting fsspec[http]>=2021.11.1\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: Downloading fsspec-2023.4.0-py3-none-any.whl (153 kB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 154.0/154.0 kB 90.2 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:Collecting pyarrow>=8.0.0\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: Downloading pyarrow-11.0.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (35.0 MB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 35.0/35.0 MB 86.1 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:Collecting pandas\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: Downloading pandas-2.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (12.3 MB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 12.3/12.3 MB 150.3 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:Collecting multiprocess\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: Downloading multiprocess-0.70.14-py38-none-any.whl (132 kB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 132.0/132.0 kB 80.7 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:Collecting responses<0.19\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: Downloading responses-0.18.0-py3-none-any.whl (38 kB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:Collecting aiohttp\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: Downloading aiohttp-3.8.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.0 MB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1.0/1.0 MB 172.0 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:Collecting xxhash\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: Downloading xxhash-3.2.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (213 kB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 213.0/213.0 kB 107.0 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:Collecting dill<0.3.7,>=0.3.0\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: Downloading dill-0.3.6-py3-none-any.whl (110 kB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 110.5/110.5 kB 45.3 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:Collecting threadpoolctl>=2.0.0\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: Downloading threadpoolctl-3.1.0-py3-none-any.whl (14 kB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:Collecting joblib>=1.1.1\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: Downloading joblib-1.2.0-py3-none-any.whl (297 kB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 298.0/298.0 kB 118.1 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:Collecting scipy>=1.3.2\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: Downloading scipy-1.10.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (34.5 MB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 34.5/34.5 MB 81.3 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:Collecting attrs>=17.3.0\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: Downloading attrs-23.1.0-py3-none-any.whl (61 kB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 61.2/61.2 kB 45.5 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:Collecting charset-normalizer<4.0,>=2.0\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: Downloading charset_normalizer-3.1.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (195 kB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 195.9/195.9 kB 99.1 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:Collecting multidict<7.0,>=4.5\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: Downloading multidict-6.0.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (121 kB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 121.3/121.3 kB 73.0 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:Collecting frozenlist>=1.1.1\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: Downloading frozenlist-1.3.3-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (161 kB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 161.3/161.3 kB 90.0 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:Collecting async-timeout<5.0,>=4.0.0a3\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: Downloading async_timeout-4.0.2-py3-none-any.whl (5.8 kB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:Collecting yarl<2.0,>=1.0\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: Downloading yarl-1.9.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (266 kB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 266.9/266.9 kB 113.6 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:Collecting aiosignal>=1.1.2\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: Downloading aiosignal-1.3.1-py3-none-any.whl (7.6 kB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:Collecting certifi>=2017.4.17\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: Downloading certifi-2022.12.7-py3-none-any.whl (155 kB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 155.3/155.3 kB 87.4 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:Collecting urllib3<1.27,>=1.21.1\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: Downloading urllib3-1.26.15-py2.py3-none-any.whl (140 kB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 140.9/140.9 kB 81.1 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:Collecting idna<4,>=2.5\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: Downloading idna-3.4-py3-none-any.whl (61 kB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 61.5/61.5 kB 43.2 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:Collecting MarkupSafe>=2.0\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: Downloading MarkupSafe-2.1.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (25 kB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:Collecting python-dateutil>=2.8.2\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: Downloading python_dateutil-2.8.2-py2.py3-none-any.whl (247 kB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 247.7/247.7 kB 112.9 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:Collecting tzdata>=2022.1\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: Downloading tzdata-2023.3-py2.py3-none-any.whl (341 kB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 98.6/98.6 kB 62.9 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:Collecting nvidia-cuda-nvrtc-cu11==11.7.99\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: Downloading nvidia_cuda_nvrtc_cu11-11.7.99-2-py3-none-manylinux1_x86_64.whl (21.0 MB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 21.0/21.0 MB 114.8 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:Collecting nvidia-cusolver-cu11==11.4.0.1\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: Downloading nvidia_cusolver_cu11-11.4.0.1-2-py3-none-manylinux1_x86_64.whl (102.6 MB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 102.6/102.6 MB 39.6 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:Collecting nvidia-cuda-runtime-cu11==11.7.99\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: Downloading nvidia_cuda_runtime_cu11-11.7.99-py3-none-manylinux1_x86_64.whl (849 kB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 849.3/849.3 kB 146.6 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:Collecting typing-extensions\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: Downloading typing_extensions-4.5.0-py3-none-any.whl (27 kB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:Collecting nvidia-cuda-cupti-cu11==11.7.101\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: Downloading nvidia_cuda_cupti_cu11-11.7.101-py3-none-manylinux1_x86_64.whl (11.8 MB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 11.8/11.8 MB 167.6 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:Collecting wheel\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: Downloading wheel-0.40.0-py3-none-any.whl (64 kB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 64.5/64.5 kB 43.8 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:Collecting setuptools\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: Downloading setuptools-67.7.2-py3-none-any.whl (1.1 MB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1.1/1.1 MB 160.1 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:Collecting cmake\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: Downloading cmake-3.26.3-py2.py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (24.0 MB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 24.0/24.0 MB 119.2 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:Collecting lit\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: Downloading lit-16.0.2.tar.gz (137 kB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 137.9/137.9 kB 80.8 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: Preparing metadata (setup.py): started\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: Preparing metadata (setup.py): finished with status \\'done\\'\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:Collecting aiohttp\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: Downloading aiohttp-3.8.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.0 MB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1.0/1.0 MB 163.6 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:Collecting pyarrow>=8.0.0\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: Downloading pyarrow-11.0.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (35.0 MB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 35.0/35.0 MB 93.0 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:Collecting dill<0.3.7,>=0.3.0\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: Downloading dill-0.3.6-py3-none-any.whl (110 kB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 110.5/110.5 kB 71.2 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:Collecting multiprocess\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: Downloading multiprocess-0.70.14-py38-none-any.whl (132 kB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 132.0/132.0 kB 73.9 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:Collecting xxhash\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: Downloading xxhash-3.2.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (213 kB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 213.0/213.0 kB 102.3 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:Collecting fsspec[http]>=2021.11.1\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: Downloading fsspec-2023.4.0-py3-none-any.whl (153 kB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 154.0/154.0 kB 89.7 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:Collecting responses<0.19\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: Downloading responses-0.18.0-py3-none-any.whl (38 kB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:Collecting pandas\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: Downloading pandas-2.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (12.3 MB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 12.3/12.3 MB 139.3 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:Collecting joblib>=1.1.1\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: Downloading joblib-1.2.0-py3-none-any.whl (297 kB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 298.0/298.0 kB 119.1 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:Collecting threadpoolctl>=2.0.0\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: Downloading threadpoolctl-3.1.0-py3-none-any.whl (14 kB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:Collecting scipy>=1.3.2\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: Downloading scipy-1.10.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (34.5 MB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 34.5/34.5 MB 86.4 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:Collecting charset-normalizer<4.0,>=2.0\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: Downloading charset_normalizer-3.1.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (195 kB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 195.9/195.9 kB 100.0 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:Collecting frozenlist>=1.1.1\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: Downloading frozenlist-1.3.3-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (161 kB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 161.3/161.3 kB 88.0 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:Collecting aiosignal>=1.1.2\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: Downloading aiosignal-1.3.1-py3-none-any.whl (7.6 kB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:Collecting attrs>=17.3.0\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: Downloading attrs-23.1.0-py3-none-any.whl (61 kB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 61.2/61.2 kB 39.5 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:Collecting yarl<2.0,>=1.0\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: Downloading yarl-1.9.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (266 kB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 266.9/266.9 kB 113.2 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:Collecting multidict<7.0,>=4.5\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: Downloading multidict-6.0.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (121 kB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 121.3/121.3 kB 69.1 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:Collecting async-timeout<5.0,>=4.0.0a3\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: Downloading async_timeout-4.0.2-py3-none-any.whl (5.8 kB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:Collecting certifi>=2017.4.17\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: Downloading certifi-2022.12.7-py3-none-any.whl (155 kB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 155.3/155.3 kB 86.7 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:Collecting idna<4,>=2.5\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: Downloading idna-3.4-py3-none-any.whl (61 kB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 61.5/61.5 kB 41.0 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:Collecting urllib3<1.27,>=1.21.1\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: Downloading urllib3-1.26.15-py2.py3-none-any.whl (140 kB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 140.9/140.9 kB 79.2 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:Collecting MarkupSafe>=2.0\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: Downloading MarkupSafe-2.1.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (25 kB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:Collecting pytz>=2020.1\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: Downloading pytz-2023.3-py2.py3-none-any.whl (502 kB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 502.3/502.3 kB 148.0 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:Collecting python-dateutil>=2.8.2\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: Downloading python_dateutil-2.8.2-py2.py3-none-any.whl (247 kB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 341.8/341.8 kB 126.0 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:Collecting pytz>=2020.1\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: Downloading pytz-2023.3-py2.py3-none-any.whl (502 kB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 502.3/502.3 kB 135.2 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:Collecting mpmath>=0.19\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: Downloading mpmath-1.3.0-py3-none-any.whl (536 kB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 536.2/536.2 kB 144.7 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:Collecting six>=1.5\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: Downloading six-1.16.0-py2.py3-none-any.whl (11 kB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:Building wheels for collected packages: lit\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: Building wheel for lit (setup.py): started\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: Building wheel for lit (setup.py): finished with status \\'done\\'\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: Created wheel for lit: filename=lit-16.0.2-py3-none-any.whl size=88174 sha256=b5e3a34a26c7dff85807f4525c1e1713d7ef47e58d8312d3a82495d1d2ffbc61\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: Stored in directory: /home/ray/.cache/pip/wheels/db/ec/82/32d94d2fa45c0af452689a6c91046392b29d856fe2eebc8276\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:Successfully built lit\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:Installing collected packages: tokenizers, sentencepiece, pytz, mpmath, lit, cmake, xxhash, wheel, urllib3, tzdata, typing-extensions, tqdm, threadpoolctl, sympy, six, setuptools, regex, pyyaml, psutil, protobuf, packaging, nvidia-nccl-cu11, nvidia-cufft-cu11, nvidia-cuda-nvrtc-cu11, numpy, networkx, multidict, MarkupSafe, joblib, idna, fsspec, frozenlist, filelock, dill, charset-normalizer, certifi, attrs, async-timeout, yarl, scipy, requests, python-dateutil, pyarrow, nvidia-nvtx-cu11, nvidia-cusparse-cu11, nvidia-curand-cu11, nvidia-cuda-runtime-cu11, nvidia-cuda-cupti-cu11, nvidia-cublas-cu11, multiprocess, jinja2, aiosignal, scikit-learn, responses, pandas, nvidia-cusolver-cu11, nvidia-cudnn-cu11, huggingface-hub, aiohttp, transformers, datasets, evaluate, triton, torch, accelerate\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:Successfully installed MarkupSafe-2.1.2 accelerate-0.18.0 aiohttp-3.8.4 aiosignal-1.3.1 async-timeout-4.0.2 attrs-23.1.0 certifi-2022.12.7 charset-normalizer-3.1.0 cmake-3.26.3 datasets-2.11.0 dill-0.3.6 evaluate-0.4.0 filelock-3.12.0 frozenlist-1.3.3 fsspec-2023.4.0 huggingface-hub-0.14.1 idna-3.4 jinja2-3.1.2 joblib-1.2.0 lit-16.0.2 mpmath-1.3.0 multidict-6.0.4 multiprocess-0.70.14 networkx-3.1 numpy-1.24.3 nvidia-cublas-cu11-11.10.3.66 nvidia-cuda-cupti-cu11-11.7.101 nvidia-cuda-nvrtc-cu11-11.7.99 nvidia-cuda-runtime-cu11-11.7.99 nvidia-cudnn-cu11-8.5.0.96 nvidia-cufft-cu11-10.9.0.58 nvidia-curand-cu11-10.2.10.91 nvidia-cusolver-cu11-11.4.0.1 nvidia-cusparse-cu11-11.7.4.91 nvidia-nccl-cu11-2.14.3 nvidia-nvtx-cu11-11.7.91 packaging-23.1 pandas-2.0.1 protobuf-3.20.1 psutil-5.9.5 pyarrow-11.0.0 python-dateutil-2.8.2 pytz-2023.3 pyyaml-6.0 regex-2023.3.23 requests-2.28.2 responses-0.18.0 scikit-learn-1.2.2 scipy-1.10.1 sentencepiece-0.1.98 setuptools-67.7.2 six-1.16.0 sympy-1.11.1 threadpoolctl-3.1.0 tokenizers-0.13.3 torch-2.0.0 tqdm-4.65.0 transformers-4.28.1 triton-2.0.0 typing-extensions-4.5.0 tzdata-2023.3 urllib3-1.26.15 wheel-0.40.0 xxhash-3.2.0 yarl-1.9.2\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:04/26/2023 07:33:24 - WARNING - __main__ - Process rank: 0, device: cuda:0, n_gpu: 1distributed training: True, 16-bits training: False\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:04/26/2023 07:33:24 - INFO - __main__ - Training/evaluation parameters TrainingArguments(\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:_n_gpu=1,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:adafactor=False,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:adam_beta1=0.9,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:adam_beta2=0.999,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:adam_epsilon=1e-08,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:auto_find_batch_size=False,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:bf16=False,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:bf16_full_eval=False,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:data_seed=None,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:dataloader_drop_last=False,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:dataloader_num_workers=0,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:dataloader_pin_memory=True,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:ddp_bucket_cap_mb=None,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:ddp_find_unused_parameters=None,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:ddp_timeout=1800,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:debug=[],\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:deepspeed=None,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:disable_tqdm=False,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:do_eval=True,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:do_predict=False,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:do_train=True,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:eval_accumulation_steps=None,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:eval_delay=0,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:eval_steps=None,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:evaluation_strategy=no,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:fp16=False,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:fp16_backend=auto,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:fp16_full_eval=False,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:fp16_opt_level=O1,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:fsdp=[],\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:fsdp_config={\\'fsdp_min_num_params\\': 0, \\'xla\\': False, \\'xla_fsdp_grad_ckpt\\': False},\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:fsdp_min_num_params=0,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:fsdp_transformer_layer_cls_to_wrap=None,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:full_determinism=False,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:gradient_accumulation_steps=1,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:gradient_checkpointing=False,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:greater_is_better=None,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:group_by_length=False,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:half_precision_backend=auto,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:hub_model_id=None,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:hub_private_repo=False,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:hub_strategy=every_save,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:hub_token=,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:ignore_data_skip=False,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:include_inputs_for_metrics=False,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:jit_mode_eval=False,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:label_names=None,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:label_smoothing_factor=0.0,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:learning_rate=5e-05,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:length_column_name=length,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:load_best_model_at_end=False,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:local_rank=0,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:log_level=passive,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:log_level_replica=warning,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:log_on_each_node=True,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:logging_dir=/tmp/test-clm/runs/Apr26_07-33-20_gptfttest-worker-small-group-gptfttest-7fdjz,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:logging_first_step=False,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:logging_nan_inf_filter=True,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:logging_steps=500,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:logging_strategy=steps,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:lr_scheduler_type=linear,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:max_grad_norm=1.0,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:max_steps=-1,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:metric_for_best_model=None,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:mp_parameters=,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:no_cuda=False,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:num_train_epochs=3.0,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:optim=adamw_hf,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:optim_args=None,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:output_dir=/tmp/test-clm,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:overwrite_output_dir=True,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:past_index=-1,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:per_device_eval_batch_size=2,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:per_device_train_batch_size=2,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:prediction_loss_only=False,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:push_to_hub=False,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:push_to_hub_model_id=None,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:push_to_hub_organization=None,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:push_to_hub_token=,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:ray_scope=last,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:remove_unused_columns=True,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:report_to=[\\'tensorboard\\'],\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:resume_from_checkpoint=None,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:run_name=/tmp/test-clm,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:save_on_each_node=False,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:save_safetensors=False,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:save_steps=500,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:save_strategy=steps,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:save_total_limit=None,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:seed=42,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:sharded_ddp=[],\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:skip_memory_metrics=True,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:tf32=None,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:torch_compile=False,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:torch_compile_backend=None,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:torch_compile_mode=None,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:torchdynamo=None,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:tpu_metrics_debug=False,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:tpu_num_cores=None,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:use_ipex=False,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:use_legacy_prediction_loop=False,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:use_mps_device=False,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:warmup_ratio=0.0,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:warmup_steps=0,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:weight_decay=0.0,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:xpu_backend=None,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:)\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:04/26/2023 07:33:25 - INFO - datasets.utils.file_utils - https://huggingface.co/datasets/wikitext/resolve/main/wikitext.py not found in cache or force_download set to True, downloading to /home/ray/.cache/huggingface/datasets/downloads/tmp9__iv6ze\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:04/26/2023 07:33:25 - INFO - datasets.utils.file_utils - storing https://huggingface.co/datasets/wikitext/resolve/main/wikitext.py in cache at /home/ray/.cache/huggingface/datasets/downloads/30cb21e192e211952c02572882251280460fb5247fe18b6c0fb69224e769f1e1.6a998136b3179c543fac19963253d25970e7fe6d053f2818edc7075627f64bad.py\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:04/26/2023 07:33:25 - INFO - datasets.utils.file_utils - creating metadata file for /home/ray/.cache/huggingface/datasets/downloads/30cb21e192e211952c02572882251280460fb5247fe18b6c0fb69224e769f1e1.6a998136b3179c543fac19963253d25970e7fe6d053f2818edc7075627f64bad.py\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:04/26/2023 07:33:25 - INFO - datasets.utils.file_utils - https://huggingface.co/datasets/wikitext/resolve/main/dataset_infos.json not found in cache or force_download set to True, downloading to /home/ray/.cache/huggingface/datasets/downloads/tmp0_knks6x\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:04/26/2023 07:33:25 - INFO - datasets.utils.file_utils - storing https://huggingface.co/datasets/wikitext/resolve/main/dataset_infos.json in cache at /home/ray/.cache/huggingface/datasets/downloads/87ea4775c52b60feb08a5087c68f4453d4533a02491172390b4d6a3f97ae44d1.d3aa47a864d0b5cf3b7ebcf51e45c9d8f96356ff8527fff02d3a4cae4c9f5b1e\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:04/26/2023 07:33:25 - INFO - datasets.utils.file_utils - creating metadata file for /home/ray/.cache/huggingface/datasets/downloads/87ea4775c52b60feb08a5087c68f4453d4533a02491172390b4d6a3f97ae44d1.d3aa47a864d0b5cf3b7ebcf51e45c9d8f96356ff8527fff02d3a4cae4c9f5b1e\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 247.7/247.7 kB 116.2 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:Collecting tzdata>=2022.1\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: Downloading tzdata-2023.3-py2.py3-none-any.whl (341 kB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 341.8/341.8 kB 133.3 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:Collecting mpmath>=0.19\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: Downloading mpmath-1.3.0-py3-none-any.whl (536 kB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 536.2/536.2 kB 150.4 MB/s eta 0:00:00\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:Collecting six>=1.5\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: Downloading six-1.16.0-py2.py3-none-any.whl (11 kB)\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:Building wheels for collected packages: lit\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: Building wheel for lit (setup.py): started\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: Building wheel for lit (setup.py): finished with status \\'done\\'\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: Created wheel for lit: filename=lit-16.0.2-py3-none-any.whl size=88174 sha256=b5e3a34a26c7dff85807f4525c1e1713d7ef47e58d8312d3a82495d1d2ffbc61\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: Stored in directory: /home/ray/.cache/pip/wheels/db/ec/82/32d94d2fa45c0af452689a6c91046392b29d856fe2eebc8276\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:Successfully built lit\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:Installing collected packages: tokenizers, sentencepiece, pytz, mpmath, lit, cmake, xxhash, wheel, urllib3, tzdata, typing-extensions, tqdm, threadpoolctl, sympy, six, setuptools, regex, pyyaml, psutil, protobuf, packaging, nvidia-nccl-cu11, nvidia-cufft-cu11, nvidia-cuda-nvrtc-cu11, numpy, networkx, multidict, MarkupSafe, joblib, idna, fsspec, frozenlist, filelock, dill, charset-normalizer, certifi, attrs, async-timeout, yarl, scipy, requests, python-dateutil, pyarrow, nvidia-nvtx-cu11, nvidia-cusparse-cu11, nvidia-curand-cu11, nvidia-cuda-runtime-cu11, nvidia-cuda-cupti-cu11, nvidia-cublas-cu11, multiprocess, jinja2, aiosignal, scikit-learn, responses, pandas, nvidia-cusolver-cu11, nvidia-cudnn-cu11, huggingface-hub, aiohttp, transformers, datasets, evaluate, triton, torch, accelerate\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:Successfully installed MarkupSafe-2.1.2 accelerate-0.18.0 aiohttp-3.8.4 aiosignal-1.3.1 async-timeout-4.0.2 attrs-23.1.0 certifi-2022.12.7 charset-normalizer-3.1.0 cmake-3.26.3 datasets-2.11.0 dill-0.3.6 evaluate-0.4.0 filelock-3.12.0 frozenlist-1.3.3 fsspec-2023.4.0 huggingface-hub-0.14.1 idna-3.4 jinja2-3.1.2 joblib-1.2.0 lit-16.0.2 mpmath-1.3.0 multidict-6.0.4 multiprocess-0.70.14 networkx-3.1 numpy-1.24.3 nvidia-cublas-cu11-11.10.3.66 nvidia-cuda-cupti-cu11-11.7.101 nvidia-cuda-nvrtc-cu11-11.7.99 nvidia-cuda-runtime-cu11-11.7.99 nvidia-cudnn-cu11-8.5.0.96 nvidia-cufft-cu11-10.9.0.58 nvidia-curand-cu11-10.2.10.91 nvidia-cusolver-cu11-11.4.0.1 nvidia-cusparse-cu11-11.7.4.91 nvidia-nccl-cu11-2.14.3 nvidia-nvtx-cu11-11.7.91 packaging-23.1 pandas-2.0.1 protobuf-3.20.1 psutil-5.9.5 pyarrow-11.0.0 python-dateutil-2.8.2 pytz-2023.3 pyyaml-6.0 regex-2023.3.23 requests-2.28.2 responses-0.18.0 scikit-learn-1.2.2 scipy-1.10.1 sentencepiece-0.1.98 setuptools-67.7.2 six-1.16.0 sympy-1.11.1 threadpoolctl-3.1.0 tokenizers-0.13.3 torch-2.0.0 tqdm-4.65.0 transformers-4.28.1 triton-2.0.0 typing-extensions-4.5.0 tzdata-2023.3 urllib3-1.26.15 wheel-0.40.0 xxhash-3.2.0 yarl-1.9.2\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:04/26/2023 07:33:24 - WARNING - __main__ - Process rank: 0, device: cuda:0, n_gpu: 1distributed training: True, 16-bits training: False\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:04/26/2023 07:33:24 - INFO - __main__ - Training/evaluation parameters TrainingArguments(\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:_n_gpu=1,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:adafactor=False,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:adam_beta1=0.9,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:adam_beta2=0.999,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:adam_epsilon=1e-08,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:auto_find_batch_size=False,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:bf16=False,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:bf16_full_eval=False,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:data_seed=None,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:dataloader_drop_last=False,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:dataloader_num_workers=0,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:dataloader_pin_memory=True,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:ddp_bucket_cap_mb=None,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:ddp_find_unused_parameters=None,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:ddp_timeout=1800,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:debug=[],\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:deepspeed=None,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:disable_tqdm=False,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:do_eval=True,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:do_predict=False,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:do_train=True,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:eval_accumulation_steps=None,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:eval_delay=0,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:eval_steps=None,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:evaluation_strategy=no,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:fp16=False,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:fp16_backend=auto,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:fp16_full_eval=False,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:fp16_opt_level=O1,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:fsdp=[],\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:fsdp_config={\\'fsdp_min_num_params\\': 0, \\'xla\\': False, \\'xla_fsdp_grad_ckpt\\': False},\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:fsdp_min_num_params=0,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:fsdp_transformer_layer_cls_to_wrap=None,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:full_determinism=False,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:gradient_accumulation_steps=1,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:gradient_checkpointing=False,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:greater_is_better=None,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:group_by_length=False,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:half_precision_backend=auto,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:hub_model_id=None,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:hub_private_repo=False,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:hub_strategy=every_save,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:hub_token=,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:ignore_data_skip=False,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:include_inputs_for_metrics=False,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:jit_mode_eval=False,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:label_names=None,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:label_smoothing_factor=0.0,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:learning_rate=5e-05,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:length_column_name=length,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:load_best_model_at_end=False,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:local_rank=0,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:log_level=passive,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:log_level_replica=warning,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:log_on_each_node=True,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:logging_dir=/tmp/test-clm/runs/Apr26_07-33-23_gptfttest-worker-small-group-gptfttest-wxm29,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:logging_first_step=False,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:logging_nan_inf_filter=True,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:logging_steps=500,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:logging_strategy=steps,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:lr_scheduler_type=linear,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:max_grad_norm=1.0,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:max_steps=-1,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:metric_for_best_model=None,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:mp_parameters=,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:no_cuda=False,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:num_train_epochs=3.0,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:optim=adamw_hf,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:optim_args=None,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:output_dir=/tmp/test-clm,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:overwrite_output_dir=True,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:past_index=-1,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:per_device_eval_batch_size=2,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:per_device_train_batch_size=2,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:prediction_loss_only=False,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:push_to_hub=False,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:push_to_hub_model_id=None,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:push_to_hub_organization=None,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:push_to_hub_token=,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:ray_scope=last,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:remove_unused_columns=True,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:report_to=[\\'tensorboard\\'],\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:resume_from_checkpoint=None,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:run_name=/tmp/test-clm,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:save_on_each_node=False,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:save_safetensors=False,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:save_steps=500,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:save_strategy=steps,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:save_total_limit=None,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:seed=42,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:sharded_ddp=[],\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:skip_memory_metrics=True,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:tf32=None,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:torch_compile=False,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:torch_compile_backend=None,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:torch_compile_mode=None,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:torchdynamo=None,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:tpu_metrics_debug=False,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:tpu_num_cores=None,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:use_ipex=False,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:use_legacy_prediction_loop=False,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:use_mps_device=False,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:warmup_ratio=0.0,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:warmup_steps=0,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:weight_decay=0.0,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:xpu_backend=None,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:)\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:04/26/2023 07:33:25 - INFO - datasets.utils.file_utils - https://huggingface.co/datasets/wikitext/resolve/main/wikitext.py not found in cache or force_download set to True, downloading to /home/ray/.cache/huggingface/datasets/downloads/tmpzmpwplcn\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:04/26/2023 07:33:25 - INFO - datasets.utils.file_utils - storing https://huggingface.co/datasets/wikitext/resolve/main/wikitext.py in cache at /home/ray/.cache/huggingface/datasets/downloads/30cb21e192e211952c02572882251280460fb5247fe18b6c0fb69224e769f1e1.6a998136b3179c543fac19963253d25970e7fe6d053f2818edc7075627f64bad.py\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:04/26/2023 07:33:25 - INFO - datasets.utils.file_utils - creating metadata file for /home/ray/.cache/huggingface/datasets/downloads/30cb21e192e211952c02572882251280460fb5247fe18b6c0fb69224e769f1e1.6a998136b3179c543fac19963253d25970e7fe6d053f2818edc7075627f64bad.py\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:04/26/2023 07:33:25 - INFO - datasets.utils.file_utils - https://huggingface.co/datasets/wikitext/resolve/main/dataset_infos.json not found in cache or force_download set to True, downloading to /home/ray/.cache/huggingface/datasets/downloads/tmpdadf8ql0\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:04/26/2023 07:33:25 - INFO - datasets.utils.file_utils - storing https://huggingface.co/datasets/wikitext/resolve/main/dataset_infos.json in cache at /home/ray/.cache/huggingface/datasets/downloads/87ea4775c52b60feb08a5087c68f4453d4533a02491172390b4d6a3f97ae44d1.d3aa47a864d0b5cf3b7ebcf51e45c9d8f96356ff8527fff02d3a4cae4c9f5b1e\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:04/26/2023 07:33:25 - INFO - datasets.utils.file_utils - creating metadata file for /home/ray/.cache/huggingface/datasets/downloads/87ea4775c52b60feb08a5087c68f4453d4533a02491172390b4d6a3f97ae44d1.d3aa47a864d0b5cf3b7ebcf51e45c9d8f96356ff8527fff02d3a4cae4c9f5b1e\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:ERROR: pip\\'s dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:googleapis-common-protos 1.57.0 requires protobuf!=3.20.0,!=3.20.1,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev,>=3.19.5, but you have protobuf 3.20.1 which is incompatible.\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:google-api-core 2.10.2 requires protobuf!=3.20.0,!=3.20.1,!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev,>=3.19.5, but you have protobuf 3.20.1 which is incompatible.\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:azure-cli-core 2.29.1 requires requests[socks]~=2.25.1, but you have requests 2.28.2 which is incompatible.\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:[notice] A new release of pip available: 22.3.1 -> 23.1.2\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:[notice] To update, run: pip install --upgrade pip\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:Downloading builder script: 0%| | 0.00/8.48k [00:00> loading configuration file config.json from cache at /home/ray/.cache/huggingface/hub/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/config.json\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:[INFO|configuration_utils.py:720] 2023-04-26 07:33:27,263 >> Model config GPT2Config {\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \"_name_or_path\": \"gpt2\",\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \"activation_function\": \"gelu_new\",\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \"architectures\": [\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \"GPT2LMHeadModel\"\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: ],\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \"attn_pdrop\": 0.1,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \"bos_token_id\": 50256,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \"embd_pdrop\": 0.1,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \"eos_token_id\": 50256,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \"initializer_range\": 0.02,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \"layer_norm_epsilon\": 1e-05,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \"model_type\": \"gpt2\",\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \"n_ctx\": 1024,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \"n_embd\": 768,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \"n_head\": 12,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \"n_inner\": null,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \"n_layer\": 12,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \"n_positions\": 1024,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \"reorder_and_upcast_attn\": false,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \"resid_pdrop\": 0.1,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \"scale_attn_by_inverse_layer_idx\": false,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \"scale_attn_weights\": true,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \"summary_activation\": null,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \"summary_first_dropout\": 0.1,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \"summary_proj_to_labels\": true,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \"summary_type\": \"cls_index\",\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \"summary_use_proj\": true,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \"task_specific_params\": {\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \"text-generation\": {\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \"do_sample\": true,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \"max_length\": 50\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: }\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: },\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \"transformers_version\": \"4.28.1\",\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \"use_cache\": true,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \"vocab_size\": 50257\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:}\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:[INFO|tokenization_auto.py:502] 2023-04-26 07:33:27,297 >> Could not locate the tokenizer configuration file, will try to use the model config instead.\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:[INFO|configuration_utils.py:668] 2023-04-26 07:33:27,323 >> loading configuration file config.json from cache at /home/ray/.cache/huggingface/hub/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/config.json\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:[INFO|configuration_utils.py:720] 2023-04-26 07:33:27,324 >> Model config GPT2Config {\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \"_name_or_path\": \"gpt2\",\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \"activation_function\": \"gelu_new\",\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \"architectures\": [\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \"GPT2LMHeadModel\"\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: ],\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \"attn_pdrop\": 0.1,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \"bos_token_id\": 50256,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \"embd_pdrop\": 0.1,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \"eos_token_id\": 50256,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \"initializer_range\": 0.02,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \"layer_norm_epsilon\": 1e-05,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \"model_type\": \"gpt2\",\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \"n_ctx\": 1024,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \"n_embd\": 768,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \"n_head\": 12,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \"n_inner\": null,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \"n_layer\": 12,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \"n_positions\": 1024,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \"reorder_and_upcast_attn\": false,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \"resid_pdrop\": 0.1,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \"scale_attn_by_inverse_layer_idx\": false,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \"scale_attn_weights\": true,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \"summary_activation\": null,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \"summary_first_dropout\": 0.1,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \"summary_proj_to_labels\": true,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \"summary_type\": \"cls_index\",\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \"summary_use_proj\": true,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \"task_specific_params\": {\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \"text-generation\": {\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \"do_sample\": true,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \"max_length\": 50\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: }\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: },\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \"transformers_version\": \"4.28.1\",\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \"use_cache\": true,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \"vocab_size\": 50257\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:}\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:Downloading (…)olve/main/vocab.json: 0%| | 0.00/1.04M [00:00> loading file vocab.json from cache at /home/ray/.cache/huggingface/hub/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/vocab.json\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:[INFO|tokenization_utils_base.py:1809] 2023-04-26 07:33:27,861 >> loading file merges.txt from cache at /home/ray/.cache/huggingface/hub/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/merges.txt\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:[INFO|tokenization_utils_base.py:1809] 2023-04-26 07:33:27,861 >> loading file tokenizer.json from cache at /home/ray/.cache/huggingface/hub/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/tokenizer.json\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:[INFO|tokenization_utils_base.py:1809] 2023-04-26 07:33:27,861 >> loading file added_tokens.json from cache at None\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:[INFO|tokenization_utils_base.py:1809] 2023-04-26 07:33:27,861 >> loading file special_tokens_map.json from cache at None\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:[INFO|tokenization_utils_base.py:1809] 2023-04-26 07:33:27,861 >> loading file tokenizer_config.json from cache at None\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:[INFO|configuration_utils.py:668] 2023-04-26 07:33:27,861 >> loading configuration file config.json from cache at /home/ray/.cache/huggingface/hub/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/config.json\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:[INFO|configuration_utils.py:720] 2023-04-26 07:33:27,862 >> Model config GPT2Config {\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \"_name_or_path\": \"gpt2\",\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \"activation_function\": \"gelu_new\",\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \"architectures\": [\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \"GPT2LMHeadModel\"\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: ],\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \"attn_pdrop\": 0.1,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \"bos_token_id\": 50256,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \"embd_pdrop\": 0.1,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \"eos_token_id\": 50256,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \"initializer_range\": 0.02,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \"layer_norm_epsilon\": 1e-05,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \"model_type\": \"gpt2\",\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \"n_ctx\": 1024,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \"n_embd\": 768,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \"n_head\": 12,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \"n_inner\": null,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:ERROR: pip\\'s dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:googleapis-common-protos 1.57.0 requires protobuf!=3.20.0,!=3.20.1,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev,>=3.19.5, but you have protobuf 3.20.1 which is incompatible.\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:google-api-core 2.10.2 requires protobuf!=3.20.0,!=3.20.1,!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev,>=3.19.5, but you have protobuf 3.20.1 which is incompatible.\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:azure-cli-core 2.29.1 requires requests[socks]~=2.25.1, but you have requests 2.28.2 which is incompatible.\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:[notice] A new release of pip available: 22.3.1 -> 23.1.2\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:[notice] To update, run: pip install --upgrade pip\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:Downloading builder script: 0%| | 0.00/8.48k [00:00> loading configuration file config.json from cache at /home/ray/.cache/huggingface/hub/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/config.json\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:[INFO|configuration_utils.py:720] 2023-04-26 07:33:27,429 >> Model config GPT2Config {\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: \"_name_or_path\": \"gpt2\",\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: \"activation_function\": \"gelu_new\",\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: \"architectures\": [\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: \"GPT2LMHeadModel\"\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: ],\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: \"attn_pdrop\": 0.1,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: \"bos_token_id\": 50256,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: \"embd_pdrop\": 0.1,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: \"eos_token_id\": 50256,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: \"initializer_range\": 0.02,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: \"layer_norm_epsilon\": 1e-05,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: \"model_type\": \"gpt2\",\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: \"n_ctx\": 1024,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: \"n_embd\": 768,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: \"n_head\": 12,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: \"n_inner\": null,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: \"n_layer\": 12,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: \"n_positions\": 1024,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: \"reorder_and_upcast_attn\": false,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: \"resid_pdrop\": 0.1,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: \"scale_attn_by_inverse_layer_idx\": false,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: \"scale_attn_weights\": true,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: \"summary_activation\": null,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: \"summary_first_dropout\": 0.1,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: \"summary_proj_to_labels\": true,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: \"summary_type\": \"cls_index\",\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: \"summary_use_proj\": true,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: \"task_specific_params\": {\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: \"text-generation\": {\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: \"do_sample\": true,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: \"max_length\": 50\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: }\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: },\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: \"transformers_version\": \"4.28.1\",\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: \"use_cache\": true,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: \"vocab_size\": 50257\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:}\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:[INFO|tokenization_auto.py:502] 2023-04-26 07:33:27,475 >> Could not locate the tokenizer configuration file, will try to use the model config instead.\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:[INFO|configuration_utils.py:668] 2023-04-26 07:33:27,521 >> loading configuration file config.json from cache at /home/ray/.cache/huggingface/hub/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/config.json\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:[INFO|configuration_utils.py:720] 2023-04-26 07:33:27,522 >> Model config GPT2Config {\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: \"_name_or_path\": \"gpt2\",\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: \"activation_function\": \"gelu_new\",\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: \"architectures\": [\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: \"GPT2LMHeadModel\"\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: ],\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: \"attn_pdrop\": 0.1,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: \"bos_token_id\": 50256,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: \"embd_pdrop\": 0.1,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: \"eos_token_id\": 50256,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: \"initializer_range\": 0.02,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: \"layer_norm_epsilon\": 1e-05,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: \"model_type\": \"gpt2\",\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: \"n_ctx\": 1024,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: \"n_embd\": 768,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: \"n_head\": 12,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: \"n_inner\": null,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: \"n_layer\": 12,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: \"n_positions\": 1024,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: \"reorder_and_upcast_attn\": false,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: \"resid_pdrop\": 0.1,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: \"scale_attn_by_inverse_layer_idx\": false,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: \"scale_attn_weights\": true,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: \"summary_activation\": null,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: \"summary_first_dropout\": 0.1,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: \"summary_proj_to_labels\": true,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: \"summary_type\": \"cls_index\",\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: \"summary_use_proj\": true,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: \"task_specific_params\": {\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: \"text-generation\": {\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: \"do_sample\": true,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: \"max_length\": 50\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: }\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: },\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: \"transformers_version\": \"4.28.1\",\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: \"use_cache\": true,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: \"vocab_size\": 50257\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:}\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:Downloading (…)olve/main/vocab.json: 0%| | 0.00/1.04M [00:00> loading file vocab.json from cache at /home/ray/.cache/huggingface/hub/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/vocab.json\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:[INFO|tokenization_utils_base.py:1809] 2023-04-26 07:33:28,273 >> loading file merges.txt from cache at /home/ray/.cache/huggingface/hub/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/merges.txt\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:[INFO|tokenization_utils_base.py:1809] 2023-04-26 07:33:28,273 >> loading file tokenizer.json from cache at /home/ray/.cache/huggingface/hub/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/tokenizer.json\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:[INFO|tokenization_utils_base.py:1809] 2023-04-26 07:33:28,273 >> loading file added_tokens.json from cache at None\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:[INFO|tokenization_utils_base.py:1809] 2023-04-26 07:33:28,273 >> loading file special_tokens_map.json from cache at None\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:[INFO|tokenization_utils_base.py:1809] 2023-04-26 07:33:28,273 >> loading file tokenizer_config.json from cache at None\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:[INFO|configuration_utils.py:668] 2023-04-26 07:33:28,273 >> loading configuration file config.json from cache at /home/ray/.cache/huggingface/hub/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/config.json\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:[INFO|configuration_utils.py:720] 2023-04-26 07:33:28,274 >> Model config GPT2Config {\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: \"_name_or_path\": \"gpt2\",\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: \"activation_function\": \"gelu_new\",\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: \"architectures\": [\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: \"GPT2LMHeadModel\"\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: ],\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: \"attn_pdrop\": 0.1,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: \"bos_token_id\": 50256,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: \"embd_pdrop\": 0.1,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: \"eos_token_id\": 50256,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: \"initializer_range\": 0.02,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: \"layer_norm_epsilon\": 1e-05,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: \"model_type\": \"gpt2\",\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: \"n_ctx\": 1024,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: \"n_embd\": 768,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: \"n_head\": 12,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: \"n_inner\": null,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: \"n_layer\": 12,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \"n_layer\": 12,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \"n_positions\": 1024,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \"reorder_and_upcast_attn\": false,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \"resid_pdrop\": 0.1,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \"scale_attn_by_inverse_layer_idx\": false,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \"scale_attn_weights\": true,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \"summary_activation\": null,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \"summary_first_dropout\": 0.1,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \"summary_proj_to_labels\": true,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \"summary_type\": \"cls_index\",\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \"summary_use_proj\": true,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \"task_specific_params\": {\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \"text-generation\": {\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \"do_sample\": true,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \"max_length\": 50\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: }\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: },\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \"transformers_version\": \"4.28.1\",\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \"use_cache\": true,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \"vocab_size\": 50257\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:}\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:Downloading pytorch_model.bin: 0%| | 0.00/548M [00:00> loading weights file pytorch_model.bin from cache at /home/ray/.cache/huggingface/hub/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/pytorch_model.bin\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:[INFO|configuration_utils.py:575] 2023-04-26 07:33:31,079 >> Generate config GenerationConfig {\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \"_from_model_config\": true,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \"bos_token_id\": 50256,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \"eos_token_id\": 50256,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \"transformers_version\": \"4.28.1\"\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:}\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:[INFO|modeling_utils.py:3190] 2023-04-26 07:33:32,989 >> All model checkpoint weights were used when initializing GPT2LMHeadModel.\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:[INFO|modeling_utils.py:3198] 2023-04-26 07:33:32,989 >> All the weights of GPT2LMHeadModel were initialized from the model checkpoint at gpt2.\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:If your task is similar to the task the model of the checkpoint was trained on, you can already use GPT2LMHeadModel for predictions without further training.\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:Downloading (…)neration_config.json: 0%| | 0.00/124 [00:00> loading configuration file generation_config.json from cache at /home/ray/.cache/huggingface/hub/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/generation_config.json\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:[INFO|configuration_utils.py:575] 2023-04-26 07:33:33,094 >> Generate config GenerationConfig {\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \"_from_model_config\": true,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \"bos_token_id\": 50256,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \"eos_token_id\": 50256,\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \"transformers_version\": \"4.28.1\"\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:}\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:Running tokenizer on dataset: 0%| | 0/4358 [00:00> loading weights file pytorch_model.bin from cache at /home/ray/.cache/huggingface/hub/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/pytorch_model.bin\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:[INFO|configuration_utils.py:575] 2023-04-26 07:33:31,686 >> Generate config GenerationConfig {\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: \"_from_model_config\": true,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: \"bos_token_id\": 50256,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: \"eos_token_id\": 50256,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: \"transformers_version\": \"4.28.1\"\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:}\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:[INFO|modeling_utils.py:3190] 2023-04-26 07:33:33,605 >> All model checkpoint weights were used when initializing GPT2LMHeadModel.\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:[INFO|modeling_utils.py:3198] 2023-04-26 07:33:33,605 >> All the weights of GPT2LMHeadModel were initialized from the model checkpoint at gpt2.\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:If your task is similar to the task the model of the checkpoint was trained on, you can already use GPT2LMHeadModel for predictions without further training.\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:Downloading (…)neration_config.json: 0%| | 0.00/124 [00:00> loading configuration file generation_config.json from cache at /home/ray/.cache/huggingface/hub/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/generation_config.json\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:[INFO|configuration_utils.py:575] 2023-04-26 07:33:33,714 >> Generate config GenerationConfig {\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: \"_from_model_config\": true,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: \"bos_token_id\": 50256,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: \"eos_token_id\": 50256,\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: \"transformers_version\": \"4.28.1\"\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:}\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:Running tokenizer on dataset: 0%| | 0/4358 [00:00> ***** Running training *****\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:[INFO|trainer.py:1770] 2023-04-26 07:33:45,732 >> Num examples = 2,318\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:[INFO|trainer.py:1771] 2023-04-26 07:33:45,732 >> Num Epochs = 3\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:[INFO|trainer.py:1772] 2023-04-26 07:33:45,732 >> Instantaneous batch size per device = 2\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:[INFO|trainer.py:1773] 2023-04-26 07:33:45,732 >> Total train batch size (w. parallel, distributed & accumulation) = 4\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:[INFO|trainer.py:1774] 2023-04-26 07:33:45,733 >> Gradient Accumulation steps = 1\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:[INFO|trainer.py:1775] 2023-04-26 07:33:45,733 >> Total optimization steps = 1,740\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:[INFO|trainer.py:1776] 2023-04-26 07:33:45,733 >> Number of trainable parameters = 124,439,808\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 0%| | 0/1740 [00:00> ***** Running training *****\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:[INFO|trainer.py:1770] 2023-04-26 07:33:45,729 >> Num examples = 2,318\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:[INFO|trainer.py:1771] 2023-04-26 07:33:45,729 >> Num Epochs = 3\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:[INFO|trainer.py:1772] 2023-04-26 07:33:45,729 >> Instantaneous batch size per device = 2\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:[INFO|trainer.py:1773] 2023-04-26 07:33:45,729 >> Total train batch size (w. parallel, distributed & accumulation) = 4\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:[INFO|trainer.py:1774] 2023-04-26 07:33:45,729 >> Gradient Accumulation steps = 1\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:[INFO|trainer.py:1775] 2023-04-26 07:33:45,729 >> Total optimization steps = 1,740\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:[INFO|trainer.py:1776] 2023-04-26 07:33:45,729 >> Number of trainable parameters = 124,439,808\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 0%| | 0/1740 [00:00> Saving model checkpoint to /tmp/test-clm/checkpoint-500\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:[INFO|configuration_utils.py:457] 2023-04-26 07:42:32,569 >> Configuration saved in /tmp/test-clm/checkpoint-500/config.json\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:[INFO|configuration_utils.py:362] 2023-04-26 07:42:32,569 >> Configuration saved in /tmp/test-clm/checkpoint-500/generation_config.json\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:[INFO|modeling_utils.py:1847] 2023-04-26 07:42:33,212 >> Model weights saved in /tmp/test-clm/checkpoint-500/pytorch_model.bin\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:[INFO|tokenization_utils_base.py:2171] 2023-04-26 07:42:33,212 >> tokenizer config file saved in /tmp/test-clm/checkpoint-500/tokenizer_config.json\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:[INFO|tokenization_utils_base.py:2178] 2023-04-26 07:42:33,213 >> Special tokens file saved in /tmp/test-clm/checkpoint-500/special_tokens_map.json\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 29%|██▉ | 501/1740 [08:49<34:21, 1.66s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 29%|██▉ | 502/1740 [08:50<30:28, 1.48s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 29%|██▉ | 503/1740 [08:51<28:09, 1.37s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 29%|██▉ | 504/1740 [08:52<26:28, 1.29s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 29%|██▉ | 505/1740 [08:53<24:44, 1.20s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 29%|██▉ | 506/1740 [08:54<23:23, 1.14s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 29%|██▉ | 507/1740 [08:55<22:34, 1.10s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 29%|██▉ | 508/1740 [08:56<22:11, 1.08s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 29%|██▉ | 509/1740 [08:57<22:02, 1.07s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 29%|██▉ | 510/1740 [08:58<21:26, 1.05s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 29%|██▉ | 511/1740 [08:59<20:58, 1.02s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 29%|██▉ | 512/1740 [09:00<21:34, 1.05s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 29%|██▉ | 513/1740 [09:01<21:46, 1.06s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 30%|██▉ | 514/1740 [09:02<21:45, 1.06s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 30%|██▉ | 515/1740 [09:03<20:52, 1.02s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 30%|██▉ | 516/1740 [09:04<20:44, 1.02s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 30%|██▉ | 517/1740 [09:05<21:02, 1.03s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 30%|██▉ | 518/1740 [09:06<21:18, 1.05s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 30%|██▉ | 519/1740 [09:08<21:09, 1.04s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 30%|██▉ | 520/1740 [09:09<20:54, 1.03s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 30%|██▉ | 521/1740 [09:09<20:34, 1.01s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 30%|███ | 522/1740 [09:11<21:54, 1.08s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 30%|███ | 523/1740 [09:12<21:27, 1.06s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 30%|███ | 524/1740 [09:13<21:37, 1.07s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 30%|███ | 525/1740 [09:14<21:46, 1.08s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 30%|███ | 526/1740 [09:15<22:20, 1.10s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 30%|███ | 527/1740 [09:16<21:23, 1.06s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 30%|███ | 528/1740 [09:17<21:43, 1.08s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 30%|███ | 529/1740 [09:18<21:18, 1.06s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 30%|███ | 530/1740 [09:19<21:17, 1.06s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 31%|███ | 531/1740 [09:20<21:49, 1.08s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 31%|███ | 532/1740 [09:21<22:00, 1.09s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 31%|███ | 533/1740 [09:23<21:39, 1.08s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 31%|███ | 534/1740 [09:24<21:51, 1.09s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 31%|███ | 535/1740 [09:25<20:52, 1.04s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 31%|███ | 536/1740 [09:26<21:12, 1.06s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 31%|███ | 537/1740 [09:27<20:43, 1.03s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 25%|██▍ | 428/1740 [07:30<22:36, 1.03s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 25%|██▍ | 429/1740 [07:31<22:37, 1.04s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 25%|██▍ | 430/1740 [07:32<22:27, 1.03s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 25%|██▍ | 431/1740 [07:33<22:31, 1.03s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 25%|██▍ | 432/1740 [07:34<22:01, 1.01s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 25%|██▍ | 433/1740 [07:35<21:18, 1.02it/s][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 25%|██▍ | 434/1740 [07:36<22:45, 1.05s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 25%|██▌ | 435/1740 [07:37<22:10, 1.02s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 25%|██▌ | 436/1740 [07:38<21:41, 1.00it/s][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 25%|██▌ | 437/1740 [07:39<22:30, 1.04s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 25%|██▌ | 438/1740 [07:40<23:14, 1.07s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 25%|██▌ | 439/1740 [07:41<23:11, 1.07s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 25%|██▌ | 440/1740 [07:42<23:21, 1.08s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 25%|██▌ | 441/1740 [07:43<23:49, 1.10s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 25%|██▌ | 442/1740 [07:45<23:57, 1.11s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 25%|██▌ | 443/1740 [07:46<23:16, 1.08s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 26%|██▌ | 444/1740 [07:47<22:49, 1.06s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 26%|██▌ | 445/1740 [07:48<22:55, 1.06s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 26%|██▌ | 446/1740 [07:49<23:18, 1.08s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 26%|██▌ | 447/1740 [07:50<22:52, 1.06s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 26%|██▌ | 448/1740 [07:51<22:44, 1.06s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 26%|██▌ | 449/1740 [07:52<22:00, 1.02s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 26%|██▌ | 450/1740 [07:53<21:54, 1.02s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 26%|██▌ | 451/1740 [07:54<21:56, 1.02s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 26%|██▌ | 452/1740 [07:55<22:19, 1.04s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 26%|██▌ | 453/1740 [07:56<22:13, 1.04s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 26%|██▌ | 454/1740 [07:57<22:17, 1.04s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 26%|██▌ | 455/1740 [07:58<21:59, 1.03s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 26%|██▌ | 456/1740 [07:59<22:39, 1.06s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 26%|██▋ | 457/1740 [08:00<23:06, 1.08s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 26%|██▋ | 458/1740 [08:01<23:04, 1.08s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 26%|██▋ | 459/1740 [08:02<22:57, 1.08s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 26%|██▋ | 460/1740 [08:03<22:31, 1.06s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 26%|██▋ | 461/1740 [08:05<23:03, 1.08s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 27%|██▋ | 462/1740 [08:06<22:42, 1.07s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 27%|██▋ | 463/1740 [08:06<21:54, 1.03s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 27%|██▋ | 464/1740 [08:08<22:04, 1.04s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 27%|██▋ | 465/1740 [08:09<22:21, 1.05s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 27%|██▋ | 466/1740 [08:10<21:42, 1.02s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 27%|██▋ | 467/1740 [08:11<22:28, 1.06s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 27%|██▋ | 468/1740 [08:12<22:26, 1.06s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 27%|██▋ | 469/1740 [08:13<22:33, 1.06s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 27%|██▋ | 470/1740 [08:14<22:17, 1.05s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 27%|██▋ | 471/1740 [08:15<23:02, 1.09s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 27%|██▋ | 472/1740 [08:16<22:43, 1.08s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 27%|██▋ | 473/1740 [08:17<22:32, 1.07s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 27%|██▋ | 474/1740 [08:18<22:41, 1.08s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 27%|██▋ | 475/1740 [08:19<22:07, 1.05s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 27%|██▋ | 476/1740 [08:20<22:37, 1.07s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 27%|██▋ | 477/1740 [08:21<22:00, 1.05s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 27%|██▋ | 478/1740 [08:22<21:31, 1.02s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 28%|██▊ | 479/1740 [08:23<22:07, 1.05s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 28%|██▊ | 480/1740 [08:24<21:14, 1.01s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 28%|██▊ | 481/1740 [08:25<21:50, 1.04s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 28%|██▊ | 482/1740 [08:27<22:07, 1.06s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 28%|██▊ | 483/1740 [08:28<22:34, 1.08s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 28%|██▊ | 484/1740 [08:29<21:50, 1.04s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 28%|██▊ | 485/1740 [08:30<22:05, 1.06s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 28%|██▊ | 486/1740 [08:31<22:01, 1.05s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 28%|██▊ | 487/1740 [08:32<21:35, 1.03s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 28%|██▊ | 488/1740 [08:33<21:47, 1.04s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 28%|██▊ | 489/1740 [08:34<21:43, 1.04s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 28%|██▊ | 490/1740 [08:35<21:50, 1.05s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 28%|██▊ | 491/1740 [08:36<22:43, 1.09s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 28%|██▊ | 492/1740 [08:37<21:52, 1.05s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 28%|██▊ | 493/1740 [08:38<22:22, 1.08s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 28%|██▊ | 494/1740 [08:39<21:58, 1.06s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 28%|██▊ | 495/1740 [08:40<22:40, 1.09s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 29%|██▊ | 496/1740 [08:42<22:56, 1.11s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 29%|██▊ | 497/1740 [08:43<23:04, 1.11s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 29%|██▊ | 498/1740 [08:44<22:12, 1.07s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 29%|██▊ | 499/1740 [08:45<22:39, 1.10s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 29%|██▊ | 500/1740 [08:46<21:57, 1.06s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: \\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 29%|██▊ | 500/1740 [08:46<21:57, 1.06s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 29%|██▉ | 501/1740 [08:49<34:20, 1.66s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 29%|██▉ | 502/1740 [08:50<30:28, 1.48s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 29%|██▉ | 503/1740 [08:51<28:09, 1.37s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 29%|██▉ | 504/1740 [08:52<26:28, 1.29s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 29%|██▉ | 505/1740 [08:53<24:44, 1.20s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 29%|██▉ | 506/1740 [08:54<23:24, 1.14s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 29%|██▉ | 507/1740 [08:55<22:34, 1.10s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 29%|██▉ | 508/1740 [08:56<22:11, 1.08s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 29%|██▉ | 509/1740 [08:57<22:03, 1.07s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 29%|██▉ | 510/1740 [08:58<21:26, 1.05s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 29%|██▉ | 511/1740 [08:59<20:58, 1.02s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 29%|██▉ | 512/1740 [09:00<21:34, 1.05s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 29%|██▉ | 513/1740 [09:01<21:45, 1.06s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 30%|██▉ | 514/1740 [09:02<21:45, 1.06s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 30%|██▉ | 515/1740 [09:03<20:53, 1.02s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 30%|██▉ | 516/1740 [09:04<20:46, 1.02s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 30%|██▉ | 517/1740 [09:05<21:01, 1.03s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 30%|██▉ | 518/1740 [09:06<21:17, 1.05s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 30%|██▉ | 519/1740 [09:08<21:08, 1.04s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 30%|██▉ | 520/1740 [09:09<20:54, 1.03s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 30%|██▉ | 521/1740 [09:10<20:33, 1.01s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 30%|███ | 522/1740 [09:11<21:54, 1.08s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 30%|███ | 523/1740 [09:12<21:27, 1.06s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 30%|███ | 524/1740 [09:13<21:37, 1.07s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 30%|███ | 525/1740 [09:14<21:46, 1.08s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 30%|███ | 526/1740 [09:15<22:20, 1.10s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 30%|███ | 527/1740 [09:16<21:23, 1.06s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 30%|███ | 528/1740 [09:17<21:43, 1.08s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 30%|███ | 529/1740 [09:18<21:18, 1.06s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 30%|███ | 530/1740 [09:19<21:17, 1.06s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 31%|███ | 531/1740 [09:20<21:49, 1.08s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 31%|███ | 532/1740 [09:21<22:00, 1.09s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 31%|███ | 533/1740 [09:23<21:39, 1.08s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 31%|███ | 534/1740 [09:24<21:51, 1.09s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 31%|███ | 535/1740 [09:25<20:52, 1.04s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 31%|███ | 536/1740 [09:26<21:12, 1.06s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 31%|███ | 537/1740 [09:27<20:43, 1.03s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 31%|███ | 538/1740 [09:28<20:31, 1.02s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 31%|███ | 539/1740 [09:29<20:37, 1.03s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 31%|███ | 540/1740 [09:30<20:25, 1.02s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 31%|███ | 541/1740 [09:31<20:48, 1.04s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 31%|███ | 542/1740 [09:32<21:04, 1.06s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 31%|███ | 543/1740 [09:33<21:06, 1.06s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 31%|███▏ | 544/1740 [09:34<21:21, 1.07s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 31%|███▏ | 545/1740 [09:35<21:15, 1.07s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 31%|███▏ | 546/1740 [09:36<20:38, 1.04s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 31%|███▏ | 547/1740 [09:37<20:24, 1.03s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 31%|███▏ | 548/1740 [09:38<20:28, 1.03s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 32%|███▏ | 549/1740 [09:39<21:00, 1.06s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 32%|███▏ | 550/1740 [09:40<21:25, 1.08s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 31%|███ | 538/1740 [09:28<20:31, 1.02s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 31%|███ | 539/1740 [09:29<20:37, 1.03s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 31%|███ | 540/1740 [09:30<20:25, 1.02s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 31%|███ | 541/1740 [09:31<20:47, 1.04s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 31%|███ | 542/1740 [09:32<21:05, 1.06s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 31%|███ | 543/1740 [09:33<21:06, 1.06s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 31%|███▏ | 544/1740 [09:34<21:21, 1.07s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 31%|███▏ | 545/1740 [09:35<21:15, 1.07s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 31%|███▏ | 546/1740 [09:36<20:39, 1.04s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 31%|███▏ | 547/1740 [09:37<20:24, 1.03s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 31%|███▏ | 548/1740 [09:38<20:28, 1.03s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 32%|███▏ | 549/1740 [09:39<21:01, 1.06s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 32%|███▏ | 550/1740 [09:40<21:25, 1.08s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 32%|███▏ | 551/1740 [09:41<20:49, 1.05s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 32%|███▏ | 552/1740 [09:42<20:14, 1.02s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 32%|███▏ | 553/1740 [09:43<20:22, 1.03s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 32%|███▏ | 554/1740 [09:44<20:14, 1.02s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 32%|███▏ | 555/1740 [09:45<20:15, 1.03s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 32%|███▏ | 556/1740 [09:46<20:07, 1.02s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 32%|███▏ | 557/1740 [09:47<20:31, 1.04s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 32%|███▏ | 558/1740 [09:49<20:25, 1.04s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 32%|███▏ | 559/1740 [09:50<20:24, 1.04s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 32%|███▏ | 560/1740 [09:51<20:54, 1.06s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 32%|███▏ | 561/1740 [09:52<20:39, 1.05s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 32%|███▏ | 562/1740 [09:53<20:55, 1.07s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 32%|███▏ | 563/1740 [09:54<20:42, 1.06s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 32%|███▏ | 564/1740 [09:55<19:57, 1.02s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 32%|███▏ | 565/1740 [09:56<19:49, 1.01s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 33%|███▎ | 566/1740 [09:57<19:47, 1.01s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 33%|███▎ | 567/1740 [09:58<20:11, 1.03s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 33%|███▎ | 568/1740 [09:59<19:56, 1.02s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 33%|███▎ | 569/1740 [10:00<20:27, 1.05s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 33%|███▎ | 570/1740 [10:01<20:51, 1.07s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 33%|███▎ | 571/1740 [10:02<20:51, 1.07s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 33%|███▎ | 572/1740 [10:03<20:22, 1.05s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 33%|███▎ | 573/1740 [10:04<20:14, 1.04s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 33%|███▎ | 574/1740 [10:05<20:35, 1.06s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 33%|███▎ | 575/1740 [10:06<20:37, 1.06s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 33%|███▎ | 576/1740 [10:07<19:49, 1.02s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 33%|███▎ | 577/1740 [10:08<20:27, 1.06s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 33%|███▎ | 578/1740 [10:09<19:58, 1.03s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 33%|███▎ | 579/1740 [10:10<20:00, 1.03s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 33%|███▎ | 580/1740 [10:11<19:31, 1.01s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 33%|███▎ | 581/1740 [10:12<19:45, 1.02s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 33%|███▎ | 582/1740 [10:13<19:49, 1.03s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 34%|███▎ | 583/1740 [10:14<19:52, 1.03s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 34%|███▎ | 584/1740 [10:16<19:53, 1.03s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 34%|███▎ | 585/1740 [10:17<19:41, 1.02s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 34%|███▎ | 586/1740 [10:18<19:46, 1.03s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 34%|███▎ | 587/1740 [10:19<19:51, 1.03s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 34%|███▍ | 588/1740 [10:20<19:29, 1.02s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 34%|███▍ | 589/1740 [10:21<19:28, 1.02s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 34%|███▍ | 590/1740 [10:22<19:25, 1.01s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 34%|███▍ | 591/1740 [10:23<20:17, 1.06s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 34%|███▍ | 592/1740 [10:24<20:16, 1.06s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 34%|███▍ | 593/1740 [10:25<19:42, 1.03s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 34%|███▍ | 594/1740 [10:26<19:50, 1.04s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 34%|███▍ | 595/1740 [10:27<20:15, 1.06s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 34%|███▍ | 596/1740 [10:28<20:23, 1.07s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 34%|███▍ | 597/1740 [10:29<20:42, 1.09s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 34%|███▍ | 598/1740 [10:30<20:43, 1.09s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 34%|███▍ | 599/1740 [10:31<20:27, 1.08s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 34%|███▍ | 600/1740 [10:32<20:29, 1.08s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 35%|███▍ | 601/1740 [10:33<19:57, 1.05s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 35%|███▍ | 602/1740 [10:34<20:03, 1.06s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 35%|███▍ | 603/1740 [10:35<19:48, 1.05s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 35%|███▍ | 604/1740 [10:37<20:31, 1.08s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 35%|███▍ | 605/1740 [10:38<20:26, 1.08s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 35%|███▍ | 606/1740 [10:39<19:52, 1.05s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 35%|███▍ | 607/1740 [10:40<19:38, 1.04s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 35%|███▍ | 608/1740 [10:41<19:47, 1.05s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 35%|███▌ | 609/1740 [10:42<19:44, 1.05s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 35%|███▌ | 610/1740 [10:43<20:12, 1.07s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 35%|███▌ | 611/1740 [10:44<19:41, 1.05s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 35%|███▌ | 612/1740 [10:45<20:09, 1.07s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 35%|███▌ | 613/1740 [10:46<20:06, 1.07s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 35%|███▌ | 614/1740 [10:47<19:27, 1.04s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 35%|███▌ | 615/1740 [10:48<19:31, 1.04s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 35%|███▌ | 616/1740 [10:49<19:44, 1.05s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 35%|███▌ | 617/1740 [10:50<20:07, 1.07s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 36%|███▌ | 618/1740 [10:52<20:35, 1.10s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 36%|███▌ | 619/1740 [10:53<20:06, 1.08s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 36%|███▌ | 620/1740 [10:54<19:53, 1.07s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 36%|███▌ | 621/1740 [10:55<19:27, 1.04s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 36%|███▌ | 622/1740 [10:56<19:55, 1.07s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 36%|███▌ | 623/1740 [10:57<19:45, 1.06s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 36%|███▌ | 624/1740 [10:58<20:03, 1.08s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 36%|███▌ | 625/1740 [10:59<19:24, 1.04s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 36%|███▌ | 626/1740 [11:00<19:35, 1.06s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 36%|███▌ | 627/1740 [11:01<20:01, 1.08s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 36%|███▌ | 628/1740 [11:02<20:08, 1.09s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 36%|███▌ | 629/1740 [11:03<19:50, 1.07s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 36%|███▌ | 630/1740 [11:04<19:21, 1.05s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 36%|███▋ | 631/1740 [11:05<19:36, 1.06s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 36%|███▋ | 632/1740 [11:06<19:20, 1.05s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 36%|███▋ | 633/1740 [11:07<19:02, 1.03s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 36%|███▋ | 634/1740 [11:08<18:52, 1.02s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 36%|███▋ | 635/1740 [11:09<19:06, 1.04s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 37%|███▋ | 636/1740 [11:10<19:13, 1.04s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 37%|███▋ | 637/1740 [11:12<21:09, 1.15s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 37%|███▋ | 638/1740 [11:13<20:42, 1.13s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 37%|███▋ | 639/1740 [11:14<20:45, 1.13s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 37%|███▋ | 640/1740 [11:15<20:53, 1.14s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 37%|███▋ | 641/1740 [11:16<20:26, 1.12s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 37%|███▋ | 642/1740 [11:17<20:07, 1.10s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 37%|███▋ | 643/1740 [11:18<19:36, 1.07s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 37%|███▋ | 644/1740 [11:19<19:32, 1.07s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 37%|███▋ | 645/1740 [11:20<19:28, 1.07s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 37%|███▋ | 646/1740 [11:22<20:09, 1.11s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 37%|███▋ | 647/1740 [11:23<19:42, 1.08s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 37%|███▋ | 648/1740 [11:24<19:23, 1.07s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 37%|███▋ | 649/1740 [11:25<19:24, 1.07s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 37%|███▋ | 650/1740 [11:26<19:31, 1.07s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 37%|███▋ | 651/1740 [11:27<19:02, 1.05s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 37%|███▋ | 652/1740 [11:28<19:45, 1.09s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 38%|███▊ | 653/1740 [11:29<19:22, 1.07s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 38%|███▊ | 654/1740 [11:30<19:16, 1.07s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 38%|███▊ | 655/1740 [11:31<19:44, 1.09s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 38%|███▊ | 656/1740 [11:32<19:10, 1.06s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 38%|███▊ | 657/1740 [11:33<19:10, 1.06s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 38%|███▊ | 658/1740 [11:34<19:04, 1.06s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 38%|███▊ | 659/1740 [11:35<18:34, 1.03s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 32%|███▏ | 551/1740 [09:41<20:50, 1.05s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 32%|███▏ | 552/1740 [09:42<20:13, 1.02s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 32%|███▏ | 553/1740 [09:43<20:22, 1.03s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 32%|███▏ | 554/1740 [09:44<20:14, 1.02s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 32%|███▏ | 555/1740 [09:45<20:14, 1.03s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 32%|███▏ | 556/1740 [09:46<20:07, 1.02s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 32%|███▏ | 557/1740 [09:47<20:31, 1.04s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 32%|███▏ | 558/1740 [09:49<20:25, 1.04s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 32%|███▏ | 559/1740 [09:50<20:24, 1.04s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 32%|███▏ | 560/1740 [09:51<20:54, 1.06s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 32%|███▏ | 561/1740 [09:52<20:39, 1.05s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 32%|███▏ | 562/1740 [09:53<20:55, 1.07s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 32%|███▏ | 563/1740 [09:54<20:42, 1.06s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 32%|███▏ | 564/1740 [09:55<19:56, 1.02s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 32%|███▏ | 565/1740 [09:56<19:50, 1.01s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 33%|███▎ | 566/1740 [09:57<19:50, 1.01s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 33%|███▎ | 567/1740 [09:58<20:11, 1.03s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 33%|███▎ | 568/1740 [09:59<19:55, 1.02s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 33%|███▎ | 569/1740 [10:00<20:28, 1.05s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 33%|███▎ | 570/1740 [10:01<20:50, 1.07s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 33%|███▎ | 571/1740 [10:02<20:51, 1.07s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 33%|███▎ | 572/1740 [10:03<20:22, 1.05s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 33%|███▎ | 573/1740 [10:04<20:14, 1.04s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 33%|███▎ | 574/1740 [10:05<20:35, 1.06s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 33%|███▎ | 575/1740 [10:06<20:36, 1.06s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 33%|███▎ | 576/1740 [10:07<19:49, 1.02s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 33%|███▎ | 577/1740 [10:08<20:27, 1.06s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 33%|███▎ | 578/1740 [10:09<19:58, 1.03s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 33%|███▎ | 579/1740 [10:10<20:00, 1.03s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 33%|███▎ | 580/1740 [10:11<19:31, 1.01s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 33%|███▎ | 581/1740 [10:12<19:45, 1.02s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 33%|███▎ | 582/1740 [10:13<19:49, 1.03s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 34%|███▎ | 583/1740 [10:14<19:52, 1.03s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 34%|███▎ | 584/1740 [10:16<19:53, 1.03s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 34%|███▎ | 585/1740 [10:17<19:41, 1.02s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 34%|███▎ | 586/1740 [10:18<19:46, 1.03s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 34%|███▎ | 587/1740 [10:19<19:51, 1.03s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 34%|███▍ | 588/1740 [10:20<19:29, 1.02s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 34%|███▍ | 589/1740 [10:21<19:28, 1.02s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 34%|███▍ | 590/1740 [10:22<19:26, 1.01s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 34%|███▍ | 591/1740 [10:23<20:18, 1.06s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 34%|███▍ | 592/1740 [10:24<20:15, 1.06s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 34%|███▍ | 593/1740 [10:25<19:43, 1.03s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 34%|███▍ | 594/1740 [10:26<19:50, 1.04s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 34%|███▍ | 595/1740 [10:27<20:15, 1.06s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 34%|███▍ | 596/1740 [10:28<20:23, 1.07s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 34%|███▍ | 597/1740 [10:29<20:43, 1.09s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 34%|███▍ | 598/1740 [10:30<20:43, 1.09s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 34%|███▍ | 599/1740 [10:31<20:27, 1.08s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 34%|███▍ | 600/1740 [10:32<20:29, 1.08s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 35%|███▍ | 601/1740 [10:33<19:56, 1.05s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 35%|███▍ | 602/1740 [10:34<20:03, 1.06s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 35%|███▍ | 603/1740 [10:35<19:48, 1.05s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 35%|███▍ | 604/1740 [10:37<20:31, 1.08s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 35%|███▍ | 605/1740 [10:38<20:26, 1.08s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 35%|███▍ | 606/1740 [10:39<19:52, 1.05s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 35%|███▍ | 607/1740 [10:40<19:39, 1.04s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 35%|███▍ | 608/1740 [10:41<19:47, 1.05s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 35%|███▌ | 609/1740 [10:42<19:44, 1.05s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 35%|███▌ | 610/1740 [10:43<20:12, 1.07s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 35%|███▌ | 611/1740 [10:44<19:41, 1.05s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 35%|███▌ | 612/1740 [10:45<20:09, 1.07s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 35%|███▌ | 613/1740 [10:46<20:06, 1.07s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 35%|███▌ | 614/1740 [10:47<19:27, 1.04s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 35%|███▌ | 615/1740 [10:48<19:30, 1.04s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 35%|███▌ | 616/1740 [10:49<19:44, 1.05s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 35%|███▌ | 617/1740 [10:50<20:07, 1.08s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 36%|███▌ | 618/1740 [10:52<20:34, 1.10s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 36%|███▌ | 619/1740 [10:53<20:05, 1.08s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 36%|███▌ | 620/1740 [10:54<19:53, 1.07s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 36%|███▌ | 621/1740 [10:55<19:27, 1.04s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 36%|███▌ | 622/1740 [10:56<19:54, 1.07s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 36%|███▌ | 623/1740 [10:57<19:45, 1.06s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 36%|███▌ | 624/1740 [10:58<20:03, 1.08s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 36%|███▌ | 625/1740 [10:59<19:24, 1.04s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 36%|███▌ | 626/1740 [11:00<19:35, 1.06s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 36%|███▌ | 627/1740 [11:01<20:01, 1.08s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 36%|███▌ | 628/1740 [11:02<20:08, 1.09s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 36%|███▌ | 629/1740 [11:03<19:50, 1.07s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 36%|███▌ | 630/1740 [11:04<19:21, 1.05s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 36%|███▋ | 631/1740 [11:05<19:35, 1.06s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 36%|███▋ | 632/1740 [11:06<19:20, 1.05s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 36%|███▋ | 633/1740 [11:07<19:02, 1.03s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 36%|███▋ | 634/1740 [11:08<18:52, 1.02s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 36%|███▋ | 635/1740 [11:09<19:06, 1.04s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 37%|███▋ | 636/1740 [11:10<19:14, 1.05s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 37%|███▋ | 637/1740 [11:12<21:09, 1.15s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 37%|███▋ | 638/1740 [11:13<20:42, 1.13s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 37%|███▋ | 639/1740 [11:14<20:45, 1.13s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 37%|███▋ | 640/1740 [11:15<20:52, 1.14s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 37%|███▋ | 641/1740 [11:16<20:27, 1.12s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 37%|███▋ | 642/1740 [11:17<20:07, 1.10s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 37%|███▋ | 643/1740 [11:18<19:36, 1.07s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 37%|███▋ | 644/1740 [11:19<19:33, 1.07s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 37%|███▋ | 645/1740 [11:20<19:28, 1.07s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 37%|███▋ | 646/1740 [11:22<20:08, 1.10s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 37%|███▋ | 647/1740 [11:23<19:42, 1.08s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 37%|███▋ | 648/1740 [11:24<19:23, 1.07s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 37%|███▋ | 649/1740 [11:25<19:24, 1.07s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 37%|███▋ | 650/1740 [11:26<19:31, 1.07s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 37%|███▋ | 651/1740 [11:27<19:02, 1.05s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 37%|███▋ | 652/1740 [11:28<19:45, 1.09s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 38%|███▊ | 653/1740 [11:29<19:23, 1.07s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 38%|███▊ | 654/1740 [11:30<19:16, 1.07s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 38%|███▊ | 655/1740 [11:31<19:44, 1.09s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 38%|███▊ | 656/1740 [11:32<19:10, 1.06s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 38%|███▊ | 657/1740 [11:33<19:10, 1.06s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 38%|███▊ | 658/1740 [11:34<19:04, 1.06s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 38%|███▊ | 659/1740 [11:35<18:34, 1.03s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 38%|███▊ | 660/1740 [11:36<18:56, 1.05s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 38%|███▊ | 661/1740 [11:37<18:49, 1.05s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 38%|███▊ | 662/1740 [11:39<19:04, 1.06s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 38%|███▊ | 663/1740 [11:40<19:03, 1.06s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 38%|███▊ | 664/1740 [11:41<19:18, 1.08s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 38%|███▊ | 665/1740 [11:42<19:23, 1.08s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 38%|███▊ | 666/1740 [11:43<19:27, 1.09s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 38%|███▊ | 667/1740 [11:44<19:21, 1.08s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 38%|███▊ | 668/1740 [11:45<18:55, 1.06s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 38%|███▊ | 669/1740 [11:46<18:18, 1.03s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 39%|███▊ | 670/1740 [11:47<18:06, 1.02s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 39%|███▊ | 671/1740 [11:48<18:18, 1.03s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 39%|███▊ | 672/1740 [11:49<18:30, 1.04s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 38%|███▊ | 660/1740 [11:36<18:56, 1.05s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 38%|███▊ | 661/1740 [11:37<18:49, 1.05s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 38%|███▊ | 662/1740 [11:39<19:04, 1.06s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 38%|███▊ | 663/1740 [11:40<19:03, 1.06s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 38%|███▊ | 664/1740 [11:41<19:18, 1.08s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 38%|███▊ | 665/1740 [11:42<19:23, 1.08s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 38%|███▊ | 666/1740 [11:43<19:28, 1.09s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 38%|███▊ | 667/1740 [11:44<19:21, 1.08s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 38%|███▊ | 668/1740 [11:45<18:55, 1.06s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 38%|███▊ | 669/1740 [11:46<18:18, 1.03s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 39%|███▊ | 670/1740 [11:47<18:06, 1.02s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 39%|███▊ | 671/1740 [11:48<18:18, 1.03s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 39%|███▊ | 672/1740 [11:49<18:29, 1.04s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 39%|███▊ | 673/1740 [11:50<18:48, 1.06s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 39%|███▊ | 674/1740 [11:51<18:20, 1.03s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 39%|███▉ | 675/1740 [11:52<18:18, 1.03s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 39%|███▉ | 676/1740 [11:53<18:39, 1.05s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 39%|███▉ | 677/1740 [11:54<18:04, 1.02s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 39%|███▉ | 678/1740 [11:55<18:14, 1.03s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 39%|███▉ | 679/1740 [11:56<17:43, 1.00s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 39%|███▉ | 680/1740 [11:57<17:33, 1.01it/s][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 39%|███▉ | 681/1740 [11:58<18:07, 1.03s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 39%|███▉ | 682/1740 [11:59<18:01, 1.02s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 39%|███▉ | 683/1740 [12:00<18:23, 1.04s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 39%|███▉ | 684/1740 [12:01<18:24, 1.05s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 39%|███▉ | 685/1740 [12:03<18:54, 1.07s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 39%|███▉ | 686/1740 [12:04<18:10, 1.04s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 39%|███▉ | 687/1740 [12:05<18:04, 1.03s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 40%|███▉ | 688/1740 [12:06<18:33, 1.06s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 40%|███▉ | 689/1740 [12:07<18:23, 1.05s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 40%|███▉ | 690/1740 [12:08<18:09, 1.04s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 40%|███▉ | 691/1740 [12:09<17:40, 1.01s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 40%|███▉ | 692/1740 [12:10<17:22, 1.01it/s][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 40%|███▉ | 693/1740 [12:11<17:33, 1.01s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 40%|███▉ | 694/1740 [12:12<18:20, 1.05s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 40%|███▉ | 695/1740 [12:13<18:50, 1.08s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 40%|████ | 696/1740 [12:14<18:51, 1.08s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 40%|████ | 697/1740 [12:15<18:55, 1.09s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 40%|████ | 698/1740 [12:16<18:43, 1.08s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 40%|████ | 699/1740 [12:17<18:31, 1.07s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 40%|████ | 700/1740 [12:18<18:25, 1.06s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 40%|████ | 701/1740 [12:19<18:15, 1.05s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 40%|████ | 702/1740 [12:20<18:04, 1.04s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 40%|████ | 703/1740 [12:22<18:42, 1.08s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 40%|████ | 704/1740 [12:23<19:19, 1.12s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 41%|████ | 705/1740 [12:24<19:19, 1.12s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 41%|████ | 706/1740 [12:25<19:01, 1.10s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 41%|████ | 707/1740 [12:26<18:37, 1.08s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 41%|████ | 708/1740 [12:27<18:33, 1.08s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 41%|████ | 709/1740 [12:28<17:57, 1.04s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 41%|████ | 710/1740 [12:29<17:50, 1.04s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 41%|████ | 711/1740 [12:30<17:36, 1.03s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 41%|████ | 712/1740 [12:31<18:15, 1.07s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 41%|████ | 713/1740 [12:32<18:33, 1.08s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 41%|████ | 714/1740 [12:33<18:35, 1.09s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 41%|████ | 715/1740 [12:34<17:55, 1.05s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 41%|████ | 716/1740 [12:35<18:08, 1.06s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 41%|████ | 717/1740 [12:36<17:55, 1.05s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 41%|████▏ | 718/1740 [12:38<18:32, 1.09s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 41%|████▏ | 719/1740 [12:39<18:04, 1.06s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 41%|████▏ | 720/1740 [12:40<17:42, 1.04s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 41%|████▏ | 721/1740 [12:41<17:51, 1.05s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 41%|████▏ | 722/1740 [12:42<17:32, 1.03s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 42%|████▏ | 723/1740 [12:43<17:52, 1.05s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 42%|████▏ | 724/1740 [12:44<18:00, 1.06s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 42%|████▏ | 725/1740 [12:45<18:08, 1.07s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 42%|████▏ | 726/1740 [12:46<18:14, 1.08s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 42%|████▏ | 727/1740 [12:47<17:59, 1.07s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 42%|████▏ | 728/1740 [12:48<17:49, 1.06s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 42%|████▏ | 729/1740 [12:49<17:40, 1.05s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 42%|████▏ | 730/1740 [12:50<17:41, 1.05s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 42%|████▏ | 731/1740 [12:51<17:24, 1.03s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 42%|████▏ | 732/1740 [12:52<17:53, 1.06s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 42%|████▏ | 733/1740 [12:53<17:38, 1.05s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 42%|████▏ | 734/1740 [12:55<17:53, 1.07s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 42%|████▏ | 735/1740 [12:56<18:06, 1.08s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 42%|████▏ | 736/1740 [12:57<17:56, 1.07s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 42%|████▏ | 737/1740 [12:58<18:00, 1.08s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 42%|████▏ | 738/1740 [12:59<17:12, 1.03s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 42%|████▏ | 739/1740 [13:00<18:09, 1.09s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 43%|████▎ | 740/1740 [13:01<18:08, 1.09s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 43%|████▎ | 741/1740 [13:02<18:08, 1.09s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 43%|████▎ | 742/1740 [13:03<18:05, 1.09s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 43%|████▎ | 743/1740 [13:04<17:51, 1.07s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 43%|████▎ | 744/1740 [13:05<17:13, 1.04s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 43%|████▎ | 745/1740 [13:06<17:20, 1.05s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 43%|████▎ | 746/1740 [13:07<17:10, 1.04s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 43%|████▎ | 747/1740 [13:08<17:39, 1.07s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 43%|████▎ | 748/1740 [13:09<17:29, 1.06s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 43%|████▎ | 749/1740 [13:10<17:18, 1.05s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 43%|████▎ | 750/1740 [13:12<17:42, 1.07s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 43%|████▎ | 751/1740 [13:13<17:52, 1.08s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 43%|████▎ | 752/1740 [13:14<17:49, 1.08s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 43%|████▎ | 753/1740 [13:15<18:02, 1.10s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 43%|████▎ | 754/1740 [13:16<18:08, 1.10s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 43%|████▎ | 755/1740 [13:17<17:18, 1.05s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 43%|████▎ | 756/1740 [13:18<17:02, 1.04s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 44%|████▎ | 757/1740 [13:19<16:52, 1.03s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 44%|████▎ | 758/1740 [13:20<16:38, 1.02s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 44%|████▎ | 759/1740 [13:21<16:41, 1.02s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 44%|████▎ | 760/1740 [13:22<17:01, 1.04s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 44%|████▎ | 761/1740 [13:23<18:16, 1.12s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 44%|████▍ | 762/1740 [13:24<18:06, 1.11s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 44%|████▍ | 763/1740 [13:26<18:01, 1.11s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 44%|████▍ | 764/1740 [13:27<17:52, 1.10s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 44%|████▍ | 765/1740 [13:28<18:04, 1.11s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 44%|████▍ | 766/1740 [13:29<17:20, 1.07s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 44%|████▍ | 767/1740 [13:30<17:23, 1.07s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 44%|████▍ | 768/1740 [13:31<17:22, 1.07s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 44%|████▍ | 769/1740 [13:32<17:13, 1.06s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 44%|████▍ | 770/1740 [13:33<17:21, 1.07s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 44%|████▍ | 771/1740 [13:34<17:06, 1.06s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 44%|████▍ | 772/1740 [13:35<16:53, 1.05s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 44%|████▍ | 773/1740 [13:36<17:01, 1.06s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 44%|████▍ | 774/1740 [13:37<16:40, 1.04s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 45%|████▍ | 775/1740 [13:38<17:34, 1.09s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 45%|████▍ | 776/1740 [13:39<17:13, 1.07s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 45%|████▍ | 777/1740 [13:41<17:19, 1.08s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 45%|████▍ | 778/1740 [13:42<17:26, 1.09s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=120, ip=10.129.22.18)\\x1b[0m [0]: 45%|████▍ | 779/1740 [13:43<17:12, 1.07s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 39%|███▊ | 673/1740 [11:50<18:49, 1.06s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 39%|███▊ | 674/1740 [11:51<18:20, 1.03s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 39%|███▉ | 675/1740 [11:52<18:18, 1.03s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 39%|███▉ | 676/1740 [11:53<18:39, 1.05s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 39%|███▉ | 677/1740 [11:54<18:04, 1.02s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 39%|███▉ | 678/1740 [11:55<18:14, 1.03s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 39%|███▉ | 679/1740 [11:56<17:42, 1.00s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 39%|███▉ | 680/1740 [11:57<17:33, 1.01it/s][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 39%|███▉ | 681/1740 [11:58<18:07, 1.03s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 39%|███▉ | 682/1740 [11:59<18:00, 1.02s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 39%|███▉ | 683/1740 [12:00<18:23, 1.04s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 39%|███▉ | 684/1740 [12:01<18:24, 1.05s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 39%|███▉ | 685/1740 [12:03<18:54, 1.08s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 39%|███▉ | 686/1740 [12:04<18:11, 1.04s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 39%|███▉ | 687/1740 [12:05<18:03, 1.03s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 40%|███▉ | 688/1740 [12:06<18:33, 1.06s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 40%|███▉ | 689/1740 [12:07<18:23, 1.05s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 40%|███▉ | 690/1740 [12:08<18:10, 1.04s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 40%|███▉ | 691/1740 [12:09<17:40, 1.01s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 40%|███▉ | 692/1740 [12:10<17:22, 1.01it/s][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 40%|███▉ | 693/1740 [12:11<17:33, 1.01s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 40%|███▉ | 694/1740 [12:12<18:20, 1.05s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 40%|███▉ | 695/1740 [12:13<18:52, 1.08s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 40%|████ | 696/1740 [12:14<18:50, 1.08s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 40%|████ | 697/1740 [12:15<18:55, 1.09s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 40%|████ | 698/1740 [12:16<18:46, 1.08s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 40%|████ | 699/1740 [12:17<18:32, 1.07s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 40%|████ | 700/1740 [12:18<18:24, 1.06s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 40%|████ | 701/1740 [12:19<18:15, 1.05s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 40%|████ | 702/1740 [12:20<18:04, 1.04s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 40%|████ | 703/1740 [12:22<18:41, 1.08s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 40%|████ | 704/1740 [12:23<19:19, 1.12s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 41%|████ | 705/1740 [12:24<19:19, 1.12s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 41%|████ | 706/1740 [12:25<19:00, 1.10s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 41%|████ | 707/1740 [12:26<18:37, 1.08s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 41%|████ | 708/1740 [12:27<18:33, 1.08s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 41%|████ | 709/1740 [12:28<17:57, 1.05s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 41%|████ | 710/1740 [12:29<17:50, 1.04s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 41%|████ | 711/1740 [12:30<17:35, 1.03s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 41%|████ | 712/1740 [12:31<18:15, 1.07s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 41%|████ | 713/1740 [12:32<18:33, 1.08s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 41%|████ | 714/1740 [12:33<18:35, 1.09s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 41%|████ | 715/1740 [12:34<17:55, 1.05s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 41%|████ | 716/1740 [12:35<18:08, 1.06s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 41%|████ | 717/1740 [12:36<17:55, 1.05s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 41%|████▏ | 718/1740 [12:38<18:32, 1.09s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 41%|████▏ | 719/1740 [12:39<18:04, 1.06s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 41%|████▏ | 720/1740 [12:40<17:42, 1.04s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 41%|████▏ | 721/1740 [12:41<17:52, 1.05s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 41%|████▏ | 722/1740 [12:42<17:31, 1.03s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 42%|████▏ | 723/1740 [12:43<17:52, 1.05s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 42%|████▏ | 724/1740 [12:44<18:00, 1.06s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 42%|████▏ | 725/1740 [12:45<18:08, 1.07s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 42%|████▏ | 726/1740 [12:46<18:14, 1.08s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 42%|████▏ | 727/1740 [12:47<18:00, 1.07s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 42%|████▏ | 728/1740 [12:48<17:49, 1.06s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 42%|████▏ | 729/1740 [12:49<17:40, 1.05s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 42%|████▏ | 730/1740 [12:50<17:41, 1.05s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 42%|████▏ | 731/1740 [12:51<17:23, 1.03s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 42%|████▏ | 732/1740 [12:52<17:53, 1.06s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 42%|████▏ | 733/1740 [12:53<17:38, 1.05s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 42%|████▏ | 734/1740 [12:55<17:54, 1.07s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 42%|████▏ | 735/1740 [12:56<18:06, 1.08s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 42%|████▏ | 736/1740 [12:57<17:57, 1.07s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 42%|████▏ | 737/1740 [12:58<18:00, 1.08s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 42%|████▏ | 738/1740 [12:59<17:11, 1.03s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 42%|████▏ | 739/1740 [13:00<18:09, 1.09s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 43%|████▎ | 740/1740 [13:01<18:08, 1.09s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 43%|████▎ | 741/1740 [13:02<18:08, 1.09s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 43%|████▎ | 742/1740 [13:03<18:05, 1.09s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 43%|████▎ | 743/1740 [13:04<17:50, 1.07s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 43%|████▎ | 744/1740 [13:05<17:13, 1.04s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 43%|████▎ | 745/1740 [13:06<17:20, 1.05s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 43%|████▎ | 746/1740 [13:07<17:10, 1.04s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 43%|████▎ | 747/1740 [13:08<17:40, 1.07s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 43%|████▎ | 748/1740 [13:09<17:29, 1.06s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 43%|████▎ | 749/1740 [13:10<17:18, 1.05s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 43%|████▎ | 750/1740 [13:12<17:41, 1.07s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 43%|████▎ | 751/1740 [13:13<17:51, 1.08s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 43%|████▎ | 752/1740 [13:14<17:49, 1.08s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 43%|████▎ | 753/1740 [13:15<18:02, 1.10s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 43%|████▎ | 754/1740 [13:16<18:08, 1.10s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 43%|████▎ | 755/1740 [13:17<17:18, 1.05s/it][0]:\\n\\x1b[2m\\x1b[36m(CommandActor pid=121, ip=10.128.22.18)\\x1b[0m [0]: 43%|████▎ | 756/1740 [13:18<17:02, 1.04s/it][0]:\\n\\x1b[2m\\x1b[36m\n", - " -- TRUNCATED FOR FILE SIZE" - ] - }, - "execution_count": 674, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "job.logs()" ] }, { "cell_type": "code", - "execution_count": 658, + "execution_count": null, "id": "beb1a6b9-d9b3-49b7-b036-09f1d3569b59", "metadata": {}, "outputs": [], diff --git a/demo-notebooks/guided-demos/notebook-ex-outputs/jobtest.yaml b/demo-notebooks/guided-demos/notebook-ex-outputs/jobtest.yaml index d08517256..05e7529f2 100644 --- a/demo-notebooks/guided-demos/notebook-ex-outputs/jobtest.yaml +++ b/demo-notebooks/guided-demos/notebook-ex-outputs/jobtest.yaml @@ -62,7 +62,15 @@ spec: valueFrom: fieldRef: fieldPath: status.podIP - image: ghcr.io/foundation-model-stack/base:ray2.1.0-py38-gpu-pytorch1.12.0cu116-20221213-193103 + - name: RAY_USE_TLS + value: '0' + - name: RAY_TLS_SERVER_CERT + value: /home/ray/workspace/tls/server.crt + - name: RAY_TLS_SERVER_KEY + value: /home/ray/workspace/tls/server.key + - name: RAY_TLS_CA_CERT + value: /home/ray/workspace/tls/ca.crt + image: quay.io/project-codeflare/ray:2.5.0-py38-cu116 imagePullPolicy: Always lifecycle: preStop: @@ -88,7 +96,8 @@ spec: cpu: 2 memory: 8G nvidia.com/gpu: 0 - rayVersion: 1.12.0 + imagePullSecrets: [] + rayVersion: 2.1.0 workerGroupSpecs: - groupName: small-group-jobtest maxReplicas: 2 @@ -110,7 +119,15 @@ spec: valueFrom: fieldRef: fieldPath: status.podIP - image: ghcr.io/foundation-model-stack/base:ray2.1.0-py38-gpu-pytorch1.12.0cu116-20221213-193103 + - name: RAY_USE_TLS + value: '0' + - name: RAY_TLS_SERVER_CERT + value: /home/ray/workspace/tls/server.crt + - name: RAY_TLS_SERVER_KEY + value: /home/ray/workspace/tls/server.key + - name: RAY_TLS_CA_CERT + value: /home/ray/workspace/tls/ca.crt + image: quay.io/project-codeflare/ray:2.5.0-py38-cu116 lifecycle: preStop: exec: @@ -128,6 +145,7 @@ spec: cpu: 1 memory: 4G nvidia.com/gpu: 0 + imagePullSecrets: [] initContainers: - command: - sh diff --git a/demo-notebooks/guided-demos/notebook-ex-outputs/raytest.yaml b/demo-notebooks/guided-demos/notebook-ex-outputs/raytest.yaml index c3e804dd4..f3389a5fa 100644 --- a/demo-notebooks/guided-demos/notebook-ex-outputs/raytest.yaml +++ b/demo-notebooks/guided-demos/notebook-ex-outputs/raytest.yaml @@ -62,7 +62,15 @@ spec: valueFrom: fieldRef: fieldPath: status.podIP - image: ghcr.io/foundation-model-stack/base:ray2.1.0-py38-gpu-pytorch1.12.0cu116-20221213-193103 + - name: RAY_USE_TLS + value: '0' + - name: RAY_TLS_SERVER_CERT + value: /home/ray/workspace/tls/server.crt + - name: RAY_TLS_SERVER_KEY + value: /home/ray/workspace/tls/server.key + - name: RAY_TLS_CA_CERT + value: /home/ray/workspace/tls/ca.crt + image: quay.io/project-codeflare/ray:2.5.0-py38-cu116 imagePullPolicy: Always lifecycle: preStop: @@ -88,7 +96,8 @@ spec: cpu: 2 memory: 8G nvidia.com/gpu: 0 - rayVersion: 1.12.0 + imagePullSecrets: [] + rayVersion: 2.1.0 workerGroupSpecs: - groupName: small-group-raytest maxReplicas: 2 @@ -110,7 +119,15 @@ spec: valueFrom: fieldRef: fieldPath: status.podIP - image: ghcr.io/foundation-model-stack/base:ray2.1.0-py38-gpu-pytorch1.12.0cu116-20221213-193103 + - name: RAY_USE_TLS + value: '0' + - name: RAY_TLS_SERVER_CERT + value: /home/ray/workspace/tls/server.crt + - name: RAY_TLS_SERVER_KEY + value: /home/ray/workspace/tls/server.key + - name: RAY_TLS_CA_CERT + value: /home/ray/workspace/tls/ca.crt + image: quay.io/project-codeflare/ray:2.5.0-py38-cu116 lifecycle: preStop: exec: @@ -128,6 +145,7 @@ spec: cpu: 1 memory: 4G nvidia.com/gpu: 0 + imagePullSecrets: [] initContainers: - command: - sh diff --git a/src/codeflare_sdk.egg-info/SOURCES.txt b/src/codeflare_sdk.egg-info/SOURCES.txt index 73e4e1243..cfea1dbff 100644 --- a/src/codeflare_sdk.egg-info/SOURCES.txt +++ b/src/codeflare_sdk.egg-info/SOURCES.txt @@ -8,11 +8,14 @@ src/codeflare_sdk.egg-info/dependency_links.txt src/codeflare_sdk.egg-info/top_level.txt src/codeflare_sdk/cluster/__init__.py src/codeflare_sdk/cluster/auth.py +src/codeflare_sdk/cluster/awload.py src/codeflare_sdk/cluster/cluster.py src/codeflare_sdk/cluster/config.py src/codeflare_sdk/cluster/model.py src/codeflare_sdk/job/__init__.py src/codeflare_sdk/job/jobs.py src/codeflare_sdk/utils/__init__.py +src/codeflare_sdk/utils/generate_cert.py src/codeflare_sdk/utils/generate_yaml.py +src/codeflare_sdk/utils/kube_api_helpers.py src/codeflare_sdk/utils/pretty_print.py From 55957f2162e6877fbd6fd699f0479f056df8e27f Mon Sep 17 00:00:00 2001 From: Mustafa Eyceoz Date: Wed, 9 Aug 2023 11:20:47 -0400 Subject: [PATCH 2/8] re-sync nbs --- .../guided-demos/2_basic_jobs.ipynb | 20 ++++++++++++++++++- .../preview_nbs/2_basic_jobs.ipynb | 20 ++++++++++++++++++- 2 files changed, 38 insertions(+), 2 deletions(-) diff --git a/demo-notebooks/guided-demos/2_basic_jobs.ipynb b/demo-notebooks/guided-demos/2_basic_jobs.ipynb index eb9247153..e86b28a9f 100644 --- a/demo-notebooks/guided-demos/2_basic_jobs.ipynb +++ b/demo-notebooks/guided-demos/2_basic_jobs.ipynb @@ -133,7 +133,7 @@ "id": "5b9ae53a", "metadata": {}, "source": [ - "Now we can take a look at the status of our submitted job, as well as the logs:" + "Now we can take a look at the status of our submitted job, as well as retrieve the full logs:" ] }, { @@ -156,6 +156,24 @@ "job.logs()" ] }, + { + "cell_type": "markdown", + "id": "c8267fb2", + "metadata": {}, + "source": [ + "You can also view organized logs, status, and other information directly through the Ray cluster's dashboard:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3f3861d0", + "metadata": {}, + "outputs": [], + "source": [ + "cluster.cluster_dashboard_uri()" + ] + }, { "cell_type": "markdown", "id": "5af8cd32", diff --git a/demo-notebooks/guided-demos/preview_nbs/2_basic_jobs.ipynb b/demo-notebooks/guided-demos/preview_nbs/2_basic_jobs.ipynb index eb9247153..bb58f8078 100644 --- a/demo-notebooks/guided-demos/preview_nbs/2_basic_jobs.ipynb +++ b/demo-notebooks/guided-demos/preview_nbs/2_basic_jobs.ipynb @@ -133,7 +133,7 @@ "id": "5b9ae53a", "metadata": {}, "source": [ - "Now we can take a look at the status of our submitted job, as well as the logs:" + "Now we can take a look at the status of our submitted job, as well as retrieve the full logs:" ] }, { @@ -156,6 +156,24 @@ "job.logs()" ] }, + { + "cell_type": "markdown", + "id": "4067ef60", + "metadata": {}, + "source": [ + "You can also view organized logs, status, and other information directly through the Ray cluster's dashboard:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "19055243", + "metadata": {}, + "outputs": [], + "source": [ + "cluster.cluster_dashboard_uri()" + ] + }, { "cell_type": "markdown", "id": "5af8cd32", From 60fc4f9f7981cf2318c2c388acda1f0d03e542cb Mon Sep 17 00:00:00 2001 From: Mustafa Eyceoz Date: Wed, 9 Aug 2023 12:30:48 -0400 Subject: [PATCH 3/8] Updated basic_instascale nb --- .../1_basic_instascale.ipynb | 93 +++++++++++++++++-- .../notebook-ex-outputs/instascaletest.yaml | 29 +++++- 2 files changed, 110 insertions(+), 12 deletions(-) diff --git a/demo-notebooks/guided-demos/notebook-ex-outputs/1_basic_instascale.ipynb b/demo-notebooks/guided-demos/notebook-ex-outputs/1_basic_instascale.ipynb index 8f8a6ed73..e289b53f2 100644 --- a/demo-notebooks/guided-demos/notebook-ex-outputs/1_basic_instascale.ipynb +++ b/demo-notebooks/guided-demos/notebook-ex-outputs/1_basic_instascale.ipynb @@ -10,7 +10,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "b55bc3ea-4ce3-49bf-bb1f-e209de8ca47a", "metadata": {}, "outputs": [], @@ -46,10 +46,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "0f4bc870-091f-4e11-9642-cba145710159", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Written to: instascaletest.yaml\n" + ] + } + ], "source": [ "# Create and configure our cluster object (and appwrapper)\n", "cluster = Cluster(ClusterConfiguration(\n", @@ -76,10 +84,19 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "id": "f0884bbc-c224-4ca0-98a0-02dfa09c2200", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Waiting for requested resources to be set up...\n", + "Requested cluster up and running!\n" + ] + } + ], "source": [ "# Bring up the cluster\n", "cluster.up()\n", @@ -98,10 +115,68 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "id": "7fd45bc5-03c0-4ae5-9ec5-dd1c30f1a084", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
                     🚀 CodeFlare Cluster Details 🚀                     \n",
+       "                                                                         \n",
+       " ╭─────────────────────────────────────────────────────────────────────╮ \n",
+       " │   Name                                                              │ \n",
+       " │   instascaletest                                        Active ✅   │ \n",
+       " │                                                                     │ \n",
+       " │   URI: ray://instascaletest-head-svc.default.svc:10001              │ \n",
+       " │                                                                     │ \n",
+       " │   Dashboard🔗                                                       │ \n",
+       " │                                                                     │ \n",
+       " │                       Cluster Resources                             │ \n",
+       " │   ╭── Workers ──╮  ╭───────── Worker specs(each) ─────────╮         │ \n",
+       " │   │  # Workers  │  │  Memory      CPU         GPU         │         │ \n",
+       " │   │             │  │                                      │         │ \n",
+       " │   │  2          │  │  8~8         2           1           │         │ \n",
+       " │   │             │  │                                      │         │ \n",
+       " │   ╰─────────────╯  ╰──────────────────────────────────────╯         │ \n",
+       " ╰─────────────────────────────────────────────────────────────────────╯ \n",
+       "
\n" + ], + "text/plain": [ + "\u001b[3m \u001b[0m\u001b[1;3m 🚀 CodeFlare Cluster Details 🚀\u001b[0m\u001b[3m \u001b[0m\n", + "\u001b[1m \u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m\n", + " ╭─────────────────────────────────────────────────────────────────────╮ \n", + " │ \u001b[1;37;42mName\u001b[0m │ \n", + " │ \u001b[1;4minstascaletest\u001b[0m Active ✅ │ \n", + " │ │ \n", + " │ \u001b[1mURI:\u001b[0m ray://instascaletest-head-svc.default.svc:10001 │ \n", + " │ │ \n", + " │ \u001b]8;id=65933;http://ray-dashboard-instascaletest-default.apps.meyceoz-07122023.psap.aws.rhperfscale.org\u001b\\\u001b[4;34mDashboard🔗\u001b[0m\u001b]8;;\u001b\\ │ \n", + " │ │ \n", + " │ \u001b[3m Cluster Resources \u001b[0m │ \n", + " │ ╭── Workers ──╮ ╭───────── Worker specs(each) ─────────╮ │ \n", + " │ │ \u001b[1m \u001b[0m\u001b[1m# Workers\u001b[0m\u001b[1m \u001b[0m │ │ \u001b[1m \u001b[0m\u001b[1mMemory \u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m\u001b[1mCPU \u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m\u001b[1mGPU \u001b[0m\u001b[1m \u001b[0m │ │ \n", + " │ │ \u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m │ │ \u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m │ │ \n", + " │ │ \u001b[35m \u001b[0m\u001b[35m2 \u001b[0m\u001b[35m \u001b[0m │ │ \u001b[36m \u001b[0m\u001b[36m8~8 \u001b[0m\u001b[36m \u001b[0m\u001b[35m \u001b[0m\u001b[35m2 \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m1 \u001b[0m\u001b[35m \u001b[0m │ │ \n", + " │ │ \u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m │ │ \u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m │ │ \n", + " │ ╰─────────────╯ ╰──────────────────────────────────────╯ │ \n", + " ╰─────────────────────────────────────────────────────────────────────╯ \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "RayCluster(name='instascaletest', status=, workers=2, worker_mem_min=8, worker_mem_max=8, worker_cpu=2, worker_gpu=1, namespace='default', dashboard='http://ray-dashboard-instascaletest-default.apps.meyceoz-07122023.psap.aws.rhperfscale.org')" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "cluster.details()" ] @@ -116,7 +191,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "id": "5f36db0f-31f6-4373-9503-dc3c1c4c3f57", "metadata": {}, "outputs": [], @@ -159,7 +234,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.13" + "version": "3.8.17" }, "vscode": { "interpreter": { diff --git a/demo-notebooks/guided-demos/notebook-ex-outputs/instascaletest.yaml b/demo-notebooks/guided-demos/notebook-ex-outputs/instascaletest.yaml index 2a40438cf..182acd724 100644 --- a/demo-notebooks/guided-demos/notebook-ex-outputs/instascaletest.yaml +++ b/demo-notebooks/guided-demos/notebook-ex-outputs/instascaletest.yaml @@ -68,7 +68,20 @@ spec: values: - instascaletest containers: - - image: ghcr.io/foundation-model-stack/base:ray2.1.0-py38-gpu-pytorch1.12.0cu116-20221213-193103 + - env: + - name: MY_POD_IP + valueFrom: + fieldRef: + fieldPath: status.podIP + - name: RAY_USE_TLS + value: '0' + - name: RAY_TLS_SERVER_CERT + value: /home/ray/workspace/tls/server.crt + - name: RAY_TLS_SERVER_KEY + value: /home/ray/workspace/tls/server.key + - name: RAY_TLS_CA_CERT + value: /home/ray/workspace/tls/ca.crt + image: quay.io/project-codeflare/ray:2.5.0-py38-cu116 imagePullPolicy: Always lifecycle: preStop: @@ -94,7 +107,8 @@ spec: cpu: 2 memory: 8G nvidia.com/gpu: 0 - rayVersion: 1.12.0 + imagePullSecrets: [] + rayVersion: 2.1.0 workerGroupSpecs: - groupName: small-group-instascaletest maxReplicas: 2 @@ -125,7 +139,15 @@ spec: valueFrom: fieldRef: fieldPath: status.podIP - image: ghcr.io/foundation-model-stack/base:ray2.1.0-py38-gpu-pytorch1.12.0cu116-20221213-193103 + - name: RAY_USE_TLS + value: '0' + - name: RAY_TLS_SERVER_CERT + value: /home/ray/workspace/tls/server.crt + - name: RAY_TLS_SERVER_KEY + value: /home/ray/workspace/tls/server.key + - name: RAY_TLS_CA_CERT + value: /home/ray/workspace/tls/ca.crt + image: quay.io/project-codeflare/ray:2.5.0-py38-cu116 lifecycle: preStop: exec: @@ -143,6 +165,7 @@ spec: cpu: 2 memory: 8G nvidia.com/gpu: 1 + imagePullSecrets: [] initContainers: - command: - sh From e7c57042b8b7723a4b17698059e52ad3c5d09458 Mon Sep 17 00:00:00 2001 From: Mustafa Eyceoz Date: Wed, 9 Aug 2023 16:50:34 -0400 Subject: [PATCH 4/8] Surrounding prep for final two notebook updates --- .../guided-demos/3_basic_interactive.ipynb | 4 +++- demo-notebooks/guided-demos/4_gpt.ipynb | 2 +- .../notebook-ex-outputs/gptfttest.yaml | 24 ++++++++++++++++--- .../notebook-ex-outputs/interactivetest.yaml | 24 ++++++++++++++++--- .../preview_nbs/3_basic_interactive.ipynb | 6 +++-- .../guided-demos/preview_nbs/4_gpt.ipynb | 2 +- 6 files changed, 51 insertions(+), 11 deletions(-) diff --git a/demo-notebooks/guided-demos/3_basic_interactive.ipynb b/demo-notebooks/guided-demos/3_basic_interactive.ipynb index 73eff977a..edad5ff80 100644 --- a/demo-notebooks/guided-demos/3_basic_interactive.ipynb +++ b/demo-notebooks/guided-demos/3_basic_interactive.ipynb @@ -140,7 +140,9 @@ "#install additional libraries that will be required for model training\n", "runtime_env = {\"pip\": [\"transformers\", \"datasets\", \"evaluate\", \"pyarrow<7.0.0\", \"accelerate\"]}\n", "\n", - "ray.init(address=f'{ray_cluster_uri}', runtime_env=runtime_env)\n", + "# NOTE: This will work for in-cluster notebook servers (RHODS/ODH), but not for local machines\n", + "# To see how to connect from your laptop, go to demo-notebooks/additional-interactive/local_interactive.ipynb\n", + "ray.init(address=ray_cluster_uri, runtime_env=runtime_env)\n", "\n", "print(\"Ray cluster is up and running: \", ray.is_initialized())" ] diff --git a/demo-notebooks/guided-demos/4_gpt.ipynb b/demo-notebooks/guided-demos/4_gpt.ipynb index 919f8f0a8..94ddc9484 100644 --- a/demo-notebooks/guided-demos/4_gpt.ipynb +++ b/demo-notebooks/guided-demos/4_gpt.ipynb @@ -44,7 +44,7 @@ " min_memory=8,\n", " max_memory=8,\n", " num_gpus=1,\n", - " instascale=True,\n", + " instascale=True, #<---instascale enabled\n", " machine_types=[\"m5.xlarge\", \"g4dn.xlarge\"],\n", "))" ] diff --git a/demo-notebooks/guided-demos/notebook-ex-outputs/gptfttest.yaml b/demo-notebooks/guided-demos/notebook-ex-outputs/gptfttest.yaml index 54799e10c..beac45ce3 100644 --- a/demo-notebooks/guided-demos/notebook-ex-outputs/gptfttest.yaml +++ b/demo-notebooks/guided-demos/notebook-ex-outputs/gptfttest.yaml @@ -73,7 +73,15 @@ spec: valueFrom: fieldRef: fieldPath: status.podIP - image: ghcr.io/foundation-model-stack/base:ray2.1.0-py38-gpu-pytorch1.12.0cu116-20221213-193103 + - name: RAY_USE_TLS + value: '0' + - name: RAY_TLS_SERVER_CERT + value: /home/ray/workspace/tls/server.crt + - name: RAY_TLS_SERVER_KEY + value: /home/ray/workspace/tls/server.key + - name: RAY_TLS_CA_CERT + value: /home/ray/workspace/tls/ca.crt + image: quay.io/project-codeflare/ray:2.5.0-py38-cu116 imagePullPolicy: Always lifecycle: preStop: @@ -99,7 +107,8 @@ spec: cpu: 2 memory: 8G nvidia.com/gpu: 0 - rayVersion: 1.12.0 + imagePullSecrets: [] + rayVersion: 2.1.0 workerGroupSpecs: - groupName: small-group-gptfttest maxReplicas: 2 @@ -130,7 +139,15 @@ spec: valueFrom: fieldRef: fieldPath: status.podIP - image: ghcr.io/foundation-model-stack/base:ray2.1.0-py38-gpu-pytorch1.12.0cu116-20221213-193103 + - name: RAY_USE_TLS + value: '0' + - name: RAY_TLS_SERVER_CERT + value: /home/ray/workspace/tls/server.crt + - name: RAY_TLS_SERVER_KEY + value: /home/ray/workspace/tls/server.key + - name: RAY_TLS_CA_CERT + value: /home/ray/workspace/tls/ca.crt + image: quay.io/project-codeflare/ray:2.5.0-py38-cu116 lifecycle: preStop: exec: @@ -148,6 +165,7 @@ spec: cpu: 2 memory: 8G nvidia.com/gpu: 1 + imagePullSecrets: [] initContainers: - command: - sh diff --git a/demo-notebooks/guided-demos/notebook-ex-outputs/interactivetest.yaml b/demo-notebooks/guided-demos/notebook-ex-outputs/interactivetest.yaml index 5544dfe72..de06ee142 100644 --- a/demo-notebooks/guided-demos/notebook-ex-outputs/interactivetest.yaml +++ b/demo-notebooks/guided-demos/notebook-ex-outputs/interactivetest.yaml @@ -73,7 +73,15 @@ spec: valueFrom: fieldRef: fieldPath: status.podIP - image: ghcr.io/foundation-model-stack/base:ray2.1.0-py38-gpu-pytorch1.12.0cu116-20221213-193103 + - name: RAY_USE_TLS + value: '0' + - name: RAY_TLS_SERVER_CERT + value: /home/ray/workspace/tls/server.crt + - name: RAY_TLS_SERVER_KEY + value: /home/ray/workspace/tls/server.key + - name: RAY_TLS_CA_CERT + value: /home/ray/workspace/tls/ca.crt + image: quay.io/project-codeflare/ray:2.5.0-py38-cu116 imagePullPolicy: Always lifecycle: preStop: @@ -99,7 +107,8 @@ spec: cpu: 2 memory: 8G nvidia.com/gpu: 0 - rayVersion: 1.12.0 + imagePullSecrets: [] + rayVersion: 2.1.0 workerGroupSpecs: - groupName: small-group-interactivetest maxReplicas: 2 @@ -130,7 +139,15 @@ spec: valueFrom: fieldRef: fieldPath: status.podIP - image: ghcr.io/foundation-model-stack/base:ray2.1.0-py38-gpu-pytorch1.12.0cu116-20221213-193103 + - name: RAY_USE_TLS + value: '0' + - name: RAY_TLS_SERVER_CERT + value: /home/ray/workspace/tls/server.crt + - name: RAY_TLS_SERVER_KEY + value: /home/ray/workspace/tls/server.key + - name: RAY_TLS_CA_CERT + value: /home/ray/workspace/tls/ca.crt + image: quay.io/project-codeflare/ray:2.5.0-py38-cu116 lifecycle: preStop: exec: @@ -148,6 +165,7 @@ spec: cpu: 2 memory: 8G nvidia.com/gpu: 1 + imagePullSecrets: [] initContainers: - command: - sh diff --git a/demo-notebooks/guided-demos/preview_nbs/3_basic_interactive.ipynb b/demo-notebooks/guided-demos/preview_nbs/3_basic_interactive.ipynb index 73eff977a..6fb81afec 100644 --- a/demo-notebooks/guided-demos/preview_nbs/3_basic_interactive.ipynb +++ b/demo-notebooks/guided-demos/preview_nbs/3_basic_interactive.ipynb @@ -61,7 +61,7 @@ " min_memory=8,\n", " max_memory=8,\n", " num_gpus=1,\n", - " instascale=True,\n", + " instascale=True, #<---instascale enabled\n", " machine_types=[\"m5.xlarge\", \"g4dn.xlarge\"]\n", " \n", "))" @@ -140,7 +140,9 @@ "#install additional libraries that will be required for model training\n", "runtime_env = {\"pip\": [\"transformers\", \"datasets\", \"evaluate\", \"pyarrow<7.0.0\", \"accelerate\"]}\n", "\n", - "ray.init(address=f'{ray_cluster_uri}', runtime_env=runtime_env)\n", + "# NOTE: This will work for in-cluster notebook servers (RHODS/ODH), but not for local machines\n", + "# To see how to connect from your laptop, go to demo-notebooks/additional-interactive/local_interactive.ipynb\n", + "ray.init(address=ray_cluster_uri, runtime_env=runtime_env)\n", "\n", "print(\"Ray cluster is up and running: \", ray.is_initialized())" ] diff --git a/demo-notebooks/guided-demos/preview_nbs/4_gpt.ipynb b/demo-notebooks/guided-demos/preview_nbs/4_gpt.ipynb index 919f8f0a8..94ddc9484 100644 --- a/demo-notebooks/guided-demos/preview_nbs/4_gpt.ipynb +++ b/demo-notebooks/guided-demos/preview_nbs/4_gpt.ipynb @@ -44,7 +44,7 @@ " min_memory=8,\n", " max_memory=8,\n", " num_gpus=1,\n", - " instascale=True,\n", + " instascale=True, #<---instascale enabled\n", " machine_types=[\"m5.xlarge\", \"g4dn.xlarge\"],\n", "))" ] From b70562c65d59bdf788c089085ad51af100252a36 Mon Sep 17 00:00:00 2001 From: Mustafa Eyceoz Date: Wed, 9 Aug 2023 20:08:37 -0400 Subject: [PATCH 5/8] Updated final two notebooks --- .../guided-demos/3_basic_interactive.ipynb | 2 +- demo-notebooks/guided-demos/4_gpt.ipynb | 36 + .../3_basic_interactive.ipynb | 1231 ++++++++++++++++- .../notebook-ex-outputs/4_gpt.ipynb | 231 +++- .../guided-demos/preview_nbs/4_gpt.ipynb | 36 + 5 files changed, 1498 insertions(+), 38 deletions(-) diff --git a/demo-notebooks/guided-demos/3_basic_interactive.ipynb b/demo-notebooks/guided-demos/3_basic_interactive.ipynb index edad5ff80..6fb81afec 100644 --- a/demo-notebooks/guided-demos/3_basic_interactive.ipynb +++ b/demo-notebooks/guided-demos/3_basic_interactive.ipynb @@ -61,7 +61,7 @@ " min_memory=8,\n", " max_memory=8,\n", " num_gpus=1,\n", - " instascale=True,\n", + " instascale=True, #<---instascale enabled\n", " machine_types=[\"m5.xlarge\", \"g4dn.xlarge\"]\n", " \n", "))" diff --git a/demo-notebooks/guided-demos/4_gpt.ipynb b/demo-notebooks/guided-demos/4_gpt.ipynb index 94ddc9484..4745751fa 100644 --- a/demo-notebooks/guided-demos/4_gpt.ipynb +++ b/demo-notebooks/guided-demos/4_gpt.ipynb @@ -135,6 +135,14 @@ "job.status()" ] }, + { + "cell_type": "markdown", + "id": "80bc1961", + "metadata": {}, + "source": [ + "Retrieve raw log output at anytime with:" + ] + }, { "cell_type": "code", "execution_count": null, @@ -145,6 +153,34 @@ "job.logs()" ] }, + { + "cell_type": "markdown", + "id": "876b96b5", + "metadata": {}, + "source": [ + "View live updates for status, logs, and other information with:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "58f8a2e8", + "metadata": {}, + "outputs": [], + "source": [ + "cluster.cluster_dashboard_uri()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d7c13eab", + "metadata": {}, + "outputs": [], + "source": [ + "job.status()" + ] + }, { "cell_type": "code", "execution_count": null, diff --git a/demo-notebooks/guided-demos/notebook-ex-outputs/3_basic_interactive.ipynb b/demo-notebooks/guided-demos/notebook-ex-outputs/3_basic_interactive.ipynb index 73eff977a..5e719a592 100644 --- a/demo-notebooks/guided-demos/notebook-ex-outputs/3_basic_interactive.ipynb +++ b/demo-notebooks/guided-demos/notebook-ex-outputs/3_basic_interactive.ipynb @@ -10,7 +10,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "b55bc3ea-4ce3-49bf-bb1f-e209de8ca47a", "metadata": {}, "outputs": [], @@ -46,10 +46,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "0f4bc870-091f-4e11-9642-cba145710159", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Written to: interactivetest.yaml\n" + ] + } + ], "source": [ "# Create and configure our cluster object (and appwrapper)\n", "cluster = Cluster(ClusterConfiguration(\n", @@ -61,7 +69,7 @@ " min_memory=8,\n", " max_memory=8,\n", " num_gpus=1,\n", - " instascale=True,\n", + " instascale=True, #<---instascale enabled\n", " machine_types=[\"m5.xlarge\", \"g4dn.xlarge\"]\n", " \n", "))" @@ -69,10 +77,19 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "id": "f0884bbc-c224-4ca0-98a0-02dfa09c2200", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Waiting for requested resources to be set up...\n", + "Requested cluster up and running!\n" + ] + } + ], "source": [ "# Bring up the cluster\n", "cluster.up()\n", @@ -81,10 +98,68 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "id": "df71c1ed", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
                      🚀 CodeFlare Cluster Details 🚀                     \n",
+       "                                                                          \n",
+       " ╭──────────────────────────────────────────────────────────────────────╮ \n",
+       " │   Name                                                               │ \n",
+       " │   interactivetest                                        Active ✅   │ \n",
+       " │                                                                      │ \n",
+       " │   URI: ray://interactivetest-head-svc.default.svc:10001              │ \n",
+       " │                                                                      │ \n",
+       " │   Dashboard🔗                                                        │ \n",
+       " │                                                                      │ \n",
+       " │                       Cluster Resources                              │ \n",
+       " │   ╭── Workers ──╮  ╭───────── Worker specs(each) ─────────╮          │ \n",
+       " │   │  # Workers  │  │  Memory      CPU         GPU         │          │ \n",
+       " │   │             │  │                                      │          │ \n",
+       " │   │  2          │  │  8~8         2           1           │          │ \n",
+       " │   │             │  │                                      │          │ \n",
+       " │   ╰─────────────╯  ╰──────────────────────────────────────╯          │ \n",
+       " ╰──────────────────────────────────────────────────────────────────────╯ \n",
+       "
\n" + ], + "text/plain": [ + "\u001b[3m \u001b[0m\u001b[1;3m 🚀 CodeFlare Cluster Details 🚀\u001b[0m\u001b[3m \u001b[0m\n", + "\u001b[1m \u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m\n", + " ╭──────────────────────────────────────────────────────────────────────╮ \n", + " │ \u001b[1;37;42mName\u001b[0m │ \n", + " │ \u001b[1;4minteractivetest\u001b[0m Active ✅ │ \n", + " │ │ \n", + " │ \u001b[1mURI:\u001b[0m ray://interactivetest-head-svc.default.svc:10001 │ \n", + " │ │ \n", + " │ \u001b]8;id=970589;http://ray-dashboard-interactivetest-default.apps.meyceoz-07122023.psap.aws.rhperfscale.org\u001b\\\u001b[4;34mDashboard🔗\u001b[0m\u001b]8;;\u001b\\ │ \n", + " │ │ \n", + " │ \u001b[3m Cluster Resources \u001b[0m │ \n", + " │ ╭── Workers ──╮ ╭───────── Worker specs(each) ─────────╮ │ \n", + " │ │ \u001b[1m \u001b[0m\u001b[1m# Workers\u001b[0m\u001b[1m \u001b[0m │ │ \u001b[1m \u001b[0m\u001b[1mMemory \u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m\u001b[1mCPU \u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m\u001b[1mGPU \u001b[0m\u001b[1m \u001b[0m │ │ \n", + " │ │ \u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m │ │ \u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m │ │ \n", + " │ │ \u001b[35m \u001b[0m\u001b[35m2 \u001b[0m\u001b[35m \u001b[0m │ │ \u001b[36m \u001b[0m\u001b[36m8~8 \u001b[0m\u001b[36m \u001b[0m\u001b[35m \u001b[0m\u001b[35m2 \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m1 \u001b[0m\u001b[35m \u001b[0m │ │ \n", + " │ │ \u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m │ │ \u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m │ │ \n", + " │ ╰─────────────╯ ╰──────────────────────────────────────╯ │ \n", + " ╰──────────────────────────────────────────────────────────────────────╯ \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "RayCluster(name='interactivetest', status=, workers=2, worker_mem_min=8, worker_mem_max=8, worker_cpu=2, worker_gpu=1, namespace='default', dashboard='http://ray-dashboard-interactivetest-default.apps.meyceoz-07122023.psap.aws.rhperfscale.org')" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "cluster.details()" ] @@ -101,10 +176,19 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "id": "c1719bca", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "http://ray-dashboard-interactivetest-default.apps.meyceoz-07122023.psap.aws.rhperfscale.org\n", + "ray://interactivetest-head-svc.default.svc:10001\n" + ] + } + ], "source": [ "ray_dashboard_uri = cluster.cluster_dashboard_uri()\n", "ray_cluster_uri = cluster.cluster_uri()\n", @@ -122,10 +206,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "id": "300146dc", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Ray cluster is up and running: True\n" + ] + } + ], "source": [ "#before proceeding make sure the cluster exists and the uri is not empty\n", "assert ray_cluster_uri, \"Ray cluster needs to be started and set before proceeding\"\n", @@ -140,7 +232,9 @@ "#install additional libraries that will be required for model training\n", "runtime_env = {\"pip\": [\"transformers\", \"datasets\", \"evaluate\", \"pyarrow<7.0.0\", \"accelerate\"]}\n", "\n", - "ray.init(address=f'{ray_cluster_uri}', runtime_env=runtime_env)\n", + "# NOTE: This will work for in-cluster notebook servers (RHODS/ODH), but not for local machines\n", + "# To see how to connect from your laptop, go to demo-notebooks/additional-interactive/local_interactive.ipynb\n", + "ray.init(address=ray_cluster_uri, runtime_env=runtime_env)\n", "\n", "print(\"Ray cluster is up and running: \", ray.is_initialized())" ] @@ -155,7 +249,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "id": "1b36e0d9", "metadata": {}, "outputs": [], @@ -234,10 +328,1111 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "id": "5901d958", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Downloading builder script: 100%|██████████| 4.31k/4.31k [00:00<00:00, 20.9MB/s]\n", + "Downloading metadata: 100%|██████████| 2.17k/2.17k [00:00<00:00, 14.1MB/s]\n", + "Downloading readme: 100%|██████████| 7.59k/7.59k [00:00<00:00, 22.9MB/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Downloading and preparing dataset imdb/plain_text to /home/ray/.cache/huggingface/datasets/imdb/plain_text/1.0.0/d613c88cf8fa3bab83b4ded3713f1f74830d1100e171db75bbddb80b3345c9c0...\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Downloading data: 0%| | 0.00/84.1M [00:00 AllToAllOperator[RandomizeBlockOrder]\n", + "\u001b[2m\u001b[36m(HuggingFaceTrainer pid=196, ip=10.130.4.19)\u001b[0m 2023-08-09 14:51:58,957\tINFO streaming_executor.py:92 -- Execution config: ExecutionOptions(resource_limits=ExecutionResources(cpu=None, gpu=None, object_store_memory=None), locality_with_output=False, preserve_order=False, actor_locality_enabled=True, verbose_progress=False)\n", + "\u001b[2m\u001b[36m(HuggingFaceTrainer pid=196, ip=10.130.4.19)\u001b[0m 2023-08-09 14:51:58,958\tINFO streaming_executor.py:94 -- Tip: For detailed progress reporting, run `ray.data.DataContext.get_current().execution_options.verbose_progress = True`\n", + "\u001b[2m\u001b[36m(HuggingFaceTrainer pid=196, ip=10.130.4.19)\u001b[0m 2023-08-09 14:51:58,969\tINFO streaming_executor.py:149 -- Shutting down .\n", + "\u001b[2m\u001b[36m(RayTrainWorker pid=235, ip=10.130.4.19)\u001b[0m 2023-08-09 14:51:58,912\tINFO config.py:86 -- Setting up process group for: env:// [rank=0, world_size=2]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m == Status ==\n", + "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Current time: 2023-08-09 14:52:01 (running for 00:00:10.18)\n", + "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Using FIFO scheduling algorithm.\n", + "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Logical resource usage: 1.0/6 CPUs, 2.0/2 GPUs\n", + "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2023-08-09_14-51-51\n", + "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n", + "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+\n", + "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | Trial name | status | loc |\n", + "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m |--------------------------------+----------+-----------------|\n", + "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m | HuggingFaceTrainer_f2621_00000 | RUNNING | 10.130.4.19:196 |\n", + "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m +--------------------------------+----------+-----------------+\n", + "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n", + "\u001b[2m\u001b[36m(train_fn pid=425)\u001b[0m \n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[2m\u001b[36m(RayTrainWorker pid=235, ip=10.130.4.19)\u001b[0m 2023-08-09 14:52:01,262\tINFO streaming_executor.py:91 -- Executing DAG InputDataBuffer[Input] -> AllToAllOperator[RandomizeBlockOrder]\n", + "\u001b[2m\u001b[36m(RayTrainWorker pid=235, ip=10.130.4.19)\u001b[0m 2023-08-09 14:52:01,262\tINFO streaming_executor.py:92 -- Execution config: ExecutionOptions(resource_limits=ExecutionResources(cpu=None, gpu=None, object_store_memory=None), locality_with_output=False, preserve_order=False, actor_locality_enabled=True, verbose_progress=False)\n", + "\u001b[2m\u001b[36m(RayTrainWorker pid=235, ip=10.130.4.19)\u001b[0m 2023-08-09 14:52:01,262\tINFO streaming_executor.py:94 -- Tip: For detailed progress reporting, run `ray.data.DataContext.get_current().execution_options.verbose_progress = True`\n", + "\u001b[2m\u001b[36m(RayTrainWorker pid=235, ip=10.130.4.19)\u001b[0m 2023-08-09 14:52:01,274\tINFO streaming_executor.py:149 -- Shutting down .\n", + "\u001b[2m\u001b[36m(RayTrainWorker pid=232, ip=10.129.4.19)\u001b[0m 2023-08-09 14:52:01,252\tINFO streaming_executor.py:91 -- Executing DAG InputDataBuffer[Input] -> AllToAllOperator[RandomizeBlockOrder]\n", + "\u001b[2m\u001b[36m(RayTrainWorker pid=232, ip=10.129.4.19)\u001b[0m 2023-08-09 14:52:01,252\tINFO streaming_executor.py:92 -- Execution config: ExecutionOptions(resource_limits=ExecutionResources(cpu=None, gpu=None, object_store_memory=None), locality_with_output=False, preserve_order=False, actor_locality_enabled=True, verbose_progress=False)\n", + "\u001b[2m\u001b[36m(RayTrainWorker pid=232, ip=10.129.4.19)\u001b[0m 2023-08-09 14:52:01,252\tINFO streaming_executor.py:94 -- Tip: For detailed progress reporting, run `ray.data.DataContext.get_current().execution_options.verbose_progress = True`\n", + "\u001b[2m\u001b[36m(RayTrainWorker pid=232, ip=10.129.4.19)\u001b[0m 2023-08-09 14:52:01,263\tINFO streaming_executor.py:149 -- Shutting down .\n", + "Downloading (…)lve/main/config.json: 100%|██████████| 483/483 [00:00<00:00, 151kB/s]\n", + "Downloading (…)lve/main/config.json: 100%|██████████| 483/483 [00:00<00:00, 146kB/s]\n", + "Downloading model.safetensors: 0%| | 0.00/268M [00:00 🚀 CodeFlare Cluster Details 🚀 \n", + " \n", + " ╭────────────────────────────────────────────────────────────────╮ \n", + " │ Name │ \n", + " │ gptfttest Active ✅ │ \n", + " │ │ \n", + " │ URI: ray://gptfttest-head-svc.default.svc:10001 │ \n", + " │ │ \n", + " │ Dashboard🔗 │ \n", + " │ │ \n", + " │ Cluster Resources │ \n", + " │ ╭── Workers ──╮ ╭───────── Worker specs(each) ─────────╮ │ \n", + " │ │ # Workers │ │ Memory CPU GPU │ │ \n", + " │ │ │ │ │ │ \n", + " │ │ 2 │ │ 8~8 2 1 │ │ \n", + " │ │ │ │ │ │ \n", + " │ ╰─────────────╯ ╰──────────────────────────────────────╯ │ \n", + " ╰────────────────────────────────────────────────────────────────╯ \n", + "\n" + ], + "text/plain": [ + "\u001b[3m \u001b[0m\u001b[1;3m 🚀 CodeFlare Cluster Details 🚀\u001b[0m\u001b[3m \u001b[0m\n", + "\u001b[1m \u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m\n", + " ╭────────────────────────────────────────────────────────────────╮ \n", + " │ \u001b[1;37;42mName\u001b[0m │ \n", + " │ \u001b[1;4mgptfttest\u001b[0m Active ✅ │ \n", + " │ │ \n", + " │ \u001b[1mURI:\u001b[0m ray://gptfttest-head-svc.default.svc:10001 │ \n", + " │ │ \n", + " │ \u001b]8;id=476902;http://ray-dashboard-gptfttest-default.apps.meyceoz-07122023.psap.aws.rhperfscale.org\u001b\\\u001b[4;34mDashboard🔗\u001b[0m\u001b]8;;\u001b\\ │ \n", + " │ │ \n", + " │ \u001b[3m Cluster Resources \u001b[0m │ \n", + " │ ╭── Workers ──╮ ╭───────── Worker specs(each) ─────────╮ │ \n", + " │ │ \u001b[1m \u001b[0m\u001b[1m# Workers\u001b[0m\u001b[1m \u001b[0m │ │ \u001b[1m \u001b[0m\u001b[1mMemory \u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m\u001b[1mCPU \u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m\u001b[1mGPU \u001b[0m\u001b[1m \u001b[0m │ │ \n", + " │ │ \u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m │ │ \u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m │ │ \n", + " │ │ \u001b[35m \u001b[0m\u001b[35m2 \u001b[0m\u001b[35m \u001b[0m │ │ \u001b[36m \u001b[0m\u001b[36m8~8 \u001b[0m\u001b[36m \u001b[0m\u001b[35m \u001b[0m\u001b[35m2 \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m1 \u001b[0m\u001b[35m \u001b[0m │ │ \n", + " │ │ \u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m │ │ \u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m │ │ \n", + " │ ╰─────────────╯ ╰──────────────────────────────────────╯ │ \n", + " ╰────────────────────────────────────────────────────────────────╯ \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "RayCluster(name='gptfttest', status=, workers=2, worker_mem_min=8, worker_mem_max=8, worker_cpu=2, worker_gpu=1, namespace='default', dashboard='http://ray-dashboard-gptfttest-default.apps.meyceoz-07122023.psap.aws.rhperfscale.org')" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "cluster.details()" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "id": "9ed5bd75-4230-4c7c-a9e2-0f247890e62a", "metadata": {}, "outputs": [], @@ -91,7 +166,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "id": "611d203a-35aa-4357-a748-1d01b022fcdb", "metadata": {}, "outputs": [], @@ -111,10 +186,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "id": "8ac7c34f-e227-44c2-a4b1-a57c853ac3a7", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "The Ray scheduler does not support port mapping.\n" + ] + } + ], "source": [ "jobdef = DDPJobDefinition(\n", " name=\"gpttest\",\n", @@ -127,27 +210,137 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 16, "id": "1680d287-de46-45f8-b95a-02ba3c83912c", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "AppStatus:\n", + " msg: !!python/object/apply:ray.dashboard.modules.job.common.JobStatus\n", + " - RUNNING\n", + " num_restarts: -1\n", + " roles:\n", + " - replicas:\n", + " - hostname: \n", + " id: 0\n", + " role: ray\n", + " state: !!python/object/apply:torchx.specs.api.AppState\n", + " - 3\n", + " structured_error_msg: \n", + " role: ray\n", + " state: RUNNING (3)\n", + " structured_error_msg: \n", + " ui_url: null" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "job.status()" ] }, + { + "cell_type": "markdown", + "id": "8222e884-7091-4b74-bdcf-565f25abed11", + "metadata": {}, + "source": [ + "Retrieve raw log output at anytime with:" + ] + }, { "cell_type": "code", - "execution_count": null, + "execution_count": 21, "id": "d25d6198-9941-47e8-857f-9811830cc854", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "'[RayActor(name=\\'gpt_og\\', command=[\\'bash\\', \\'-c\\', \"torchrun --rdzv_backend static --rdzv_endpoint $TORCHX_RANK0_HOST:49782 --rdzv_id \\'gpttest-tg69zmd1xf19l\\' --nnodes 2 --nproc_per_node 1 --node_rank \\'0\\' --tee 3 --role \\'\\' gpt_og.py --model_name_or_path gpt2 --dataset_name wikitext --dataset_config_name wikitext-2-raw-v1 --per_device_train_batch_size 2 --per_device_eval_batch_size 2 --do_train --do_eval --output_dir /tmp/test-clm --overwrite_output_dir\"], env={\\'TORCHX_TRACKING_EXPERIMENT_NAME\\': \\'default-experiment\\', \\'LOGLEVEL\\': \\'WARNING\\', \\'TORCHX_JOB_ID\\': \\'ray://torchx/gpttest-tg69zmd1xf19l\\'}, num_cpus=2, num_gpus=1, min_replicas=2), RayActor(name=\\'gpt_og\\', command=[\\'bash\\', \\'-c\\', \"torchrun --rdzv_backend static --rdzv_endpoint $TORCHX_RANK0_HOST:49782 --rdzv_id \\'gpttest-tg69zmd1xf19l\\' --nnodes 2 --nproc_per_node 1 --node_rank \\'1\\' --tee 3 --role \\'\\' gpt_og.py --model_name_or_path gpt2 --dataset_name wikitext --dataset_config_name wikitext-2-raw-v1 --per_device_train_batch_size 2 --per_device_eval_batch_size 2 --do_train --do_eval --output_dir /tmp/test-clm --overwrite_output_dir\"], env={\\'TORCHX_TRACKING_EXPERIMENT_NAME\\': \\'default-experiment\\', \\'LOGLEVEL\\': \\'WARNING\\', \\'TORCHX_JOB_ID\\': \\'ray://torchx/gpttest-tg69zmd1xf19l\\'}, num_cpus=2, num_gpus=1, min_replicas=2)]\\n2023-08-09 15:39:16,608\\tINFO worker.py:1334 -- Using address 10.129.6.9:6379 set in the environment variable RAY_ADDRESS\\n2023-08-09 15:39:16,608\\tINFO worker.py:1452 -- Connecting to existing Ray cluster at address: 10.129.6.9:6379...\\n2023-08-09 15:39:16,667\\tINFO worker.py:1627 -- Connected to Ray cluster. View the dashboard at \\x1b[1m\\x1b[32mhttp://10.129.6.9:8265 \\x1b[39m\\x1b[22m\\nWaiting for minimum placement group to start.\\nSuccessfully created placement groups\\nrdzv_endpoint set to 10.128.6.19 for actor b557571d0e8b5ce27f34a7e802000000\\nrdzv_endpoint set to 10.128.6.19 for actor 8826a9b5f730773c7c99241102000000\\nSuccessfully placed command actors\\nEntering main loop, start executing the script on worker nodes\\nrunning ray.wait on [ObjectRef(e082c90ab8422b00b557571d0e8b5ce27f34a7e80200000001000000), ObjectRef(ce868e48e2fa9a948826a9b5f730773c7c9924110200000001000000)]\\nrunning ray.wait on [ObjectRef(ce868e48e2fa9a948826a9b5f730773c7c9924110200000001000000), ObjectRef(f81ec6ff838b16dbb557571d0e8b5ce27f34a7e80200000001000000)]\\nrunning ray.wait on [ObjectRef(f81ec6ff838b16dbb557571d0e8b5ce27f34a7e80200000001000000), ObjectRef(32b0eec39cfa87ac8826a9b5f730773c7c9924110200000001000000)]\\n'" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "job.logs()" ] }, + { + "cell_type": "markdown", + "id": "92b0a8ec-c2ee-43ce-abd7-368c769abd57", + "metadata": {}, + "source": [ + "View live updates for status, logs, and other information with:" + ] + }, { "cell_type": "code", - "execution_count": null, + "execution_count": 22, + "id": "36bfa974-d923-44d2-b079-4c42b66152f7", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'http://ray-dashboard-gptfttest-default.apps.meyceoz-07122023.psap.aws.rhperfscale.org'" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cluster.cluster_dashboard_uri()" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "d7b27d0a-b0e3-421c-a3f5-d9db97890f7c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "AppStatus:\n", + " msg: !!python/object/apply:ray.dashboard.modules.job.common.JobStatus\n", + " - SUCCEEDED\n", + " num_restarts: -1\n", + " roles:\n", + " - replicas:\n", + " - hostname: \n", + " id: 0\n", + " role: ray\n", + " state: !!python/object/apply:torchx.specs.api.AppState\n", + " - 4\n", + " structured_error_msg: \n", + " role: ray\n", + " state: SUCCEEDED (4)\n", + " structured_error_msg: \n", + " ui_url: null" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "job.status()" + ] + }, + { + "cell_type": "code", + "execution_count": 25, "id": "beb1a6b9-d9b3-49b7-b036-09f1d3569b59", "metadata": {}, "outputs": [], @@ -182,7 +375,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.13" + "version": "3.8.17" } }, "nbformat": 4, diff --git a/demo-notebooks/guided-demos/preview_nbs/4_gpt.ipynb b/demo-notebooks/guided-demos/preview_nbs/4_gpt.ipynb index 94ddc9484..dd3a0cbf8 100644 --- a/demo-notebooks/guided-demos/preview_nbs/4_gpt.ipynb +++ b/demo-notebooks/guided-demos/preview_nbs/4_gpt.ipynb @@ -135,6 +135,14 @@ "job.status()" ] }, + { + "cell_type": "markdown", + "id": "d310e6a8", + "metadata": {}, + "source": [ + "Retrieve raw log output at anytime with:" + ] + }, { "cell_type": "code", "execution_count": null, @@ -145,6 +153,34 @@ "job.logs()" ] }, + { + "cell_type": "markdown", + "id": "cb27d5e0", + "metadata": {}, + "source": [ + "View live updates for status, logs, and other information with:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d2c51953", + "metadata": {}, + "outputs": [], + "source": [ + "cluster.cluster_dashboard_uri()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b14bbde1", + "metadata": {}, + "outputs": [], + "source": [ + "job.status()" + ] + }, { "cell_type": "code", "execution_count": null, From fa3d0257b63de1dcba55f481540feebad4eb4af1 Mon Sep 17 00:00:00 2001 From: Mustafa Eyceoz Date: Wed, 9 Aug 2023 20:36:34 -0400 Subject: [PATCH 6/8] Updated auth block --- demo-notebooks/guided-demos/0_basic_ray.ipynb | 4 +++- demo-notebooks/guided-demos/1_basic_instascale.ipynb | 4 +++- demo-notebooks/guided-demos/2_basic_jobs.ipynb | 4 +++- demo-notebooks/guided-demos/3_basic_interactive.ipynb | 4 +++- demo-notebooks/guided-demos/4_gpt.ipynb | 4 +++- .../guided-demos/notebook-ex-outputs/0_basic_ray.ipynb | 4 +++- .../guided-demos/notebook-ex-outputs/1_basic_instascale.ipynb | 4 +++- .../guided-demos/notebook-ex-outputs/2_basic_jobs.ipynb | 4 +++- .../notebook-ex-outputs/3_basic_interactive.ipynb | 4 +++- demo-notebooks/guided-demos/notebook-ex-outputs/4_gpt.ipynb | 4 +++- demo-notebooks/guided-demos/preview_nbs/0_basic_ray.ipynb | 4 +++- .../guided-demos/preview_nbs/1_basic_instascale.ipynb | 4 +++- demo-notebooks/guided-demos/preview_nbs/2_basic_jobs.ipynb | 4 +++- .../guided-demos/preview_nbs/3_basic_interactive.ipynb | 4 +++- demo-notebooks/guided-demos/preview_nbs/4_gpt.ipynb | 4 +++- 15 files changed, 45 insertions(+), 15 deletions(-) diff --git a/demo-notebooks/guided-demos/0_basic_ray.ipynb b/demo-notebooks/guided-demos/0_basic_ray.ipynb index b3040676f..dfe067331 100644 --- a/demo-notebooks/guided-demos/0_basic_ray.ipynb +++ b/demo-notebooks/guided-demos/0_basic_ray.ipynb @@ -30,7 +30,9 @@ "metadata": {}, "outputs": [], "source": [ - "# Create authentication object for oc user permissions\n", + "# Create authentication object for user permissions\n", + "# IF unused, SDK will automatically check for default kubeconfig, then in-cluster config\n", + "# KubeConfigFileAuthentication can also be used to specify kubeconfig path manually\n", "auth = TokenAuthentication(\n", " token = \"XXXXX\",\n", " server = \"XXXXX\",\n", diff --git a/demo-notebooks/guided-demos/1_basic_instascale.ipynb b/demo-notebooks/guided-demos/1_basic_instascale.ipynb index 8f8a6ed73..d0faf5b9b 100644 --- a/demo-notebooks/guided-demos/1_basic_instascale.ipynb +++ b/demo-notebooks/guided-demos/1_basic_instascale.ipynb @@ -27,7 +27,9 @@ "metadata": {}, "outputs": [], "source": [ - "# Create authentication object for oc user permissions\n", + "# Create authentication object for user permissions\n", + "# IF unused, SDK will automatically check for default kubeconfig, then in-cluster config\n", + "# KubeConfigFileAuthentication can also be used to specify kubeconfig path manually\n", "auth = TokenAuthentication(\n", " token = \"XXXXX\",\n", " server = \"XXXXX\",\n", diff --git a/demo-notebooks/guided-demos/2_basic_jobs.ipynb b/demo-notebooks/guided-demos/2_basic_jobs.ipynb index e86b28a9f..da74f9e54 100644 --- a/demo-notebooks/guided-demos/2_basic_jobs.ipynb +++ b/demo-notebooks/guided-demos/2_basic_jobs.ipynb @@ -27,7 +27,9 @@ "metadata": {}, "outputs": [], "source": [ - "# Create authentication object for oc user permissions\n", + "# Create authentication object for user permissions\n", + "# IF unused, SDK will automatically check for default kubeconfig, then in-cluster config\n", + "# KubeConfigFileAuthentication can also be used to specify kubeconfig path manually\n", "auth = TokenAuthentication(\n", " token = \"XXXXX\",\n", " server = \"XXXXX\",\n", diff --git a/demo-notebooks/guided-demos/3_basic_interactive.ipynb b/demo-notebooks/guided-demos/3_basic_interactive.ipynb index 6fb81afec..e824d340c 100644 --- a/demo-notebooks/guided-demos/3_basic_interactive.ipynb +++ b/demo-notebooks/guided-demos/3_basic_interactive.ipynb @@ -27,7 +27,9 @@ "metadata": {}, "outputs": [], "source": [ - "# Create authentication object for oc user permissions\n", + "# Create authentication object for user permissions\n", + "# IF unused, SDK will automatically check for default kubeconfig, then in-cluster config\n", + "# KubeConfigFileAuthentication can also be used to specify kubeconfig path manually\n", "auth = TokenAuthentication(\n", " token = \"XXXXX\",\n", " server = \"XXXXX\",\n", diff --git a/demo-notebooks/guided-demos/4_gpt.ipynb b/demo-notebooks/guided-demos/4_gpt.ipynb index 4745751fa..0fdcec965 100644 --- a/demo-notebooks/guided-demos/4_gpt.ipynb +++ b/demo-notebooks/guided-demos/4_gpt.ipynb @@ -19,7 +19,9 @@ "metadata": {}, "outputs": [], "source": [ - "# Create authentication object for oc user permissions\n", + "# Create authentication object for user permissions\n", + "# IF unused, SDK will automatically check for default kubeconfig, then in-cluster config\n", + "# KubeConfigFileAuthentication can also be used to specify kubeconfig path manually\n", "auth = TokenAuthentication(\n", " token = \"XXXXX\",\n", " server = \"XXXXX\",\n", diff --git a/demo-notebooks/guided-demos/notebook-ex-outputs/0_basic_ray.ipynb b/demo-notebooks/guided-demos/notebook-ex-outputs/0_basic_ray.ipynb index 9353daf1d..be4e8a5e2 100644 --- a/demo-notebooks/guided-demos/notebook-ex-outputs/0_basic_ray.ipynb +++ b/demo-notebooks/guided-demos/notebook-ex-outputs/0_basic_ray.ipynb @@ -30,7 +30,9 @@ "metadata": {}, "outputs": [], "source": [ - "# Create authentication object for oc user permissions\n", + "# Create authentication object for user permissions\n", + "# IF unused, SDK will automatically check for default kubeconfig, then in-cluster config\n", + "# KubeConfigFileAuthentication can also be used to specify kubeconfig path manually\n", "auth = TokenAuthentication(\n", " token = \"XXXXX\",\n", " server = \"XXXXX\",\n", diff --git a/demo-notebooks/guided-demos/notebook-ex-outputs/1_basic_instascale.ipynb b/demo-notebooks/guided-demos/notebook-ex-outputs/1_basic_instascale.ipynb index e289b53f2..97a2b3822 100644 --- a/demo-notebooks/guided-demos/notebook-ex-outputs/1_basic_instascale.ipynb +++ b/demo-notebooks/guided-demos/notebook-ex-outputs/1_basic_instascale.ipynb @@ -27,7 +27,9 @@ "metadata": {}, "outputs": [], "source": [ - "# Create authentication object for oc user permissions\n", + "# Create authentication object for user permissions\n", + "# IF unused, SDK will automatically check for default kubeconfig, then in-cluster config\n", + "# KubeConfigFileAuthentication can also be used to specify kubeconfig path manually\n", "auth = TokenAuthentication(\n", " token = \"XXXXX\",\n", " server = \"XXXXX\",\n", diff --git a/demo-notebooks/guided-demos/notebook-ex-outputs/2_basic_jobs.ipynb b/demo-notebooks/guided-demos/notebook-ex-outputs/2_basic_jobs.ipynb index 3c33ed204..9c6122670 100644 --- a/demo-notebooks/guided-demos/notebook-ex-outputs/2_basic_jobs.ipynb +++ b/demo-notebooks/guided-demos/notebook-ex-outputs/2_basic_jobs.ipynb @@ -27,7 +27,9 @@ "metadata": {}, "outputs": [], "source": [ - "# Create authentication object for oc user permissions\n", + "# Create authentication object for user permissions\n", + "# IF unused, SDK will automatically check for default kubeconfig, then in-cluster config\n", + "# KubeConfigFileAuthentication can also be used to specify kubeconfig path manually\n", "auth = TokenAuthentication(\n", " token = \"XXXXX\",\n", " server = \"XXXXX\",\n", diff --git a/demo-notebooks/guided-demos/notebook-ex-outputs/3_basic_interactive.ipynb b/demo-notebooks/guided-demos/notebook-ex-outputs/3_basic_interactive.ipynb index 5e719a592..ea4da1739 100644 --- a/demo-notebooks/guided-demos/notebook-ex-outputs/3_basic_interactive.ipynb +++ b/demo-notebooks/guided-demos/notebook-ex-outputs/3_basic_interactive.ipynb @@ -27,7 +27,9 @@ "metadata": {}, "outputs": [], "source": [ - "# Create authentication object for oc user permissions\n", + "# Create authentication object for user permissions\n", + "# IF unused, SDK will automatically check for default kubeconfig, then in-cluster config\n", + "# KubeConfigFileAuthentication can also be used to specify kubeconfig path manually\n", "auth = TokenAuthentication(\n", " token = \"XXXXX\",\n", " server = \"XXXXX\",\n", diff --git a/demo-notebooks/guided-demos/notebook-ex-outputs/4_gpt.ipynb b/demo-notebooks/guided-demos/notebook-ex-outputs/4_gpt.ipynb index 8976cedff..704f94f2b 100644 --- a/demo-notebooks/guided-demos/notebook-ex-outputs/4_gpt.ipynb +++ b/demo-notebooks/guided-demos/notebook-ex-outputs/4_gpt.ipynb @@ -19,7 +19,9 @@ "metadata": {}, "outputs": [], "source": [ - "# Create authentication object for oc user permissions\n", + "# Create authentication object for user permissions\n", + "# IF unused, SDK will automatically check for default kubeconfig, then in-cluster config\n", + "# KubeConfigFileAuthentication can also be used to specify kubeconfig path manually\n", "auth = TokenAuthentication(\n", " token = \"XXXXX\",\n", " server = \"XXXXX\",\n", diff --git a/demo-notebooks/guided-demos/preview_nbs/0_basic_ray.ipynb b/demo-notebooks/guided-demos/preview_nbs/0_basic_ray.ipynb index b3040676f..dfe067331 100644 --- a/demo-notebooks/guided-demos/preview_nbs/0_basic_ray.ipynb +++ b/demo-notebooks/guided-demos/preview_nbs/0_basic_ray.ipynb @@ -30,7 +30,9 @@ "metadata": {}, "outputs": [], "source": [ - "# Create authentication object for oc user permissions\n", + "# Create authentication object for user permissions\n", + "# IF unused, SDK will automatically check for default kubeconfig, then in-cluster config\n", + "# KubeConfigFileAuthentication can also be used to specify kubeconfig path manually\n", "auth = TokenAuthentication(\n", " token = \"XXXXX\",\n", " server = \"XXXXX\",\n", diff --git a/demo-notebooks/guided-demos/preview_nbs/1_basic_instascale.ipynb b/demo-notebooks/guided-demos/preview_nbs/1_basic_instascale.ipynb index 8f8a6ed73..d0faf5b9b 100644 --- a/demo-notebooks/guided-demos/preview_nbs/1_basic_instascale.ipynb +++ b/demo-notebooks/guided-demos/preview_nbs/1_basic_instascale.ipynb @@ -27,7 +27,9 @@ "metadata": {}, "outputs": [], "source": [ - "# Create authentication object for oc user permissions\n", + "# Create authentication object for user permissions\n", + "# IF unused, SDK will automatically check for default kubeconfig, then in-cluster config\n", + "# KubeConfigFileAuthentication can also be used to specify kubeconfig path manually\n", "auth = TokenAuthentication(\n", " token = \"XXXXX\",\n", " server = \"XXXXX\",\n", diff --git a/demo-notebooks/guided-demos/preview_nbs/2_basic_jobs.ipynb b/demo-notebooks/guided-demos/preview_nbs/2_basic_jobs.ipynb index bb58f8078..4ac4f00af 100644 --- a/demo-notebooks/guided-demos/preview_nbs/2_basic_jobs.ipynb +++ b/demo-notebooks/guided-demos/preview_nbs/2_basic_jobs.ipynb @@ -27,7 +27,9 @@ "metadata": {}, "outputs": [], "source": [ - "# Create authentication object for oc user permissions\n", + "# Create authentication object for user permissions\n", + "# IF unused, SDK will automatically check for default kubeconfig, then in-cluster config\n", + "# KubeConfigFileAuthentication can also be used to specify kubeconfig path manually\n", "auth = TokenAuthentication(\n", " token = \"XXXXX\",\n", " server = \"XXXXX\",\n", diff --git a/demo-notebooks/guided-demos/preview_nbs/3_basic_interactive.ipynb b/demo-notebooks/guided-demos/preview_nbs/3_basic_interactive.ipynb index 6fb81afec..e824d340c 100644 --- a/demo-notebooks/guided-demos/preview_nbs/3_basic_interactive.ipynb +++ b/demo-notebooks/guided-demos/preview_nbs/3_basic_interactive.ipynb @@ -27,7 +27,9 @@ "metadata": {}, "outputs": [], "source": [ - "# Create authentication object for oc user permissions\n", + "# Create authentication object for user permissions\n", + "# IF unused, SDK will automatically check for default kubeconfig, then in-cluster config\n", + "# KubeConfigFileAuthentication can also be used to specify kubeconfig path manually\n", "auth = TokenAuthentication(\n", " token = \"XXXXX\",\n", " server = \"XXXXX\",\n", diff --git a/demo-notebooks/guided-demos/preview_nbs/4_gpt.ipynb b/demo-notebooks/guided-demos/preview_nbs/4_gpt.ipynb index dd3a0cbf8..455bb9aa8 100644 --- a/demo-notebooks/guided-demos/preview_nbs/4_gpt.ipynb +++ b/demo-notebooks/guided-demos/preview_nbs/4_gpt.ipynb @@ -19,7 +19,9 @@ "metadata": {}, "outputs": [], "source": [ - "# Create authentication object for oc user permissions\n", + "# Create authentication object for user permissions\n", + "# IF unused, SDK will automatically check for default kubeconfig, then in-cluster config\n", + "# KubeConfigFileAuthentication can also be used to specify kubeconfig path manually\n", "auth = TokenAuthentication(\n", " token = \"XXXXX\",\n", " server = \"XXXXX\",\n", From 4ed3df9fc8e63cfdd8f163811d4c378534b40422 Mon Sep 17 00:00:00 2001 From: Mustafa Eyceoz Date: Wed, 9 Aug 2023 20:41:39 -0400 Subject: [PATCH 7/8] Update outdated interactive --- .../additional-interactive/hf_interactive.ipynb | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/demo-notebooks/additional-interactive/hf_interactive.ipynb b/demo-notebooks/additional-interactive/hf_interactive.ipynb index e80df90db..e5282cd39 100644 --- a/demo-notebooks/additional-interactive/hf_interactive.ipynb +++ b/demo-notebooks/additional-interactive/hf_interactive.ipynb @@ -53,11 +53,13 @@ "metadata": {}, "outputs": [], "source": [ - "# Create authentication object for oc user permissions and login\n", + "# Create authentication object for user permissions\n", + "# IF unused, SDK will automatically check for default kubeconfig, then in-cluster config\n", + "# KubeConfigFileAuthentication can also be used to specify kubeconfig path manually\n", "auth = TokenAuthentication(\n", " token = \"XXXX\",\n", " server = \"XXXX\",\n", - " skip_tls = True\n", + " skip_tls = False\n", ")\n", "auth.login()" ] @@ -88,13 +90,12 @@ "# Create our cluster and submit appwrapper\n", "cluster = Cluster(ClusterConfiguration(name='hfgputest', \n", " namespace=\"default\",\n", - " min_worker=1, \n", - " max_worker=1, \n", + " num_workers=1,\n", " min_cpus=8, \n", " max_cpus=8, \n", " min_memory=16, \n", " max_memory=16, \n", - " gpu=4,\n", + " num_gpus=4,\n", " image=\"quay.io/project-codeflare/ray:2.5.0-py38-cu116\",\n", " instascale=True, machine_types=[\"m5.xlarge\", \"p3.8xlarge\"]))" ] @@ -319,7 +320,7 @@ "#install additional libraries that will be required for this training\n", "runtime_env = {\"pip\": [\"transformers\", \"datasets\", \"evaluate\", \"pyarrow<7.0.0\", \"accelerate\"]}\n", "\n", - "ray.init(address=f'{ray_cluster_uri}', runtime_env=runtime_env)\n", + "ray.init(address=ray_cluster_uri, runtime_env=runtime_env)\n", "\n", "print(\"Ray cluster is up and running: \", ray.is_initialized())" ] From fe87f4b513a2e71e2a81cf3fb0aa51f2fdd27b9a Mon Sep 17 00:00:00 2001 From: Mustafa Eyceoz Date: Fri, 11 Aug 2023 10:07:20 -0400 Subject: [PATCH 8/8] Changed to generic additional demos --- .../hf_interactive.ipynb | 2 ++ .../local_interactive.ipynb | 0 demo-notebooks/guided-demos/3_basic_interactive.ipynb | 2 +- .../guided-demos/notebook-ex-outputs/3_basic_interactive.ipynb | 2 +- .../guided-demos/preview_nbs/3_basic_interactive.ipynb | 2 +- 5 files changed, 5 insertions(+), 3 deletions(-) rename demo-notebooks/{additional-interactive => additional-demos}/hf_interactive.ipynb (99%) rename demo-notebooks/{additional-interactive => additional-demos}/local_interactive.ipynb (100%) diff --git a/demo-notebooks/additional-interactive/hf_interactive.ipynb b/demo-notebooks/additional-demos/hf_interactive.ipynb similarity index 99% rename from demo-notebooks/additional-interactive/hf_interactive.ipynb rename to demo-notebooks/additional-demos/hf_interactive.ipynb index e5282cd39..32e7be41e 100644 --- a/demo-notebooks/additional-interactive/hf_interactive.ipynb +++ b/demo-notebooks/additional-demos/hf_interactive.ipynb @@ -320,6 +320,8 @@ "#install additional libraries that will be required for this training\n", "runtime_env = {\"pip\": [\"transformers\", \"datasets\", \"evaluate\", \"pyarrow<7.0.0\", \"accelerate\"]}\n", "\n", + "# NOTE: This will work for in-cluster notebook servers (RHODS/ODH), but not for local machines\n", + "# To see how to connect from your laptop, go to demo-notebooks/additional-demos/local_interactive.ipynb\n", "ray.init(address=ray_cluster_uri, runtime_env=runtime_env)\n", "\n", "print(\"Ray cluster is up and running: \", ray.is_initialized())" diff --git a/demo-notebooks/additional-interactive/local_interactive.ipynb b/demo-notebooks/additional-demos/local_interactive.ipynb similarity index 100% rename from demo-notebooks/additional-interactive/local_interactive.ipynb rename to demo-notebooks/additional-demos/local_interactive.ipynb diff --git a/demo-notebooks/guided-demos/3_basic_interactive.ipynb b/demo-notebooks/guided-demos/3_basic_interactive.ipynb index e824d340c..c8b2b1a0e 100644 --- a/demo-notebooks/guided-demos/3_basic_interactive.ipynb +++ b/demo-notebooks/guided-demos/3_basic_interactive.ipynb @@ -143,7 +143,7 @@ "runtime_env = {\"pip\": [\"transformers\", \"datasets\", \"evaluate\", \"pyarrow<7.0.0\", \"accelerate\"]}\n", "\n", "# NOTE: This will work for in-cluster notebook servers (RHODS/ODH), but not for local machines\n", - "# To see how to connect from your laptop, go to demo-notebooks/additional-interactive/local_interactive.ipynb\n", + "# To see how to connect from your laptop, go to demo-notebooks/additional-demos/local_interactive.ipynb\n", "ray.init(address=ray_cluster_uri, runtime_env=runtime_env)\n", "\n", "print(\"Ray cluster is up and running: \", ray.is_initialized())" diff --git a/demo-notebooks/guided-demos/notebook-ex-outputs/3_basic_interactive.ipynb b/demo-notebooks/guided-demos/notebook-ex-outputs/3_basic_interactive.ipynb index ea4da1739..d6799161d 100644 --- a/demo-notebooks/guided-demos/notebook-ex-outputs/3_basic_interactive.ipynb +++ b/demo-notebooks/guided-demos/notebook-ex-outputs/3_basic_interactive.ipynb @@ -235,7 +235,7 @@ "runtime_env = {\"pip\": [\"transformers\", \"datasets\", \"evaluate\", \"pyarrow<7.0.0\", \"accelerate\"]}\n", "\n", "# NOTE: This will work for in-cluster notebook servers (RHODS/ODH), but not for local machines\n", - "# To see how to connect from your laptop, go to demo-notebooks/additional-interactive/local_interactive.ipynb\n", + "# To see how to connect from your laptop, go to demo-notebooks/additional-demos/local_interactive.ipynb\n", "ray.init(address=ray_cluster_uri, runtime_env=runtime_env)\n", "\n", "print(\"Ray cluster is up and running: \", ray.is_initialized())" diff --git a/demo-notebooks/guided-demos/preview_nbs/3_basic_interactive.ipynb b/demo-notebooks/guided-demos/preview_nbs/3_basic_interactive.ipynb index e824d340c..c8b2b1a0e 100644 --- a/demo-notebooks/guided-demos/preview_nbs/3_basic_interactive.ipynb +++ b/demo-notebooks/guided-demos/preview_nbs/3_basic_interactive.ipynb @@ -143,7 +143,7 @@ "runtime_env = {\"pip\": [\"transformers\", \"datasets\", \"evaluate\", \"pyarrow<7.0.0\", \"accelerate\"]}\n", "\n", "# NOTE: This will work for in-cluster notebook servers (RHODS/ODH), but not for local machines\n", - "# To see how to connect from your laptop, go to demo-notebooks/additional-interactive/local_interactive.ipynb\n", + "# To see how to connect from your laptop, go to demo-notebooks/additional-demos/local_interactive.ipynb\n", "ray.init(address=ray_cluster_uri, runtime_env=runtime_env)\n", "\n", "print(\"Ray cluster is up and running: \", ray.is_initialized())"