diff --git a/custom-nb-image/Dockerfile b/custom-nb-image/Dockerfile
new file mode 100644
index 000000000..fdaeb96d1
--- /dev/null
+++ b/custom-nb-image/Dockerfile
@@ -0,0 +1,33 @@
+# Copyright 2022 IBM, Red Hat
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+FROM quay.io/thoth-station/s2i-minimal-py38-notebook:latest
+
+# Install: torch (v1.12), ray (v2.1.0) and others
+
+COPY requirements.txt requirements.txt
+
+RUN pip install -r requirements.txt
+
+RUN pip uninstall pickle5 -y
+
+# Pull notebooks in
+ADD codeflare/ /home/codeflare
+
+# Install codeflare-cli and other libraries
+RUN pip install codeflare-sdk==0.2.1 \
+ datasets==2.6.1 \
+ transformers==4.23.1 \
+ evaluate==0.3.0 \
+ git+https://github.com/MichaelClifford/torchx.git@ray2-patch
diff --git a/custom-nb-image/imagestream.yaml b/custom-nb-image/imagestream.yaml
new file mode 100644
index 000000000..10ded6b07
--- /dev/null
+++ b/custom-nb-image/imagestream.yaml
@@ -0,0 +1,39 @@
+# Copyright 2022 IBM, Red Hat
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+kind: ImageStream
+apiVersion: image.openshift.io/v1
+metadata:
+ name: codeflare-notebook
+ labels:
+ opendatahub.io/notebook-image: 'true'
+ annotations:
+ opendatahub.io/notebook-image-name:
+ "Codeflare Notebook"
+ opendatahub.io/notebook-image-desc: "Custom Jupyter notebook image with codeflare SDK, Python 3.8, Ray 2.1.0 and PyTorch 1.12.1"
+spec:
+ lookupPolicy:
+ local: true
+ tags:
+ - annotations:
+ openshift.io/imported-from: quay.io/project-codeflare/notebook
+ name: latest
+ from:
+ kind: DockerImage
+ name: quay.io/project-codeflare/notebook:latest
+ name: "latest"
+ referencePolicy:
+ type: Source
+ importPolicy:
+ scheduled: true
diff --git a/custom-nb-image/requirements.txt b/custom-nb-image/requirements.txt
new file mode 100644
index 000000000..44938879e
--- /dev/null
+++ b/custom-nb-image/requirements.txt
@@ -0,0 +1,205 @@
+#
+# These requirements were autogenerated by pipenv
+# To regenerate from the project's Pipfile, run:
+#
+# pipenv lock --requirements
+#
+
+-i https://pypi.org/simple
+aiohttp-cors==0.7.0
+aiohttp==3.8.3
+aiorwlock==1.3.0
+aiosignal==1.2.0; python_version >= '3.6'
+anyio==3.6.1; python_full_version >= '3.6.2'
+argon2-cffi-bindings==21.2.0; python_version >= '3.6'
+argon2-cffi==21.3.0; python_version >= '3.6'
+asgiref==3.5.2; python_version >= '3.7'
+asttokens==2.0.8
+astunparse==1.6.3
+async-timeout==4.0.2; python_version >= '3.6'
+attrs==22.1.0; python_version >= '3.5'
+babel==2.10.3; python_version >= '3.6'
+backcall==0.2.0
+bcrypt==4.0.0; python_version >= '3.6'
+beautifulsoup4==4.11.1; python_version >= '3.6'
+black==22.8.0; python_full_version >= '3.6.2'
+bleach==5.0.1; python_version >= '3.7'
+blessed==1.19.1; python_version >= '2.7'
+boto3==1.17.11
+botocore==1.20.112; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'
+cachetools==5.2.0; python_version ~= '3.7'
+certifi==2022.9.24; python_version >= '3.6'
+cffi==1.15.1
+charset-normalizer==2.1.1; python_version >= '3.6'
+click==8.0.4; python_version >= '3.6'
+cloudpickle==2.2.0; python_version >= '3.6'
+codeflare==0.1.2.dev0
+colorama==0.4.5; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'
+colorful==0.5.4
+contourpy==1.0.5; python_version >= '3.7'
+cryptography==38.0.1; python_version >= '3.6'
+cycler==0.11.0; python_version >= '3.6'
+cython==0.29.32
+dask[array,dataframe]==2021.2.0
+dataclasses==0.6
+debugpy==1.6.3; python_version >= '3.7'
+decorator==5.1.1; python_version >= '3.5'
+defusedxml==0.7.1; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'
+distlib==0.3.6
+entrypoints==0.4; python_version >= '3.6'
+executing==1.1.0
+fastapi==0.85.0
+fastjsonschema==2.16.2
+filelock==3.8.0; python_version >= '3.7'
+flatbuffers==22.9.24
+fonttools==4.37.3; python_version >= '3.7'
+frozenlist==1.3.1; python_version >= '3.7'
+fsspec==2022.8.2
+future==0.18.2; python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2, 3.3'
+gitdb==4.0.9; python_version >= '3.6'
+gitpython==3.1.27; python_version >= '3.7'
+google-api-core==2.10.1; python_version >= '3.6'
+google-auth==2.12.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'
+googleapis-common-protos==1.56.4; python_version >= '3.7'
+gpustat==1.0.0
+graphviz==0.20.1; python_version >= '3.7'
+greenery==3.3.3
+grpcio==1.43.0; python_version >= '3.6'
+h11==0.14.0; python_version >= '3.7'
+hyperopt==0.2.5
+idna==3.4; python_version >= '3.5'
+importlib-metadata==4.12.0; python_version < '3.10'
+importlib-resources==5.9.0; python_version < '3.9'
+ipykernel==6.16.0; python_version >= '3.7'
+ipython-genutils==0.2.0
+ipython==8.5.0; python_version >= '3.8'
+ipywidgets==8.0.2
+iso8601==1.1.0; python_version < '4' and python_full_version >= '3.6.2'
+jedi==0.18.1; python_version >= '3.6'
+jinja2==3.1.2; python_version >= '3.7'
+jmespath==0.10.0; python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2, 3.3'
+joblib==1.2.0; python_version >= '3.7'
+json5==0.9.10
+jsonref==0.2
+jsonschema==4.16.0; python_version >= '3.7'
+jsonsubschema==0.0.6
+jupyter-client==7.3.5; python_version >= '3.7'
+jupyter-core==4.11.1; python_version >= '3.7'
+jupyter-server-mathjax==0.2.6; python_version >= '3.7'
+jupyter-server==1.19.1; python_version >= '3.7'
+jupyterlab-git==0.30.0
+jupyterlab-pygments==0.2.2; python_version >= '3.7'
+jupyterlab-s3-browser==0.10.1
+jupyterlab-server==2.15.2; python_version >= '3.7'
+jupyterlab-widgets==3.0.3; python_version >= '3.7'
+jupyterlab==3.4.7; python_version >= '3.7'
+kiwisolver==1.4.4; python_version >= '3.7'
+kopf==1.35.6
+kubernetes==24.2.0
+lale==0.6.19
+locket==1.0.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
+lxml==4.9.1; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'
+markupsafe==2.1.1; python_version >= '3.7'
+matplotlib-inline==0.1.6; python_version >= '3.5'
+matplotlib==3.6.0
+memory-profiler==0.60.0
+mistune==2.0.4
+msgpack==1.0.4
+multidict==6.0.2; python_version >= '3.7'
+mypy-extensions==0.4.3
+nbclassic==0.4.3; python_version >= '3.7'
+nbclient==0.6.8; python_version >= '3.7'
+nbconvert==7.0.0; python_version >= '3.7'
+nbdime==3.1.1; python_version >= '3.6'
+nbformat==5.6.1; python_version >= '3.7'
+nest-asyncio==1.5.5; python_version >= '3.5'
+networkx==2.8.6; python_version >= '3.8'
+notebook-shim==0.1.0; python_version >= '3.7'
+notebook==6.4.12; python_version >= '3.7'
+numpy==1.23.3
+nvidia-ml-py==11.495.46
+oauthlib==3.2.1; python_version >= '3.6'
+opencensus-context==0.1.3
+opencensus==0.11.0
+openshift-client==1.0.18
+packaging==21.3
+pandas==1.5.0
+pandocfilters==1.5.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
+paramiko==2.11.0
+parso==0.8.3; python_version >= '3.6'
+partd==1.3.0
+pathspec==0.10.1; python_version >= '3.7'
+pexpect==4.8.0; sys_platform != 'win32'
+pickleshare==0.7.5
+pillow==9.2.0; python_version >= '3.7'
+pkgutil-resolve-name==1.3.10; python_version < '3.9'
+platformdirs==2.5.2; python_version >= '3.7'
+portion==2.3.0; python_version ~= '3.6'
+prometheus-client==0.13.1
+prompt-toolkit==3.0.31; python_full_version >= '3.6.2'
+protobuf==3.20.1; python_version >= '3.7'
+psutil==5.9.2; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
+ptyprocess==0.7.0; os_name != 'nt'
+pure-eval==0.2.2
+py-spy==0.3.14
+pyarrow==6.0.1
+pyasn1-modules==0.2.8
+pyasn1==0.4.8
+pycparser==2.21
+pydantic==1.10.2; python_version >= '3.7'
+pygments==2.13.0; python_version >= '3.6'
+pynacl==1.5.0; python_version >= '3.6'
+pyparsing==3.0.9; python_full_version >= '3.6.8'
+pyrsistent==0.18.1; python_version >= '3.7'
+python-dateutil==2.8.2; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
+python-dotenv==0.21.0
+python-json-logger==2.0.4; python_version >= '3.5'
+pytz==2022.2.1
+pyyaml==6.0; python_version >= '3.6'
+pyzmq==24.0.1; python_version >= '3.6'
+ray==2.1.0
+requests-oauthlib==1.3.1; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
+requests==2.28.1; python_version >= '3.7' and python_version < '4'
+rsa==4.9; python_version >= '3.6'
+s3fs==0.3.4
+s3transfer==0.3.7
+scikit-learn==1.1.1
+scipy==1.8.1
+send2trash==1.8.0
+singleton-decorator==1.0.0
+six==1.16.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
+sklearn==0.0
+smart-open==6.2.0
+smmap==5.0.0; python_version >= '3.6'
+sniffio==1.3.0; python_version >= '3.7'
+sortedcontainers==2.4.0
+soupsieve==2.3.2.post1; python_version >= '3.6'
+stack-data==0.5.1
+starlette==0.20.4
+tabulate==0.8.10
+tensorboardx==2.5.1
+terminado==0.15.0; python_version >= '3.7'
+threadpoolctl==3.1.0; python_version >= '3.6'
+tinycss2==1.1.1; python_version >= '3.6'
+tomli==2.0.1; python_full_version < '3.11.0a7'
+toolz==0.12.0
+torch==1.12.1
+torchvision==0.13.1
+tornado==6.2; python_version >= '3.7'
+tqdm==4.64.1; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
+traitlets==5.4.0; python_version >= '3.7'
+tune-sklearn==0.4.3
+typing-extensions==4.3.0; python_version < '3.10'
+urllib3==1.26.12
+uvicorn==0.16.0
+virtualenv==20.16.5; python_version >= '3.6'
+wcwidth==0.2.5
+webencodings==0.5.1
+websocket-client==1.4.1; python_version >= '3.7'
+wheel==0.37.1; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'
+widgetsnbextension==4.0.3; python_version >= '3.7'
+wrapt==1.14.1; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'
+xgboost-ray==0.1.10
+xgboost==1.6.2
+yarl==1.8.1; python_version >= '3.7'
+zipp==3.8.1; python_version < '3.10'
diff --git a/demo-notebooks/batch-job/batch_mnist.ipynb b/demo-notebooks/batch-job/batch_mnist.ipynb
new file mode 100644
index 000000000..6512c9be1
--- /dev/null
+++ b/demo-notebooks/batch-job/batch_mnist.ipynb
@@ -0,0 +1,2025 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "id": "b55bc3ea-4ce3-49bf-bb1f-e209de8ca47a",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Import pieces from codeflare-sdk\n",
+ "from codeflare_sdk.cluster.cluster import Cluster, ClusterConfiguration\n",
+ "from codeflare_sdk.cluster.auth import TokenAuthentication"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "614daa0c",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Create authentication object for oc user permissions\n",
+ "auth = TokenAuthentication(\n",
+ " token = \"XXXX\",\n",
+ " server = \"XXXX\",\n",
+ " skip_tls=True\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "bc27f84c",
+ "metadata": {},
+ "source": [
+ "Here, we want to define our cluster by specifying the resources we require for our batch workload. Below, we define our cluster object (which generates a corresponding AppWrapper)."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "0f4bc870-091f-4e11-9642-cba145710159",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Create our cluster and submit appwrapper\n",
+ "cluster = Cluster(ClusterConfiguration(name='mnisttest', min_worker=2, max_worker=2, min_cpus=8, max_cpus=8, min_memory=16, max_memory=16, gpu=4, instascale=True, machine_types=[\"m5.xlarge\", \"p3.8xlarge\"], auth=auth))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "12eef53c",
+ "metadata": {},
+ "source": [
+ "Next, we want to bring our cluster up, so we call the `up()` function below to submit our cluster AppWrapper yaml onto the MCAD queue, and begin the process of obtaining our resource cluster."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "f0884bbc-c224-4ca0-98a0-02dfa09c2200",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Bring up the cluster\n",
+ "cluster.up()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "657ebdfb",
+ "metadata": {},
+ "source": [
+ "Now, we want to check on the status of our resource cluster, until it is finally ready for use."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "id": "3c1b4311-2e61-44c9-8225-87c2db11363d",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
╭─────────────────────────╮\n",
+ "│ 🚀 List of CodeFlare │\n",
+ "│ clusters in queue🚀 │\n",
+ "│ +-----------+---------+ │\n",
+ "│ | Name | Status | │\n",
+ "│ +===========+=========+ │\n",
+ "│ | mnisttest | pending | │\n",
+ "│ | | | │\n",
+ "│ +-----------+---------+ │\n",
+ "╰─────────────────────────╯\n",
+ "
\n"
+ ],
+ "text/plain": [
+ "╭─────────────────────────╮\n",
+ "│ \u001b[3m \u001b[0m\u001b[1;3m 🚀 List of CodeFlare\u001b[0m\u001b[3m \u001b[0m │\n",
+ "│ \u001b[3m \u001b[0m\u001b[1;3mclusters in queue🚀\u001b[0m\u001b[3m \u001b[0m │\n",
+ "│ +-----------+---------+ │\n",
+ "│ |\u001b[1m \u001b[0m\u001b[1mName \u001b[0m\u001b[1m \u001b[0m|\u001b[1m \u001b[0m\u001b[1mStatus \u001b[0m\u001b[1m \u001b[0m| │\n",
+ "│ +===========+=========+ │\n",
+ "│ |\u001b[36m \u001b[0m\u001b[36mmnisttest\u001b[0m\u001b[36m \u001b[0m|\u001b[35m \u001b[0m\u001b[35mpending\u001b[0m\u001b[35m \u001b[0m| │\n",
+ "│ |\u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[36m \u001b[0m|\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m| │\n",
+ "│ +-----------+---------+ │\n",
+ "╰─────────────────────────╯\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/plain": [
+ "(False, )"
+ ]
+ },
+ "execution_count": 17,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "cluster.is_ready()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "id": "7fd45bc5-03c0-4ae5-9ec5-dd1c30f1a084",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ " 🚀 List of CodeFlare clusters 🚀 \n",
+ " \n",
+ " ╭────────────────────────────────────────────────────────────────╮ \n",
+ " │ Owner │ \n",
+ " │ mnisttest Active ✅ │ \n",
+ " │ │ \n",
+ " │ URI: ray://mnisttest-head-svc.default.svc:10001 │ \n",
+ " │ │ \n",
+ " │ Dashboard🔗 │ \n",
+ " │ │ \n",
+ " │ Cluster Resources │ \n",
+ " │ ╭─ Workers ──╮ ╭───────── Worker specs(each) ─────────╮ │ \n",
+ " │ │ Min Max │ │ Memory CPU GPU │ │ \n",
+ " │ │ │ │ │ │ \n",
+ " │ │ 2 2 │ │ 16G~16G 8 4 │ │ \n",
+ " │ │ │ │ │ │ \n",
+ " │ ╰────────────╯ ╰──────────────────────────────────────╯ │ \n",
+ " ╰────────────────────────────────────────────────────────────────╯ \n",
+ "
\n"
+ ],
+ "text/plain": [
+ "\u001b[3m \u001b[0m\u001b[1;3m 🚀 List of CodeFlare clusters 🚀\u001b[0m\u001b[3m \u001b[0m\n",
+ "\u001b[1m \u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m\n",
+ " ╭────────────────────────────────────────────────────────────────╮ \n",
+ " │ \u001b[1;37;42mOwner\u001b[0m │ \n",
+ " │ \u001b[1;4mmnisttest\u001b[0m Active ✅ │ \n",
+ " │ │ \n",
+ " │ \u001b[1mURI:\u001b[0m ray://mnisttest-head-svc.default.svc:10001 │ \n",
+ " │ │ \n",
+ " │ \u001b]8;id=309861;ray-dashboard-mnisttest-default.apps.prepfullinstall.psap.aws.rhperfscale.org\u001b\\\u001b[4;34mDashboard🔗\u001b[0m\u001b]8;;\u001b\\ │ \n",
+ " │ │ \n",
+ " │ \u001b[3m Cluster Resources \u001b[0m │ \n",
+ " │ ╭─ Workers ──╮ ╭───────── Worker specs(each) ─────────╮ │ \n",
+ " │ │ \u001b[1m \u001b[0m\u001b[1mMin\u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m\u001b[1mMax\u001b[0m\u001b[1m \u001b[0m │ │ \u001b[1m \u001b[0m\u001b[1mMemory \u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m\u001b[1mCPU \u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m\u001b[1mGPU \u001b[0m\u001b[1m \u001b[0m │ │ \n",
+ " │ │ \u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m │ │ \u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m │ │ \n",
+ " │ │ \u001b[36m \u001b[0m\u001b[36m2 \u001b[0m\u001b[36m \u001b[0m\u001b[35m \u001b[0m\u001b[35m2 \u001b[0m\u001b[35m \u001b[0m │ │ \u001b[36m \u001b[0m\u001b[36m16G~16G \u001b[0m\u001b[36m \u001b[0m\u001b[35m \u001b[0m\u001b[35m8 \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m4 \u001b[0m\u001b[35m \u001b[0m │ │ \n",
+ " │ │ \u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m │ │ \u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m │ │ \n",
+ " │ ╰────────────╯ ╰──────────────────────────────────────╯ │ \n",
+ " ╰────────────────────────────────────────────────────────────────╯ \n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 18,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "cluster.status()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "87d2c9b3",
+ "metadata": {},
+ "source": [
+ "Now that our resource cluster is ready, we can directly submit our batch job (model training on two workers with four gpus each) to the cluster via torchx."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "id": "3cc6183a-8f6e-4347-af91-d088ed422544",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "environemnt before exec ddp from torchx {'LOGLEVEL': 'DEBUG', 'TORCH_DISTRIBUTED_DEBUG': 'DETAIL'}\n",
+ "\u001b[34mtorchx\u001b[0m \u001b[2m2022-11-04 15:04:31 INFO \u001b[0m Checking for changes in workspace `file:///opt/app-root/src/codeflare/notebooks/jobs`...\n",
+ "\u001b[34mtorchx\u001b[0m \u001b[2m2022-11-04 15:04:31 INFO \u001b[0m To disable workspaces pass: --workspace=\"\" from CLI or workspace=None programmatically.\n",
+ "\u001b[34mtorchx\u001b[0m \u001b[2m2022-11-04 15:04:31 INFO \u001b[0m Built new image `/tmp/torchx_workspace3c_d437b` based on original image `ghcr.io/pytorch/torchx:0.3.0dev0` and changes in workspace `file:///opt/app-root/src/codeflare/notebooks/jobs` for role[0]=mnist.\n",
+ "\u001b[34mtorchx\u001b[0m \u001b[2m2022-11-04 15:04:31 WARNING \u001b[0m The Ray scheduler does not support port mapping.\n",
+ "\u001b[34mtorchx\u001b[0m \u001b[2m2022-11-04 15:04:31 INFO \u001b[0m Uploading package gcs://_ray_pkg_ce2c3e935774455d.zip.\n",
+ "\u001b[34mtorchx\u001b[0m \u001b[2m2022-11-04 15:04:31 INFO \u001b[0m Creating a file package for local directory '/tmp/torchx_workspace3c_d437b'.\n",
+ "ray://torchx/mnisttest-head-svc.default.svc:8265-mnist-jlm13hx5g53mk\n",
+ "\u001b[34mtorchx\u001b[0m \u001b[2m2022-11-04 15:04:31 INFO \u001b[0m Launched app: ray://torchx/mnisttest-head-svc.default.svc:8265-mnist-jlm13hx5g53mk\n",
+ "\u001b[34mtorchx\u001b[0m \u001b[2m2022-11-04 15:04:31 INFO \u001b[0m AppStatus:\n",
+ " msg: PENDING\n",
+ " num_restarts: -1\n",
+ " roles:\n",
+ " - replicas:\n",
+ " - hostname: \n",
+ " id: 0\n",
+ " role: ray\n",
+ " state: !!python/object/apply:torchx.specs.api.AppState\n",
+ " - 2\n",
+ " structured_error_msg: \n",
+ " role: ray\n",
+ " state: PENDING (2)\n",
+ " structured_error_msg: \n",
+ " ui_url: null\n",
+ "\n",
+ "\u001b[34mtorchx\u001b[0m \u001b[2m2022-11-04 15:04:31 INFO \u001b[0m Job URL: None\n",
+ "\u001b[0m"
+ ]
+ }
+ ],
+ "source": [
+ "! torchx run -s ray -cfg dashboard_address=mnisttest-head-svc.default.svc:8265,requirements=requirements.txt dist.ddp -j 2x4 --gpu 4 --script mnist.py"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "ff065051",
+ "metadata": {},
+ "source": [
+ "Now we can go ahead and look at the status and logs of our batch job."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 31,
+ "id": "ced6ccd6-a17e-413a-a0e4-65004fc35463",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\u001b[37mJob submission server address\u001b[39m: \u001b[1mhttp://mnisttest-head-svc.default.svc:8265\u001b[22m\n",
+ "{'mnist-jlm13hx5g53mk': JobInfo(status='SUCCEEDED', entrypoint='python3 ray_driver.py', message='Job finished successfully.', error_type=None, start_time=1667574271415, end_time=1667574616127, metadata={}, runtime_env={'working_dir': 'gcs://_ray_pkg_ce2c3e935774455d.zip', 'pip': {'packages': ['pytorch_lightning==1.5.10', 'ray_lightning', 'torchmetrics==0.9.1', 'torchvision==0.12.0'], 'pip_check': False}, '_ray_commit': 'e4ce38d001dbbe09cd21c497fedd03d692b2be3e'})}\n",
+ "\u001b[0m"
+ ]
+ }
+ ],
+ "source": [
+ "cluster.list_jobs()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 30,
+ "id": "e5c0b0da-c22e-4142-b096-407ac8aebe5e",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\u001b[37mJob submission server address\u001b[39m: \u001b[1mhttp://mnisttest-head-svc.default.svc:8265\u001b[22m\n",
+ "\n",
+ "\u001b[32m-----------------------------------\u001b[39m\n",
+ "\u001b[32mJob 'mnist-jlm13hx5g53mk' succeeded\u001b[39m\n",
+ "\u001b[32m-----------------------------------\u001b[39m\n",
+ "\n",
+ "\u001b[0m"
+ ]
+ }
+ ],
+ "source": [
+ "cluster.job_status(\"mnist-jlm13hx5g53mk\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 29,
+ "id": "264c1809-de72-4acf-b0f6-e67d345640f6",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\u001b[37mJob submission server address\u001b[39m: \u001b[1mhttp://mnisttest-head-svc.default.svc:8265\u001b[22m\n",
+ "acrtors: [RayActor(name='mnist', command=['bash', '-c', \"python -m torch.distributed.run --rdzv_backend static --rdzv_endpoint $TORCHX_RANK0_HOST:49782 --rdzv_id 'mnist-jlm13hx5g53mk' --nnodes 2 --nproc_per_node 4 --node_rank '0' --tee 3 --role '' mnist.py\"], env={'LOGLEVEL': 'DEBUG', 'TORCH_DISTRIBUTED_DEBUG': 'DETAIL'}, num_cpus=2, num_gpus=4), RayActor(name='mnist', command=['bash', '-c', \"python -m torch.distributed.run --rdzv_backend static --rdzv_endpoint $TORCHX_RANK0_HOST:49782 --rdzv_id 'mnist-jlm13hx5g53mk' --nnodes 2 --nproc_per_node 4 --node_rank '1' --tee 3 --role '' mnist.py\"], env={'LOGLEVEL': 'DEBUG', 'TORCH_DISTRIBUTED_DEBUG': 'DETAIL'}, num_cpus=2, num_gpus=4)]\n",
+ "Waiting for placement group to start.\n",
+ "here and rank is 0 and 10.131.66.16 49782\n",
+ "finally setting actor remote address and port 10.131.66.16 49782\n",
+ "here and rank is 1 and 10.131.66.16 49782\n",
+ "setting actor remote address and port 10.131.66.16 49782\n",
+ "finally setting actor remote address and port 10.131.66.16 49782\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m get_actor_address_and_port before: 10.131.66.16 42903\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m get_actor_address_and_port: 10.131.66.16 42903\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m set_address_and_port: 10.131.66.16 49782\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m get_actor_address_and_port before: 10.131.66.16 53621\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m get_actor_address_and_port: 10.131.66.16 53621\n",
+ "running ray.wait on [ObjectRef(32b0eec39cfa87ac523554acce28b667f9bc98bb0200000001000000), ObjectRef(80b655a2d9b04d4074fb8e3cef07ab2b3516f40e0200000001000000)]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m cmd: ['bash', '-c', \"python -m torch.distributed.run --rdzv_backend static --rdzv_endpoint $TORCHX_RANK0_HOST:49782 --rdzv_id 'mnist-jlm13hx5g53mk' --nnodes 2 --nproc_per_node 4 --node_rank '0' --tee 3 --role '' mnist.py\"]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m worker env: {'NV_LIBCUBLAS_DEV_VERSION': '11.3.1.68-1', 'NV_CUDA_COMPAT_PACKAGE': 'cuda-compat-11-2', 'RAY_IP': 'mnisttest-head-svc', 'MNISTTEST_HEAD_SVC_SERVICE_PORT_GCS': '6379', 'NV_CUDNN_PACKAGE_DEV': 'libcudnn8-dev=8.1.1.33-1+cuda11.2', 'LC_ALL': 'C.UTF-8', 'LD_LIBRARY_PATH': '/usr/local/nvidia/lib:/usr/local/nvidia/lib64', 'NV_LIBNCCL_DEV_PACKAGE': 'libnccl-dev=2.8.4-1+cuda11.2', 'REDIS_PASSWORD': '', 'MNISTTEST_HEAD_SVC_PORT_8265_TCP_PORT': '8265', 'RAY_USAGE_STATS_ENABLED': '0', 'LANG': 'C.UTF-8', 'TZ': 'America/Los_Angeles', 'NV_LIBNPP_DEV_PACKAGE': 'libnpp-dev-11-2=11.2.1.68-1', 'HOSTNAME': 'mnisttest-worker-small-group-mnisttest-wzz2l', 'MNISTTEST_HEAD_SVC_PORT_8265_TCP': 'tcp://172.30.163.155:8265', 'OLDPWD': '/home/ray/workspace', 'MNISTTEST_HEAD_SVC_PORT_10001_TCP_ADDR': '172.30.163.155', 'RAY_CLIENT_MODE': '0', 'RAY_JOB_ID': '02000000', 'MNISTTEST_HEAD_SVC_PORT_6379_TCP_ADDR': '172.30.163.155', 'MNISTTEST_HEAD_SVC_PORT_8265_TCP_ADDR': '172.30.163.155', 'NV_LIBNPP_VERSION': '11.2.1.68-1', 'MNISTTEST_HEAD_SVC_PORT_6379_TCP_PROTO': 'tcp', 'NVIDIA_VISIBLE_DEVICES': 'GPU-d3e8af45-f80b-98a8-dcd8-d3b428c4a4c2,GPU-15e57e64-c38b-9923-8f4a-6c098fdbc062,GPU-d14042c5-219c-5419-9511-ac62c72f90d1,GPU-b0d6ba11-ccb2-c4fb-89ad-01c50e6d393c', 'VIRTUAL_ENV': '/tmp/ray/session_2022-11-04_08-02-48_207951_7/runtime_resources/pip/3510e0c008a5c3627e4d2408c8b93ed71be6c3e1/virtualenv', 'NV_LIBCUSPARSE_VERSION': '11.3.1.68-1', 'MNISTTEST_HEAD_SVC_SERVICE_PORT_DASHBOARD': '8265', 'MNISTTEST_HEAD_SVC_SERVICE_PORT_CLIENT': '10001', 'KUBERNETES_PORT_443_TCP_PROTO': 'tcp', 'MNISTTEST_HEAD_SVC_PORT_6379_TCP_PORT': '6379', 'KUBERNETES_PORT_443_TCP_ADDR': '172.30.0.1', 'NV_LIBCUBLAS_DEV_PACKAGE': 'libcublas-dev-11-2=11.3.1.68-1', 'NCCL_VERSION': '2.8.4-1', 'KUBERNETES_PORT': 'tcp://172.30.0.1:443', 'PWD': '/tmp/ray/session_2022-11-04_08-02-48_207951_7/runtime_resources/working_dir_files/_ray_pkg_ce2c3e935774455d', 'NVARCH': 'x86_64', 'NV_LIBCUSPARSE_DEV_VERSION': '11.3.1.68-1', 'HOME': '/home/ray', 'RAY_RAYLET_PID': '19', 'NV_ML_REPO_URL': 'https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64', 'NV_LIBNCCL_PACKAGE_VERSION': '2.8.4-1', 'SPT_NOENV': '1', 'KUBERNETES_SERVICE_PORT_HTTPS': '443', 'NV_LIBNCCL_PACKAGE': 'libnccl2=2.8.4-1+cuda11.2', 'NV_LIBNCCL_DEV_PACKAGE_NAME': 'libnccl-dev', 'KUBERNETES_PORT_443_TCP_PORT': '443', 'NV_CUDA_LIB_VERSION': '11.2.0-1', 'NV_ML_REPO_ENABLED': '1', 'NV_LIBNPP_PACKAGE': 'libnpp-11-2=11.2.1.68-1', 'NV_LIBNCCL_PACKAGE_NAME': 'libnccl2', 'LIBRARY_PATH': '/usr/local/cuda/lib64/stubs', 'NV_NVTX_VERSION': '11.2.67-1', 'MNISTTEST_HEAD_SVC_PORT_10001_TCP': 'tcp://172.30.163.155:10001', 'NV_LIBCUBLAS_VERSION': '11.3.1.68-1', 'RAY_ADDRESS': 'mnisttest-head-svc:6379', 'NV_LIBCUBLAS_PACKAGE': 'libcublas-11-2=11.3.1.68-1', 'KUBERNETES_PORT_443_TCP': 'tcp://172.30.0.1:443', 'NV_CUDNN_VERSION': '8.1.1.33', 'RAY_PORT': '6379', 'NV_CUDA_CUDART_DEV_VERSION': '11.2.72-1', 'MNISTTEST_HEAD_SVC_PORT_6379_TCP': 'tcp://172.30.163.155:6379', 'MNISTTEST_HEAD_SVC_PORT_8265_TCP_PROTO': 'tcp', 'MNISTTEST_HEAD_SVC_PORT_10001_TCP_PORT': '10001', 'TERM': 'xterm', 'MNISTTEST_HEAD_SVC_SERVICE_PORT': '6379', 'NV_NVML_DEV_VERSION': '11.2.67-1', 'CUDA_VERSION': '11.2.0', 'NV_LIBCUBLAS_PACKAGE_NAME': 'libcublas-11-2', 'NSS_SDB_USE_CACHE': 'no', 'NVIDIA_DRIVER_CAPABILITIES': 'compute,utility', 'MY_POD_IP': '10.131.66.16', 'SHLVL': '1', 'PYTHONPATH': ':/tmp/ray/session_2022-11-04_08-02-48_207951_7/runtime_resources/working_dir_files/_ray_pkg_ce2c3e935774455d:/home/ray/workspace::/home/ray/workspace:', 'NV_LIBCUBLAS_DEV_PACKAGE_NAME': 'libcublas-dev-11-2', 'NVIDIA_REQUIRE_CUDA': 'cuda>=11.2 brand=tesla,driver>=418,driver<419 brand=tesla,driver>=440,driver<441 driver>=450', 'NV_LIBNPP_DEV_VERSION': '11.2.1.68-1', 'KUBERNETES_SERVICE_PORT': '443', 'NV_CUDA_CUDART_VERSION': '11.2.72-1', 'NV_CUDNN_PACKAGE_NAME': 'libcudnn8', 'PATH': '/tmp/ray/session_2022-11-04_08-02-48_207951_7/runtime_resources/pip/3510e0c008a5c3627e4d2408c8b93ed71be6c3e1/virtualenv/bin:/home/ray/anaconda3/bin:/home/ray/anaconda3/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin', 'NV_LIBNCCL_DEV_PACKAGE_VERSION': '2.8.4-1', 'MNISTTEST_HEAD_SVC_PORT': 'tcp://172.30.163.155:6379', 'PS1': '(virtualenv) ', 'MNISTTEST_HEAD_SVC_PORT_10001_TCP_PROTO': 'tcp', 'MNISTTEST_HEAD_SVC_SERVICE_HOST': '172.30.163.155', 'KUBERNETES_SERVICE_HOST': '172.30.0.1', 'NV_CUDNN_PACKAGE': 'libcudnn8=8.1.1.33-1+cuda11.2', 'OMP_NUM_THREADS': '1', 'PYTHONBREAKPOINT': 'ray.util.rpdb.set_trace', 'CUDA_VISIBLE_DEVICES': '0,1,2,3', 'LOGLEVEL': 'DEBUG', 'TORCH_DISTRIBUTED_DEBUG': 'DETAIL', 'TORCHX_RANK0_HOST': '10.131.66.16'}\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.128.68.15)\u001b[0m set_address_and_port: 10.131.66.16 49782\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.128.68.15)\u001b[0m cmd: ['bash', '-c', \"python -m torch.distributed.run --rdzv_backend static --rdzv_endpoint $TORCHX_RANK0_HOST:49782 --rdzv_id 'mnist-jlm13hx5g53mk' --nnodes 2 --nproc_per_node 4 --node_rank '1' --tee 3 --role '' mnist.py\"]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.128.68.15)\u001b[0m worker env: {'NV_LIBCUBLAS_DEV_VERSION': '11.3.1.68-1', 'NV_CUDA_COMPAT_PACKAGE': 'cuda-compat-11-2', 'RAY_IP': 'mnisttest-head-svc', 'MNISTTEST_HEAD_SVC_SERVICE_PORT_GCS': '6379', 'NV_CUDNN_PACKAGE_DEV': 'libcudnn8-dev=8.1.1.33-1+cuda11.2', 'LC_ALL': 'C.UTF-8', 'LD_LIBRARY_PATH': '/usr/local/nvidia/lib:/usr/local/nvidia/lib64', 'NV_LIBNCCL_DEV_PACKAGE': 'libnccl-dev=2.8.4-1+cuda11.2', 'REDIS_PASSWORD': '', 'MNISTTEST_HEAD_SVC_PORT_8265_TCP_PORT': '8265', 'RAY_USAGE_STATS_ENABLED': '0', 'LANG': 'C.UTF-8', 'TZ': 'America/Los_Angeles', 'NV_LIBNPP_DEV_PACKAGE': 'libnpp-dev-11-2=11.2.1.68-1', 'HOSTNAME': 'mnisttest-worker-small-group-mnisttest-hfm8l', 'MNISTTEST_HEAD_SVC_PORT_8265_TCP': 'tcp://172.30.163.155:8265', 'OLDPWD': '/home/ray/workspace', 'MNISTTEST_HEAD_SVC_PORT_10001_TCP_ADDR': '172.30.163.155', 'RAY_CLIENT_MODE': '0', 'RAY_JOB_ID': '02000000', 'MNISTTEST_HEAD_SVC_PORT_6379_TCP_ADDR': '172.30.163.155', 'MNISTTEST_HEAD_SVC_PORT_8265_TCP_ADDR': '172.30.163.155', 'NV_LIBNPP_VERSION': '11.2.1.68-1', 'MNISTTEST_HEAD_SVC_PORT_6379_TCP_PROTO': 'tcp', 'NVIDIA_VISIBLE_DEVICES': 'GPU-48fae530-6bda-e366-3423-864fe847ff3b,GPU-5d8d79bb-5c38-4ef7-0ea8-c91297cbc59f,GPU-8c8b3c0b-ccf8-c06c-f253-0bb90285c4cb,GPU-a8a4e808-841d-c212-2686-a2bd227279b3', 'VIRTUAL_ENV': '/tmp/ray/session_2022-11-04_08-02-48_207951_7/runtime_resources/pip/3510e0c008a5c3627e4d2408c8b93ed71be6c3e1/virtualenv', 'NV_LIBCUSPARSE_VERSION': '11.3.1.68-1', 'MNISTTEST_HEAD_SVC_SERVICE_PORT_DASHBOARD': '8265', 'MNISTTEST_HEAD_SVC_SERVICE_PORT_CLIENT': '10001', 'KUBERNETES_PORT_443_TCP_PROTO': 'tcp', 'MNISTTEST_HEAD_SVC_PORT_6379_TCP_PORT': '6379', 'KUBERNETES_PORT_443_TCP_ADDR': '172.30.0.1', 'NV_LIBCUBLAS_DEV_PACKAGE': 'libcublas-dev-11-2=11.3.1.68-1', 'NCCL_VERSION': '2.8.4-1', 'KUBERNETES_PORT': 'tcp://172.30.0.1:443', 'PWD': '/tmp/ray/session_2022-11-04_08-02-48_207951_7/runtime_resources/working_dir_files/_ray_pkg_ce2c3e935774455d', 'NVARCH': 'x86_64', 'NV_LIBCUSPARSE_DEV_VERSION': '11.3.1.68-1', 'HOME': '/home/ray', 'RAY_RAYLET_PID': '19', 'NV_ML_REPO_URL': 'https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64', 'NV_LIBNCCL_PACKAGE_VERSION': '2.8.4-1', 'SPT_NOENV': '1', 'KUBERNETES_SERVICE_PORT_HTTPS': '443', 'NV_LIBNCCL_PACKAGE': 'libnccl2=2.8.4-1+cuda11.2', 'NV_LIBNCCL_DEV_PACKAGE_NAME': 'libnccl-dev', 'KUBERNETES_PORT_443_TCP_PORT': '443', 'NV_CUDA_LIB_VERSION': '11.2.0-1', 'NV_ML_REPO_ENABLED': '1', 'NV_LIBNPP_PACKAGE': 'libnpp-11-2=11.2.1.68-1', 'NV_LIBNCCL_PACKAGE_NAME': 'libnccl2', 'LIBRARY_PATH': '/usr/local/cuda/lib64/stubs', 'NV_NVTX_VERSION': '11.2.67-1', 'MNISTTEST_HEAD_SVC_PORT_10001_TCP': 'tcp://172.30.163.155:10001', 'NV_LIBCUBLAS_VERSION': '11.3.1.68-1', 'RAY_ADDRESS': 'mnisttest-head-svc:6379', 'NV_LIBCUBLAS_PACKAGE': 'libcublas-11-2=11.3.1.68-1', 'KUBERNETES_PORT_443_TCP': 'tcp://172.30.0.1:443', 'NV_CUDNN_VERSION': '8.1.1.33', 'RAY_PORT': '6379', 'NV_CUDA_CUDART_DEV_VERSION': '11.2.72-1', 'MNISTTEST_HEAD_SVC_PORT_6379_TCP': 'tcp://172.30.163.155:6379', 'MNISTTEST_HEAD_SVC_PORT_8265_TCP_PROTO': 'tcp', 'MNISTTEST_HEAD_SVC_PORT_10001_TCP_PORT': '10001', 'TERM': 'xterm', 'MNISTTEST_HEAD_SVC_SERVICE_PORT': '6379', 'NV_NVML_DEV_VERSION': '11.2.67-1', 'CUDA_VERSION': '11.2.0', 'NV_LIBCUBLAS_PACKAGE_NAME': 'libcublas-11-2', 'NSS_SDB_USE_CACHE': 'no', 'NVIDIA_DRIVER_CAPABILITIES': 'compute,utility', 'MY_POD_IP': '10.128.68.15', 'SHLVL': '1', 'PYTHONPATH': ':/tmp/ray/session_2022-11-04_08-02-48_207951_7/runtime_resources/working_dir_files/_ray_pkg_ce2c3e935774455d:/home/ray/workspace::/home/ray/workspace:', 'NV_LIBCUBLAS_DEV_PACKAGE_NAME': 'libcublas-dev-11-2', 'NVIDIA_REQUIRE_CUDA': 'cuda>=11.2 brand=tesla,driver>=418,driver<419 brand=tesla,driver>=440,driver<441 driver>=450', 'NV_LIBNPP_DEV_VERSION': '11.2.1.68-1', 'KUBERNETES_SERVICE_PORT': '443', 'NV_CUDA_CUDART_VERSION': '11.2.72-1', 'NV_CUDNN_PACKAGE_NAME': 'libcudnn8', 'PATH': '/tmp/ray/session_2022-11-04_08-02-48_207951_7/runtime_resources/pip/3510e0c008a5c3627e4d2408c8b93ed71be6c3e1/virtualenv/bin:/home/ray/anaconda3/bin:/home/ray/anaconda3/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin', 'NV_LIBNCCL_DEV_PACKAGE_VERSION': '2.8.4-1', 'MNISTTEST_HEAD_SVC_PORT': 'tcp://172.30.163.155:6379', 'PS1': '(virtualenv) ', 'MNISTTEST_HEAD_SVC_PORT_10001_TCP_PROTO': 'tcp', 'MNISTTEST_HEAD_SVC_SERVICE_HOST': '172.30.163.155', 'KUBERNETES_SERVICE_HOST': '172.30.0.1', 'NV_CUDNN_PACKAGE': 'libcudnn8=8.1.1.33-1+cuda11.2', 'OMP_NUM_THREADS': '1', 'PYTHONBREAKPOINT': 'ray.util.rpdb.set_trace', 'CUDA_VISIBLE_DEVICES': '0,1,2,3', 'LOGLEVEL': 'DEBUG', 'TORCH_DISTRIBUTED_DEBUG': 'DETAIL', 'TORCHX_RANK0_HOST': '10.131.66.16'}\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.128.68.15)\u001b[0m INFO:torch.distributed.launcher.api:Starting elastic_operator with launch configs:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.128.68.15)\u001b[0m entrypoint : mnist.py\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.128.68.15)\u001b[0m min_nodes : 2\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.128.68.15)\u001b[0m max_nodes : 2\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.128.68.15)\u001b[0m nproc_per_node : 4\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.128.68.15)\u001b[0m run_id : mnist-jlm13hx5g53mk\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.128.68.15)\u001b[0m rdzv_backend : static\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.128.68.15)\u001b[0m rdzv_endpoint : 10.131.66.16:49782\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.128.68.15)\u001b[0m rdzv_configs : {'rank': 1, 'timeout': 900}\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.128.68.15)\u001b[0m max_restarts : 0\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.128.68.15)\u001b[0m monitor_interval : 5\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.128.68.15)\u001b[0m log_dir : None\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.128.68.15)\u001b[0m metrics_cfg : {}\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.128.68.15)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.128.68.15)\u001b[0m INFO:torch.distributed.elastic.agent.server.local_elastic_agent:log directory set to: /tmp/torchelastic_d2kdqlka/mnist-jlm13hx5g53mk_r9bujvap\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.128.68.15)\u001b[0m INFO:torch.distributed.elastic.agent.server.api:[] starting workers for entrypoint: python\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.128.68.15)\u001b[0m INFO:torch.distributed.elastic.agent.server.api:[] Rendezvous'ing worker group\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m INFO:torch.distributed.launcher.api:Starting elastic_operator with launch configs:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m entrypoint : mnist.py\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m min_nodes : 2\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m max_nodes : 2\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m nproc_per_node : 4\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m run_id : mnist-jlm13hx5g53mk\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m rdzv_backend : static\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m rdzv_endpoint : 10.131.66.16:49782\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m rdzv_configs : {'rank': 0, 'timeout': 900}\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m max_restarts : 0\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m monitor_interval : 5\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m log_dir : None\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m metrics_cfg : {}\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m INFO:torch.distributed.elastic.agent.server.local_elastic_agent:log directory set to: /tmp/torchelastic_d4z71nty/mnist-jlm13hx5g53mk_nxz_och1\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m INFO:torch.distributed.elastic.agent.server.api:[] starting workers for entrypoint: python\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m INFO:torch.distributed.elastic.agent.server.api:[] Rendezvous'ing worker group\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.128.68.15)\u001b[0m INFO:torch.distributed.elastic.agent.server.api:[] Rendezvous complete for workers. Result:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.128.68.15)\u001b[0m restart_count=0\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.128.68.15)\u001b[0m master_addr=10.131.66.16\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.128.68.15)\u001b[0m master_port=49782\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.128.68.15)\u001b[0m group_rank=1\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.128.68.15)\u001b[0m group_world_size=2\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.128.68.15)\u001b[0m local_ranks=[0, 1, 2, 3]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.128.68.15)\u001b[0m role_ranks=[4, 5, 6, 7]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.128.68.15)\u001b[0m global_ranks=[4, 5, 6, 7]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.128.68.15)\u001b[0m role_world_sizes=[8, 8, 8, 8]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.128.68.15)\u001b[0m global_world_sizes=[8, 8, 8, 8]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.128.68.15)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.128.68.15)\u001b[0m INFO:torch.distributed.elastic.agent.server.api:[] Starting worker group\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.128.68.15)\u001b[0m INFO:torch.distributed.elastic.multiprocessing:Setting worker0 reply file to: /tmp/torchelastic_d2kdqlka/mnist-jlm13hx5g53mk_r9bujvap/attempt_0/0/error.json\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.128.68.15)\u001b[0m INFO:torch.distributed.elastic.multiprocessing:Setting worker1 reply file to: /tmp/torchelastic_d2kdqlka/mnist-jlm13hx5g53mk_r9bujvap/attempt_0/1/error.json\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.128.68.15)\u001b[0m INFO:torch.distributed.elastic.multiprocessing:Setting worker2 reply file to: /tmp/torchelastic_d2kdqlka/mnist-jlm13hx5g53mk_r9bujvap/attempt_0/2/error.json\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.128.68.15)\u001b[0m INFO:torch.distributed.elastic.multiprocessing:Setting worker3 reply file to: /tmp/torchelastic_d2kdqlka/mnist-jlm13hx5g53mk_r9bujvap/attempt_0/3/error.json\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m INFO:torch.distributed.elastic.agent.server.api:[] Rendezvous complete for workers. Result:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m restart_count=0\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m master_addr=10.131.66.16\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m master_port=49782\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m group_rank=0\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m group_world_size=2\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m local_ranks=[0, 1, 2, 3]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m role_ranks=[0, 1, 2, 3]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m global_ranks=[0, 1, 2, 3]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m role_world_sizes=[8, 8, 8, 8]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m global_world_sizes=[8, 8, 8, 8]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m INFO:torch.distributed.elastic.agent.server.api:[] Starting worker group\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m INFO:torch.distributed.elastic.multiprocessing:Setting worker0 reply file to: /tmp/torchelastic_d4z71nty/mnist-jlm13hx5g53mk_nxz_och1/attempt_0/0/error.json\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m INFO:torch.distributed.elastic.multiprocessing:Setting worker1 reply file to: /tmp/torchelastic_d4z71nty/mnist-jlm13hx5g53mk_nxz_och1/attempt_0/1/error.json\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m INFO:torch.distributed.elastic.multiprocessing:Setting worker2 reply file to: /tmp/torchelastic_d4z71nty/mnist-jlm13hx5g53mk_nxz_och1/attempt_0/2/error.json\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m INFO:torch.distributed.elastic.multiprocessing:Setting worker3 reply file to: /tmp/torchelastic_d4z71nty/mnist-jlm13hx5g53mk_nxz_och1/attempt_0/3/error.json\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [3]:prior to running the trainer\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [3]:MASTER_ADDR: is 10.131.66.16\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [3]:MASTER_PORT: is 49782\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [3]:GROUP: 2\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [3]:LOCAL: 4\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:prior to running the trainer\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:MASTER_ADDR: is 10.131.66.16\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:MASTER_PORT: is 49782\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:GROUP: 2\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:LOCAL: 4\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Downloading MNIST dataset...\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [2]:prior to running the trainer\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [2]:MASTER_ADDR: is 10.131.66.16\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [2]:MASTER_PORT: is 49782\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [2]:GROUP: 2\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [2]:LOCAL: 4\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [1]:prior to running the trainer\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [1]:MASTER_ADDR: is 10.131.66.16\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [1]:MASTER_PORT: is 49782\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [1]:GROUP: 2\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [1]:LOCAL: 4\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./MNIST/raw/train-images-idx3-ubyte.gz\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Extracting ./MNIST/raw/train-images-idx3-ubyte.gz to ./MNIST/raw\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./MNIST/raw/train-labels-idx1-ubyte.gz\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Extracting ./MNIST/raw/train-labels-idx1-ubyte.gz to ./MNIST/raw\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./MNIST/raw/t10k-images-idx3-ubyte.gz\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Extracting ./MNIST/raw/t10k-images-idx3-ubyte.gz to ./MNIST/raw\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./MNIST/raw/t10k-labels-idx1-ubyte.gz\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Extracting ./MNIST/raw/t10k-labels-idx1-ubyte.gz to ./MNIST/raw\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validation sanity check: 0it [00:00, ?it/s][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validation sanity check: 0%| | 0/2 [00:00, ?it/s][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validation sanity check: 100%|██████████| 2/2 [00:00<00:00, 11.31it/s][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Training: 0it [00:00, ?it/s]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Training: 0%| | 0/30 [00:00, ?it/s]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 0: 0%| | 0/30 [00:00, ?it/s] [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 0: 67%|██████▋ | 20/30 [00:01<00:00, 12.35it/s]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 0: 67%|██████▋ | 20/30 [00:01<00:00, 12.35it/s, loss=2.2, v_num=0][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0it [00:00, ?it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0%| | 0/3 [00:00, ?it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 100%|██████████| 3/3 [00:00<00:00, 19.32it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 0: 100%|██████████| 30/30 [00:02<00:00, 12.94it/s, loss=2.1, v_num=0, val_loss=1.900, val_acc=0.482]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: \u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 0: 100%|██████████| 30/30 [00:02<00:00, 12.94it/s, loss=2.1, v_num=0, val_loss=1.900, val_acc=0.482][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 0: 0%| | 0/30 [00:00, ?it/s, loss=2.1, v_num=0, val_loss=1.900, val_acc=0.482] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 1: 0%| | 0/30 [00:00, ?it/s, loss=2.1, v_num=0, val_loss=1.900, val_acc=0.482][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 1: 67%|██████▋ | 20/30 [00:01<00:00, 11.57it/s, loss=2.1, v_num=0, val_loss=1.900, val_acc=0.482]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 1: 67%|██████▋ | 20/30 [00:01<00:00, 11.57it/s, loss=1.72, v_num=0, val_loss=1.900, val_acc=0.482][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0it [00:00, ?it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0%| | 0/3 [00:00, ?it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 100%|██████████| 3/3 [00:00<00:00, 18.50it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 1: 100%|██████████| 30/30 [00:02<00:00, 12.27it/s, loss=1.56, v_num=0, val_loss=1.290, val_acc=0.751]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: \u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 1: 100%|██████████| 30/30 [00:02<00:00, 12.26it/s, loss=1.56, v_num=0, val_loss=1.290, val_acc=0.751]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 1: 0%| | 0/30 [00:00, ?it/s, loss=1.56, v_num=0, val_loss=1.290, val_acc=0.751] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 2: 0%| | 0/30 [00:00, ?it/s, loss=1.56, v_num=0, val_loss=1.290, val_acc=0.751][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 2: 67%|██████▋ | 20/30 [00:01<00:00, 12.22it/s, loss=1.56, v_num=0, val_loss=1.290, val_acc=0.751]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 2: 67%|██████▋ | 20/30 [00:01<00:00, 12.22it/s, loss=1.18, v_num=0, val_loss=1.290, val_acc=0.751][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0it [00:00, ?it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0%| | 0/3 [00:00, ?it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 100%|██████████| 3/3 [00:00<00:00, 19.89it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 2: 100%|██████████| 30/30 [00:02<00:00, 12.84it/s, loss=1.07, v_num=0, val_loss=0.833, val_acc=0.819]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: \u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 2: 100%|██████████| 30/30 [00:02<00:00, 12.84it/s, loss=1.07, v_num=0, val_loss=0.833, val_acc=0.819]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 2: 0%| | 0/30 [00:00, ?it/s, loss=1.07, v_num=0, val_loss=0.833, val_acc=0.819] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 3: 0%| | 0/30 [00:00, ?it/s, loss=1.07, v_num=0, val_loss=0.833, val_acc=0.819][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 3: 67%|██████▋ | 20/30 [00:01<00:00, 11.72it/s, loss=1.07, v_num=0, val_loss=0.833, val_acc=0.819]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 3: 67%|██████▋ | 20/30 [00:01<00:00, 11.72it/s, loss=0.837, v_num=0, val_loss=0.833, val_acc=0.819][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0it [00:00, ?it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0%| | 0/3 [00:00, ?it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 100%|██████████| 3/3 [00:00<00:00, 19.48it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 3: 100%|██████████| 30/30 [00:02<00:00, 12.53it/s, loss=0.778, v_num=0, val_loss=0.615, val_acc=0.853]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: \u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 3: 100%|██████████| 30/30 [00:02<00:00, 12.52it/s, loss=0.778, v_num=0, val_loss=0.615, val_acc=0.853]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 3: 0%| | 0/30 [00:00, ?it/s, loss=0.778, v_num=0, val_loss=0.615, val_acc=0.853] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 4: 0%| | 0/30 [00:00, ?it/s, loss=0.778, v_num=0, val_loss=0.615, val_acc=0.853][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 4: 67%|██████▋ | 20/30 [00:01<00:00, 12.62it/s, loss=0.778, v_num=0, val_loss=0.615, val_acc=0.853]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 4: 67%|██████▋ | 20/30 [00:01<00:00, 12.62it/s, loss=0.657, v_num=0, val_loss=0.615, val_acc=0.853][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0it [00:00, ?it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0%| | 0/3 [00:00, ?it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 100%|██████████| 3/3 [00:00<00:00, 19.41it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 4: 100%|██████████| 30/30 [00:02<00:00, 13.17it/s, loss=0.623, v_num=0, val_loss=0.503, val_acc=0.870]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: \u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 4: 100%|██████████| 30/30 [00:02<00:00, 13.16it/s, loss=0.623, v_num=0, val_loss=0.503, val_acc=0.870]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 4: 0%| | 0/30 [00:00, ?it/s, loss=0.623, v_num=0, val_loss=0.503, val_acc=0.870] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 5: 0%| | 0/30 [00:00, ?it/s, loss=0.623, v_num=0, val_loss=0.503, val_acc=0.870][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 5: 67%|██████▋ | 20/30 [00:01<00:00, 12.06it/s, loss=0.623, v_num=0, val_loss=0.503, val_acc=0.870]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 5: 67%|██████▋ | 20/30 [00:01<00:00, 12.06it/s, loss=0.56, v_num=0, val_loss=0.503, val_acc=0.870] [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0it [00:00, ?it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0%| | 0/3 [00:00, ?it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 100%|██████████| 3/3 [00:00<00:00, 18.54it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 5: 100%|██████████| 30/30 [00:02<00:00, 12.73it/s, loss=0.55, v_num=0, val_loss=0.433, val_acc=0.883]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: \u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 5: 100%|██████████| 30/30 [00:02<00:00, 12.72it/s, loss=0.55, v_num=0, val_loss=0.433, val_acc=0.883]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 5: 0%| | 0/30 [00:00, ?it/s, loss=0.55, v_num=0, val_loss=0.433, val_acc=0.883] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 6: 0%| | 0/30 [00:00, ?it/s, loss=0.55, v_num=0, val_loss=0.433, val_acc=0.883][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 6: 67%|██████▋ | 20/30 [00:01<00:00, 12.19it/s, loss=0.55, v_num=0, val_loss=0.433, val_acc=0.883]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 6: 67%|██████▋ | 20/30 [00:01<00:00, 12.19it/s, loss=0.497, v_num=0, val_loss=0.433, val_acc=0.883]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0it [00:00, ?it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0%| | 0/3 [00:00, ?it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 100%|██████████| 3/3 [00:00<00:00, 18.80it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 6: 100%|██████████| 30/30 [00:02<00:00, 12.78it/s, loss=0.484, v_num=0, val_loss=0.390, val_acc=0.891]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: \u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 6: 100%|██████████| 30/30 [00:02<00:00, 12.77it/s, loss=0.484, v_num=0, val_loss=0.390, val_acc=0.891]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 6: 0%| | 0/30 [00:00, ?it/s, loss=0.484, v_num=0, val_loss=0.390, val_acc=0.891] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 7: 0%| | 0/30 [00:00, ?it/s, loss=0.484, v_num=0, val_loss=0.390, val_acc=0.891][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 7: 67%|██████▋ | 20/30 [00:01<00:00, 12.63it/s, loss=0.484, v_num=0, val_loss=0.390, val_acc=0.891]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 7: 67%|██████▋ | 20/30 [00:01<00:00, 12.63it/s, loss=0.455, v_num=0, val_loss=0.390, val_acc=0.891][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0it [00:00, ?it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0%| | 0/3 [00:00, ?it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 100%|██████████| 3/3 [00:00<00:00, 19.56it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 7: 100%|██████████| 30/30 [00:02<00:00, 13.31it/s, loss=0.451, v_num=0, val_loss=0.359, val_acc=0.898]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: \u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 7: 100%|██████████| 30/30 [00:02<00:00, 13.31it/s, loss=0.451, v_num=0, val_loss=0.359, val_acc=0.898]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 7: 0%| | 0/30 [00:00, ?it/s, loss=0.451, v_num=0, val_loss=0.359, val_acc=0.898] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 8: 0%| | 0/30 [00:00, ?it/s, loss=0.451, v_num=0, val_loss=0.359, val_acc=0.898][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 8: 67%|██████▋ | 20/30 [00:01<00:00, 12.29it/s, loss=0.451, v_num=0, val_loss=0.359, val_acc=0.898]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 8: 67%|██████▋ | 20/30 [00:01<00:00, 12.29it/s, loss=0.406, v_num=0, val_loss=0.359, val_acc=0.898][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0it [00:00, ?it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0%| | 0/3 [00:00, ?it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 100%|██████████| 3/3 [00:00<00:00, 19.47it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 8: 100%|██████████| 30/30 [00:02<00:00, 12.81it/s, loss=0.408, v_num=0, val_loss=0.335, val_acc=0.903]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: \u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 8: 100%|██████████| 30/30 [00:02<00:00, 12.81it/s, loss=0.408, v_num=0, val_loss=0.335, val_acc=0.903]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 8: 0%| | 0/30 [00:00, ?it/s, loss=0.408, v_num=0, val_loss=0.335, val_acc=0.903] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 9: 0%| | 0/30 [00:00, ?it/s, loss=0.408, v_num=0, val_loss=0.335, val_acc=0.903][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 9: 67%|██████▋ | 20/30 [00:01<00:00, 11.78it/s, loss=0.408, v_num=0, val_loss=0.335, val_acc=0.903]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 9: 67%|██████▋ | 20/30 [00:01<00:00, 11.78it/s, loss=0.395, v_num=0, val_loss=0.335, val_acc=0.903][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0it [00:00, ?it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0%| | 0/3 [00:00, ?it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 100%|██████████| 3/3 [00:00<00:00, 18.49it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 9: 100%|██████████| 30/30 [00:02<00:00, 12.44it/s, loss=0.381, v_num=0, val_loss=0.316, val_acc=0.906]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: \u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 9: 100%|██████████| 30/30 [00:02<00:00, 12.44it/s, loss=0.381, v_num=0, val_loss=0.316, val_acc=0.906][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 9: 0%| | 0/30 [00:00, ?it/s, loss=0.381, v_num=0, val_loss=0.316, val_acc=0.906] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 10: 0%| | 0/30 [00:00, ?it/s, loss=0.381, v_num=0, val_loss=0.316, val_acc=0.906][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 10: 67%|██████▋ | 20/30 [00:01<00:00, 12.47it/s, loss=0.381, v_num=0, val_loss=0.316, val_acc=0.906]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 10: 67%|██████▋ | 20/30 [00:01<00:00, 12.46it/s, loss=0.374, v_num=0, val_loss=0.316, val_acc=0.906][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0it [00:00, ?it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0%| | 0/3 [00:00, ?it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 100%|██████████| 3/3 [00:00<00:00, 19.06it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 10: 100%|██████████| 30/30 [00:02<00:00, 12.75it/s, loss=0.368, v_num=0, val_loss=0.303, val_acc=0.909]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: \u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 10: 100%|██████████| 30/30 [00:02<00:00, 12.75it/s, loss=0.368, v_num=0, val_loss=0.303, val_acc=0.909]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 10: 0%| | 0/30 [00:00, ?it/s, loss=0.368, v_num=0, val_loss=0.303, val_acc=0.909] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 11: 0%| | 0/30 [00:00, ?it/s, loss=0.368, v_num=0, val_loss=0.303, val_acc=0.909][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 11: 67%|██████▋ | 20/30 [00:01<00:00, 12.33it/s, loss=0.368, v_num=0, val_loss=0.303, val_acc=0.909]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 11: 67%|██████▋ | 20/30 [00:01<00:00, 12.33it/s, loss=0.344, v_num=0, val_loss=0.303, val_acc=0.909][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0it [00:00, ?it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0%| | 0/3 [00:00, ?it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 100%|██████████| 3/3 [00:00<00:00, 18.43it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 11: 100%|██████████| 30/30 [00:02<00:00, 12.93it/s, loss=0.349, v_num=0, val_loss=0.291, val_acc=0.913]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: \u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 11: 100%|██████████| 30/30 [00:02<00:00, 12.92it/s, loss=0.349, v_num=0, val_loss=0.291, val_acc=0.913][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 11: 0%| | 0/30 [00:00, ?it/s, loss=0.349, v_num=0, val_loss=0.291, val_acc=0.913] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 12: 0%| | 0/30 [00:00, ?it/s, loss=0.349, v_num=0, val_loss=0.291, val_acc=0.913][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 12: 67%|██████▋ | 20/30 [00:01<00:00, 12.58it/s, loss=0.349, v_num=0, val_loss=0.291, val_acc=0.913]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 12: 67%|██████▋ | 20/30 [00:01<00:00, 12.58it/s, loss=0.347, v_num=0, val_loss=0.291, val_acc=0.913][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0it [00:00, ?it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0%| | 0/3 [00:00, ?it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 100%|██████████| 3/3 [00:00<00:00, 18.32it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 12: 100%|██████████| 30/30 [00:02<00:00, 12.94it/s, loss=0.344, v_num=0, val_loss=0.279, val_acc=0.914]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: \u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 12: 100%|██████████| 30/30 [00:02<00:00, 12.93it/s, loss=0.344, v_num=0, val_loss=0.279, val_acc=0.914]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 12: 0%| | 0/30 [00:00, ?it/s, loss=0.344, v_num=0, val_loss=0.279, val_acc=0.914] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 13: 0%| | 0/30 [00:00, ?it/s, loss=0.344, v_num=0, val_loss=0.279, val_acc=0.914][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 13: 67%|██████▋ | 20/30 [00:01<00:00, 12.30it/s, loss=0.344, v_num=0, val_loss=0.279, val_acc=0.914]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 13: 67%|██████▋ | 20/30 [00:01<00:00, 12.30it/s, loss=0.342, v_num=0, val_loss=0.279, val_acc=0.914][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0it [00:00, ?it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0%| | 0/3 [00:00, ?it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 100%|██████████| 3/3 [00:00<00:00, 18.52it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 13: 100%|██████████| 30/30 [00:02<00:00, 12.88it/s, loss=0.335, v_num=0, val_loss=0.270, val_acc=0.916]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: \u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 13: 100%|██████████| 30/30 [00:02<00:00, 12.87it/s, loss=0.335, v_num=0, val_loss=0.270, val_acc=0.916]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 13: 0%| | 0/30 [00:00, ?it/s, loss=0.335, v_num=0, val_loss=0.270, val_acc=0.916] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 14: 0%| | 0/30 [00:00, ?it/s, loss=0.335, v_num=0, val_loss=0.270, val_acc=0.916][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 14: 67%|██████▋ | 20/30 [00:01<00:00, 12.13it/s, loss=0.335, v_num=0, val_loss=0.270, val_acc=0.916]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 14: 67%|██████▋ | 20/30 [00:01<00:00, 12.13it/s, loss=0.332, v_num=0, val_loss=0.270, val_acc=0.916][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0it [00:00, ?it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0%| | 0/3 [00:00, ?it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 100%|██████████| 3/3 [00:00<00:00, 16.66it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 14: 100%|██████████| 30/30 [00:02<00:00, 12.62it/s, loss=0.322, v_num=0, val_loss=0.260, val_acc=0.920]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: \u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 14: 100%|██████████| 30/30 [00:02<00:00, 12.61it/s, loss=0.322, v_num=0, val_loss=0.260, val_acc=0.920]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 14: 0%| | 0/30 [00:00, ?it/s, loss=0.322, v_num=0, val_loss=0.260, val_acc=0.920] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 15: 0%| | 0/30 [00:00, ?it/s, loss=0.322, v_num=0, val_loss=0.260, val_acc=0.920][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 15: 67%|██████▋ | 20/30 [00:01<00:00, 12.13it/s, loss=0.322, v_num=0, val_loss=0.260, val_acc=0.920]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 15: 67%|██████▋ | 20/30 [00:01<00:00, 12.13it/s, loss=0.321, v_num=0, val_loss=0.260, val_acc=0.920][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0it [00:00, ?it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0%| | 0/3 [00:00, ?it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 100%|██████████| 3/3 [00:00<00:00, 18.45it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 15: 100%|██████████| 30/30 [00:02<00:00, 12.74it/s, loss=0.319, v_num=0, val_loss=0.252, val_acc=0.922]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: \u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 15: 100%|██████████| 30/30 [00:02<00:00, 12.73it/s, loss=0.319, v_num=0, val_loss=0.252, val_acc=0.922]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 15: 0%| | 0/30 [00:00, ?it/s, loss=0.319, v_num=0, val_loss=0.252, val_acc=0.922] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 16: 0%| | 0/30 [00:00, ?it/s, loss=0.319, v_num=0, val_loss=0.252, val_acc=0.922][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 16: 67%|██████▋ | 20/30 [00:01<00:00, 12.04it/s, loss=0.319, v_num=0, val_loss=0.252, val_acc=0.922]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 16: 67%|██████▋ | 20/30 [00:01<00:00, 12.04it/s, loss=0.303, v_num=0, val_loss=0.252, val_acc=0.922][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0it [00:00, ?it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0%| | 0/3 [00:00, ?it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 100%|██████████| 3/3 [00:00<00:00, 18.22it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 16: 100%|██████████| 30/30 [00:02<00:00, 12.68it/s, loss=0.295, v_num=0, val_loss=0.247, val_acc=0.924]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: \u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 16: 100%|██████████| 30/30 [00:02<00:00, 12.67it/s, loss=0.295, v_num=0, val_loss=0.247, val_acc=0.924]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 16: 0%| | 0/30 [00:00, ?it/s, loss=0.295, v_num=0, val_loss=0.247, val_acc=0.924] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 17: 0%| | 0/30 [00:00, ?it/s, loss=0.295, v_num=0, val_loss=0.247, val_acc=0.924][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 17: 67%|██████▋ | 20/30 [00:01<00:00, 12.16it/s, loss=0.295, v_num=0, val_loss=0.247, val_acc=0.924]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 17: 67%|██████▋ | 20/30 [00:01<00:00, 12.15it/s, loss=0.289, v_num=0, val_loss=0.247, val_acc=0.924][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0it [00:00, ?it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0%| | 0/3 [00:00, ?it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 100%|██████████| 3/3 [00:00<00:00, 18.48it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 17: 100%|██████████| 30/30 [00:02<00:00, 12.81it/s, loss=0.286, v_num=0, val_loss=0.237, val_acc=0.928]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: \u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 17: 100%|██████████| 30/30 [00:02<00:00, 12.80it/s, loss=0.286, v_num=0, val_loss=0.237, val_acc=0.928]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 17: 0%| | 0/30 [00:00, ?it/s, loss=0.286, v_num=0, val_loss=0.237, val_acc=0.928] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 18: 0%| | 0/30 [00:00, ?it/s, loss=0.286, v_num=0, val_loss=0.237, val_acc=0.928][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 18: 67%|██████▋ | 20/30 [00:01<00:00, 12.45it/s, loss=0.286, v_num=0, val_loss=0.237, val_acc=0.928]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 18: 67%|██████▋ | 20/30 [00:01<00:00, 12.44it/s, loss=0.298, v_num=0, val_loss=0.237, val_acc=0.928][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0it [00:00, ?it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0%| | 0/3 [00:00, ?it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 100%|██████████| 3/3 [00:00<00:00, 19.73it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 18: 100%|██████████| 30/30 [00:02<00:00, 13.06it/s, loss=0.303, v_num=0, val_loss=0.230, val_acc=0.930]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: \u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 18: 100%|██████████| 30/30 [00:02<00:00, 13.06it/s, loss=0.303, v_num=0, val_loss=0.230, val_acc=0.930]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 18: 0%| | 0/30 [00:00, ?it/s, loss=0.303, v_num=0, val_loss=0.230, val_acc=0.930] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 19: 0%| | 0/30 [00:00, ?it/s, loss=0.303, v_num=0, val_loss=0.230, val_acc=0.930][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 19: 67%|██████▋ | 20/30 [00:01<00:00, 12.78it/s, loss=0.303, v_num=0, val_loss=0.230, val_acc=0.930]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 19: 67%|██████▋ | 20/30 [00:01<00:00, 12.78it/s, loss=0.275, v_num=0, val_loss=0.230, val_acc=0.930][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0it [00:00, ?it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0%| | 0/3 [00:00, ?it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 100%|██████████| 3/3 [00:00<00:00, 11.98it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 19: 100%|██████████| 30/30 [00:02<00:00, 12.18it/s, loss=0.268, v_num=0, val_loss=0.225, val_acc=0.931]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: \u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 19: 100%|██████████| 30/30 [00:02<00:00, 12.15it/s, loss=0.268, v_num=0, val_loss=0.225, val_acc=0.931][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 19: 0%| | 0/30 [00:00, ?it/s, loss=0.268, v_num=0, val_loss=0.225, val_acc=0.931] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 20: 0%| | 0/30 [00:00, ?it/s, loss=0.268, v_num=0, val_loss=0.225, val_acc=0.931][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 20: 67%|██████▋ | 20/30 [00:04<00:02, 4.58it/s, loss=0.268, v_num=0, val_loss=0.225, val_acc=0.931]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 20: 67%|██████▋ | 20/30 [00:04<00:02, 4.58it/s, loss=0.272, v_num=0, val_loss=0.225, val_acc=0.931][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0it [00:00, ?it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0%| | 0/3 [00:00, ?it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 100%|██████████| 3/3 [00:00<00:00, 6.21it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 20: 100%|██████████| 30/30 [00:06<00:00, 4.80it/s, loss=0.271, v_num=0, val_loss=0.217, val_acc=0.935]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: \u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 20: 100%|██████████| 30/30 [00:06<00:00, 4.79it/s, loss=0.271, v_num=0, val_loss=0.217, val_acc=0.935][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 20: 0%| | 0/30 [00:00, ?it/s, loss=0.271, v_num=0, val_loss=0.217, val_acc=0.935] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 21: 0%| | 0/30 [00:00, ?it/s, loss=0.271, v_num=0, val_loss=0.217, val_acc=0.935][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 21: 67%|██████▋ | 20/30 [00:02<00:01, 7.33it/s, loss=0.271, v_num=0, val_loss=0.217, val_acc=0.935]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 21: 67%|██████▋ | 20/30 [00:02<00:01, 7.33it/s, loss=0.266, v_num=0, val_loss=0.217, val_acc=0.935][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0it [00:00, ?it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0%| | 0/3 [00:00, ?it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 100%|██████████| 3/3 [00:00<00:00, 18.86it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 21: 100%|██████████| 30/30 [00:03<00:00, 8.80it/s, loss=0.249, v_num=0, val_loss=0.213, val_acc=0.936]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: \u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 21: 100%|██████████| 30/30 [00:03<00:00, 8.80it/s, loss=0.249, v_num=0, val_loss=0.213, val_acc=0.936]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 21: 0%| | 0/30 [00:00, ?it/s, loss=0.249, v_num=0, val_loss=0.213, val_acc=0.936] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 22: 0%| | 0/30 [00:00, ?it/s, loss=0.249, v_num=0, val_loss=0.213, val_acc=0.936][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 22: 67%|██████▋ | 20/30 [00:01<00:00, 12.47it/s, loss=0.249, v_num=0, val_loss=0.213, val_acc=0.936]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 22: 67%|██████▋ | 20/30 [00:01<00:00, 12.47it/s, loss=0.245, v_num=0, val_loss=0.213, val_acc=0.936][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0it [00:00, ?it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0%| | 0/3 [00:00, ?it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 100%|██████████| 3/3 [00:00<00:00, 18.65it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 22: 100%|██████████| 30/30 [00:02<00:00, 13.10it/s, loss=0.241, v_num=0, val_loss=0.206, val_acc=0.939]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: \u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 22: 100%|██████████| 30/30 [00:02<00:00, 13.09it/s, loss=0.241, v_num=0, val_loss=0.206, val_acc=0.939]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 22: 0%| | 0/30 [00:00, ?it/s, loss=0.241, v_num=0, val_loss=0.206, val_acc=0.939] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 23: 0%| | 0/30 [00:00, ?it/s, loss=0.241, v_num=0, val_loss=0.206, val_acc=0.939][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 23: 67%|██████▋ | 20/30 [00:01<00:00, 12.39it/s, loss=0.241, v_num=0, val_loss=0.206, val_acc=0.939]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 23: 67%|██████▋ | 20/30 [00:01<00:00, 12.38it/s, loss=0.247, v_num=0, val_loss=0.206, val_acc=0.939][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0it [00:00, ?it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0%| | 0/3 [00:00, ?it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 100%|██████████| 3/3 [00:00<00:00, 19.76it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 23: 100%|██████████| 30/30 [00:02<00:00, 12.99it/s, loss=0.246, v_num=0, val_loss=0.201, val_acc=0.940]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: \u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 23: 100%|██████████| 30/30 [00:02<00:00, 12.99it/s, loss=0.246, v_num=0, val_loss=0.201, val_acc=0.940]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 23: 0%| | 0/30 [00:00, ?it/s, loss=0.246, v_num=0, val_loss=0.201, val_acc=0.940] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 24: 0%| | 0/30 [00:00, ?it/s, loss=0.246, v_num=0, val_loss=0.201, val_acc=0.940][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 24: 67%|██████▋ | 20/30 [00:01<00:00, 12.54it/s, loss=0.246, v_num=0, val_loss=0.201, val_acc=0.940]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 24: 67%|██████▋ | 20/30 [00:01<00:00, 12.54it/s, loss=0.234, v_num=0, val_loss=0.201, val_acc=0.940][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0it [00:00, ?it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0%| | 0/3 [00:00, ?it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 100%|██████████| 3/3 [00:00<00:00, 19.21it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 24: 100%|██████████| 30/30 [00:02<00:00, 13.12it/s, loss=0.225, v_num=0, val_loss=0.195, val_acc=0.940]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: \u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 24: 100%|██████████| 30/30 [00:02<00:00, 13.12it/s, loss=0.225, v_num=0, val_loss=0.195, val_acc=0.940]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 24: 0%| | 0/30 [00:00, ?it/s, loss=0.225, v_num=0, val_loss=0.195, val_acc=0.940] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 25: 0%| | 0/30 [00:00, ?it/s, loss=0.225, v_num=0, val_loss=0.195, val_acc=0.940][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 25: 67%|██████▋ | 20/30 [00:01<00:00, 12.48it/s, loss=0.225, v_num=0, val_loss=0.195, val_acc=0.940]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 25: 67%|██████▋ | 20/30 [00:01<00:00, 12.48it/s, loss=0.252, v_num=0, val_loss=0.195, val_acc=0.940][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0it [00:00, ?it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0%| | 0/3 [00:00, ?it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 100%|██████████| 3/3 [00:00<00:00, 19.87it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 25: 100%|██████████| 30/30 [00:02<00:00, 13.10it/s, loss=0.238, v_num=0, val_loss=0.190, val_acc=0.943]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: \u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 25: 100%|██████████| 30/30 [00:02<00:00, 13.10it/s, loss=0.238, v_num=0, val_loss=0.190, val_acc=0.943]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 25: 0%| | 0/30 [00:00, ?it/s, loss=0.238, v_num=0, val_loss=0.190, val_acc=0.943] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 26: 0%| | 0/30 [00:00, ?it/s, loss=0.238, v_num=0, val_loss=0.190, val_acc=0.943][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 26: 67%|██████▋ | 20/30 [00:01<00:00, 12.52it/s, loss=0.238, v_num=0, val_loss=0.190, val_acc=0.943]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 26: 67%|██████▋ | 20/30 [00:01<00:00, 12.52it/s, loss=0.242, v_num=0, val_loss=0.190, val_acc=0.943][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0it [00:00, ?it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0%| | 0/3 [00:00, ?it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 100%|██████████| 3/3 [00:00<00:00, 19.58it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 26: 100%|██████████| 30/30 [00:02<00:00, 13.14it/s, loss=0.241, v_num=0, val_loss=0.186, val_acc=0.942]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: \u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 26: 100%|██████████| 30/30 [00:02<00:00, 13.13it/s, loss=0.241, v_num=0, val_loss=0.186, val_acc=0.942]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 26: 0%| | 0/30 [00:00, ?it/s, loss=0.241, v_num=0, val_loss=0.186, val_acc=0.942] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 27: 0%| | 0/30 [00:00, ?it/s, loss=0.241, v_num=0, val_loss=0.186, val_acc=0.942][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 27: 67%|██████▋ | 20/30 [00:01<00:00, 12.06it/s, loss=0.241, v_num=0, val_loss=0.186, val_acc=0.942]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 27: 67%|██████▋ | 20/30 [00:01<00:00, 12.05it/s, loss=0.207, v_num=0, val_loss=0.186, val_acc=0.942][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0it [00:00, ?it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0%| | 0/3 [00:00, ?it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 100%|██████████| 3/3 [00:00<00:00, 19.61it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 27: 100%|██████████| 30/30 [00:02<00:00, 12.74it/s, loss=0.212, v_num=0, val_loss=0.182, val_acc=0.944]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: \u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 27: 100%|██████████| 30/30 [00:02<00:00, 12.73it/s, loss=0.212, v_num=0, val_loss=0.182, val_acc=0.944]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 27: 0%| | 0/30 [00:00, ?it/s, loss=0.212, v_num=0, val_loss=0.182, val_acc=0.944] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 28: 0%| | 0/30 [00:00, ?it/s, loss=0.212, v_num=0, val_loss=0.182, val_acc=0.944][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 28: 67%|██████▋ | 20/30 [00:01<00:00, 12.48it/s, loss=0.212, v_num=0, val_loss=0.182, val_acc=0.944]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 28: 67%|██████▋ | 20/30 [00:01<00:00, 12.48it/s, loss=0.219, v_num=0, val_loss=0.182, val_acc=0.944][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0it [00:00, ?it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0%| | 0/3 [00:00, ?it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 100%|██████████| 3/3 [00:00<00:00, 19.57it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 28: 100%|██████████| 30/30 [00:02<00:00, 13.11it/s, loss=0.216, v_num=0, val_loss=0.178, val_acc=0.947]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: \u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 28: 100%|██████████| 30/30 [00:02<00:00, 13.11it/s, loss=0.216, v_num=0, val_loss=0.178, val_acc=0.947]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 28: 0%| | 0/30 [00:00, ?it/s, loss=0.216, v_num=0, val_loss=0.178, val_acc=0.947] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 29: 0%| | 0/30 [00:00, ?it/s, loss=0.216, v_num=0, val_loss=0.178, val_acc=0.947][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 29: 67%|██████▋ | 20/30 [00:01<00:00, 12.43it/s, loss=0.216, v_num=0, val_loss=0.178, val_acc=0.947]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 29: 67%|██████▋ | 20/30 [00:01<00:00, 12.43it/s, loss=0.218, v_num=0, val_loss=0.178, val_acc=0.947][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0it [00:00, ?it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0%| | 0/3 [00:00, ?it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 100%|██████████| 3/3 [00:00<00:00, 19.27it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 29: 100%|██████████| 30/30 [00:02<00:00, 12.99it/s, loss=0.214, v_num=0, val_loss=0.174, val_acc=0.948]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: \u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 29: 100%|██████████| 30/30 [00:02<00:00, 12.98it/s, loss=0.214, v_num=0, val_loss=0.174, val_acc=0.948]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 29: 0%| | 0/30 [00:00, ?it/s, loss=0.214, v_num=0, val_loss=0.174, val_acc=0.948] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 30: 0%| | 0/30 [00:00, ?it/s, loss=0.214, v_num=0, val_loss=0.174, val_acc=0.948][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 30: 67%|██████▋ | 20/30 [00:01<00:00, 12.52it/s, loss=0.214, v_num=0, val_loss=0.174, val_acc=0.948]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 30: 67%|██████▋ | 20/30 [00:01<00:00, 12.51it/s, loss=0.205, v_num=0, val_loss=0.174, val_acc=0.948][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0it [00:00, ?it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0%| | 0/3 [00:00, ?it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 100%|██████████| 3/3 [00:00<00:00, 19.59it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 30: 100%|██████████| 30/30 [00:02<00:00, 12.94it/s, loss=0.199, v_num=0, val_loss=0.169, val_acc=0.949]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: \u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 30: 100%|██████████| 30/30 [00:02<00:00, 12.94it/s, loss=0.199, v_num=0, val_loss=0.169, val_acc=0.949]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 30: 0%| | 0/30 [00:00, ?it/s, loss=0.199, v_num=0, val_loss=0.169, val_acc=0.949] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 31: 0%| | 0/30 [00:00, ?it/s, loss=0.199, v_num=0, val_loss=0.169, val_acc=0.949][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 31: 67%|██████▋ | 20/30 [00:01<00:00, 12.22it/s, loss=0.199, v_num=0, val_loss=0.169, val_acc=0.949]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 31: 67%|██████▋ | 20/30 [00:01<00:00, 12.21it/s, loss=0.198, v_num=0, val_loss=0.169, val_acc=0.949][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0it [00:00, ?it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0%| | 0/3 [00:00, ?it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 100%|██████████| 3/3 [00:00<00:00, 19.59it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 31: 100%|██████████| 30/30 [00:02<00:00, 12.94it/s, loss=0.2, v_num=0, val_loss=0.166, val_acc=0.951] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: \u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 31: 100%|██████████| 30/30 [00:02<00:00, 12.94it/s, loss=0.2, v_num=0, val_loss=0.166, val_acc=0.951][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 31: 0%| | 0/30 [00:00, ?it/s, loss=0.2, v_num=0, val_loss=0.166, val_acc=0.951] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 32: 0%| | 0/30 [00:00, ?it/s, loss=0.2, v_num=0, val_loss=0.166, val_acc=0.951][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 32: 67%|██████▋ | 20/30 [00:01<00:00, 12.55it/s, loss=0.2, v_num=0, val_loss=0.166, val_acc=0.951]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 32: 67%|██████▋ | 20/30 [00:01<00:00, 12.55it/s, loss=0.193, v_num=0, val_loss=0.166, val_acc=0.951][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0it [00:00, ?it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0%| | 0/3 [00:00, ?it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 100%|██████████| 3/3 [00:00<00:00, 18.68it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 32: 100%|██████████| 30/30 [00:02<00:00, 13.09it/s, loss=0.203, v_num=0, val_loss=0.163, val_acc=0.950]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: \u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 32: 100%|██████████| 30/30 [00:02<00:00, 13.09it/s, loss=0.203, v_num=0, val_loss=0.163, val_acc=0.950]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 32: 0%| | 0/30 [00:00, ?it/s, loss=0.203, v_num=0, val_loss=0.163, val_acc=0.950] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 33: 0%| | 0/30 [00:00, ?it/s, loss=0.203, v_num=0, val_loss=0.163, val_acc=0.950][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 33: 67%|██████▋ | 20/30 [00:01<00:00, 12.44it/s, loss=0.203, v_num=0, val_loss=0.163, val_acc=0.950]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 33: 67%|██████▋ | 20/30 [00:01<00:00, 12.44it/s, loss=0.205, v_num=0, val_loss=0.163, val_acc=0.950][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0it [00:00, ?it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0%| | 0/3 [00:00, ?it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 100%|██████████| 3/3 [00:00<00:00, 19.73it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 33: 100%|██████████| 30/30 [00:02<00:00, 13.07it/s, loss=0.201, v_num=0, val_loss=0.159, val_acc=0.952]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: \u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 33: 100%|██████████| 30/30 [00:02<00:00, 13.07it/s, loss=0.201, v_num=0, val_loss=0.159, val_acc=0.952]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 33: 0%| | 0/30 [00:00, ?it/s, loss=0.201, v_num=0, val_loss=0.159, val_acc=0.952] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 34: 0%| | 0/30 [00:00, ?it/s, loss=0.201, v_num=0, val_loss=0.159, val_acc=0.952][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 34: 67%|██████▋ | 20/30 [00:01<00:00, 12.60it/s, loss=0.201, v_num=0, val_loss=0.159, val_acc=0.952]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 34: 67%|██████▋ | 20/30 [00:01<00:00, 12.60it/s, loss=0.183, v_num=0, val_loss=0.159, val_acc=0.952][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0it [00:00, ?it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0%| | 0/3 [00:00, ?it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 100%|██████████| 3/3 [00:00<00:00, 19.69it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 34: 100%|██████████| 30/30 [00:02<00:00, 13.16it/s, loss=0.189, v_num=0, val_loss=0.156, val_acc=0.951]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: \u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 34: 100%|██████████| 30/30 [00:02<00:00, 13.15it/s, loss=0.189, v_num=0, val_loss=0.156, val_acc=0.951]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 34: 0%| | 0/30 [00:00, ?it/s, loss=0.189, v_num=0, val_loss=0.156, val_acc=0.951] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 35: 0%| | 0/30 [00:00, ?it/s, loss=0.189, v_num=0, val_loss=0.156, val_acc=0.951][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 35: 67%|██████▋ | 20/30 [00:01<00:00, 12.54it/s, loss=0.189, v_num=0, val_loss=0.156, val_acc=0.951]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 35: 67%|██████▋ | 20/30 [00:01<00:00, 12.54it/s, loss=0.182, v_num=0, val_loss=0.156, val_acc=0.951][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0it [00:00, ?it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0%| | 0/3 [00:00, ?it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 100%|██████████| 3/3 [00:00<00:00, 19.73it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 35: 100%|██████████| 30/30 [00:02<00:00, 13.13it/s, loss=0.197, v_num=0, val_loss=0.153, val_acc=0.953]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: \u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 35: 100%|██████████| 30/30 [00:02<00:00, 13.12it/s, loss=0.197, v_num=0, val_loss=0.153, val_acc=0.953]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 35: 0%| | 0/30 [00:00, ?it/s, loss=0.197, v_num=0, val_loss=0.153, val_acc=0.953] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 36: 0%| | 0/30 [00:00, ?it/s, loss=0.197, v_num=0, val_loss=0.153, val_acc=0.953][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 36: 67%|██████▋ | 20/30 [00:01<00:00, 12.61it/s, loss=0.197, v_num=0, val_loss=0.153, val_acc=0.953]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 36: 67%|██████▋ | 20/30 [00:01<00:00, 12.61it/s, loss=0.206, v_num=0, val_loss=0.153, val_acc=0.953][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0it [00:00, ?it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0%| | 0/3 [00:00, ?it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 100%|██████████| 3/3 [00:00<00:00, 19.73it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 36: 100%|██████████| 30/30 [00:02<00:00, 13.13it/s, loss=0.2, v_num=0, val_loss=0.150, val_acc=0.954] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: \u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 36: 100%|██████████| 30/30 [00:02<00:00, 13.13it/s, loss=0.2, v_num=0, val_loss=0.150, val_acc=0.954][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 36: 0%| | 0/30 [00:00, ?it/s, loss=0.2, v_num=0, val_loss=0.150, val_acc=0.954] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 37: 0%| | 0/30 [00:00, ?it/s, loss=0.2, v_num=0, val_loss=0.150, val_acc=0.954][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 37: 67%|██████▋ | 20/30 [00:01<00:00, 12.43it/s, loss=0.2, v_num=0, val_loss=0.150, val_acc=0.954]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 37: 67%|██████▋ | 20/30 [00:01<00:00, 12.43it/s, loss=0.178, v_num=0, val_loss=0.150, val_acc=0.954][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0it [00:00, ?it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0%| | 0/3 [00:00, ?it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 100%|██████████| 3/3 [00:00<00:00, 19.39it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 37: 100%|██████████| 30/30 [00:02<00:00, 12.80it/s, loss=0.178, v_num=0, val_loss=0.147, val_acc=0.955]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: \u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 37: 100%|██████████| 30/30 [00:02<00:00, 12.79it/s, loss=0.178, v_num=0, val_loss=0.147, val_acc=0.955]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 37: 0%| | 0/30 [00:00, ?it/s, loss=0.178, v_num=0, val_loss=0.147, val_acc=0.955] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 38: 0%| | 0/30 [00:00, ?it/s, loss=0.178, v_num=0, val_loss=0.147, val_acc=0.955][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 38: 67%|██████▋ | 20/30 [00:01<00:00, 12.05it/s, loss=0.178, v_num=0, val_loss=0.147, val_acc=0.955]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 38: 67%|██████▋ | 20/30 [00:01<00:00, 12.05it/s, loss=0.173, v_num=0, val_loss=0.147, val_acc=0.955][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0it [00:00, ?it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0%| | 0/3 [00:00, ?it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 100%|██████████| 3/3 [00:00<00:00, 19.72it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 38: 100%|██████████| 30/30 [00:02<00:00, 12.76it/s, loss=0.174, v_num=0, val_loss=0.144, val_acc=0.955]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: \u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 38: 100%|██████████| 30/30 [00:02<00:00, 12.75it/s, loss=0.174, v_num=0, val_loss=0.144, val_acc=0.955]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 38: 0%| | 0/30 [00:00, ?it/s, loss=0.174, v_num=0, val_loss=0.144, val_acc=0.955] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 39: 0%| | 0/30 [00:00, ?it/s, loss=0.174, v_num=0, val_loss=0.144, val_acc=0.955][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 39: 67%|██████▋ | 20/30 [00:01<00:00, 11.81it/s, loss=0.174, v_num=0, val_loss=0.144, val_acc=0.955]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 39: 67%|██████▋ | 20/30 [00:01<00:00, 11.80it/s, loss=0.176, v_num=0, val_loss=0.144, val_acc=0.955][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0it [00:00, ?it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0%| | 0/3 [00:00, ?it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 100%|██████████| 3/3 [00:00<00:00, 19.69it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 39: 100%|██████████| 30/30 [00:02<00:00, 12.58it/s, loss=0.177, v_num=0, val_loss=0.142, val_acc=0.955]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: \u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 39: 100%|██████████| 30/30 [00:02<00:00, 12.57it/s, loss=0.177, v_num=0, val_loss=0.142, val_acc=0.955]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 39: 0%| | 0/30 [00:00, ?it/s, loss=0.177, v_num=0, val_loss=0.142, val_acc=0.955] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 40: 0%| | 0/30 [00:00, ?it/s, loss=0.177, v_num=0, val_loss=0.142, val_acc=0.955][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 40: 67%|██████▋ | 20/30 [00:01<00:00, 11.96it/s, loss=0.177, v_num=0, val_loss=0.142, val_acc=0.955]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 40: 67%|██████▋ | 20/30 [00:01<00:00, 11.95it/s, loss=0.178, v_num=0, val_loss=0.142, val_acc=0.955][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0it [00:00, ?it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0%| | 0/3 [00:00, ?it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 100%|██████████| 3/3 [00:00<00:00, 19.93it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 40: 100%|██████████| 30/30 [00:02<00:00, 12.58it/s, loss=0.172, v_num=0, val_loss=0.138, val_acc=0.956]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: \u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 40: 100%|██████████| 30/30 [00:02<00:00, 12.57it/s, loss=0.172, v_num=0, val_loss=0.138, val_acc=0.956]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 40: 0%| | 0/30 [00:00, ?it/s, loss=0.172, v_num=0, val_loss=0.138, val_acc=0.956] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 41: 0%| | 0/30 [00:00, ?it/s, loss=0.172, v_num=0, val_loss=0.138, val_acc=0.956][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 41: 67%|██████▋ | 20/30 [00:01<00:00, 12.51it/s, loss=0.172, v_num=0, val_loss=0.138, val_acc=0.956]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 41: 67%|██████▋ | 20/30 [00:01<00:00, 12.50it/s, loss=0.169, v_num=0, val_loss=0.138, val_acc=0.956][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0it [00:00, ?it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0%| | 0/3 [00:00, ?it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 100%|██████████| 3/3 [00:00<00:00, 19.62it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 41: 100%|██████████| 30/30 [00:02<00:00, 13.15it/s, loss=0.167, v_num=0, val_loss=0.136, val_acc=0.957]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: \u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 41: 100%|██████████| 30/30 [00:02<00:00, 13.14it/s, loss=0.167, v_num=0, val_loss=0.136, val_acc=0.957]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 41: 0%| | 0/30 [00:00, ?it/s, loss=0.167, v_num=0, val_loss=0.136, val_acc=0.957] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 42: 0%| | 0/30 [00:00, ?it/s, loss=0.167, v_num=0, val_loss=0.136, val_acc=0.957][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 42: 67%|██████▋ | 20/30 [00:01<00:00, 12.83it/s, loss=0.167, v_num=0, val_loss=0.136, val_acc=0.957]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 42: 67%|██████▋ | 20/30 [00:01<00:00, 12.82it/s, loss=0.173, v_num=0, val_loss=0.136, val_acc=0.957][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0it [00:00, ?it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0%| | 0/3 [00:00, ?it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 100%|██████████| 3/3 [00:00<00:00, 19.61it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 42: 100%|██████████| 30/30 [00:02<00:00, 13.44it/s, loss=0.174, v_num=0, val_loss=0.133, val_acc=0.958]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: \u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 42: 100%|██████████| 30/30 [00:02<00:00, 13.43it/s, loss=0.174, v_num=0, val_loss=0.133, val_acc=0.958]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 42: 0%| | 0/30 [00:00, ?it/s, loss=0.174, v_num=0, val_loss=0.133, val_acc=0.958] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 43: 0%| | 0/30 [00:00, ?it/s, loss=0.174, v_num=0, val_loss=0.133, val_acc=0.958][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 43: 67%|██████▋ | 20/30 [00:01<00:00, 12.65it/s, loss=0.174, v_num=0, val_loss=0.133, val_acc=0.958]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 43: 67%|██████▋ | 20/30 [00:01<00:00, 12.64it/s, loss=0.169, v_num=0, val_loss=0.133, val_acc=0.958][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0it [00:00, ?it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0%| | 0/3 [00:00, ?it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 100%|██████████| 3/3 [00:00<00:00, 19.80it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 43: 100%|██████████| 30/30 [00:02<00:00, 13.32it/s, loss=0.166, v_num=0, val_loss=0.131, val_acc=0.958]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: \u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 43: 100%|██████████| 30/30 [00:02<00:00, 13.32it/s, loss=0.166, v_num=0, val_loss=0.131, val_acc=0.958]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 43: 0%| | 0/30 [00:00, ?it/s, loss=0.166, v_num=0, val_loss=0.131, val_acc=0.958] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 44: 0%| | 0/30 [00:00, ?it/s, loss=0.166, v_num=0, val_loss=0.131, val_acc=0.958][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 44: 67%|██████▋ | 20/30 [00:01<00:00, 12.57it/s, loss=0.166, v_num=0, val_loss=0.131, val_acc=0.958]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 44: 67%|██████▋ | 20/30 [00:01<00:00, 12.57it/s, loss=0.159, v_num=0, val_loss=0.131, val_acc=0.958][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0it [00:00, ?it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0%| | 0/3 [00:00, ?it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 100%|██████████| 3/3 [00:00<00:00, 19.79it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 44: 100%|██████████| 30/30 [00:02<00:00, 12.99it/s, loss=0.148, v_num=0, val_loss=0.129, val_acc=0.959]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: \u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 44: 100%|██████████| 30/30 [00:02<00:00, 12.98it/s, loss=0.148, v_num=0, val_loss=0.129, val_acc=0.959]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 44: 0%| | 0/30 [00:00, ?it/s, loss=0.148, v_num=0, val_loss=0.129, val_acc=0.959] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 45: 0%| | 0/30 [00:00, ?it/s, loss=0.148, v_num=0, val_loss=0.129, val_acc=0.959][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 45: 67%|██████▋ | 20/30 [00:01<00:00, 12.51it/s, loss=0.148, v_num=0, val_loss=0.129, val_acc=0.959]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 45: 67%|██████▋ | 20/30 [00:01<00:00, 12.50it/s, loss=0.145, v_num=0, val_loss=0.129, val_acc=0.959][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0it [00:00, ?it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0%| | 0/3 [00:00, ?it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 100%|██████████| 3/3 [00:00<00:00, 19.81it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 45: 100%|██████████| 30/30 [00:02<00:00, 13.11it/s, loss=0.143, v_num=0, val_loss=0.126, val_acc=0.960]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: \u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 45: 100%|██████████| 30/30 [00:02<00:00, 13.10it/s, loss=0.143, v_num=0, val_loss=0.126, val_acc=0.960]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 45: 0%| | 0/30 [00:00, ?it/s, loss=0.143, v_num=0, val_loss=0.126, val_acc=0.960] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 46: 0%| | 0/30 [00:00, ?it/s, loss=0.143, v_num=0, val_loss=0.126, val_acc=0.960][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 46: 67%|██████▋ | 20/30 [00:01<00:00, 12.55it/s, loss=0.143, v_num=0, val_loss=0.126, val_acc=0.960]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 46: 67%|██████▋ | 20/30 [00:01<00:00, 12.54it/s, loss=0.154, v_num=0, val_loss=0.126, val_acc=0.960][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0it [00:00, ?it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0%| | 0/3 [00:00, ?it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 100%|██████████| 3/3 [00:00<00:00, 19.48it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 46: 100%|██████████| 30/30 [00:02<00:00, 13.12it/s, loss=0.153, v_num=0, val_loss=0.124, val_acc=0.959]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: \u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 46: 100%|██████████| 30/30 [00:02<00:00, 13.11it/s, loss=0.153, v_num=0, val_loss=0.124, val_acc=0.959]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 46: 0%| | 0/30 [00:00, ?it/s, loss=0.153, v_num=0, val_loss=0.124, val_acc=0.959] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 47: 0%| | 0/30 [00:00, ?it/s, loss=0.153, v_num=0, val_loss=0.124, val_acc=0.959][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 47: 67%|██████▋ | 20/30 [00:01<00:00, 12.22it/s, loss=0.153, v_num=0, val_loss=0.124, val_acc=0.959]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 47: 67%|██████▋ | 20/30 [00:01<00:00, 12.22it/s, loss=0.152, v_num=0, val_loss=0.124, val_acc=0.959][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0it [00:00, ?it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0%| | 0/3 [00:00, ?it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 100%|██████████| 3/3 [00:00<00:00, 19.76it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 47: 100%|██████████| 30/30 [00:02<00:00, 12.94it/s, loss=0.148, v_num=0, val_loss=0.121, val_acc=0.962]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: \u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 47: 100%|██████████| 30/30 [00:02<00:00, 12.93it/s, loss=0.148, v_num=0, val_loss=0.121, val_acc=0.962]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 47: 0%| | 0/30 [00:00, ?it/s, loss=0.148, v_num=0, val_loss=0.121, val_acc=0.962] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 48: 0%| | 0/30 [00:00, ?it/s, loss=0.148, v_num=0, val_loss=0.121, val_acc=0.962][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 48: 67%|██████▋ | 20/30 [00:01<00:00, 12.72it/s, loss=0.148, v_num=0, val_loss=0.121, val_acc=0.962]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 48: 67%|██████▋ | 20/30 [00:01<00:00, 12.72it/s, loss=0.148, v_num=0, val_loss=0.121, val_acc=0.962][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0it [00:00, ?it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0%| | 0/3 [00:00, ?it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 100%|██████████| 3/3 [00:00<00:00, 19.79it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 48: 100%|██████████| 30/30 [00:02<00:00, 13.32it/s, loss=0.145, v_num=0, val_loss=0.121, val_acc=0.961]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: \u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 48: 100%|██████████| 30/30 [00:02<00:00, 13.31it/s, loss=0.145, v_num=0, val_loss=0.121, val_acc=0.961]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 48: 0%| | 0/30 [00:00, ?it/s, loss=0.145, v_num=0, val_loss=0.121, val_acc=0.961] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 49: 0%| | 0/30 [00:00, ?it/s, loss=0.145, v_num=0, val_loss=0.121, val_acc=0.961][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 49: 67%|██████▋ | 20/30 [00:01<00:00, 12.58it/s, loss=0.145, v_num=0, val_loss=0.121, val_acc=0.961]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 49: 67%|██████▋ | 20/30 [00:01<00:00, 12.58it/s, loss=0.151, v_num=0, val_loss=0.121, val_acc=0.961][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0it [00:00, ?it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0%| | 0/3 [00:00, ?it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 100%|██████████| 3/3 [00:00<00:00, 19.74it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 49: 100%|██████████| 30/30 [00:02<00:00, 13.16it/s, loss=0.154, v_num=0, val_loss=0.118, val_acc=0.963]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: \u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 49: 100%|██████████| 30/30 [00:02<00:00, 13.15it/s, loss=0.154, v_num=0, val_loss=0.118, val_acc=0.963]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 49: 0%| | 0/30 [00:00, ?it/s, loss=0.154, v_num=0, val_loss=0.118, val_acc=0.963] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 50: 0%| | 0/30 [00:00, ?it/s, loss=0.154, v_num=0, val_loss=0.118, val_acc=0.963][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 50: 67%|██████▋ | 20/30 [00:01<00:00, 12.53it/s, loss=0.154, v_num=0, val_loss=0.118, val_acc=0.963]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 50: 67%|██████▋ | 20/30 [00:01<00:00, 12.53it/s, loss=0.144, v_num=0, val_loss=0.118, val_acc=0.963][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0it [00:00, ?it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0%| | 0/3 [00:00, ?it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 100%|██████████| 3/3 [00:00<00:00, 19.74it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 50: 100%|██████████| 30/30 [00:02<00:00, 13.10it/s, loss=0.138, v_num=0, val_loss=0.116, val_acc=0.964]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: \u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 50: 100%|██████████| 30/30 [00:02<00:00, 13.09it/s, loss=0.138, v_num=0, val_loss=0.116, val_acc=0.964]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 50: 0%| | 0/30 [00:00, ?it/s, loss=0.138, v_num=0, val_loss=0.116, val_acc=0.964] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 51: 0%| | 0/30 [00:00, ?it/s, loss=0.138, v_num=0, val_loss=0.116, val_acc=0.964][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 51: 67%|██████▋ | 20/30 [00:01<00:00, 12.57it/s, loss=0.138, v_num=0, val_loss=0.116, val_acc=0.964]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 51: 67%|██████▋ | 20/30 [00:01<00:00, 12.57it/s, loss=0.146, v_num=0, val_loss=0.116, val_acc=0.964][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0it [00:00, ?it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0%| | 0/3 [00:00, ?it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 100%|██████████| 3/3 [00:00<00:00, 19.82it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 51: 100%|██████████| 30/30 [00:02<00:00, 13.21it/s, loss=0.141, v_num=0, val_loss=0.114, val_acc=0.963]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: \u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 51: 100%|██████████| 30/30 [00:02<00:00, 13.20it/s, loss=0.141, v_num=0, val_loss=0.114, val_acc=0.963]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 51: 0%| | 0/30 [00:00, ?it/s, loss=0.141, v_num=0, val_loss=0.114, val_acc=0.963] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 52: 0%| | 0/30 [00:00, ?it/s, loss=0.141, v_num=0, val_loss=0.114, val_acc=0.963][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 52: 67%|██████▋ | 20/30 [00:01<00:00, 12.47it/s, loss=0.141, v_num=0, val_loss=0.114, val_acc=0.963]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 52: 67%|██████▋ | 20/30 [00:01<00:00, 12.46it/s, loss=0.141, v_num=0, val_loss=0.114, val_acc=0.963][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0it [00:00, ?it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0%| | 0/3 [00:00, ?it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 100%|██████████| 3/3 [00:00<00:00, 19.78it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 52: 100%|██████████| 30/30 [00:02<00:00, 13.05it/s, loss=0.151, v_num=0, val_loss=0.112, val_acc=0.964]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: \u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 52: 100%|██████████| 30/30 [00:02<00:00, 13.04it/s, loss=0.151, v_num=0, val_loss=0.112, val_acc=0.964]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 52: 0%| | 0/30 [00:00, ?it/s, loss=0.151, v_num=0, val_loss=0.112, val_acc=0.964] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 53: 0%| | 0/30 [00:00, ?it/s, loss=0.151, v_num=0, val_loss=0.112, val_acc=0.964][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 53: 67%|██████▋ | 20/30 [00:01<00:00, 12.27it/s, loss=0.151, v_num=0, val_loss=0.112, val_acc=0.964]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 53: 67%|██████▋ | 20/30 [00:01<00:00, 12.26it/s, loss=0.155, v_num=0, val_loss=0.112, val_acc=0.964][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0it [00:00, ?it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0%| | 0/3 [00:00, ?it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 100%|██████████| 3/3 [00:00<00:00, 19.70it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 53: 100%|██████████| 30/30 [00:02<00:00, 12.94it/s, loss=0.153, v_num=0, val_loss=0.110, val_acc=0.964]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: \u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 53: 100%|██████████| 30/30 [00:02<00:00, 12.94it/s, loss=0.153, v_num=0, val_loss=0.110, val_acc=0.964]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 53: 0%| | 0/30 [00:00, ?it/s, loss=0.153, v_num=0, val_loss=0.110, val_acc=0.964] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 54: 0%| | 0/30 [00:00, ?it/s, loss=0.153, v_num=0, val_loss=0.110, val_acc=0.964][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 54: 67%|██████▋ | 20/30 [00:01<00:00, 12.44it/s, loss=0.153, v_num=0, val_loss=0.110, val_acc=0.964]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 54: 67%|██████▋ | 20/30 [00:01<00:00, 12.44it/s, loss=0.152, v_num=0, val_loss=0.110, val_acc=0.964][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0it [00:00, ?it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0%| | 0/3 [00:00, ?it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 100%|██████████| 3/3 [00:00<00:00, 19.86it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 54: 100%|██████████| 30/30 [00:02<00:00, 12.97it/s, loss=0.151, v_num=0, val_loss=0.109, val_acc=0.965]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: \u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 54: 100%|██████████| 30/30 [00:02<00:00, 12.96it/s, loss=0.151, v_num=0, val_loss=0.109, val_acc=0.965][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 54: 0%| | 0/30 [00:00, ?it/s, loss=0.151, v_num=0, val_loss=0.109, val_acc=0.965] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 55: 0%| | 0/30 [00:00, ?it/s, loss=0.151, v_num=0, val_loss=0.109, val_acc=0.965][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 55: 67%|██████▋ | 20/30 [00:01<00:00, 12.70it/s, loss=0.151, v_num=0, val_loss=0.109, val_acc=0.965]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 55: 67%|██████▋ | 20/30 [00:01<00:00, 12.69it/s, loss=0.129, v_num=0, val_loss=0.109, val_acc=0.965][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0it [00:00, ?it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0%| | 0/3 [00:00, ?it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 100%|██████████| 3/3 [00:00<00:00, 19.64it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 55: 100%|██████████| 30/30 [00:02<00:00, 13.30it/s, loss=0.128, v_num=0, val_loss=0.107, val_acc=0.966]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: \u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 55: 100%|██████████| 30/30 [00:02<00:00, 13.30it/s, loss=0.128, v_num=0, val_loss=0.107, val_acc=0.966]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 55: 0%| | 0/30 [00:00, ?it/s, loss=0.128, v_num=0, val_loss=0.107, val_acc=0.966] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 56: 0%| | 0/30 [00:00, ?it/s, loss=0.128, v_num=0, val_loss=0.107, val_acc=0.966][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 56: 67%|██████▋ | 20/30 [00:01<00:00, 12.38it/s, loss=0.128, v_num=0, val_loss=0.107, val_acc=0.966]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 56: 67%|██████▋ | 20/30 [00:01<00:00, 12.38it/s, loss=0.136, v_num=0, val_loss=0.107, val_acc=0.966][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0it [00:00, ?it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0%| | 0/3 [00:00, ?it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 100%|██████████| 3/3 [00:00<00:00, 19.83it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 56: 100%|██████████| 30/30 [00:02<00:00, 13.05it/s, loss=0.125, v_num=0, val_loss=0.105, val_acc=0.967]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: \u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 56: 100%|██████████| 30/30 [00:02<00:00, 13.04it/s, loss=0.125, v_num=0, val_loss=0.105, val_acc=0.967]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 56: 0%| | 0/30 [00:00, ?it/s, loss=0.125, v_num=0, val_loss=0.105, val_acc=0.967] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 57: 0%| | 0/30 [00:00, ?it/s, loss=0.125, v_num=0, val_loss=0.105, val_acc=0.967][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 57: 67%|██████▋ | 20/30 [00:01<00:00, 12.41it/s, loss=0.125, v_num=0, val_loss=0.105, val_acc=0.967]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 57: 67%|██████▋ | 20/30 [00:01<00:00, 12.40it/s, loss=0.128, v_num=0, val_loss=0.105, val_acc=0.967][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0it [00:00, ?it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0%| | 0/3 [00:00, ?it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 100%|██████████| 3/3 [00:00<00:00, 19.55it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 57: 100%|██████████| 30/30 [00:02<00:00, 13.05it/s, loss=0.134, v_num=0, val_loss=0.104, val_acc=0.968]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: \u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 57: 100%|██████████| 30/30 [00:02<00:00, 13.04it/s, loss=0.134, v_num=0, val_loss=0.104, val_acc=0.968][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 57: 0%| | 0/30 [00:00, ?it/s, loss=0.134, v_num=0, val_loss=0.104, val_acc=0.968] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 58: 0%| | 0/30 [00:00, ?it/s, loss=0.134, v_num=0, val_loss=0.104, val_acc=0.968][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 58: 67%|██████▋ | 20/30 [00:01<00:00, 12.58it/s, loss=0.134, v_num=0, val_loss=0.104, val_acc=0.968]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 58: 67%|██████▋ | 20/30 [00:01<00:00, 12.58it/s, loss=0.143, v_num=0, val_loss=0.104, val_acc=0.968][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0it [00:00, ?it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0%| | 0/3 [00:00, ?it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 100%|██████████| 3/3 [00:00<00:00, 19.78it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 58: 100%|██████████| 30/30 [00:02<00:00, 13.16it/s, loss=0.142, v_num=0, val_loss=0.102, val_acc=0.968]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: \u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 58: 100%|██████████| 30/30 [00:02<00:00, 13.16it/s, loss=0.142, v_num=0, val_loss=0.102, val_acc=0.968]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 58: 0%| | 0/30 [00:00, ?it/s, loss=0.142, v_num=0, val_loss=0.102, val_acc=0.968] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 59: 0%| | 0/30 [00:00, ?it/s, loss=0.142, v_num=0, val_loss=0.102, val_acc=0.968][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 59: 67%|██████▋ | 20/30 [00:01<00:00, 12.61it/s, loss=0.142, v_num=0, val_loss=0.102, val_acc=0.968]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 59: 67%|██████▋ | 20/30 [00:01<00:00, 12.61it/s, loss=0.121, v_num=0, val_loss=0.102, val_acc=0.968][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0it [00:00, ?it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0%| | 0/3 [00:00, ?it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 100%|██████████| 3/3 [00:00<00:00, 19.65it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 59: 100%|██████████| 30/30 [00:02<00:00, 13.18it/s, loss=0.121, v_num=0, val_loss=0.100, val_acc=0.968]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: \u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 59: 100%|██████████| 30/30 [00:02<00:00, 13.17it/s, loss=0.121, v_num=0, val_loss=0.100, val_acc=0.968]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 59: 0%| | 0/30 [00:00, ?it/s, loss=0.121, v_num=0, val_loss=0.100, val_acc=0.968] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 60: 0%| | 0/30 [00:00, ?it/s, loss=0.121, v_num=0, val_loss=0.100, val_acc=0.968][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 60: 67%|██████▋ | 20/30 [00:01<00:00, 12.32it/s, loss=0.121, v_num=0, val_loss=0.100, val_acc=0.968]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 60: 67%|██████▋ | 20/30 [00:01<00:00, 12.32it/s, loss=0.133, v_num=0, val_loss=0.100, val_acc=0.968][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0it [00:00, ?it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0%| | 0/3 [00:00, ?it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 100%|██████████| 3/3 [00:00<00:00, 19.83it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 60: 100%|██████████| 30/30 [00:02<00:00, 12.99it/s, loss=0.139, v_num=0, val_loss=0.0989, val_acc=0.969]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: \u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 60: 100%|██████████| 30/30 [00:02<00:00, 12.98it/s, loss=0.139, v_num=0, val_loss=0.0989, val_acc=0.969][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 60: 0%| | 0/30 [00:00, ?it/s, loss=0.139, v_num=0, val_loss=0.0989, val_acc=0.969] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 61: 0%| | 0/30 [00:00, ?it/s, loss=0.139, v_num=0, val_loss=0.0989, val_acc=0.969][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 61: 67%|██████▋ | 20/30 [00:01<00:00, 12.66it/s, loss=0.139, v_num=0, val_loss=0.0989, val_acc=0.969]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 61: 67%|██████▋ | 20/30 [00:01<00:00, 12.65it/s, loss=0.137, v_num=0, val_loss=0.0989, val_acc=0.969][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0it [00:00, ?it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0%| | 0/3 [00:00, ?it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 100%|██████████| 3/3 [00:00<00:00, 19.85it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 61: 100%|██████████| 30/30 [00:02<00:00, 13.09it/s, loss=0.143, v_num=0, val_loss=0.0964, val_acc=0.969]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: \u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 61: 100%|██████████| 30/30 [00:02<00:00, 13.09it/s, loss=0.143, v_num=0, val_loss=0.0964, val_acc=0.969][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 61: 0%| | 0/30 [00:00, ?it/s, loss=0.143, v_num=0, val_loss=0.0964, val_acc=0.969] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 62: 0%| | 0/30 [00:00, ?it/s, loss=0.143, v_num=0, val_loss=0.0964, val_acc=0.969][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 62: 67%|██████▋ | 20/30 [00:01<00:00, 12.63it/s, loss=0.143, v_num=0, val_loss=0.0964, val_acc=0.969]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 62: 67%|██████▋ | 20/30 [00:01<00:00, 12.62it/s, loss=0.136, v_num=0, val_loss=0.0964, val_acc=0.969][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0it [00:00, ?it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0%| | 0/3 [00:00, ?it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 100%|██████████| 3/3 [00:00<00:00, 19.77it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 62: 100%|██████████| 30/30 [00:02<00:00, 13.16it/s, loss=0.132, v_num=0, val_loss=0.0958, val_acc=0.970]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: \u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 62: 100%|██████████| 30/30 [00:02<00:00, 13.15it/s, loss=0.132, v_num=0, val_loss=0.0958, val_acc=0.970][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 62: 0%| | 0/30 [00:00, ?it/s, loss=0.132, v_num=0, val_loss=0.0958, val_acc=0.970] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 63: 0%| | 0/30 [00:00, ?it/s, loss=0.132, v_num=0, val_loss=0.0958, val_acc=0.970][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 63: 67%|██████▋ | 20/30 [00:01<00:00, 12.32it/s, loss=0.132, v_num=0, val_loss=0.0958, val_acc=0.970]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 63: 67%|██████▋ | 20/30 [00:01<00:00, 12.32it/s, loss=0.115, v_num=0, val_loss=0.0958, val_acc=0.970][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0it [00:00, ?it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0%| | 0/3 [00:00, ?it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 100%|██████████| 3/3 [00:00<00:00, 19.87it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 63: 100%|██████████| 30/30 [00:02<00:00, 12.72it/s, loss=0.12, v_num=0, val_loss=0.0946, val_acc=0.971] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: \u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 63: 100%|██████████| 30/30 [00:02<00:00, 12.71it/s, loss=0.12, v_num=0, val_loss=0.0946, val_acc=0.971]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 63: 0%| | 0/30 [00:00, ?it/s, loss=0.12, v_num=0, val_loss=0.0946, val_acc=0.971] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 64: 0%| | 0/30 [00:00, ?it/s, loss=0.12, v_num=0, val_loss=0.0946, val_acc=0.971][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 64: 67%|██████▋ | 20/30 [00:01<00:00, 12.55it/s, loss=0.12, v_num=0, val_loss=0.0946, val_acc=0.971]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 64: 67%|██████▋ | 20/30 [00:01<00:00, 12.55it/s, loss=0.114, v_num=0, val_loss=0.0946, val_acc=0.971][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0it [00:00, ?it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0%| | 0/3 [00:00, ?it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 100%|██████████| 3/3 [00:00<00:00, 19.74it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 64: 100%|██████████| 30/30 [00:02<00:00, 13.12it/s, loss=0.12, v_num=0, val_loss=0.0933, val_acc=0.971] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: \u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 64: 100%|██████████| 30/30 [00:02<00:00, 13.11it/s, loss=0.12, v_num=0, val_loss=0.0933, val_acc=0.971]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 64: 0%| | 0/30 [00:00, ?it/s, loss=0.12, v_num=0, val_loss=0.0933, val_acc=0.971] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 65: 0%| | 0/30 [00:00, ?it/s, loss=0.12, v_num=0, val_loss=0.0933, val_acc=0.971][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 65: 67%|██████▋ | 20/30 [00:01<00:00, 12.59it/s, loss=0.12, v_num=0, val_loss=0.0933, val_acc=0.971]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 65: 67%|██████▋ | 20/30 [00:01<00:00, 12.58it/s, loss=0.12, v_num=0, val_loss=0.0933, val_acc=0.971][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0it [00:00, ?it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0%| | 0/3 [00:00, ?it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 100%|██████████| 3/3 [00:00<00:00, 19.84it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 65: 100%|██████████| 30/30 [00:02<00:00, 13.17it/s, loss=0.119, v_num=0, val_loss=0.0909, val_acc=0.972]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: \u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 65: 100%|██████████| 30/30 [00:02<00:00, 13.16it/s, loss=0.119, v_num=0, val_loss=0.0909, val_acc=0.972]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 65: 0%| | 0/30 [00:00, ?it/s, loss=0.119, v_num=0, val_loss=0.0909, val_acc=0.972] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 66: 0%| | 0/30 [00:00, ?it/s, loss=0.119, v_num=0, val_loss=0.0909, val_acc=0.972][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 66: 67%|██████▋ | 20/30 [00:01<00:00, 12.02it/s, loss=0.119, v_num=0, val_loss=0.0909, val_acc=0.972]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 66: 67%|██████▋ | 20/30 [00:01<00:00, 12.02it/s, loss=0.118, v_num=0, val_loss=0.0909, val_acc=0.972][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0it [00:00, ?it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0%| | 0/3 [00:00, ?it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 100%|██████████| 3/3 [00:00<00:00, 19.59it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 66: 100%|██████████| 30/30 [00:02<00:00, 12.76it/s, loss=0.111, v_num=0, val_loss=0.0897, val_acc=0.972]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: \u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 66: 100%|██████████| 30/30 [00:02<00:00, 12.76it/s, loss=0.111, v_num=0, val_loss=0.0897, val_acc=0.972]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 66: 0%| | 0/30 [00:00, ?it/s, loss=0.111, v_num=0, val_loss=0.0897, val_acc=0.972] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 67: 0%| | 0/30 [00:00, ?it/s, loss=0.111, v_num=0, val_loss=0.0897, val_acc=0.972][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 67: 67%|██████▋ | 20/30 [00:01<00:00, 12.56it/s, loss=0.111, v_num=0, val_loss=0.0897, val_acc=0.972]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 67: 67%|██████▋ | 20/30 [00:01<00:00, 12.56it/s, loss=0.114, v_num=0, val_loss=0.0897, val_acc=0.972][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0it [00:00, ?it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0%| | 0/3 [00:00, ?it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 100%|██████████| 3/3 [00:00<00:00, 19.66it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 67: 100%|██████████| 30/30 [00:02<00:00, 12.95it/s, loss=0.114, v_num=0, val_loss=0.0885, val_acc=0.973]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: \u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 67: 100%|██████████| 30/30 [00:02<00:00, 12.94it/s, loss=0.114, v_num=0, val_loss=0.0885, val_acc=0.973]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 67: 0%| | 0/30 [00:00, ?it/s, loss=0.114, v_num=0, val_loss=0.0885, val_acc=0.973] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 68: 0%| | 0/30 [00:00, ?it/s, loss=0.114, v_num=0, val_loss=0.0885, val_acc=0.973][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 68: 67%|██████▋ | 20/30 [00:01<00:00, 12.63it/s, loss=0.114, v_num=0, val_loss=0.0885, val_acc=0.973]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 68: 67%|██████▋ | 20/30 [00:01<00:00, 12.63it/s, loss=0.122, v_num=0, val_loss=0.0885, val_acc=0.973][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0it [00:00, ?it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0%| | 0/3 [00:00, ?it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 100%|██████████| 3/3 [00:00<00:00, 19.67it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 68: 100%|██████████| 30/30 [00:02<00:00, 12.97it/s, loss=0.122, v_num=0, val_loss=0.0876, val_acc=0.973]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: \u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 68: 100%|██████████| 30/30 [00:02<00:00, 12.97it/s, loss=0.122, v_num=0, val_loss=0.0876, val_acc=0.973]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 68: 0%| | 0/30 [00:00, ?it/s, loss=0.122, v_num=0, val_loss=0.0876, val_acc=0.973] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 69: 0%| | 0/30 [00:00, ?it/s, loss=0.122, v_num=0, val_loss=0.0876, val_acc=0.973][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 69: 67%|██████▋ | 20/30 [00:01<00:00, 12.58it/s, loss=0.122, v_num=0, val_loss=0.0876, val_acc=0.973]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 69: 67%|██████▋ | 20/30 [00:01<00:00, 12.58it/s, loss=0.129, v_num=0, val_loss=0.0876, val_acc=0.973][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0it [00:00, ?it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0%| | 0/3 [00:00, ?it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 100%|██████████| 3/3 [00:00<00:00, 19.71it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 69: 100%|██████████| 30/30 [00:02<00:00, 13.15it/s, loss=0.125, v_num=0, val_loss=0.0872, val_acc=0.972]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: \u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 69: 100%|██████████| 30/30 [00:02<00:00, 13.14it/s, loss=0.125, v_num=0, val_loss=0.0872, val_acc=0.972]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 69: 0%| | 0/30 [00:00, ?it/s, loss=0.125, v_num=0, val_loss=0.0872, val_acc=0.972] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 70: 0%| | 0/30 [00:00, ?it/s, loss=0.125, v_num=0, val_loss=0.0872, val_acc=0.972][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 70: 67%|██████▋ | 20/30 [00:01<00:00, 12.61it/s, loss=0.125, v_num=0, val_loss=0.0872, val_acc=0.972]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 70: 67%|██████▋ | 20/30 [00:01<00:00, 12.61it/s, loss=0.111, v_num=0, val_loss=0.0872, val_acc=0.972][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0it [00:00, ?it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0%| | 0/3 [00:00, ?it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 100%|██████████| 3/3 [00:00<00:00, 19.75it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 70: 100%|██████████| 30/30 [00:02<00:00, 13.21it/s, loss=0.115, v_num=0, val_loss=0.0853, val_acc=0.973]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: \u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 70: 100%|██████████| 30/30 [00:02<00:00, 13.20it/s, loss=0.115, v_num=0, val_loss=0.0853, val_acc=0.973]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 70: 0%| | 0/30 [00:00, ?it/s, loss=0.115, v_num=0, val_loss=0.0853, val_acc=0.973] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 71: 0%| | 0/30 [00:00, ?it/s, loss=0.115, v_num=0, val_loss=0.0853, val_acc=0.973][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 71: 67%|██████▋ | 20/30 [00:01<00:00, 12.09it/s, loss=0.115, v_num=0, val_loss=0.0853, val_acc=0.973]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 71: 67%|██████▋ | 20/30 [00:01<00:00, 12.09it/s, loss=0.113, v_num=0, val_loss=0.0853, val_acc=0.973][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0it [00:00, ?it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0%| | 0/3 [00:00, ?it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 100%|██████████| 3/3 [00:00<00:00, 19.81it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 71: 100%|██████████| 30/30 [00:02<00:00, 12.84it/s, loss=0.115, v_num=0, val_loss=0.0841, val_acc=0.973]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: \u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 71: 100%|██████████| 30/30 [00:02<00:00, 12.84it/s, loss=0.115, v_num=0, val_loss=0.0841, val_acc=0.973]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 71: 0%| | 0/30 [00:00, ?it/s, loss=0.115, v_num=0, val_loss=0.0841, val_acc=0.973] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 72: 0%| | 0/30 [00:00, ?it/s, loss=0.115, v_num=0, val_loss=0.0841, val_acc=0.973][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 72: 67%|██████▋ | 20/30 [00:01<00:00, 12.41it/s, loss=0.115, v_num=0, val_loss=0.0841, val_acc=0.973]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 72: 67%|██████▋ | 20/30 [00:01<00:00, 12.41it/s, loss=0.12, v_num=0, val_loss=0.0841, val_acc=0.973] [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0it [00:00, ?it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0%| | 0/3 [00:00, ?it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 100%|██████████| 3/3 [00:00<00:00, 19.71it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 72: 100%|██████████| 30/30 [00:02<00:00, 13.03it/s, loss=0.118, v_num=0, val_loss=0.0833, val_acc=0.974]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: \u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 72: 100%|██████████| 30/30 [00:02<00:00, 13.02it/s, loss=0.118, v_num=0, val_loss=0.0833, val_acc=0.974]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 72: 0%| | 0/30 [00:00, ?it/s, loss=0.118, v_num=0, val_loss=0.0833, val_acc=0.974] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 73: 0%| | 0/30 [00:00, ?it/s, loss=0.118, v_num=0, val_loss=0.0833, val_acc=0.974][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 73: 67%|██████▋ | 20/30 [00:01<00:00, 12.31it/s, loss=0.118, v_num=0, val_loss=0.0833, val_acc=0.974]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 73: 67%|██████▋ | 20/30 [00:01<00:00, 12.31it/s, loss=0.117, v_num=0, val_loss=0.0833, val_acc=0.974][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0it [00:00, ?it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0%| | 0/3 [00:00, ?it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 100%|██████████| 3/3 [00:00<00:00, 19.69it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 73: 100%|██████████| 30/30 [00:02<00:00, 12.94it/s, loss=0.116, v_num=0, val_loss=0.0817, val_acc=0.975]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: \u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 73: 100%|██████████| 30/30 [00:02<00:00, 12.93it/s, loss=0.116, v_num=0, val_loss=0.0817, val_acc=0.975]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 73: 0%| | 0/30 [00:00, ?it/s, loss=0.116, v_num=0, val_loss=0.0817, val_acc=0.975] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 74: 0%| | 0/30 [00:00, ?it/s, loss=0.116, v_num=0, val_loss=0.0817, val_acc=0.975][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 74: 67%|██████▋ | 20/30 [00:01<00:00, 12.59it/s, loss=0.116, v_num=0, val_loss=0.0817, val_acc=0.975]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 74: 67%|██████▋ | 20/30 [00:01<00:00, 12.59it/s, loss=0.114, v_num=0, val_loss=0.0817, val_acc=0.975][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0it [00:00, ?it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0%| | 0/3 [00:00, ?it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 100%|██████████| 3/3 [00:00<00:00, 19.44it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 74: 100%|██████████| 30/30 [00:02<00:00, 13.08it/s, loss=0.11, v_num=0, val_loss=0.0803, val_acc=0.976] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: \u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 74: 100%|██████████| 30/30 [00:02<00:00, 13.07it/s, loss=0.11, v_num=0, val_loss=0.0803, val_acc=0.976]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 74: 0%| | 0/30 [00:00, ?it/s, loss=0.11, v_num=0, val_loss=0.0803, val_acc=0.976] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 75: 0%| | 0/30 [00:00, ?it/s, loss=0.11, v_num=0, val_loss=0.0803, val_acc=0.976][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 75: 67%|██████▋ | 20/30 [00:01<00:00, 12.34it/s, loss=0.11, v_num=0, val_loss=0.0803, val_acc=0.976]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 75: 67%|██████▋ | 20/30 [00:01<00:00, 12.34it/s, loss=0.102, v_num=0, val_loss=0.0803, val_acc=0.976][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0it [00:00, ?it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0%| | 0/3 [00:00, ?it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 100%|██████████| 3/3 [00:00<00:00, 16.46it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 75: 100%|██████████| 30/30 [00:02<00:00, 12.77it/s, loss=0.0973, v_num=0, val_loss=0.0803, val_acc=0.974]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: \u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 75: 100%|██████████| 30/30 [00:02<00:00, 12.77it/s, loss=0.0973, v_num=0, val_loss=0.0803, val_acc=0.974]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 75: 0%| | 0/30 [00:00, ?it/s, loss=0.0973, v_num=0, val_loss=0.0803, val_acc=0.974] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 76: 0%| | 0/30 [00:00, ?it/s, loss=0.0973, v_num=0, val_loss=0.0803, val_acc=0.974][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 76: 67%|██████▋ | 20/30 [00:01<00:00, 12.73it/s, loss=0.0973, v_num=0, val_loss=0.0803, val_acc=0.974]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 76: 67%|██████▋ | 20/30 [00:01<00:00, 12.72it/s, loss=0.106, v_num=0, val_loss=0.0803, val_acc=0.974] [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0it [00:00, ?it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0%| | 0/3 [00:00, ?it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 100%|██████████| 3/3 [00:00<00:00, 19.82it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 76: 100%|██████████| 30/30 [00:02<00:00, 13.38it/s, loss=0.106, v_num=0, val_loss=0.0792, val_acc=0.975]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: \u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 76: 100%|██████████| 30/30 [00:02<00:00, 13.37it/s, loss=0.106, v_num=0, val_loss=0.0792, val_acc=0.975]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 76: 0%| | 0/30 [00:00, ?it/s, loss=0.106, v_num=0, val_loss=0.0792, val_acc=0.975] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 77: 0%| | 0/30 [00:00, ?it/s, loss=0.106, v_num=0, val_loss=0.0792, val_acc=0.975][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 77: 67%|██████▋ | 20/30 [00:01<00:00, 12.57it/s, loss=0.106, v_num=0, val_loss=0.0792, val_acc=0.975]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 77: 67%|██████▋ | 20/30 [00:01<00:00, 12.57it/s, loss=0.107, v_num=0, val_loss=0.0792, val_acc=0.975][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0it [00:00, ?it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0%| | 0/3 [00:00, ?it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 100%|██████████| 3/3 [00:00<00:00, 18.31it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 77: 100%|██████████| 30/30 [00:02<00:00, 13.03it/s, loss=0.107, v_num=0, val_loss=0.0781, val_acc=0.976]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: \u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 77: 100%|██████████| 30/30 [00:02<00:00, 13.03it/s, loss=0.107, v_num=0, val_loss=0.0781, val_acc=0.976][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 77: 0%| | 0/30 [00:00, ?it/s, loss=0.107, v_num=0, val_loss=0.0781, val_acc=0.976] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 78: 0%| | 0/30 [00:00, ?it/s, loss=0.107, v_num=0, val_loss=0.0781, val_acc=0.976][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 78: 67%|██████▋ | 20/30 [00:01<00:00, 12.42it/s, loss=0.107, v_num=0, val_loss=0.0781, val_acc=0.976]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 78: 67%|██████▋ | 20/30 [00:01<00:00, 12.42it/s, loss=0.1, v_num=0, val_loss=0.0781, val_acc=0.976] [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0it [00:00, ?it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0%| | 0/3 [00:00, ?it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 100%|██████████| 3/3 [00:00<00:00, 19.66it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 78: 100%|██████████| 30/30 [00:02<00:00, 13.08it/s, loss=0.102, v_num=0, val_loss=0.0763, val_acc=0.976]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: \u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 78: 100%|██████████| 30/30 [00:02<00:00, 13.08it/s, loss=0.102, v_num=0, val_loss=0.0763, val_acc=0.976][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 78: 0%| | 0/30 [00:00, ?it/s, loss=0.102, v_num=0, val_loss=0.0763, val_acc=0.976] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 79: 0%| | 0/30 [00:00, ?it/s, loss=0.102, v_num=0, val_loss=0.0763, val_acc=0.976][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 79: 67%|██████▋ | 20/30 [00:01<00:00, 12.51it/s, loss=0.102, v_num=0, val_loss=0.0763, val_acc=0.976]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 79: 67%|██████▋ | 20/30 [00:01<00:00, 12.50it/s, loss=0.118, v_num=0, val_loss=0.0763, val_acc=0.976][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0it [00:00, ?it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0%| | 0/3 [00:00, ?it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 100%|██████████| 3/3 [00:00<00:00, 19.51it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 79: 100%|██████████| 30/30 [00:02<00:00, 13.15it/s, loss=0.106, v_num=0, val_loss=0.0762, val_acc=0.977]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: \u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 79: 100%|██████████| 30/30 [00:02<00:00, 13.14it/s, loss=0.106, v_num=0, val_loss=0.0762, val_acc=0.977][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 79: 0%| | 0/30 [00:00, ?it/s, loss=0.106, v_num=0, val_loss=0.0762, val_acc=0.977] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 80: 0%| | 0/30 [00:00, ?it/s, loss=0.106, v_num=0, val_loss=0.0762, val_acc=0.977][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 80: 67%|██████▋ | 20/30 [00:01<00:00, 12.58it/s, loss=0.106, v_num=0, val_loss=0.0762, val_acc=0.977]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 80: 67%|██████▋ | 20/30 [00:01<00:00, 12.57it/s, loss=0.106, v_num=0, val_loss=0.0762, val_acc=0.977][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0it [00:00, ?it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0%| | 0/3 [00:00, ?it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 100%|██████████| 3/3 [00:00<00:00, 19.76it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 80: 100%|██████████| 30/30 [00:02<00:00, 13.06it/s, loss=0.106, v_num=0, val_loss=0.075, val_acc=0.977] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: \u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 80: 100%|██████████| 30/30 [00:02<00:00, 13.06it/s, loss=0.106, v_num=0, val_loss=0.075, val_acc=0.977]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 80: 0%| | 0/30 [00:00, ?it/s, loss=0.106, v_num=0, val_loss=0.075, val_acc=0.977] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 81: 0%| | 0/30 [00:00, ?it/s, loss=0.106, v_num=0, val_loss=0.075, val_acc=0.977][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 81: 67%|██████▋ | 20/30 [00:01<00:00, 12.39it/s, loss=0.106, v_num=0, val_loss=0.075, val_acc=0.977]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 81: 67%|██████▋ | 20/30 [00:01<00:00, 12.38it/s, loss=0.106, v_num=0, val_loss=0.075, val_acc=0.977][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0it [00:00, ?it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0%| | 0/3 [00:00, ?it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 100%|██████████| 3/3 [00:00<00:00, 19.48it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 81: 100%|██████████| 30/30 [00:02<00:00, 13.05it/s, loss=0.105, v_num=0, val_loss=0.0738, val_acc=0.976]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: \u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 81: 100%|██████████| 30/30 [00:02<00:00, 13.04it/s, loss=0.105, v_num=0, val_loss=0.0738, val_acc=0.976]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 81: 0%| | 0/30 [00:00, ?it/s, loss=0.105, v_num=0, val_loss=0.0738, val_acc=0.976] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 82: 0%| | 0/30 [00:00, ?it/s, loss=0.105, v_num=0, val_loss=0.0738, val_acc=0.976][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 82: 67%|██████▋ | 20/30 [00:01<00:00, 12.35it/s, loss=0.105, v_num=0, val_loss=0.0738, val_acc=0.976]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 82: 67%|██████▋ | 20/30 [00:01<00:00, 12.35it/s, loss=0.105, v_num=0, val_loss=0.0738, val_acc=0.976][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0it [00:00, ?it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0%| | 0/3 [00:00, ?it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 100%|██████████| 3/3 [00:00<00:00, 19.57it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 82: 100%|██████████| 30/30 [00:02<00:00, 12.98it/s, loss=0.102, v_num=0, val_loss=0.0729, val_acc=0.977]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: \u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 82: 100%|██████████| 30/30 [00:02<00:00, 12.97it/s, loss=0.102, v_num=0, val_loss=0.0729, val_acc=0.977]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 82: 0%| | 0/30 [00:00, ?it/s, loss=0.102, v_num=0, val_loss=0.0729, val_acc=0.977] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 83: 0%| | 0/30 [00:00, ?it/s, loss=0.102, v_num=0, val_loss=0.0729, val_acc=0.977][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 83: 67%|██████▋ | 20/30 [00:01<00:00, 12.74it/s, loss=0.102, v_num=0, val_loss=0.0729, val_acc=0.977]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 83: 67%|██████▋ | 20/30 [00:01<00:00, 12.74it/s, loss=0.105, v_num=0, val_loss=0.0729, val_acc=0.977][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0it [00:00, ?it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0%| | 0/3 [00:00, ?it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 100%|██████████| 3/3 [00:00<00:00, 19.72it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 83: 100%|██████████| 30/30 [00:02<00:00, 13.17it/s, loss=0.103, v_num=0, val_loss=0.0722, val_acc=0.979]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: \u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 83: 100%|██████████| 30/30 [00:02<00:00, 13.16it/s, loss=0.103, v_num=0, val_loss=0.0722, val_acc=0.979]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 83: 0%| | 0/30 [00:00, ?it/s, loss=0.103, v_num=0, val_loss=0.0722, val_acc=0.979] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 84: 0%| | 0/30 [00:00, ?it/s, loss=0.103, v_num=0, val_loss=0.0722, val_acc=0.979][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 84: 67%|██████▋ | 20/30 [00:01<00:00, 12.58it/s, loss=0.103, v_num=0, val_loss=0.0722, val_acc=0.979]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 84: 67%|██████▋ | 20/30 [00:01<00:00, 12.57it/s, loss=0.0952, v_num=0, val_loss=0.0722, val_acc=0.979][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0it [00:00, ?it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0%| | 0/3 [00:00, ?it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 100%|██████████| 3/3 [00:00<00:00, 19.87it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 84: 100%|██████████| 30/30 [00:02<00:00, 13.11it/s, loss=0.0987, v_num=0, val_loss=0.0703, val_acc=0.979]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: \u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 84: 100%|██████████| 30/30 [00:02<00:00, 13.11it/s, loss=0.0987, v_num=0, val_loss=0.0703, val_acc=0.979]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 84: 0%| | 0/30 [00:00, ?it/s, loss=0.0987, v_num=0, val_loss=0.0703, val_acc=0.979] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 85: 0%| | 0/30 [00:00, ?it/s, loss=0.0987, v_num=0, val_loss=0.0703, val_acc=0.979][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 85: 67%|██████▋ | 20/30 [00:01<00:00, 12.56it/s, loss=0.0987, v_num=0, val_loss=0.0703, val_acc=0.979]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 85: 67%|██████▋ | 20/30 [00:01<00:00, 12.56it/s, loss=0.0953, v_num=0, val_loss=0.0703, val_acc=0.979][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0it [00:00, ?it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0%| | 0/3 [00:00, ?it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 100%|██████████| 3/3 [00:00<00:00, 18.79it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 85: 100%|██████████| 30/30 [00:02<00:00, 13.11it/s, loss=0.0925, v_num=0, val_loss=0.070, val_acc=0.980] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: \u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 85: 100%|██████████| 30/30 [00:02<00:00, 13.11it/s, loss=0.0925, v_num=0, val_loss=0.070, val_acc=0.980]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 85: 0%| | 0/30 [00:00, ?it/s, loss=0.0925, v_num=0, val_loss=0.070, val_acc=0.980] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 86: 0%| | 0/30 [00:00, ?it/s, loss=0.0925, v_num=0, val_loss=0.070, val_acc=0.980][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 86: 67%|██████▋ | 20/30 [00:01<00:00, 12.09it/s, loss=0.0925, v_num=0, val_loss=0.070, val_acc=0.980]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 86: 67%|██████▋ | 20/30 [00:01<00:00, 12.09it/s, loss=0.0999, v_num=0, val_loss=0.070, val_acc=0.980][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0it [00:00, ?it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0%| | 0/3 [00:00, ?it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 100%|██████████| 3/3 [00:00<00:00, 18.42it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 86: 100%|██████████| 30/30 [00:02<00:00, 12.70it/s, loss=0.107, v_num=0, val_loss=0.0691, val_acc=0.979]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: \u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 86: 100%|██████████| 30/30 [00:02<00:00, 12.69it/s, loss=0.107, v_num=0, val_loss=0.0691, val_acc=0.979]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 86: 0%| | 0/30 [00:00, ?it/s, loss=0.107, v_num=0, val_loss=0.0691, val_acc=0.979] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 87: 0%| | 0/30 [00:00, ?it/s, loss=0.107, v_num=0, val_loss=0.0691, val_acc=0.979][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 87: 67%|██████▋ | 20/30 [00:01<00:00, 12.43it/s, loss=0.107, v_num=0, val_loss=0.0691, val_acc=0.979]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 87: 67%|██████▋ | 20/30 [00:01<00:00, 12.43it/s, loss=0.0981, v_num=0, val_loss=0.0691, val_acc=0.979][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0it [00:00, ?it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0%| | 0/3 [00:00, ?it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 100%|██████████| 3/3 [00:00<00:00, 19.62it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 87: 100%|██████████| 30/30 [00:02<00:00, 13.00it/s, loss=0.101, v_num=0, val_loss=0.0682, val_acc=0.980] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: \u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 87: 100%|██████████| 30/30 [00:02<00:00, 13.00it/s, loss=0.101, v_num=0, val_loss=0.0682, val_acc=0.980]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 87: 0%| | 0/30 [00:00, ?it/s, loss=0.101, v_num=0, val_loss=0.0682, val_acc=0.980] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 88: 0%| | 0/30 [00:00, ?it/s, loss=0.101, v_num=0, val_loss=0.0682, val_acc=0.980][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 88: 67%|██████▋ | 20/30 [00:01<00:00, 12.35it/s, loss=0.101, v_num=0, val_loss=0.0682, val_acc=0.980]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 88: 67%|██████▋ | 20/30 [00:01<00:00, 12.35it/s, loss=0.0962, v_num=0, val_loss=0.0682, val_acc=0.980][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0it [00:00, ?it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0%| | 0/3 [00:00, ?it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 100%|██████████| 3/3 [00:00<00:00, 17.11it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 88: 100%|██████████| 30/30 [00:02<00:00, 12.90it/s, loss=0.0998, v_num=0, val_loss=0.0678, val_acc=0.980]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: \u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 88: 100%|██████████| 30/30 [00:02<00:00, 12.90it/s, loss=0.0998, v_num=0, val_loss=0.0678, val_acc=0.980]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 88: 0%| | 0/30 [00:00, ?it/s, loss=0.0998, v_num=0, val_loss=0.0678, val_acc=0.980] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 89: 0%| | 0/30 [00:00, ?it/s, loss=0.0998, v_num=0, val_loss=0.0678, val_acc=0.980][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 89: 67%|██████▋ | 20/30 [00:01<00:00, 12.26it/s, loss=0.0998, v_num=0, val_loss=0.0678, val_acc=0.980]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 89: 67%|██████▋ | 20/30 [00:01<00:00, 12.25it/s, loss=0.0937, v_num=0, val_loss=0.0678, val_acc=0.980][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0it [00:00, ?it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0%| | 0/3 [00:00, ?it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 100%|██████████| 3/3 [00:00<00:00, 19.53it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 89: 100%|██████████| 30/30 [00:02<00:00, 12.90it/s, loss=0.0936, v_num=0, val_loss=0.0669, val_acc=0.980]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: \u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 89: 100%|██████████| 30/30 [00:02<00:00, 12.89it/s, loss=0.0936, v_num=0, val_loss=0.0669, val_acc=0.980]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 89: 0%| | 0/30 [00:00, ?it/s, loss=0.0936, v_num=0, val_loss=0.0669, val_acc=0.980] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 90: 0%| | 0/30 [00:00, ?it/s, loss=0.0936, v_num=0, val_loss=0.0669, val_acc=0.980][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 90: 67%|██████▋ | 20/30 [00:01<00:00, 12.52it/s, loss=0.0936, v_num=0, val_loss=0.0669, val_acc=0.980]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 90: 67%|██████▋ | 20/30 [00:01<00:00, 12.52it/s, loss=0.0899, v_num=0, val_loss=0.0669, val_acc=0.980][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0it [00:00, ?it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0%| | 0/3 [00:00, ?it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 100%|██████████| 3/3 [00:00<00:00, 18.40it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 90: 100%|██████████| 30/30 [00:02<00:00, 13.04it/s, loss=0.0926, v_num=0, val_loss=0.0663, val_acc=0.980]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: \u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 90: 100%|██████████| 30/30 [00:02<00:00, 13.03it/s, loss=0.0926, v_num=0, val_loss=0.0663, val_acc=0.980]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 90: 0%| | 0/30 [00:00, ?it/s, loss=0.0926, v_num=0, val_loss=0.0663, val_acc=0.980] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 91: 0%| | 0/30 [00:00, ?it/s, loss=0.0926, v_num=0, val_loss=0.0663, val_acc=0.980][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 91: 67%|██████▋ | 20/30 [00:01<00:00, 12.59it/s, loss=0.0926, v_num=0, val_loss=0.0663, val_acc=0.980]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 91: 67%|██████▋ | 20/30 [00:01<00:00, 12.59it/s, loss=0.0976, v_num=0, val_loss=0.0663, val_acc=0.980][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0it [00:00, ?it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0%| | 0/3 [00:00, ?it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 100%|██████████| 3/3 [00:00<00:00, 19.62it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 91: 100%|██████████| 30/30 [00:02<00:00, 13.14it/s, loss=0.092, v_num=0, val_loss=0.0647, val_acc=0.982] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: \u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 91: 100%|██████████| 30/30 [00:02<00:00, 13.14it/s, loss=0.092, v_num=0, val_loss=0.0647, val_acc=0.982]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 91: 0%| | 0/30 [00:00, ?it/s, loss=0.092, v_num=0, val_loss=0.0647, val_acc=0.982] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 92: 0%| | 0/30 [00:00, ?it/s, loss=0.092, v_num=0, val_loss=0.0647, val_acc=0.982][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 92: 67%|██████▋ | 20/30 [00:01<00:00, 12.32it/s, loss=0.092, v_num=0, val_loss=0.0647, val_acc=0.982]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 92: 67%|██████▋ | 20/30 [00:01<00:00, 12.31it/s, loss=0.0883, v_num=0, val_loss=0.0647, val_acc=0.982][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0it [00:00, ?it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0%| | 0/3 [00:00, ?it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 100%|██████████| 3/3 [00:00<00:00, 19.72it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 92: 100%|██████████| 30/30 [00:02<00:00, 12.97it/s, loss=0.0853, v_num=0, val_loss=0.0646, val_acc=0.979]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: \u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 92: 100%|██████████| 30/30 [00:02<00:00, 12.96it/s, loss=0.0853, v_num=0, val_loss=0.0646, val_acc=0.979]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 92: 0%| | 0/30 [00:00, ?it/s, loss=0.0853, v_num=0, val_loss=0.0646, val_acc=0.979] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 93: 0%| | 0/30 [00:00, ?it/s, loss=0.0853, v_num=0, val_loss=0.0646, val_acc=0.979][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 93: 67%|██████▋ | 20/30 [00:01<00:00, 12.43it/s, loss=0.0853, v_num=0, val_loss=0.0646, val_acc=0.979]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 93: 67%|██████▋ | 20/30 [00:01<00:00, 12.42it/s, loss=0.0891, v_num=0, val_loss=0.0646, val_acc=0.979][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0it [00:00, ?it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0%| | 0/3 [00:00, ?it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 100%|██████████| 3/3 [00:00<00:00, 19.78it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 93: 100%|██████████| 30/30 [00:02<00:00, 12.98it/s, loss=0.092, v_num=0, val_loss=0.0633, val_acc=0.981] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: \u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 93: 100%|██████████| 30/30 [00:02<00:00, 12.98it/s, loss=0.092, v_num=0, val_loss=0.0633, val_acc=0.981]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 93: 0%| | 0/30 [00:00, ?it/s, loss=0.092, v_num=0, val_loss=0.0633, val_acc=0.981] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 94: 0%| | 0/30 [00:00, ?it/s, loss=0.092, v_num=0, val_loss=0.0633, val_acc=0.981][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 94: 67%|██████▋ | 20/30 [00:01<00:00, 11.85it/s, loss=0.092, v_num=0, val_loss=0.0633, val_acc=0.981]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 94: 67%|██████▋ | 20/30 [00:01<00:00, 11.85it/s, loss=0.0881, v_num=0, val_loss=0.0633, val_acc=0.981][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0it [00:00, ?it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0%| | 0/3 [00:00, ?it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 100%|██████████| 3/3 [00:00<00:00, 18.49it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 94: 100%|██████████| 30/30 [00:02<00:00, 12.52it/s, loss=0.092, v_num=0, val_loss=0.0627, val_acc=0.982] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.128.68.15)\u001b[0m [0]:prior to running the trainer\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.128.68.15)\u001b[0m [0]:MASTER_ADDR: is 10.131.66.16\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.128.68.15)\u001b[0m [0]:MASTER_PORT: is 49782\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.128.68.15)\u001b[0m [0]:GROUP: 2\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.128.68.15)\u001b[0m [0]:LOCAL: 4\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.128.68.15)\u001b[0m [0]:Downloading MNIST dataset...\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.128.68.15)\u001b[0m [0]:Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.128.68.15)\u001b[0m [1]:prior to running the trainer\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.128.68.15)\u001b[0m [1]:MASTER_ADDR: is 10.131.66.16\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.128.68.15)\u001b[0m [1]:MASTER_PORT: is 49782\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.128.68.15)\u001b[0m [1]:GROUP: 2\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.128.68.15)\u001b[0m [1]:LOCAL: 4\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.128.68.15)\u001b[0m [2]:prior to running the trainer\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.128.68.15)\u001b[0m [2]:MASTER_ADDR: is 10.131.66.16\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.128.68.15)\u001b[0m [2]:MASTER_PORT: is 49782\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.128.68.15)\u001b[0m [2]:GROUP: 2\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.128.68.15)\u001b[0m [2]:LOCAL: 4\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.128.68.15)\u001b[0m [3]:prior to running the trainer\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.128.68.15)\u001b[0m [3]:MASTER_ADDR: is 10.131.66.16\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.128.68.15)\u001b[0m [3]:MASTER_PORT: is 49782\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.128.68.15)\u001b[0m [3]:GROUP: 2\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.128.68.15)\u001b[0m [3]:LOCAL: 4\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.128.68.15)\u001b[0m [0]:Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./MNIST/raw/train-images-idx3-ubyte.gz\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.128.68.15)\u001b[0m [0]:Extracting ./MNIST/raw/train-images-idx3-ubyte.gz to ./MNIST/raw\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.128.68.15)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.128.68.15)\u001b[0m [0]:Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.128.68.15)\u001b[0m [0]:Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./MNIST/raw/train-labels-idx1-ubyte.gz\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.128.68.15)\u001b[0m [0]:Extracting ./MNIST/raw/train-labels-idx1-ubyte.gz to ./MNIST/raw\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.128.68.15)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.128.68.15)\u001b[0m [0]:Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.128.68.15)\u001b[0m [0]:Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./MNIST/raw/t10k-images-idx3-ubyte.gz\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.128.68.15)\u001b[0m [0]:Extracting ./MNIST/raw/t10k-images-idx3-ubyte.gz to ./MNIST/raw\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.128.68.15)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.128.68.15)\u001b[0m [0]:Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.128.68.15)\u001b[0m [0]:Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./MNIST/raw/t10k-labels-idx1-ubyte.gz\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.128.68.15)\u001b[0m [0]:Extracting ./MNIST/raw/t10k-labels-idx1-ubyte.gz to ./MNIST/raw\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.128.68.15)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.128.68.15)\u001b[0m [1]:initializing distributed: GLOBAL_RANK: 5, MEMBER: 6/8\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.128.68.15)\u001b[0m [2]:initializing distributed: GLOBAL_RANK: 6, MEMBER: 7/8\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.128.68.15)\u001b[0m [3]:initializing distributed: GLOBAL_RANK: 7, MEMBER: 8/8\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.128.68.15)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.128.68.15)\u001b[0m [0]: 0%| | 0/9912422 [00:00, ?it/s][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.128.68.15)\u001b[0m [0]: 32%|███▏ | 3206144/9912422 [00:00<00:00, 32060476.88it/s][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.128.68.15)\u001b[0m [0]:9913344it [00:00, 54643536.53it/s] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.128.68.15)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.128.68.15)\u001b[0m [0]: 0%| | 0/28881 [00:00, ?it/s]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.128.68.15)\u001b[0m [0]:29696it [00:00, 3404883.73it/s] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.128.68.15)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.128.68.15)\u001b[0m [0]: 0%| | 0/1648877 [00:00, ?it/s][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.128.68.15)\u001b[0m [0]: 81%|████████▏ | 1342464/1648877 [00:00<00:00, 12934776.25it/s]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.128.68.15)\u001b[0m [0]:1649664it [00:00, 14026877.81it/s] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.128.68.15)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.128.68.15)\u001b[0m [0]: 0%| | 0/4542 [00:00, ?it/s]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.128.68.15)\u001b[0m [0]:5120it [00:00, 33979171.65it/s] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.128.68.15)\u001b[0m [0]:initializing distributed: GLOBAL_RANK: 4, MEMBER: 5/8\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.128.68.15)\u001b[0m [1]:LOCAL_RANK: 1 - CUDA_VISIBLE_DEVICES: [0,1,2,3]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.128.68.15)\u001b[0m [1]:Missing logger folder: /tmp/ray/session_2022-11-04_08-02-48_207951_7/runtime_resources/working_dir_files/_ray_pkg_ce2c3e935774455d/lightning_logs\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.128.68.15)\u001b[0m [2]:LOCAL_RANK: 2 - CUDA_VISIBLE_DEVICES: [0,1,2,3]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.128.68.15)\u001b[0m [2]:Missing logger folder: /tmp/ray/session_2022-11-04_08-02-48_207951_7/runtime_resources/working_dir_files/_ray_pkg_ce2c3e935774455d/lightning_logs\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.128.68.15)\u001b[0m [0]:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.128.68.15)\u001b[0m [0]:Missing logger folder: /tmp/ray/session_2022-11-04_08-02-48_207951_7/runtime_resources/working_dir_files/_ray_pkg_ce2c3e935774455d/lightning_logs\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.128.68.15)\u001b[0m [3]:LOCAL_RANK: 3 - CUDA_VISIBLE_DEVICES: [0,1,2,3]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.128.68.15)\u001b[0m [3]:Missing logger folder: /tmp/ray/session_2022-11-04_08-02-48_207951_7/runtime_resources/working_dir_files/_ray_pkg_ce2c3e935774455d/lightning_logs\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.128.68.15)\u001b[0m [1]:[W reducer.cpp:1289] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.128.68.15)\u001b[0m [2]:[W reducer.cpp:1289] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.128.68.15)\u001b[0m [0]:[W reducer.cpp:1289] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.128.68.15)\u001b[0m [3]:[W reducer.cpp:1289] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.128.68.15)\u001b[0m INFO:torch.distributed.elastic.agent.server.api:[] worker group successfully finished. Waiting 300 seconds for other agents to finish.\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.128.68.15)\u001b[0m INFO:torch.distributed.elastic.agent.server.api:Local worker group finished (SUCCEEDED). Waiting 300 seconds for other agents to finish\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.128.68.15)\u001b[0m INFO:torch.distributed.elastic.agent.server.api:Done waiting for other agents. Elapsed: 0.0041942596435546875 seconds\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: \u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 94: 100%|██████████| 30/30 [00:02<00:00, 12.51it/s, loss=0.092, v_num=0, val_loss=0.0627, val_acc=0.982]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 94: 0%| | 0/30 [00:00, ?it/s, loss=0.092, v_num=0, val_loss=0.0627, val_acc=0.982] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 95: 0%| | 0/30 [00:00, ?it/s, loss=0.092, v_num=0, val_loss=0.0627, val_acc=0.982][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 95: 67%|██████▋ | 20/30 [00:01<00:00, 12.38it/s, loss=0.092, v_num=0, val_loss=0.0627, val_acc=0.982]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 95: 67%|██████▋ | 20/30 [00:01<00:00, 12.37it/s, loss=0.0935, v_num=0, val_loss=0.0627, val_acc=0.982][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0it [00:00, ?it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0%| | 0/3 [00:00, ?it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 100%|██████████| 3/3 [00:00<00:00, 19.53it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 95: 100%|██████████| 30/30 [00:02<00:00, 13.08it/s, loss=0.0922, v_num=0, val_loss=0.0621, val_acc=0.982]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: \u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 95: 100%|██████████| 30/30 [00:02<00:00, 13.08it/s, loss=0.0922, v_num=0, val_loss=0.0621, val_acc=0.982]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 95: 0%| | 0/30 [00:00, ?it/s, loss=0.0922, v_num=0, val_loss=0.0621, val_acc=0.982] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 96: 0%| | 0/30 [00:00, ?it/s, loss=0.0922, v_num=0, val_loss=0.0621, val_acc=0.982][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 96: 67%|██████▋ | 20/30 [00:01<00:00, 12.59it/s, loss=0.0922, v_num=0, val_loss=0.0621, val_acc=0.982]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 96: 67%|██████▋ | 20/30 [00:01<00:00, 12.59it/s, loss=0.0862, v_num=0, val_loss=0.0621, val_acc=0.982][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0it [00:00, ?it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0%| | 0/3 [00:00, ?it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 100%|██████████| 3/3 [00:00<00:00, 18.73it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 96: 100%|██████████| 30/30 [00:02<00:00, 13.16it/s, loss=0.0828, v_num=0, val_loss=0.0609, val_acc=0.983]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: \u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 96: 100%|██████████| 30/30 [00:02<00:00, 13.15it/s, loss=0.0828, v_num=0, val_loss=0.0609, val_acc=0.983]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 96: 0%| | 0/30 [00:00, ?it/s, loss=0.0828, v_num=0, val_loss=0.0609, val_acc=0.983] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 97: 0%| | 0/30 [00:00, ?it/s, loss=0.0828, v_num=0, val_loss=0.0609, val_acc=0.983][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 97: 67%|██████▋ | 20/30 [00:01<00:00, 12.52it/s, loss=0.0828, v_num=0, val_loss=0.0609, val_acc=0.983]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 97: 67%|██████▋ | 20/30 [00:01<00:00, 12.52it/s, loss=0.0949, v_num=0, val_loss=0.0609, val_acc=0.983][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0it [00:00, ?it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0%| | 0/3 [00:00, ?it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 100%|██████████| 3/3 [00:00<00:00, 19.77it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 97: 100%|██████████| 30/30 [00:02<00:00, 13.18it/s, loss=0.0924, v_num=0, val_loss=0.0595, val_acc=0.982]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: \u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 97: 100%|██████████| 30/30 [00:02<00:00, 13.18it/s, loss=0.0924, v_num=0, val_loss=0.0595, val_acc=0.982]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 97: 0%| | 0/30 [00:00, ?it/s, loss=0.0924, v_num=0, val_loss=0.0595, val_acc=0.982] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 98: 0%| | 0/30 [00:00, ?it/s, loss=0.0924, v_num=0, val_loss=0.0595, val_acc=0.982][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 98: 67%|██████▋ | 20/30 [00:01<00:00, 12.33it/s, loss=0.0924, v_num=0, val_loss=0.0595, val_acc=0.982]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 98: 67%|██████▋ | 20/30 [00:01<00:00, 12.33it/s, loss=0.0817, v_num=0, val_loss=0.0595, val_acc=0.982][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0it [00:00, ?it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0%| | 0/3 [00:00, ?it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 100%|██████████| 3/3 [00:00<00:00, 18.97it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 98: 100%|██████████| 30/30 [00:02<00:00, 12.97it/s, loss=0.0881, v_num=0, val_loss=0.0605, val_acc=0.981]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: \u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 98: 100%|██████████| 30/30 [00:02<00:00, 12.96it/s, loss=0.0881, v_num=0, val_loss=0.0605, val_acc=0.981][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 98: 0%| | 0/30 [00:00, ?it/s, loss=0.0881, v_num=0, val_loss=0.0605, val_acc=0.981] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 99: 0%| | 0/30 [00:00, ?it/s, loss=0.0881, v_num=0, val_loss=0.0605, val_acc=0.981][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 99: 67%|██████▋ | 20/30 [00:01<00:00, 12.74it/s, loss=0.0881, v_num=0, val_loss=0.0605, val_acc=0.981]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 99: 67%|██████▋ | 20/30 [00:01<00:00, 12.74it/s, loss=0.0761, v_num=0, val_loss=0.0605, val_acc=0.981][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0it [00:00, ?it/s]\u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 0%| | 0/3 [00:00, ?it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Validating: 100%|██████████| 3/3 [00:00<00:00, 19.83it/s]\u001b[A[0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 99: 100%|██████████| 30/30 [00:02<00:00, 13.37it/s, loss=0.0796, v_num=0, val_loss=0.0593, val_acc=0.983]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: \u001b[A\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 99: 100%|██████████| 30/30 [00:02<00:00, 13.36it/s, loss=0.0796, v_num=0, val_loss=0.0593, val_acc=0.983]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Epoch 99: 100%|██████████| 30/30 [00:02<00:00, 13.27it/s, loss=0.0796, v_num=0, val_loss=0.0593, val_acc=0.983]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:GPU available: True, used: True\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:TPU available: False, using: 0 TPU cores\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:IPU available: False, using: 0 IPUs\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [2]:initializing distributed: GLOBAL_RANK: 2, MEMBER: 3/8\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [1]:initializing distributed: GLOBAL_RANK: 1, MEMBER: 2/8\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [3]:initializing distributed: GLOBAL_RANK: 3, MEMBER: 4/8\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: 0%| | 0/9912422 [00:00, ?it/s][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: 61%|██████ | 6011904/9912422 [00:00<00:00, 59181211.37it/s][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:9913344it [00:00, 65856745.70it/s] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: 0%| | 0/28881 [00:00, ?it/s]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:29696it [00:00, 2800163.03it/s] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: 0%| | 0/1648877 [00:00, ?it/s][0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:1649664it [00:00, 27676879.96it/s] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: 0%| | 0/4542 [00:00, ?it/s]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:5120it [00:00, 35971250.39it/s] \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:initializing distributed: GLOBAL_RANK: 0, MEMBER: 1/8\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:----------------------------------------------------------------------------------------------------\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:distributed_backend=nccl\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:All distributed processes registered. Starting with 8 processes\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:----------------------------------------------------------------------------------------------------\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [2]:LOCAL_RANK: 2 - CUDA_VISIBLE_DEVICES: [0,1,2,3]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [2]:Missing logger folder: /tmp/ray/session_2022-11-04_08-02-48_207951_7/runtime_resources/working_dir_files/_ray_pkg_ce2c3e935774455d/lightning_logs\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [1]:LOCAL_RANK: 1 - CUDA_VISIBLE_DEVICES: [0,1,2,3]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [1]:Missing logger folder: /tmp/ray/session_2022-11-04_08-02-48_207951_7/runtime_resources/working_dir_files/_ray_pkg_ce2c3e935774455d/lightning_logs\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [3]:LOCAL_RANK: 3 - CUDA_VISIBLE_DEVICES: [0,1,2,3]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [3]:Missing logger folder: /tmp/ray/session_2022-11-04_08-02-48_207951_7/runtime_resources/working_dir_files/_ray_pkg_ce2c3e935774455d/lightning_logs\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:Missing logger folder: /tmp/ray/session_2022-11-04_08-02-48_207951_7/runtime_resources/working_dir_files/_ray_pkg_ce2c3e935774455d/lightning_logs\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: | Name | Type | Params\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:---------------------------------------------\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:0 | model | Sequential | 55.1 K\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:1 | val_accuracy | Accuracy | 0 \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:2 | test_accuracy | Accuracy | 0 \n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:---------------------------------------------\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:55.1 K Trainable params\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:0 Non-trainable params\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:55.1 K Total params\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:0.220 Total estimated model params size (MB)\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:/tmp/ray/session_2022-11-04_08-02-48_207951_7/runtime_resources/pip/3510e0c008a5c3627e4d2408c8b93ed71be6c3e1/virtualenv/lib/python3.8/site-packages/pytorch_lightning/trainer/data_loading.py:132: UserWarning: The dataloader, val_dataloader 0, does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` (try 32 which is the number of cpus on this machine) in the `DataLoader` init to improve performance.\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: rank_zero_warn(\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:/tmp/ray/session_2022-11-04_08-02-48_207951_7/runtime_resources/pip/3510e0c008a5c3627e4d2408c8b93ed71be6c3e1/virtualenv/lib/python3.8/site-packages/pytorch_lightning/trainer/data_loading.py:132: UserWarning: The dataloader, train_dataloader, does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` (try 32 which is the number of cpus on this machine) in the `DataLoader` init to improve performance.\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: rank_zero_warn(\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:/tmp/ray/session_2022-11-04_08-02-48_207951_7/runtime_resources/pip/3510e0c008a5c3627e4d2408c8b93ed71be6c3e1/virtualenv/lib/python3.8/site-packages/pytorch_lightning/trainer/data_loading.py:432: UserWarning: The number of training samples (27) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]: rank_zero_warn(\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [2]:[W reducer.cpp:1289] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [1]:[W reducer.cpp:1289] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [3]:[W reducer.cpp:1289] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m [0]:[W reducer.cpp:1289] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m INFO:torch.distributed.elastic.agent.server.api:[] worker group successfully finished. Waiting 300 seconds for other agents to finish.\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m INFO:torch.distributed.elastic.agent.server.api:Local worker group finished (SUCCEEDED). Waiting 300 seconds for other agents to finish\n",
+ "\u001b[2m\u001b[36m(CommandActor pid=123, ip=10.131.66.16)\u001b[0m INFO:torch.distributed.elastic.agent.server.api:Done waiting for other agents. Elapsed: 0.0004851818084716797 seconds\n",
+ "running ray.wait on [ObjectRef(32b0eec39cfa87ac523554acce28b667f9bc98bb0200000001000000)]\n",
+ "\u001b[0m"
+ ]
+ }
+ ],
+ "source": [
+ "cluster.job_logs(\"mnist-jlm13hx5g53mk\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "5af8cd32",
+ "metadata": {},
+ "source": [
+ "Finally, we bring our resource cluster down and release/terminate the associated resources, bringing everything back to the way it was before our cluster was brought up."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 32,
+ "id": "5f36db0f-31f6-4373-9503-dc3c1c4c3f57",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "cluster.down()"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3.9.7 64-bit",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.9.7"
+ },
+ "vscode": {
+ "interpreter": {
+ "hash": "f9f85f796d01129d0dd105a088854619f454435301f6ffec2fea96ecbd9be4ac"
+ }
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/demo-notebooks/batch-job/mnist.py b/demo-notebooks/batch-job/mnist.py
new file mode 100644
index 000000000..98984d5f1
--- /dev/null
+++ b/demo-notebooks/batch-job/mnist.py
@@ -0,0 +1,163 @@
+# Copyright 2022 IBM, Red Hat
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# In[]
+import os
+from tkinter import W
+
+import torch
+from pytorch_lightning import LightningModule, Trainer
+from pytorch_lightning.callbacks.progress import TQDMProgressBar
+from pytorch_lightning.loggers import CSVLogger
+from torch import nn
+from torch.nn import functional as F
+from torch.utils.data import DataLoader, random_split
+from torchmetrics import Accuracy
+from torchvision import transforms
+from torchvision.datasets import MNIST
+
+PATH_DATASETS = os.environ.get("PATH_DATASETS", ".")
+BATCH_SIZE = 256 if torch.cuda.is_available() else 64
+# %%
+
+print("prior to running the trainer")
+print("MASTER_ADDR: is ", os.getenv("MASTER_ADDR"))
+print("MASTER_PORT: is ", os.getenv("MASTER_PORT"))
+
+
+class LitMNIST(LightningModule):
+ def __init__(self, data_dir=PATH_DATASETS, hidden_size=64, learning_rate=2e-4):
+
+ super().__init__()
+
+ # Set our init args as class attributes
+ self.data_dir = data_dir
+ self.hidden_size = hidden_size
+ self.learning_rate = learning_rate
+
+ # Hardcode some dataset specific attributes
+ self.num_classes = 10
+ self.dims = (1, 28, 28)
+ channels, width, height = self.dims
+ self.transform = transforms.Compose(
+ [
+ transforms.ToTensor(),
+ transforms.Normalize((0.1307,), (0.3081,)),
+ ]
+ )
+
+ # Define PyTorch model
+ self.model = nn.Sequential(
+ nn.Flatten(),
+ nn.Linear(channels * width * height, hidden_size),
+ nn.ReLU(),
+ nn.Dropout(0.1),
+ nn.Linear(hidden_size, hidden_size),
+ nn.ReLU(),
+ nn.Dropout(0.1),
+ nn.Linear(hidden_size, self.num_classes),
+ )
+
+ self.val_accuracy = Accuracy()
+ self.test_accuracy = Accuracy()
+
+ def forward(self, x):
+ x = self.model(x)
+ return F.log_softmax(x, dim=1)
+
+ def training_step(self, batch, batch_idx):
+ x, y = batch
+ logits = self(x)
+ loss = F.nll_loss(logits, y)
+ return loss
+
+ def validation_step(self, batch, batch_idx):
+ x, y = batch
+ logits = self(x)
+ loss = F.nll_loss(logits, y)
+ preds = torch.argmax(logits, dim=1)
+ self.val_accuracy.update(preds, y)
+
+ # Calling self.log will surface up scalars for you in TensorBoard
+ self.log("val_loss", loss, prog_bar=True)
+ self.log("val_acc", self.val_accuracy, prog_bar=True)
+
+ def test_step(self, batch, batch_idx):
+ x, y = batch
+ logits = self(x)
+ loss = F.nll_loss(logits, y)
+ preds = torch.argmax(logits, dim=1)
+ self.test_accuracy.update(preds, y)
+
+ # Calling self.log will surface up scalars for you in TensorBoard
+ self.log("test_loss", loss, prog_bar=True)
+ self.log("test_acc", self.test_accuracy, prog_bar=True)
+
+ def configure_optimizers(self):
+ optimizer = torch.optim.Adam(self.parameters(), lr=self.learning_rate)
+ return optimizer
+
+ ####################
+ # DATA RELATED HOOKS
+ ####################
+
+ def prepare_data(self):
+ # download
+ print("Downloading MNIST dataset...")
+ MNIST(self.data_dir, train=True, download=True)
+ MNIST(self.data_dir, train=False, download=True)
+
+ def setup(self, stage=None):
+
+ # Assign train/val datasets for use in dataloaders
+ if stage == "fit" or stage is None:
+ mnist_full = MNIST(self.data_dir, train=True, transform=self.transform)
+ self.mnist_train, self.mnist_val = random_split(mnist_full, [55000, 5000])
+
+ # Assign test dataset for use in dataloader(s)
+ if stage == "test" or stage is None:
+ self.mnist_test = MNIST(
+ self.data_dir, train=False, transform=self.transform
+ )
+
+ def train_dataloader(self):
+ return DataLoader(self.mnist_train, batch_size=BATCH_SIZE)
+
+ def val_dataloader(self):
+ return DataLoader(self.mnist_val, batch_size=BATCH_SIZE)
+
+ def test_dataloader(self):
+ return DataLoader(self.mnist_test, batch_size=BATCH_SIZE)
+
+
+# Init DataLoader from MNIST Dataset
+
+model = LitMNIST()
+
+print("GROUP: ", int(os.environ.get("GROUP_WORLD_SIZE", 1)))
+print("LOCAL: ", int(os.environ.get("LOCAL_WORLD_SIZE", 1)))
+
+# Initialize a trainer
+trainer = Trainer(
+ accelerator="auto",
+ # devices=1 if torch.cuda.is_available() else None, # limiting got iPython runs
+ max_epochs=100,
+ callbacks=[TQDMProgressBar(refresh_rate=20)],
+ num_nodes=int(os.environ.get("GROUP_WORLD_SIZE", 1)),
+ devices=int(os.environ.get("LOCAL_WORLD_SIZE", 1)),
+ strategy="ddp",
+)
+
+# Train the model ⚡
+trainer.fit(model)
diff --git a/demo-notebooks/batch-job/requirements.txt b/demo-notebooks/batch-job/requirements.txt
new file mode 100644
index 000000000..073e4247a
--- /dev/null
+++ b/demo-notebooks/batch-job/requirements.txt
@@ -0,0 +1,4 @@
+pytorch_lightning==1.5.10
+ray_lightning
+torchmetrics==0.9.1
+torchvision==0.12.0
\ No newline at end of file
diff --git a/demo-notebooks/interactive/hf_interactive.ipynb b/demo-notebooks/interactive/hf_interactive.ipynb
new file mode 100644
index 000000000..b107d1ac3
--- /dev/null
+++ b/demo-notebooks/interactive/hf_interactive.ipynb
@@ -0,0 +1,1429 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "d3260669-c7ec-4d06-a655-590c5e7ab152",
+ "metadata": {},
+ "source": [
+ "# Transfer learning with Huggingface using CodeFlare"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "d4acfb10-1aa1-445d-947e-396ea5ebed1a",
+ "metadata": {},
+ "source": [
+ "In this notebook you will learn how to leverage the **[huggingface](https://huggingface.co/)** support in ray ecosystem to carry out a text classification task using transfer learning. We will be referencing the example **[here](https://huggingface.co/docs/transformers/tasks/sequence_classification)**"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "70b77929-e96c-434e-ada3-8b14795bfbb1",
+ "metadata": {},
+ "source": [
+ "The example carries out a text classification task on **[imdb dataset](https://huggingface.co/datasets/imdb)** and tries to classify the movie reviews as positive or negative. Huggingface library provides an easy way to build a model and the dataset to carry out this classification task. In this case we will be using **distilbert-base-uncased** model which is a **BERT** based model.\n",
+ "\n",
+ "Huggingface has a **[built in support for ray ecosystem](https://docs.ray.io/en/releases-1.13.0/_modules/ray/ml/train/integrations/huggingface/huggingface_trainer.html)** which allows the huggingface trainer to scale on CodeFlare and can scale the training as we add additional gpus and can run distributed training across multiple GPUs that will help scale out the training.\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "02593d04-40b9-4a07-a32e-40b649444ab5",
+ "metadata": {},
+ "source": [
+ "### Getting all the requirements in place"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "id": "c737a768-6e31-4767-a301-60ae932b4ed9",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Import pieces from codeflare-sdk\n",
+ "from codeflare_sdk.cluster.cluster import Cluster, ClusterConfiguration\n",
+ "from codeflare_sdk.cluster.auth import TokenAuthentication"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "0734734b",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Create authentication object for oc user permissions\n",
+ "auth = TokenAuthentication(\n",
+ " token = \"XXXX\",\n",
+ " server = \"XXXX\",\n",
+ " skip_tls = True\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "bc27f84c",
+ "metadata": {},
+ "source": [
+ "Here, we want to define our cluster by specifying the resources we require for our batch workload. Below, we define our cluster object (which generates a corresponding AppWrapper)."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "id": "220b9d85-3a3c-4c0c-aaf2-0d866823dcd8",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Written to: hfgputest.yaml\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Create our cluster and submit appwrapper\n",
+ "cluster = Cluster(ClusterConfiguration(name='hfgputest', min_worker=1, max_worker=1, min_cpus=8, max_cpus=8, min_memory=16, max_memory=16, gpu=4, instascale=True, machine_types=[\"m5.xlarge\", \"p3.8xlarge\"], auth=auth))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "12eef53c",
+ "metadata": {},
+ "source": [
+ "Next, we want to bring our cluster up, so we call the `up()` function below to submit our cluster AppWrapper yaml onto the MCAD queue, and begin the process of obtaining our resource cluster."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "2ae1d861-b743-4c05-903b-5799072b942b",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "cluster.up()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "657ebdfb",
+ "metadata": {},
+ "source": [
+ "Now, we want to check on the status of our resource cluster, until it is finally ready for use."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "id": "4d0db5f5-22f1-4806-ae7e-a0ee865625c1",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "╭─────────────────────────╮\n",
+ "│ 🚀 List of CodeFlare │\n",
+ "│ clusters in queue🚀 │\n",
+ "│ +-----------+---------+ │\n",
+ "│ | Name | Status | │\n",
+ "│ +===========+=========+ │\n",
+ "│ | hfgputest | pending | │\n",
+ "│ | | | │\n",
+ "│ +-----------+---------+ │\n",
+ "╰─────────────────────────╯\n",
+ "
\n"
+ ],
+ "text/plain": [
+ "╭─────────────────────────╮\n",
+ "│ \u001b[3m \u001b[0m\u001b[1;3m 🚀 List of CodeFlare\u001b[0m\u001b[3m \u001b[0m │\n",
+ "│ \u001b[3m \u001b[0m\u001b[1;3mclusters in queue🚀\u001b[0m\u001b[3m \u001b[0m │\n",
+ "│ +-----------+---------+ │\n",
+ "│ |\u001b[1m \u001b[0m\u001b[1mName \u001b[0m\u001b[1m \u001b[0m|\u001b[1m \u001b[0m\u001b[1mStatus \u001b[0m\u001b[1m \u001b[0m| │\n",
+ "│ +===========+=========+ │\n",
+ "│ |\u001b[36m \u001b[0m\u001b[36mhfgputest\u001b[0m\u001b[36m \u001b[0m|\u001b[35m \u001b[0m\u001b[35mpending\u001b[0m\u001b[35m \u001b[0m| │\n",
+ "│ |\u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[36m \u001b[0m|\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m| │\n",
+ "│ +-----------+---------+ │\n",
+ "╰─────────────────────────╯\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/plain": [
+ "(False, )"
+ ]
+ },
+ "execution_count": 17,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "cluster.is_ready()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "id": "06a54428-f186-4c27-948e-4eaf9c0e34b5",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ " 🚀 List of CodeFlare clusters 🚀 \n",
+ " \n",
+ " ╭────────────────────────────────────────────────────────────────╮ \n",
+ " │ Owner │ \n",
+ " │ hfgputest Active ✅ │ \n",
+ " │ │ \n",
+ " │ URI: ray://hfgputest-head-svc.default.svc:10001 │ \n",
+ " │ │ \n",
+ " │ Dashboard🔗 │ \n",
+ " │ │ \n",
+ " │ Cluster Resources │ \n",
+ " │ ╭─ Workers ──╮ ╭───────── Worker specs(each) ─────────╮ │ \n",
+ " │ │ Min Max │ │ Memory CPU GPU │ │ \n",
+ " │ │ │ │ │ │ \n",
+ " │ │ 1 1 │ │ 16G~16G 8 4 │ │ \n",
+ " │ │ │ │ │ │ \n",
+ " │ ╰────────────╯ ╰──────────────────────────────────────╯ │ \n",
+ " ╰────────────────────────────────────────────────────────────────╯ \n",
+ "
\n"
+ ],
+ "text/plain": [
+ "\u001b[3m \u001b[0m\u001b[1;3m 🚀 List of CodeFlare clusters 🚀\u001b[0m\u001b[3m \u001b[0m\n",
+ "\u001b[1m \u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m\n",
+ " ╭────────────────────────────────────────────────────────────────╮ \n",
+ " │ \u001b[1;37;42mOwner\u001b[0m │ \n",
+ " │ \u001b[1;4mhfgputest\u001b[0m Active ✅ │ \n",
+ " │ │ \n",
+ " │ \u001b[1mURI:\u001b[0m ray://hfgputest-head-svc.default.svc:10001 │ \n",
+ " │ │ \n",
+ " │ \u001b]8;id=552692;ray-dashboard-hfgputest-default.apps.prepfullinstall.psap.aws.rhperfscale.org\u001b\\\u001b[4;34mDashboard🔗\u001b[0m\u001b]8;;\u001b\\ │ \n",
+ " │ │ \n",
+ " │ \u001b[3m Cluster Resources \u001b[0m │ \n",
+ " │ ╭─ Workers ──╮ ╭───────── Worker specs(each) ─────────╮ │ \n",
+ " │ │ \u001b[1m \u001b[0m\u001b[1mMin\u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m\u001b[1mMax\u001b[0m\u001b[1m \u001b[0m │ │ \u001b[1m \u001b[0m\u001b[1mMemory \u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m\u001b[1mCPU \u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m\u001b[1mGPU \u001b[0m\u001b[1m \u001b[0m │ │ \n",
+ " │ │ \u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m │ │ \u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m │ │ \n",
+ " │ │ \u001b[36m \u001b[0m\u001b[36m1 \u001b[0m\u001b[36m \u001b[0m\u001b[35m \u001b[0m\u001b[35m1 \u001b[0m\u001b[35m \u001b[0m │ │ \u001b[36m \u001b[0m\u001b[36m16G~16G \u001b[0m\u001b[36m \u001b[0m\u001b[35m \u001b[0m\u001b[35m8 \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m4 \u001b[0m\u001b[35m \u001b[0m │ │ \n",
+ " │ │ \u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m │ │ \u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m │ │ \n",
+ " │ ╰────────────╯ ╰──────────────────────────────────────╯ │ \n",
+ " ╰────────────────────────────────────────────────────────────────╯ \n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 18,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "cluster.status()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "id": "8ac46c87-70f1-4c70-9648-881151665355",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "ray_cluster_uri = cluster.cluster_uri()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "44dba6a0-8275-4726-8911-6b6ec467b6a3",
+ "metadata": {},
+ "source": [
+ "**NOTE**: Now we have our resource cluster with the desired GPUs, so we can interact with it to train the HuggingFace model."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "id": "4c458589-5a17-47c6-a8db-625427ae4fe7",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Ray cluster is up and running: True\n"
+ ]
+ }
+ ],
+ "source": [
+ "#before proceeding make sure the cluster exists and the uri is not empty\n",
+ "assert ray_cluster_uri, \"Ray cluster needs to be started and set before proceeding\"\n",
+ "\n",
+ "import ray\n",
+ "from ray.air.config import ScalingConfig\n",
+ "\n",
+ "# reset the ray context in case there's already one. \n",
+ "ray.shutdown()\n",
+ "# establish connection to ray cluster\n",
+ "\n",
+ "#install additionall libraries that will be required for this training\n",
+ "runtime_env = {\"pip\": [\"transformers\", \"datasets\", \"evaluate\"]}\n",
+ "\n",
+ "ray.init(address=f'{ray_cluster_uri}', runtime_env=runtime_env)\n",
+ "\n",
+ "print(\"Ray cluster is up and running: \", ray.is_initialized())"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "94a38146-1321-4b7b-9152-9ebca4eb9444",
+ "metadata": {},
+ "source": [
+ "**NOTE** : in this case since we are running a task for which we need additional pip packages. we can install those by passing them in the `runtime_env` variable"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "76a1945b-d6c8-49b8-9a4c-b82724cffba9",
+ "metadata": {},
+ "source": [
+ "### Transfer learning code from huggingface"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "8bdbe888-4f38-4e9a-ae43-67ce89ff9d42",
+ "metadata": {},
+ "source": [
+ "We are using the code based on the example **[here](https://huggingface.co/docs/transformers/tasks/sequence_classification)** . "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "id": "e69994b4-1a13-43fe-b698-2a5374cb941b",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "@ray.remote\n",
+ "def train_fn():\n",
+ " from datasets import load_dataset\n",
+ " import transformers\n",
+ " from transformers import AutoTokenizer, TrainingArguments\n",
+ " from transformers import AutoModelForSequenceClassification\n",
+ " import numpy as np\n",
+ " from datasets import load_metric\n",
+ " import ray\n",
+ " from ray import tune\n",
+ " from ray.train.huggingface import HuggingFaceTrainer\n",
+ "\n",
+ " dataset = load_dataset(\"imdb\")\n",
+ " tokenizer = AutoTokenizer.from_pretrained(\"distilbert-base-uncased\")\n",
+ "\n",
+ " def tokenize_function(examples):\n",
+ " return tokenizer(examples[\"text\"], padding=\"max_length\", truncation=True)\n",
+ "\n",
+ " tokenized_datasets = dataset.map(tokenize_function, batched=True)\n",
+ "\n",
+ " #using a fraction of dataset but you can run with the full dataset\n",
+ " small_train_dataset = tokenized_datasets[\"train\"].shuffle(seed=42).select(range(100))\n",
+ " small_eval_dataset = tokenized_datasets[\"test\"].shuffle(seed=42).select(range(100))\n",
+ "\n",
+ " print(f\"len of train {small_train_dataset} and test {small_eval_dataset}\")\n",
+ "\n",
+ " ray_train_ds = ray.data.from_huggingface(small_train_dataset)\n",
+ " ray_evaluation_ds = ray.data.from_huggingface(small_eval_dataset)\n",
+ "\n",
+ " def compute_metrics(eval_pred):\n",
+ " metric = load_metric(\"accuracy\")\n",
+ " logits, labels = eval_pred\n",
+ " predictions = np.argmax(logits, axis=-1)\n",
+ " return metric.compute(predictions=predictions, references=labels)\n",
+ "\n",
+ " def trainer_init_per_worker(train_dataset, eval_dataset, **config):\n",
+ " model = AutoModelForSequenceClassification.from_pretrained(\"distilbert-base-uncased\", num_labels=2)\n",
+ "\n",
+ " training_args = TrainingArguments(\"/tmp/hf_imdb/test\", eval_steps=1, disable_tqdm=True, \n",
+ " num_train_epochs=1, skip_memory_metrics=True,\n",
+ " learning_rate=2e-5,\n",
+ " per_device_train_batch_size=16,\n",
+ " per_device_eval_batch_size=16, \n",
+ " weight_decay=0.01,)\n",
+ " return transformers.Trainer(\n",
+ " model=model,\n",
+ " args=training_args,\n",
+ " train_dataset=train_dataset,\n",
+ " eval_dataset=eval_dataset,\n",
+ " compute_metrics=compute_metrics\n",
+ " )\n",
+ "\n",
+ " scaling_config = ScalingConfig(num_workers=4, use_gpu=True) #num workers is the number of gpus\n",
+ "\n",
+ " # we are using the ray native HuggingFaceTrainer, but you can swap out to use non ray Huggingface Trainer. Both have the same method signature. \n",
+ " # the ray native HFTrainer has built in support for scaling to multiple GPUs\n",
+ " trainer = HuggingFaceTrainer(\n",
+ " trainer_init_per_worker=trainer_init_per_worker,\n",
+ " scaling_config=scaling_config,\n",
+ " datasets={\"train\": ray_train_ds, \"evaluation\": ray_evaluation_ds},\n",
+ " )\n",
+ " result = trainer.fit()\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "f9593fee-2b2b-415f-8902-bceec014385f",
+ "metadata": {},
+ "source": [
+ "**NOTE:** This code will produce a lot of output and will run for **approximately 2 minutes.** As a part of execution it will download the `imdb` dataset, `distilbert-base-uncased` model and then will start transfer learning task for training the model with this dataset. "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 22,
+ "id": "7f0985e9-5e88-4d36-ab38-c3001c13f97c",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Downloading builder script: 100%|██████████| 4.31k/4.31k [00:00<00:00, 5.60MB/s]\n",
+ "Downloading metadata: 100%|██████████| 2.17k/2.17k [00:00<00:00, 3.13MB/s]\n",
+ "Downloading readme: 100%|██████████| 7.59k/7.59k [00:00<00:00, 9.75MB/s]\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Downloading and preparing dataset imdb/plain_text to /home/ray/.cache/huggingface/datasets/imdb/plain_text/1.0.0/2fdd8b9bcadd6e7055e742a706876ba43f19faee861df134affd7a3f60fc38a1...\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Downloading data: 0%| | 0.00/84.1M [00:00, ?B/s]\n",
+ "Downloading data: 0%| | 30.7k/84.1M [00:00<05:22, 261kB/s]\n",
+ "Downloading data: 0%| | 89.1k/84.1M [00:00<03:31, 397kB/s]\n",
+ "Downloading data: 0%| | 184k/84.1M [00:00<02:24, 582kB/s] \n",
+ "Downloading data: 0%| | 373k/84.1M [00:00<01:25, 981kB/s]\n",
+ "Downloading data: 1%| | 778k/84.1M [00:00<00:44, 1.86MB/s]\n",
+ "Downloading data: 2%|▏ | 1.34M/84.1M [00:00<00:29, 2.83MB/s]\n",
+ "Downloading data: 2%|▏ | 2.02M/84.1M [00:00<00:21, 3.79MB/s]\n",
+ "Downloading data: 3%|▎ | 2.86M/84.1M [00:00<00:16, 4.85MB/s]\n",
+ "Downloading data: 5%|▍ | 3.98M/84.1M [00:01<00:12, 6.27MB/s]\n",
+ "Downloading data: 6%|▋ | 5.39M/84.1M [00:01<00:09, 8.02MB/s]\n",
+ "Downloading data: 9%|▉ | 7.69M/84.1M [00:01<00:06, 11.8MB/s]\n",
+ "Downloading data: 13%|█▎ | 11.2M/84.1M [00:01<00:04, 17.4MB/s]\n",
+ "Downloading data: 18%|█▊ | 15.3M/84.1M [00:01<00:03, 22.5MB/s]\n",
+ "Downloading data: 23%|██▎ | 19.7M/84.1M [00:01<00:02, 28.5MB/s]\n",
+ "Downloading data: 27%|██▋ | 23.1M/84.1M [00:01<00:02, 29.9MB/s]\n",
+ "Downloading data: 31%|███▏ | 26.4M/84.1M [00:01<00:01, 30.7MB/s]\n",
+ "Downloading data: 37%|███▋ | 30.7M/84.1M [00:01<00:01, 34.5MB/s]\n",
+ "Downloading data: 42%|████▏ | 35.5M/84.1M [00:02<00:01, 38.4MB/s]\n",
+ "Downloading data: 47%|████▋ | 39.4M/84.1M [00:02<00:01, 38.6MB/s]\n",
+ "Downloading data: 52%|█████▏ | 43.6M/84.1M [00:02<00:01, 39.6MB/s]\n",
+ "Downloading data: 58%|█████▊ | 48.7M/84.1M [00:02<00:00, 42.8MB/s]\n",
+ "Downloading data: 63%|██████▎ | 53.0M/84.1M [00:02<00:00, 42.7MB/s]\n",
+ "Downloading data: 68%|██████▊ | 57.3M/84.1M [00:02<00:00, 42.9MB/s]\n",
+ "Downloading data: 74%|███████▎ | 62.0M/84.1M [00:02<00:00, 43.2MB/s]\n",
+ "Downloading data: 80%|███████▉ | 67.3M/84.1M [00:02<00:00, 46.0MB/s]\n",
+ "Downloading data: 85%|████████▌ | 71.9M/84.1M [00:02<00:00, 45.5MB/s]\n",
+ "Downloading data: 91%|█████████ | 76.5M/84.1M [00:02<00:00, 45.3MB/s]\n",
+ "Downloading data: 100%|██████████| 84.1M/84.1M [00:03<00:00, 27.2MB/s]\n",
+ "Generating train split: 0%| | 0/25000 [00:00, ? examples/s]\n",
+ "Generating train split: 0%| | 1/25000 [00:02<15:40:26, 2.26s/ examples]\n",
+ "Generating train split: 3%|▎ | 749/25000 [00:02<00:54, 445.61 examples/s]\n",
+ "Generating train split: 6%|▌ | 1514/25000 [00:02<00:23, 1010.13 examples/s]\n",
+ "Generating train split: 9%|▉ | 2285/25000 [00:02<00:13, 1684.51 examples/s]\n",
+ "Generating train split: 12%|█▏ | 3061/25000 [00:02<00:08, 2447.86 examples/s]\n",
+ "Generating train split: 15%|█▌ | 3836/25000 [00:02<00:06, 3254.59 examples/s]\n",
+ "Generating train split: 18%|█▊ | 4596/25000 [00:02<00:05, 4030.18 examples/s]\n",
+ "Generating train split: 21%|██▏ | 5368/25000 [00:02<00:04, 4780.15 examples/s]\n",
+ "Generating train split: 25%|██▍ | 6132/25000 [00:03<00:03, 5423.09 examples/s]\n",
+ "Generating train split: 28%|██▊ | 6913/25000 [00:03<00:03, 6002.61 examples/s]\n",
+ "Generating train split: 31%|███ | 7683/25000 [00:03<00:02, 6439.96 examples/s]\n",
+ "Generating train split: 34%|███▍ | 8448/25000 [00:03<00:02, 6757.88 examples/s]\n",
+ "Generating train split: 37%|███▋ | 9219/25000 [00:03<00:02, 7020.62 examples/s]\n",
+ "Generating train split: 40%|████ | 10000/25000 [00:03<00:02, 6475.62 examples/s]\n",
+ "Generating train split: 43%|████▎ | 10774/25000 [00:03<00:02, 6811.79 examples/s]\n",
+ "Generating train split: 46%|████▌ | 11561/25000 [00:03<00:01, 7102.16 examples/s]\n",
+ "Generating train split: 49%|████▉ | 12348/25000 [00:03<00:01, 7317.96 examples/s]\n",
+ "Generating train split: 53%|█████▎ | 13143/25000 [00:04<00:01, 7498.06 examples/s]\n",
+ "Generating train split: 56%|█████▌ | 13919/25000 [00:04<00:01, 7573.16 examples/s]\n",
+ "Generating train split: 59%|█████▉ | 14707/25000 [00:04<00:01, 7659.25 examples/s]\n",
+ "Generating train split: 62%|██████▏ | 15494/25000 [00:04<00:01, 7721.19 examples/s]\n",
+ "Generating train split: 65%|██████▌ | 16273/25000 [00:04<00:01, 7739.71 examples/s]\n",
+ "Generating train split: 68%|██████▊ | 17056/25000 [00:04<00:01, 7765.06 examples/s]\n",
+ "Generating train split: 71%|███████▏ | 17839/25000 [00:04<00:00, 7783.64 examples/s]\n",
+ "Generating train split: 75%|███████▍ | 18628/25000 [00:04<00:00, 7813.80 examples/s]\n",
+ "Generating train split: 78%|███████▊ | 19411/25000 [00:04<00:00, 7804.00 examples/s]\n",
+ "Generating train split: 81%|████████ | 20193/25000 [00:04<00:00, 7072.48 examples/s]\n",
+ "Generating train split: 84%|████████▍ | 20967/25000 [00:05<00:00, 7257.29 examples/s]\n",
+ "Generating train split: 87%|████████▋ | 21749/25000 [00:05<00:00, 7416.22 examples/s]\n",
+ "Generating train split: 90%|█████████ | 22540/25000 [00:05<00:00, 7556.79 examples/s]\n",
+ "Generating train split: 93%|█████████▎| 23327/25000 [00:05<00:00, 7646.65 examples/s]\n",
+ "Generating train split: 96%|█████████▋| 24108/25000 [00:05<00:00, 7694.25 examples/s]\n",
+ "Generating train split: 100%|█████████▉| 24881/25000 [00:05<00:00, 7674.07 examples/s]\n",
+ " \n",
+ "Generating test split: 0%| | 0/25000 [00:00, ? examples/s]\n",
+ "Generating test split: 0%| | 1/25000 [00:00<3:10:01, 2.19 examples/s]\n",
+ "Generating test split: 3%|▎ | 766/25000 [00:00<00:13, 1826.18 examples/s]\n",
+ "Generating test split: 6%|▌ | 1544/25000 [00:00<00:07, 3338.57 examples/s]\n",
+ "Generating test split: 9%|▉ | 2332/25000 [00:00<00:04, 4546.72 examples/s]\n",
+ "Generating test split: 12%|█▏ | 3116/25000 [00:00<00:04, 5453.13 examples/s]\n",
+ "Generating test split: 16%|█▌ | 3905/25000 [00:00<00:03, 6140.31 examples/s]\n",
+ "Generating test split: 19%|█▉ | 4688/25000 [00:01<00:03, 6624.84 examples/s]\n",
+ "Generating test split: 22%|██▏ | 5474/25000 [00:01<00:02, 6982.92 examples/s]\n",
+ "Generating test split: 25%|██▌ | 6257/25000 [00:01<00:02, 7230.53 examples/s]\n",
+ "Generating test split: 28%|██▊ | 7046/25000 [00:01<00:02, 7423.93 examples/s]\n",
+ "Generating test split: 31%|███▏ | 7829/25000 [00:01<00:02, 7543.45 examples/s]\n",
+ "Generating test split: 34%|███▍ | 8614/25000 [00:01<00:02, 7633.64 examples/s]\n",
+ "Generating test split: 38%|███▊ | 9394/25000 [00:01<00:02, 7680.82 examples/s]\n",
+ "Generating test split: 41%|████ | 10174/25000 [00:01<00:02, 7005.77 examples/s]\n",
+ "Generating test split: 44%|████▍ | 10949/25000 [00:01<00:01, 7213.25 examples/s]\n",
+ "Generating test split: 47%|████▋ | 11730/25000 [00:01<00:01, 7381.64 examples/s]\n",
+ "Generating test split: 50%|█████ | 12515/25000 [00:02<00:01, 7516.21 examples/s]\n",
+ "Generating test split: 53%|█████▎ | 13301/25000 [00:02<00:01, 7615.13 examples/s]\n",
+ "Generating test split: 56%|█████▋ | 14087/25000 [00:02<00:01, 7684.18 examples/s]\n",
+ "Generating test split: 60%|█████▉ | 14876/25000 [00:02<00:01, 7744.99 examples/s]\n",
+ "Generating test split: 63%|██████▎ | 15664/25000 [00:02<00:01, 7779.58 examples/s]\n",
+ "Generating test split: 66%|██████▌ | 16456/25000 [00:02<00:01, 7817.82 examples/s]\n",
+ "Generating test split: 69%|██████▉ | 17240/25000 [00:02<00:00, 7819.40 examples/s]\n",
+ "Generating test split: 72%|███████▏ | 18024/25000 [00:02<00:00, 7816.77 examples/s]\n",
+ "Generating test split: 75%|███████▌ | 18808/25000 [00:02<00:00, 7823.24 examples/s]\n",
+ "Generating test split: 78%|███████▊ | 19593/25000 [00:02<00:00, 7829.38 examples/s]\n",
+ "Generating test split: 82%|████████▏ | 20377/25000 [00:03<00:00, 7091.67 examples/s]\n",
+ "Generating test split: 85%|████████▍ | 21155/25000 [00:03<00:00, 7283.38 examples/s]\n",
+ "Generating test split: 88%|████████▊ | 21937/25000 [00:03<00:00, 7434.19 examples/s]\n",
+ "Generating test split: 91%|█████████ | 22724/25000 [00:03<00:00, 7560.18 examples/s]\n",
+ "Generating test split: 94%|█████████▍| 23514/25000 [00:03<00:00, 7658.45 examples/s]\n",
+ "Generating test split: 97%|█████████▋| 24285/25000 [00:03<00:00, 7624.86 examples/s]\n",
+ "Generating unsupervised split: 0%| | 0/50000 [00:00, ? examples/s] \n",
+ "Generating unsupervised split: 0%| | 1/50000 [00:04<56:42:02, 4.08s/ examples]\n",
+ "Generating unsupervised split: 2%|▏ | 771/50000 [00:04<03:08, 260.56 examples/s]\n",
+ "Generating unsupervised split: 3%|▎ | 1525/50000 [00:04<01:21, 595.91 examples/s]\n",
+ "Generating unsupervised split: 5%|▍ | 2297/50000 [00:04<00:46, 1035.82 examples/s]\n",
+ "Generating unsupervised split: 6%|▌ | 3065/50000 [00:04<00:29, 1574.39 examples/s]\n",
+ "Generating unsupervised split: 8%|▊ | 3795/50000 [00:04<00:21, 2168.20 examples/s]\n",
+ "Generating unsupervised split: 9%|▉ | 4564/50000 [00:04<00:15, 2880.92 examples/s]\n",
+ "Generating unsupervised split: 11%|█ | 5351/50000 [00:04<00:12, 3657.19 examples/s]\n",
+ "Generating unsupervised split: 12%|█▏ | 6099/50000 [00:04<00:11, 3947.54 examples/s]\n",
+ "Generating unsupervised split: 14%|█▎ | 6874/50000 [00:05<00:09, 4672.28 examples/s]\n",
+ "Generating unsupervised split: 15%|█▌ | 7653/50000 [00:05<00:07, 5336.59 examples/s]\n",
+ "Generating unsupervised split: 17%|█▋ | 8430/50000 [00:05<00:07, 5905.86 examples/s]\n",
+ "Generating unsupervised split: 18%|█▊ | 9206/50000 [00:05<00:06, 6366.96 examples/s]\n",
+ "Generating unsupervised split: 20%|█▉ | 9971/50000 [00:05<00:05, 6703.01 examples/s]\n",
+ "Generating unsupervised split: 21%|██▏ | 10725/50000 [00:05<00:06, 6205.86 examples/s]\n",
+ "Generating unsupervised split: 23%|██▎ | 11505/50000 [00:05<00:05, 6617.62 examples/s]\n",
+ "Generating unsupervised split: 25%|██▍ | 12291/50000 [00:05<00:05, 6952.09 examples/s]\n",
+ "Generating unsupervised split: 26%|██▌ | 13080/50000 [00:05<00:05, 7212.23 examples/s]\n",
+ "Generating unsupervised split: 28%|██▊ | 13852/50000 [00:05<00:04, 7356.03 examples/s]\n",
+ "Generating unsupervised split: 29%|██▉ | 14628/50000 [00:06<00:04, 7471.67 examples/s]\n",
+ "Generating unsupervised split: 31%|███ | 15399/50000 [00:06<00:04, 7539.92 examples/s]\n",
+ "Generating unsupervised split: 32%|███▏ | 16181/50000 [00:06<00:04, 7619.82 examples/s]\n",
+ "Generating unsupervised split: 34%|███▍ | 16967/50000 [00:06<00:04, 7690.56 examples/s]\n",
+ "Generating unsupervised split: 36%|███▌ | 17753/50000 [00:06<00:04, 7738.38 examples/s]\n",
+ "Generating unsupervised split: 37%|███▋ | 18531/50000 [00:06<00:04, 7742.28 examples/s]\n",
+ "Generating unsupervised split: 39%|███▊ | 19311/50000 [00:06<00:03, 7754.75 examples/s]\n",
+ "Generating unsupervised split: 40%|████ | 20089/50000 [00:06<00:04, 7009.65 examples/s]\n",
+ "Generating unsupervised split: 42%|████▏ | 20862/50000 [00:06<00:04, 7208.36 examples/s]\n",
+ "Generating unsupervised split: 43%|████▎ | 21633/50000 [00:07<00:03, 7348.02 examples/s]\n",
+ "Generating unsupervised split: 45%|████▍ | 22408/50000 [00:07<00:03, 7463.78 examples/s]\n",
+ "Generating unsupervised split: 46%|████▋ | 23192/50000 [00:07<00:03, 7573.20 examples/s]\n",
+ "Generating unsupervised split: 48%|████▊ | 23971/50000 [00:07<00:03, 7636.86 examples/s]\n",
+ "Generating unsupervised split: 49%|████▉ | 24739/50000 [00:07<00:03, 7621.64 examples/s]\n",
+ "Generating unsupervised split: 51%|█████ | 25517/50000 [00:07<00:03, 7667.30 examples/s]\n",
+ "Generating unsupervised split: 53%|█████▎ | 26286/50000 [00:07<00:03, 7661.28 examples/s]\n",
+ "Generating unsupervised split: 54%|█████▍ | 27058/50000 [00:07<00:02, 7675.99 examples/s]\n",
+ "Generating unsupervised split: 56%|█████▌ | 27848/50000 [00:07<00:02, 7740.61 examples/s]\n",
+ "Generating unsupervised split: 57%|█████▋ | 28629/50000 [00:07<00:02, 7756.47 examples/s]\n",
+ "Generating unsupervised split: 59%|█████▉ | 29411/50000 [00:08<00:02, 7774.99 examples/s]\n",
+ "Generating unsupervised split: 60%|██████ | 30189/50000 [00:08<00:02, 6960.63 examples/s]\n",
+ "Generating unsupervised split: 62%|██████▏ | 30922/50000 [00:08<00:02, 7061.87 examples/s]\n",
+ "Generating unsupervised split: 63%|██████▎ | 31660/50000 [00:08<00:02, 7149.84 examples/s]\n",
+ "Generating unsupervised split: 65%|██████▍ | 32440/50000 [00:08<00:02, 7335.25 examples/s]\n",
+ "Generating unsupervised split: 66%|██████▋ | 33224/50000 [00:08<00:02, 7481.89 examples/s]\n",
+ "Generating unsupervised split: 68%|██████▊ | 34007/50000 [00:08<00:02, 7582.15 examples/s]\n",
+ "Generating unsupervised split: 70%|██████▉ | 34795/50000 [00:08<00:01, 7669.23 examples/s]\n",
+ "Generating unsupervised split: 71%|███████ | 35582/50000 [00:08<00:01, 7728.71 examples/s]\n",
+ "Generating unsupervised split: 73%|███████▎ | 36366/50000 [00:08<00:01, 7759.63 examples/s]\n",
+ "Generating unsupervised split: 74%|███████▍ | 37151/50000 [00:09<00:01, 7784.00 examples/s]\n",
+ "Generating unsupervised split: 76%|███████▌ | 37935/50000 [00:09<00:01, 7798.60 examples/s]\n",
+ "Generating unsupervised split: 77%|███████▋ | 38719/50000 [00:09<00:01, 7808.77 examples/s]\n",
+ "Generating unsupervised split: 79%|███████▉ | 39501/50000 [00:09<00:01, 7745.30 examples/s]\n",
+ "Generating unsupervised split: 81%|████████ | 40277/50000 [00:09<00:01, 6887.20 examples/s]\n",
+ "Generating unsupervised split: 82%|████████▏ | 41043/50000 [00:09<00:01, 7098.58 examples/s]\n",
+ "Generating unsupervised split: 84%|████████▎ | 41820/50000 [00:09<00:01, 7285.74 examples/s]\n",
+ "Generating unsupervised split: 85%|████████▌ | 42600/50000 [00:09<00:00, 7433.09 examples/s]\n",
+ "Generating unsupervised split: 87%|████████▋ | 43379/50000 [00:09<00:00, 7533.84 examples/s]\n",
+ "Generating unsupervised split: 88%|████████▊ | 44161/50000 [00:10<00:00, 7616.56 examples/s]\n",
+ "Generating unsupervised split: 90%|████████▉ | 44939/50000 [00:10<00:00, 7663.82 examples/s]\n",
+ "Generating unsupervised split: 91%|█████████▏| 45719/50000 [00:10<00:00, 7703.42 examples/s]\n",
+ "Generating unsupervised split: 93%|█████████▎| 46507/50000 [00:10<00:00, 7753.52 examples/s]\n",
+ "Generating unsupervised split: 95%|█████████▍| 47285/50000 [00:10<00:00, 7758.62 examples/s]\n",
+ "Generating unsupervised split: 96%|█████████▌| 48063/50000 [00:10<00:00, 7751.50 examples/s]\n",
+ "Generating unsupervised split: 98%|█████████▊| 48854/50000 [00:10<00:00, 7796.96 examples/s]\n",
+ "Generating unsupervised split: 99%|█████████▉| 49641/50000 [00:10<00:00, 7818.59 examples/s]\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Dataset imdb downloaded and prepared to /home/ray/.cache/huggingface/datasets/imdb/plain_text/1.0.0/2fdd8b9bcadd6e7055e742a706876ba43f19faee861df134affd7a3f60fc38a1. Subsequent calls will reuse this data.\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "100%|██████████| 3/3 [00:00<00:00, 696.30it/s] \n",
+ "Downloading: 100%|██████████| 28.0/28.0 [00:00<00:00, 32.1kB/s]\n",
+ "Downloading: 100%|██████████| 483/483 [00:00<00:00, 600kB/s]\n",
+ "Downloading: 0%| | 0.00/232k [00:00, ?B/s]\n",
+ "Downloading: 100%|██████████| 232k/232k [00:00<00:00, 4.80MB/s]\n",
+ "Downloading: 0%| | 0.00/466k [00:00, ?B/s]\n",
+ "Downloading: 100%|██████████| 466k/466k [00:00<00:00, 7.88MB/s]\n",
+ " 0%| | 0/25 [00:00, ?ba/s]\n",
+ " 4%|▍ | 1/25 [00:00<00:15, 1.52ba/s]\n",
+ " 8%|▊ | 2/25 [00:01<00:14, 1.57ba/s]\n",
+ " 12%|█▏ | 3/25 [00:01<00:13, 1.59ba/s]\n",
+ " 16%|█▌ | 4/25 [00:02<00:13, 1.59ba/s]\n",
+ " 20%|██ | 5/25 [00:03<00:13, 1.52ba/s]\n",
+ " 24%|██▍ | 6/25 [00:03<00:12, 1.54ba/s]\n",
+ " 28%|██▊ | 7/25 [00:04<00:11, 1.55ba/s]\n",
+ " 32%|███▏ | 8/25 [00:05<00:11, 1.53ba/s]\n",
+ " 36%|███▌ | 9/25 [00:05<00:10, 1.54ba/s]\n",
+ " 40%|████ | 10/25 [00:06<00:09, 1.54ba/s]\n",
+ " 44%|████▍ | 11/25 [00:07<00:08, 1.58ba/s]\n",
+ " 48%|████▊ | 12/25 [00:07<00:08, 1.58ba/s]\n",
+ " 52%|█████▏ | 13/25 [00:08<00:07, 1.60ba/s]\n",
+ " 56%|█████▌ | 14/25 [00:08<00:07, 1.57ba/s]\n",
+ " 60%|██████ | 15/25 [00:09<00:06, 1.57ba/s]\n",
+ " 64%|██████▍ | 16/25 [00:10<00:05, 1.55ba/s]\n",
+ " 68%|██████▊ | 17/25 [00:10<00:05, 1.55ba/s]\n",
+ " 72%|███████▏ | 18/25 [00:11<00:04, 1.56ba/s]\n",
+ " 76%|███████▌ | 19/25 [00:12<00:03, 1.56ba/s]\n",
+ " 80%|████████ | 20/25 [00:12<00:03, 1.55ba/s]\n",
+ " 84%|████████▍ | 21/25 [00:13<00:02, 1.50ba/s]\n",
+ " 88%|████████▊ | 22/25 [00:14<00:01, 1.51ba/s]\n",
+ " 92%|█████████▏| 23/25 [00:14<00:01, 1.55ba/s]\n",
+ " 96%|█████████▌| 24/25 [00:15<00:00, 1.55ba/s]\n",
+ " 96%|█████████▌| 24/25 [00:16<00:00, 1.49ba/s]\n",
+ " 0%| | 0/25 [00:00, ?ba/s]\n",
+ " 4%|▍ | 1/25 [00:00<00:14, 1.67ba/s]\n",
+ " 8%|▊ | 2/25 [00:01<00:13, 1.67ba/s]\n",
+ " 12%|█▏ | 3/25 [00:01<00:13, 1.64ba/s]\n",
+ " 16%|█▌ | 4/25 [00:02<00:12, 1.64ba/s]\n",
+ " 20%|██ | 5/25 [00:03<00:12, 1.63ba/s]\n",
+ " 24%|██▍ | 6/25 [00:03<00:11, 1.61ba/s]\n",
+ " 28%|██▊ | 7/25 [00:04<00:11, 1.62ba/s]\n",
+ " 32%|███▏ | 8/25 [00:04<00:10, 1.61ba/s]\n",
+ " 36%|███▌ | 9/25 [00:05<00:10, 1.58ba/s]\n",
+ " 40%|████ | 10/25 [00:06<00:09, 1.58ba/s]\n",
+ " 44%|████▍ | 11/25 [00:06<00:08, 1.60ba/s]\n",
+ " 48%|████▊ | 12/25 [00:07<00:08, 1.59ba/s]\n",
+ " 52%|█████▏ | 13/25 [00:08<00:07, 1.55ba/s]\n",
+ " 56%|█████▌ | 14/25 [00:08<00:06, 1.58ba/s]\n",
+ " 60%|██████ | 15/25 [00:09<00:06, 1.58ba/s]\n",
+ " 64%|██████▍ | 16/25 [00:10<00:05, 1.57ba/s]\n",
+ " 68%|██████▊ | 17/25 [00:10<00:05, 1.56ba/s]\n",
+ " 72%|███████▏ | 18/25 [00:11<00:04, 1.56ba/s]\n",
+ " 76%|███████▌ | 19/25 [00:11<00:03, 1.57ba/s]\n",
+ " 80%|████████ | 20/25 [00:12<00:03, 1.60ba/s]\n",
+ " 84%|████████▍ | 21/25 [00:13<00:02, 1.61ba/s]\n",
+ " 88%|████████▊ | 22/25 [00:13<00:01, 1.58ba/s]\n",
+ " 92%|█████████▏| 23/25 [00:14<00:01, 1.58ba/s]\n",
+ " 96%|█████████▌| 24/25 [00:15<00:00, 1.58ba/s]\n",
+ " 96%|█████████▌| 24/25 [00:15<00:00, 1.53ba/s]\n",
+ " 0%| | 0/50 [00:00, ?ba/s]\n",
+ " 2%|▏ | 1/50 [00:00<00:29, 1.68ba/s]\n",
+ " 4%|▍ | 2/50 [00:01<00:29, 1.63ba/s]\n",
+ " 6%|▌ | 3/50 [00:01<00:29, 1.58ba/s]\n",
+ " 8%|▊ | 4/50 [00:02<00:30, 1.51ba/s]\n",
+ " 10%|█ | 5/50 [00:03<00:29, 1.51ba/s]\n",
+ " 12%|█▏ | 6/50 [00:03<00:28, 1.56ba/s]\n",
+ " 14%|█▍ | 7/50 [00:04<00:27, 1.54ba/s]\n",
+ " 16%|█▌ | 8/50 [00:05<00:27, 1.53ba/s]\n",
+ " 18%|█▊ | 9/50 [00:05<00:26, 1.56ba/s]\n",
+ " 20%|██ | 10/50 [00:06<00:25, 1.55ba/s]\n",
+ " 22%|██▏ | 11/50 [00:07<00:25, 1.55ba/s]\n",
+ " 24%|██▍ | 12/50 [00:07<00:24, 1.56ba/s]\n",
+ " 26%|██▌ | 13/50 [00:08<00:23, 1.57ba/s]\n",
+ " 28%|██▊ | 14/50 [00:08<00:22, 1.57ba/s]\n",
+ " 30%|███ | 15/50 [00:09<00:22, 1.55ba/s]\n",
+ " 32%|███▏ | 16/50 [00:10<00:21, 1.55ba/s]\n",
+ " 34%|███▍ | 17/50 [00:10<00:21, 1.56ba/s]\n",
+ " 36%|███▌ | 18/50 [00:11<00:20, 1.56ba/s]\n",
+ " 38%|███▊ | 19/50 [00:12<00:19, 1.56ba/s]\n",
+ " 40%|████ | 20/50 [00:12<00:19, 1.56ba/s]\n",
+ " 42%|████▏ | 21/50 [00:13<00:18, 1.53ba/s]\n",
+ " 44%|████▍ | 22/50 [00:14<00:18, 1.55ba/s]\n",
+ " 46%|████▌ | 23/50 [00:14<00:17, 1.54ba/s]\n",
+ " 48%|████▊ | 24/50 [00:15<00:16, 1.56ba/s]\n",
+ " 50%|█████ | 25/50 [00:16<00:15, 1.56ba/s]\n",
+ " 52%|█████▏ | 26/50 [00:16<00:15, 1.57ba/s]\n",
+ " 54%|█████▍ | 27/50 [00:17<00:14, 1.55ba/s]\n",
+ " 56%|█████▌ | 28/50 [00:17<00:13, 1.60ba/s]\n",
+ " 58%|█████▊ | 29/50 [00:18<00:13, 1.57ba/s]\n",
+ " 60%|██████ | 30/50 [00:19<00:12, 1.59ba/s]\n",
+ " 62%|██████▏ | 31/50 [00:19<00:12, 1.55ba/s]\n",
+ " 64%|██████▍ | 32/50 [00:20<00:11, 1.55ba/s]\n",
+ " 66%|██████▌ | 33/50 [00:21<00:10, 1.56ba/s]\n",
+ " 68%|██████▊ | 34/50 [00:21<00:10, 1.58ba/s]\n",
+ " 70%|███████ | 35/50 [00:22<00:09, 1.61ba/s]\n",
+ " 72%|███████▏ | 36/50 [00:23<00:08, 1.60ba/s]\n",
+ " 74%|███████▍ | 37/50 [00:23<00:08, 1.54ba/s]\n",
+ " 76%|███████▌ | 38/50 [00:24<00:07, 1.56ba/s]\n",
+ " 78%|███████▊ | 39/50 [00:24<00:07, 1.56ba/s]\n",
+ " 80%|████████ | 40/50 [00:25<00:06, 1.57ba/s]\n",
+ " 82%|████████▏ | 41/50 [00:26<00:05, 1.54ba/s]\n",
+ " 84%|████████▍ | 42/50 [00:26<00:05, 1.55ba/s]\n",
+ " 86%|████████▌ | 43/50 [00:27<00:04, 1.55ba/s]\n",
+ " 88%|████████▊ | 44/50 [00:28<00:03, 1.54ba/s]\n",
+ " 90%|█████████ | 45/50 [00:28<00:03, 1.56ba/s]\n",
+ " 92%|█████████▏| 46/50 [00:29<00:02, 1.56ba/s]\n",
+ " 94%|█████████▍| 47/50 [00:30<00:01, 1.54ba/s]\n",
+ " 96%|█████████▌| 48/50 [00:30<00:01, 1.54ba/s]\n",
+ " 98%|█████████▊| 49/50 [00:31<00:00, 1.54ba/s]\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m len of train Dataset({\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m features: ['text', 'label', 'input_ids', 'attention_mask'],\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m num_rows: 100\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m }) and test Dataset({\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m features: ['text', 'label', 'input_ids', 'attention_mask'],\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m num_rows: 100\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m })\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ " 98%|█████████▊| 49/50 [00:32<00:00, 1.53ba/s]\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m To disable this warning, you can either:\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \t- Avoid using `tokenizers` before the fork if possible\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m == Status ==\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Current time: 2022-11-04 07:55:58 (running for 00:00:05.07)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Memory usage on this node: 6.4/240.1 GiB\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Using FIFO scheduling algorithm.\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Resources requested: 5.0/10 CPUs, 4.0/4 GPUs, 0.0/22.35 GiB heap, 0.0/6.59 GiB objects\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2022-11-04_07-55-53\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | Trial name | status | loc |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m |--------------------------------+----------+------------------|\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | HuggingFaceTrainer_c7d60_00000 | RUNNING | 10.129.66.16:146 |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\u001b[2m\u001b[36m(BaseWorkerMixin pid=182, ip=10.129.66.16)\u001b[0m 2022-11-04 07:56:02,047\tINFO torch.py:346 -- Setting up process group for: env:// [rank=0, world_size=4]\n",
+ "\u001b[2m\u001b[36m(BaseWorkerMixin pid=184, ip=10.129.66.16)\u001b[0m 2022-11-04 07:56:02,045\tINFO torch.py:346 -- Setting up process group for: env:// [rank=2, world_size=4]\n",
+ "\u001b[2m\u001b[36m(BaseWorkerMixin pid=183, ip=10.129.66.16)\u001b[0m 2022-11-04 07:56:02,047\tINFO torch.py:346 -- Setting up process group for: env:// [rank=1, world_size=4]\n",
+ "\u001b[2m\u001b[36m(BaseWorkerMixin pid=185, ip=10.129.66.16)\u001b[0m 2022-11-04 07:56:02,048\tINFO torch.py:346 -- Setting up process group for: env:// [rank=3, world_size=4]\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m == Status ==\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Current time: 2022-11-04 07:56:03 (running for 00:00:10.07)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Memory usage on this node: 7.2/240.1 GiB\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Using FIFO scheduling algorithm.\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Resources requested: 5.0/10 CPUs, 4.0/4 GPUs, 0.0/22.35 GiB heap, 0.0/6.59 GiB objects\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2022-11-04_07-55-53\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | Trial name | status | loc |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m |--------------------------------+----------+------------------|\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | HuggingFaceTrainer_c7d60_00000 | RUNNING | 10.129.66.16:146 |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Downloading: 100%|██████████| 483/483 [00:00<00:00, 588kB/s]\n",
+ "Downloading: 0%| | 0.00/268M [00:00, ?B/s] \n",
+ "Downloading: 0%| | 893k/268M [00:00<00:29, 8.93MB/s]\n",
+ "Downloading: 3%|▎ | 6.70M/268M [00:00<00:06, 37.8MB/s]\n",
+ "Downloading: 5%|▍ | 12.9M/268M [00:00<00:05, 48.7MB/s]\n",
+ "Downloading: 7%|▋ | 19.2M/268M [00:00<00:04, 54.4MB/s]\n",
+ "Downloading: 10%|▉ | 25.7M/268M [00:00<00:04, 58.3MB/s]\n",
+ "Downloading: 12%|█▏ | 32.3M/268M [00:00<00:03, 60.8MB/s]\n",
+ "Downloading: 14%|█▍ | 38.8M/268M [00:00<00:03, 62.1MB/s]\n",
+ "Downloading: 17%|█▋ | 45.3M/268M [00:00<00:03, 63.3MB/s]\n",
+ "Downloading: 19%|█▉ | 51.8M/268M [00:00<00:03, 63.7MB/s]\n",
+ "Downloading: 22%|██▏ | 58.4M/268M [00:01<00:03, 64.3MB/s]\n",
+ "Downloading: 24%|██▍ | 64.9M/268M [00:01<00:03, 64.7MB/s]\n",
+ "Downloading: 27%|██▋ | 71.5M/268M [00:01<00:03, 65.2MB/s]\n",
+ "Downloading: 29%|██▉ | 78.1M/268M [00:01<00:02, 65.1MB/s]\n",
+ "Downloading: 32%|███▏ | 84.6M/268M [00:01<00:02, 65.1MB/s]\n",
+ "Downloading: 34%|███▍ | 91.2M/268M [00:01<00:02, 65.3MB/s]\n",
+ "Downloading: 36%|███▋ | 97.7M/268M [00:01<00:02, 65.3MB/s]\n",
+ "Downloading: 39%|███▉ | 104M/268M [00:01<00:02, 65.4MB/s] \n",
+ "Downloading: 41%|████▏ | 111M/268M [00:01<00:02, 65.5MB/s]\n",
+ "Downloading: 44%|████▍ | 117M/268M [00:01<00:02, 65.5MB/s]\n",
+ "Downloading: 46%|████▋ | 124M/268M [00:02<00:02, 65.4MB/s]\n",
+ "Downloading: 49%|████▊ | 130M/268M [00:02<00:02, 65.4MB/s]\n",
+ "Downloading: 51%|█████ | 137M/268M [00:02<00:01, 65.5MB/s]\n",
+ "Downloading: 54%|█████▎ | 144M/268M [00:02<00:01, 65.5MB/s]\n",
+ "Downloading: 56%|█████▌ | 150M/268M [00:02<00:01, 65.4MB/s]\n",
+ "Downloading: 58%|█████▊ | 157M/268M [00:02<00:01, 65.4MB/s]\n",
+ "Downloading: 61%|██████ | 163M/268M [00:02<00:01, 65.6MB/s]\n",
+ "Downloading: 63%|██████▎ | 170M/268M [00:02<00:01, 65.4MB/s]\n",
+ "Downloading: 66%|██████▌ | 176M/268M [00:02<00:01, 65.4MB/s]\n",
+ "Downloading: 68%|██████▊ | 183M/268M [00:02<00:01, 65.4MB/s]\n",
+ "Downloading: 71%|███████ | 190M/268M [00:03<00:01, 65.6MB/s]\n",
+ "Downloading: 73%|███████▎ | 196M/268M [00:03<00:01, 65.6MB/s]\n",
+ "Downloading: 76%|███████▌ | 203M/268M [00:03<00:00, 65.7MB/s]\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m == Status ==\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Current time: 2022-11-04 07:56:08 (running for 00:00:15.07)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Memory usage on this node: 7.5/240.1 GiB\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Using FIFO scheduling algorithm.\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Resources requested: 5.0/10 CPUs, 4.0/4 GPUs, 0.0/22.35 GiB heap, 0.0/6.59 GiB objects\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2022-11-04_07-55-53\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | Trial name | status | loc |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m |--------------------------------+----------+------------------|\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | HuggingFaceTrainer_c7d60_00000 | RUNNING | 10.129.66.16:146 |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Downloading: 78%|███████▊ | 209M/268M [00:03<00:00, 65.7MB/s]\n",
+ "Downloading: 81%|████████ | 216M/268M [00:03<00:00, 65.7MB/s]\n",
+ "Downloading: 83%|████████▎ | 223M/268M [00:03<00:00, 66.0MB/s]\n",
+ "Downloading: 86%|████████▌ | 229M/268M [00:03<00:00, 66.0MB/s]\n",
+ "Downloading: 88%|████████▊ | 236M/268M [00:03<00:00, 65.8MB/s]\n",
+ "Downloading: 90%|█████████ | 242M/268M [00:03<00:00, 65.8MB/s]\n",
+ "Downloading: 93%|█████████▎| 249M/268M [00:03<00:00, 65.7MB/s]\n",
+ "Downloading: 95%|█████████▌| 255M/268M [00:04<00:00, 65.7MB/s]\n",
+ "Downloading: 98%|█████████▊| 262M/268M [00:04<00:00, 65.8MB/s]\n",
+ "Downloading: 100%|██████████| 268M/268M [00:04<00:00, 63.9MB/s]\n",
+ "\u001b[2m\u001b[36m(BaseWorkerMixin pid=182, ip=10.129.66.16)\u001b[0m Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_projector.bias', 'vocab_layer_norm.bias', 'vocab_transform.bias', 'vocab_layer_norm.weight', 'vocab_transform.weight', 'vocab_projector.weight']\n",
+ "\u001b[2m\u001b[36m(BaseWorkerMixin pid=182, ip=10.129.66.16)\u001b[0m - This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
+ "\u001b[2m\u001b[36m(BaseWorkerMixin pid=182, ip=10.129.66.16)\u001b[0m - This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
+ "\u001b[2m\u001b[36m(BaseWorkerMixin pid=182, ip=10.129.66.16)\u001b[0m Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.bias', 'classifier.bias', 'classifier.weight', 'pre_classifier.weight']\n",
+ "\u001b[2m\u001b[36m(BaseWorkerMixin pid=182, ip=10.129.66.16)\u001b[0m You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
+ "\u001b[2m\u001b[36m(BaseWorkerMixin pid=184, ip=10.129.66.16)\u001b[0m Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.weight', 'vocab_transform.bias', 'vocab_layer_norm.bias', 'vocab_projector.bias', 'vocab_projector.weight', 'vocab_layer_norm.weight']\n",
+ "\u001b[2m\u001b[36m(BaseWorkerMixin pid=184, ip=10.129.66.16)\u001b[0m - This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
+ "\u001b[2m\u001b[36m(BaseWorkerMixin pid=184, ip=10.129.66.16)\u001b[0m - This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
+ "\u001b[2m\u001b[36m(BaseWorkerMixin pid=184, ip=10.129.66.16)\u001b[0m Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'pre_classifier.weight', 'pre_classifier.bias', 'classifier.weight']\n",
+ "\u001b[2m\u001b[36m(BaseWorkerMixin pid=184, ip=10.129.66.16)\u001b[0m You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
+ "\u001b[2m\u001b[36m(BaseWorkerMixin pid=183, ip=10.129.66.16)\u001b[0m Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_projector.weight', 'vocab_transform.weight', 'vocab_projector.bias', 'vocab_transform.bias', 'vocab_layer_norm.bias', 'vocab_layer_norm.weight']\n",
+ "\u001b[2m\u001b[36m(BaseWorkerMixin pid=183, ip=10.129.66.16)\u001b[0m - This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
+ "\u001b[2m\u001b[36m(BaseWorkerMixin pid=183, ip=10.129.66.16)\u001b[0m - This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
+ "\u001b[2m\u001b[36m(BaseWorkerMixin pid=183, ip=10.129.66.16)\u001b[0m Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'pre_classifier.bias', 'pre_classifier.weight', 'classifier.weight']\n",
+ "\u001b[2m\u001b[36m(BaseWorkerMixin pid=183, ip=10.129.66.16)\u001b[0m You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
+ "\u001b[2m\u001b[36m(BaseWorkerMixin pid=185, ip=10.129.66.16)\u001b[0m Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_layer_norm.weight', 'vocab_projector.weight', 'vocab_transform.weight', 'vocab_projector.bias', 'vocab_layer_norm.bias', 'vocab_transform.bias']\n",
+ "\u001b[2m\u001b[36m(BaseWorkerMixin pid=185, ip=10.129.66.16)\u001b[0m - This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
+ "\u001b[2m\u001b[36m(BaseWorkerMixin pid=185, ip=10.129.66.16)\u001b[0m - This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
+ "\u001b[2m\u001b[36m(BaseWorkerMixin pid=185, ip=10.129.66.16)\u001b[0m Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']\n",
+ "\u001b[2m\u001b[36m(BaseWorkerMixin pid=185, ip=10.129.66.16)\u001b[0m You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
+ "\u001b[2m\u001b[36m(BaseWorkerMixin pid=182, ip=10.129.66.16)\u001b[0m /tmp/ray/session_2022-11-04_07-51-23_507232_7/runtime_resources/pip/4a36d7bd0bbff8fccea52f9c0d942dd63707933f/virtualenv/lib/python3.8/site-packages/transformers/optimization.py:306: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
+ "\u001b[2m\u001b[36m(BaseWorkerMixin pid=182, ip=10.129.66.16)\u001b[0m warnings.warn(\n",
+ "\u001b[2m\u001b[36m(BaseWorkerMixin pid=183, ip=10.129.66.16)\u001b[0m /tmp/ray/session_2022-11-04_07-51-23_507232_7/runtime_resources/pip/4a36d7bd0bbff8fccea52f9c0d942dd63707933f/virtualenv/lib/python3.8/site-packages/transformers/optimization.py:306: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
+ "\u001b[2m\u001b[36m(BaseWorkerMixin pid=183, ip=10.129.66.16)\u001b[0m warnings.warn(\n",
+ "\u001b[2m\u001b[36m(BaseWorkerMixin pid=185, ip=10.129.66.16)\u001b[0m /tmp/ray/session_2022-11-04_07-51-23_507232_7/runtime_resources/pip/4a36d7bd0bbff8fccea52f9c0d942dd63707933f/virtualenv/lib/python3.8/site-packages/transformers/optimization.py:306: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
+ "\u001b[2m\u001b[36m(BaseWorkerMixin pid=185, ip=10.129.66.16)\u001b[0m warnings.warn(\n",
+ "\u001b[2m\u001b[36m(BaseWorkerMixin pid=184, ip=10.129.66.16)\u001b[0m /tmp/ray/session_2022-11-04_07-51-23_507232_7/runtime_resources/pip/4a36d7bd0bbff8fccea52f9c0d942dd63707933f/virtualenv/lib/python3.8/site-packages/transformers/optimization.py:306: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
+ "\u001b[2m\u001b[36m(BaseWorkerMixin pid=184, ip=10.129.66.16)\u001b[0m warnings.warn(\n",
+ "\u001b[2m\u001b[36m(BaseWorkerMixin pid=182, ip=10.129.66.16)\u001b[0m ***** Running training *****\n",
+ "\u001b[2m\u001b[36m(BaseWorkerMixin pid=182, ip=10.129.66.16)\u001b[0m Num examples = 6250\n",
+ "\u001b[2m\u001b[36m(BaseWorkerMixin pid=182, ip=10.129.66.16)\u001b[0m Num Epochs = 1\n",
+ "\u001b[2m\u001b[36m(BaseWorkerMixin pid=182, ip=10.129.66.16)\u001b[0m Instantaneous batch size per device = 16\n",
+ "\u001b[2m\u001b[36m(BaseWorkerMixin pid=182, ip=10.129.66.16)\u001b[0m Total train batch size (w. parallel, distributed & accumulation) = 64\n",
+ "\u001b[2m\u001b[36m(BaseWorkerMixin pid=182, ip=10.129.66.16)\u001b[0m Gradient Accumulation steps = 1\n",
+ "\u001b[2m\u001b[36m(BaseWorkerMixin pid=182, ip=10.129.66.16)\u001b[0m Total optimization steps = 391\n",
+ "\u001b[2m\u001b[36m(BaseWorkerMixin pid=182, ip=10.129.66.16)\u001b[0m Number of trainable parameters = 66955010\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m == Status ==\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Current time: 2022-11-04 07:56:13 (running for 00:00:20.08)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Memory usage on this node: 12.3/240.1 GiB\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Using FIFO scheduling algorithm.\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Resources requested: 5.0/10 CPUs, 4.0/4 GPUs, 0.0/22.35 GiB heap, 0.0/6.59 GiB objects\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2022-11-04_07-55-53\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | Trial name | status | loc |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m |--------------------------------+----------+------------------|\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | HuggingFaceTrainer_c7d60_00000 | RUNNING | 10.129.66.16:146 |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\u001b[2m\u001b[36m(BaseWorkerMixin pid=182, ip=10.129.66.16)\u001b[0m [W reducer.cpp:1251] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())\n",
+ "\u001b[2m\u001b[36m(BaseWorkerMixin pid=184, ip=10.129.66.16)\u001b[0m [W reducer.cpp:1251] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())\n",
+ "\u001b[2m\u001b[36m(BaseWorkerMixin pid=183, ip=10.129.66.16)\u001b[0m [W reducer.cpp:1251] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())\n",
+ "\u001b[2m\u001b[36m(BaseWorkerMixin pid=185, ip=10.129.66.16)\u001b[0m [W reducer.cpp:1251] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m == Status ==\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Current time: 2022-11-04 07:56:18 (running for 00:00:25.08)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Memory usage on this node: 13.7/240.1 GiB\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Using FIFO scheduling algorithm.\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Resources requested: 5.0/10 CPUs, 4.0/4 GPUs, 0.0/22.35 GiB heap, 0.0/6.59 GiB objects\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2022-11-04_07-55-53\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | Trial name | status | loc |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m |--------------------------------+----------+------------------|\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | HuggingFaceTrainer_c7d60_00000 | RUNNING | 10.129.66.16:146 |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m == Status ==\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Current time: 2022-11-04 07:56:23 (running for 00:00:30.08)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Memory usage on this node: 13.7/240.1 GiB\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Using FIFO scheduling algorithm.\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Resources requested: 5.0/10 CPUs, 4.0/4 GPUs, 0.0/22.35 GiB heap, 0.0/6.59 GiB objects\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2022-11-04_07-55-53\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | Trial name | status | loc |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m |--------------------------------+----------+------------------|\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | HuggingFaceTrainer_c7d60_00000 | RUNNING | 10.129.66.16:146 |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m == Status ==\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Current time: 2022-11-04 07:56:28 (running for 00:00:35.09)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Memory usage on this node: 13.7/240.1 GiB\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Using FIFO scheduling algorithm.\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Resources requested: 5.0/10 CPUs, 4.0/4 GPUs, 0.0/22.35 GiB heap, 0.0/6.59 GiB objects\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2022-11-04_07-55-53\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | Trial name | status | loc |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m |--------------------------------+----------+------------------|\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | HuggingFaceTrainer_c7d60_00000 | RUNNING | 10.129.66.16:146 |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m == Status ==\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Current time: 2022-11-04 07:56:33 (running for 00:00:40.09)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Memory usage on this node: 13.7/240.1 GiB\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Using FIFO scheduling algorithm.\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Resources requested: 5.0/10 CPUs, 4.0/4 GPUs, 0.0/22.35 GiB heap, 0.0/6.59 GiB objects\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2022-11-04_07-55-53\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | Trial name | status | loc |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m |--------------------------------+----------+------------------|\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | HuggingFaceTrainer_c7d60_00000 | RUNNING | 10.129.66.16:146 |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m == Status ==\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Current time: 2022-11-04 07:56:38 (running for 00:00:45.10)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Memory usage on this node: 13.7/240.1 GiB\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Using FIFO scheduling algorithm.\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Resources requested: 5.0/10 CPUs, 4.0/4 GPUs, 0.0/22.35 GiB heap, 0.0/6.59 GiB objects\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2022-11-04_07-55-53\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | Trial name | status | loc |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m |--------------------------------+----------+------------------|\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | HuggingFaceTrainer_c7d60_00000 | RUNNING | 10.129.66.16:146 |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m == Status ==\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Current time: 2022-11-04 07:56:43 (running for 00:00:50.10)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Memory usage on this node: 13.7/240.1 GiB\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Using FIFO scheduling algorithm.\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Resources requested: 5.0/10 CPUs, 4.0/4 GPUs, 0.0/22.35 GiB heap, 0.0/6.59 GiB objects\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2022-11-04_07-55-53\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | Trial name | status | loc |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m |--------------------------------+----------+------------------|\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | HuggingFaceTrainer_c7d60_00000 | RUNNING | 10.129.66.16:146 |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m == Status ==\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Current time: 2022-11-04 07:56:48 (running for 00:00:55.10)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Memory usage on this node: 13.7/240.1 GiB\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Using FIFO scheduling algorithm.\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Resources requested: 5.0/10 CPUs, 4.0/4 GPUs, 0.0/22.35 GiB heap, 0.0/6.59 GiB objects\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2022-11-04_07-55-53\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | Trial name | status | loc |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m |--------------------------------+----------+------------------|\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | HuggingFaceTrainer_c7d60_00000 | RUNNING | 10.129.66.16:146 |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m == Status ==\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Current time: 2022-11-04 07:56:53 (running for 00:01:00.10)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Memory usage on this node: 13.7/240.1 GiB\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Using FIFO scheduling algorithm.\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Resources requested: 5.0/10 CPUs, 4.0/4 GPUs, 0.0/22.35 GiB heap, 0.0/6.59 GiB objects\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2022-11-04_07-55-53\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | Trial name | status | loc |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m |--------------------------------+----------+------------------|\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | HuggingFaceTrainer_c7d60_00000 | RUNNING | 10.129.66.16:146 |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m == Status ==\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Current time: 2022-11-04 07:56:59 (running for 00:01:05.11)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Memory usage on this node: 13.7/240.1 GiB\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Using FIFO scheduling algorithm.\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Resources requested: 5.0/10 CPUs, 4.0/4 GPUs, 0.0/22.35 GiB heap, 0.0/6.59 GiB objects\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2022-11-04_07-55-53\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | Trial name | status | loc |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m |--------------------------------+----------+------------------|\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | HuggingFaceTrainer_c7d60_00000 | RUNNING | 10.129.66.16:146 |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m == Status ==\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Current time: 2022-11-04 07:57:04 (running for 00:01:10.11)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Memory usage on this node: 13.7/240.1 GiB\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Using FIFO scheduling algorithm.\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Resources requested: 5.0/10 CPUs, 4.0/4 GPUs, 0.0/22.35 GiB heap, 0.0/6.59 GiB objects\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2022-11-04_07-55-53\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | Trial name | status | loc |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m |--------------------------------+----------+------------------|\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | HuggingFaceTrainer_c7d60_00000 | RUNNING | 10.129.66.16:146 |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m == Status ==\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Current time: 2022-11-04 07:57:09 (running for 00:01:15.11)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Memory usage on this node: 13.7/240.1 GiB\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Using FIFO scheduling algorithm.\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Resources requested: 5.0/10 CPUs, 4.0/4 GPUs, 0.0/22.35 GiB heap, 0.0/6.59 GiB objects\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2022-11-04_07-55-53\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | Trial name | status | loc |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m |--------------------------------+----------+------------------|\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | HuggingFaceTrainer_c7d60_00000 | RUNNING | 10.129.66.16:146 |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m == Status ==\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Current time: 2022-11-04 07:57:14 (running for 00:01:20.12)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Memory usage on this node: 13.7/240.1 GiB\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Using FIFO scheduling algorithm.\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Resources requested: 5.0/10 CPUs, 4.0/4 GPUs, 0.0/22.35 GiB heap, 0.0/6.59 GiB objects\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2022-11-04_07-55-53\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | Trial name | status | loc |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m |--------------------------------+----------+------------------|\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | HuggingFaceTrainer_c7d60_00000 | RUNNING | 10.129.66.16:146 |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m == Status ==\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Current time: 2022-11-04 07:57:19 (running for 00:01:25.12)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Memory usage on this node: 13.7/240.1 GiB\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Using FIFO scheduling algorithm.\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Resources requested: 5.0/10 CPUs, 4.0/4 GPUs, 0.0/22.35 GiB heap, 0.0/6.59 GiB objects\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2022-11-04_07-55-53\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | Trial name | status | loc |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m |--------------------------------+----------+------------------|\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | HuggingFaceTrainer_c7d60_00000 | RUNNING | 10.129.66.16:146 |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m == Status ==\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Current time: 2022-11-04 07:57:24 (running for 00:01:30.12)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Memory usage on this node: 13.7/240.1 GiB\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Using FIFO scheduling algorithm.\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Resources requested: 5.0/10 CPUs, 4.0/4 GPUs, 0.0/22.35 GiB heap, 0.0/6.59 GiB objects\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2022-11-04_07-55-53\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | Trial name | status | loc |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m |--------------------------------+----------+------------------|\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | HuggingFaceTrainer_c7d60_00000 | RUNNING | 10.129.66.16:146 |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m == Status ==\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Current time: 2022-11-04 07:57:29 (running for 00:01:35.13)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Memory usage on this node: 13.7/240.1 GiB\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Using FIFO scheduling algorithm.\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Resources requested: 5.0/10 CPUs, 4.0/4 GPUs, 0.0/22.35 GiB heap, 0.0/6.59 GiB objects\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2022-11-04_07-55-53\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | Trial name | status | loc |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m |--------------------------------+----------+------------------|\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | HuggingFaceTrainer_c7d60_00000 | RUNNING | 10.129.66.16:146 |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m == Status ==\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Current time: 2022-11-04 07:57:34 (running for 00:01:40.13)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Memory usage on this node: 13.7/240.1 GiB\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Using FIFO scheduling algorithm.\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Resources requested: 5.0/10 CPUs, 4.0/4 GPUs, 0.0/22.35 GiB heap, 0.0/6.59 GiB objects\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2022-11-04_07-55-53\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | Trial name | status | loc |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m |--------------------------------+----------+------------------|\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | HuggingFaceTrainer_c7d60_00000 | RUNNING | 10.129.66.16:146 |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m == Status ==\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Current time: 2022-11-04 07:57:39 (running for 00:01:45.13)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Memory usage on this node: 13.7/240.1 GiB\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Using FIFO scheduling algorithm.\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Resources requested: 5.0/10 CPUs, 4.0/4 GPUs, 0.0/22.35 GiB heap, 0.0/6.59 GiB objects\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2022-11-04_07-55-53\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | Trial name | status | loc |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m |--------------------------------+----------+------------------|\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | HuggingFaceTrainer_c7d60_00000 | RUNNING | 10.129.66.16:146 |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m == Status ==\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Current time: 2022-11-04 07:57:44 (running for 00:01:50.13)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Memory usage on this node: 13.7/240.1 GiB\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Using FIFO scheduling algorithm.\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Resources requested: 5.0/10 CPUs, 4.0/4 GPUs, 0.0/22.35 GiB heap, 0.0/6.59 GiB objects\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2022-11-04_07-55-53\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | Trial name | status | loc |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m |--------------------------------+----------+------------------|\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | HuggingFaceTrainer_c7d60_00000 | RUNNING | 10.129.66.16:146 |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m == Status ==\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Current time: 2022-11-04 07:57:49 (running for 00:01:55.14)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Memory usage on this node: 13.7/240.1 GiB\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Using FIFO scheduling algorithm.\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Resources requested: 5.0/10 CPUs, 4.0/4 GPUs, 0.0/22.35 GiB heap, 0.0/6.59 GiB objects\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2022-11-04_07-55-53\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | Trial name | status | loc |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m |--------------------------------+----------+------------------|\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | HuggingFaceTrainer_c7d60_00000 | RUNNING | 10.129.66.16:146 |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m == Status ==\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Current time: 2022-11-04 07:57:54 (running for 00:02:00.14)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Memory usage on this node: 13.7/240.1 GiB\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Using FIFO scheduling algorithm.\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Resources requested: 5.0/10 CPUs, 4.0/4 GPUs, 0.0/22.35 GiB heap, 0.0/6.59 GiB objects\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2022-11-04_07-55-53\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | Trial name | status | loc |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m |--------------------------------+----------+------------------|\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | HuggingFaceTrainer_c7d60_00000 | RUNNING | 10.129.66.16:146 |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m == Status ==\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Current time: 2022-11-04 07:57:59 (running for 00:02:05.15)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Memory usage on this node: 13.7/240.1 GiB\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Using FIFO scheduling algorithm.\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Resources requested: 5.0/10 CPUs, 4.0/4 GPUs, 0.0/22.35 GiB heap, 0.0/6.59 GiB objects\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2022-11-04_07-55-53\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | Trial name | status | loc |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m |--------------------------------+----------+------------------|\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | HuggingFaceTrainer_c7d60_00000 | RUNNING | 10.129.66.16:146 |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+----------+------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\u001b[2m\u001b[36m(BaseWorkerMixin pid=182, ip=10.129.66.16)\u001b[0m Saving model checkpoint to /tmp/hf_imdb/test/checkpoint-391\n",
+ "\u001b[2m\u001b[36m(BaseWorkerMixin pid=182, ip=10.129.66.16)\u001b[0m Configuration saved in /tmp/hf_imdb/test/checkpoint-391/config.json\n",
+ "\u001b[2m\u001b[36m(BaseWorkerMixin pid=182, ip=10.129.66.16)\u001b[0m Model weights saved in /tmp/hf_imdb/test/checkpoint-391/pytorch_model.bin\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Result for HuggingFaceTrainer_c7d60_00000:\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m _time_this_iter_s: 118.07144260406494\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m _timestamp: 1667573883\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m _training_iteration: 1\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m date: 2022-11-04_07-58-03\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m done: false\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m epoch: 1.0\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m experiment_id: 7bc6ab25d0414fcbb589bcb5d0f29b99\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m hostname: hfgputest-worker-small-group-hfgputest-q4758\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m iterations_since_restore: 1\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m node_ip: 10.129.66.16\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m pid: 146\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m should_checkpoint: true\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m step: 391\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m time_since_restore: 124.55581378936768\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m time_this_iter_s: 124.55581378936768\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m time_total_s: 124.55581378936768\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m timestamp: 1667573883\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m timesteps_since_restore: 0\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m train_loss: 0.2760564701636429\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m train_runtime: 109.7668\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m train_samples_per_second: 56.939\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m train_steps_per_second: 3.562\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m training_iteration: 1\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m trial_id: c7d60_00000\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m warmup_time: 0.003995656967163086\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(BaseWorkerMixin pid=182, ip=10.129.66.16)\u001b[0m {'train_runtime': 109.7668, 'train_samples_per_second': 56.939, 'train_steps_per_second': 3.562, 'train_loss': 0.2760564701636429, 'epoch': 1.0}\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\u001b[2m\u001b[36m(BaseWorkerMixin pid=182, ip=10.129.66.16)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(BaseWorkerMixin pid=182, ip=10.129.66.16)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(BaseWorkerMixin pid=182, ip=10.129.66.16)\u001b[0m Training completed. Do not forget to share your model on huggingface.co/models =)\n",
+ "\u001b[2m\u001b[36m(BaseWorkerMixin pid=182, ip=10.129.66.16)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(BaseWorkerMixin pid=182, ip=10.129.66.16)\u001b[0m \n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m == Status ==\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Current time: 2022-11-04 07:58:13 (running for 00:02:19.36)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Memory usage on this node: 16.0/240.1 GiB\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Using FIFO scheduling algorithm.\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Resources requested: 5.0/10 CPUs, 4.0/4 GPUs, 0.0/22.35 GiB heap, 0.0/6.59 GiB objects\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2022-11-04_07-55-53\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+----------+------------------+--------+------------------+-----------------+----------------------------+--------------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | Trial name | status | loc | iter | total time (s) | train_runtime | train_samples_per_second | train_steps_per_second |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m |--------------------------------+----------+------------------+--------+------------------+-----------------+----------------------------+--------------------------|\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | HuggingFaceTrainer_c7d60_00000 | RUNNING | 10.129.66.16:146 | 1 | 124.556 | 109.767 | 56.939 | 3.562 |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+----------+------------------+--------+------------------+-----------------+----------------------------+--------------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m 2022-11-04 07:58:13,248\tWARNING util.py:214 -- The `process_trial_save` operation took 9.709 s, which may be a performance bottleneck.\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m 2022-11-04 07:58:13,248\tWARNING trial_runner.py:856 -- Consider turning off forced head-worker trial checkpoint syncs by setting sync_on_checkpoint=False. Note that this may result in faulty trial restoration if a failure occurs while the checkpoint is being synced from the worker to the head node.\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Result for HuggingFaceTrainer_c7d60_00000:\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m _time_this_iter_s: 118.07144260406494\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m _timestamp: 1667573883\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m _training_iteration: 1\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m date: 2022-11-04_07-58-03\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m done: true\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m epoch: 1.0\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m experiment_id: 7bc6ab25d0414fcbb589bcb5d0f29b99\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m experiment_tag: '0'\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m hostname: hfgputest-worker-small-group-hfgputest-q4758\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m iterations_since_restore: 1\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m node_ip: 10.129.66.16\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m pid: 146\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m should_checkpoint: true\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m step: 391\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m time_since_restore: 124.55581378936768\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m time_this_iter_s: 124.55581378936768\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m time_total_s: 124.55581378936768\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m timestamp: 1667573883\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m timesteps_since_restore: 0\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m train_loss: 0.2760564701636429\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m train_runtime: 109.7668\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m train_samples_per_second: 56.939\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m train_steps_per_second: 3.562\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m training_iteration: 1\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m trial_id: c7d60_00000\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m warmup_time: 0.003995656967163086\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m == Status ==\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Current time: 2022-11-04 07:58:16 (running for 00:02:22.40)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Memory usage on this node: 9.1/240.1 GiB\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Using FIFO scheduling algorithm.\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Resources requested: 0/10 CPUs, 0/4 GPUs, 0.0/22.35 GiB heap, 0.0/6.59 GiB objects\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Result logdir: /home/ray/ray_results/HuggingFaceTrainer_2022-11-04_07-55-53\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m Number of trials: 1/1 (1 TERMINATED)\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+------------+------------------+--------+------------------+-----------------+----------------------------+--------------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | Trial name | status | loc | iter | total time (s) | train_runtime | train_samples_per_second | train_steps_per_second |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m |--------------------------------+------------+------------------+--------+------------------+-----------------+----------------------------+--------------------------|\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m | HuggingFaceTrainer_c7d60_00000 | TERMINATED | 10.129.66.16:146 | 1 | 124.556 | 109.767 | 56.939 | 3.562 |\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m +--------------------------------+------------+------------------+--------+------------------+-----------------+----------------------------+--------------------------+\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m \n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m 2022-11-04 07:58:16,286\tWARNING util.py:214 -- The `process_trial_save` operation took 2.161 s, which may be a performance bottleneck.\n",
+ "\u001b[2m\u001b[36m(train_fn pid=250)\u001b[0m 2022-11-04 07:58:16,398\tINFO tune.py:747 -- Total run time: 142.70 seconds (142.40 seconds for the tuning loop).\n"
+ ]
+ }
+ ],
+ "source": [
+ "#call the above cell as a remote ray function\n",
+ "ray.get(train_fn.remote())"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "5af8cd32",
+ "metadata": {},
+ "source": [
+ "Finally, we bring our resource cluster down and release/terminate the associated resources, bringing everything back to the way it was before our cluster was brought up."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "ec421113-0e49-4043-a3b5-66efa5021cdd",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "cluster.down()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "2b7a183b-5e8e-4adb-b9a6-a349e13512a0",
+ "metadata": {},
+ "source": [
+ "## Conclusion\n",
+ "As shown in the above example, you can easily run your Huggingface transfer learning tasks easily and natively on CodeFlare. You can scale them from 1 to n GPUs without requiring you to make any significant code changes and leveraging the native Huggingface trainer. \n",
+ "\n",
+ "Also refer to additional notebooks that showcase other use cases\n",
+ "In our next notebook [./02_codeflare_workflows_encoding.ipynb ] shows an sklearn example and how you can leverage workflows to run experiment pipelines and explore multiple pipelines in parallel on CodeFlare cluster. \n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "2677c868-a052-4893-9493-6f1dacd8fa27",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3.9.7 64-bit",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.9.7"
+ },
+ "vscode": {
+ "interpreter": {
+ "hash": "f9f85f796d01129d0dd105a088854619f454435301f6ffec2fea96ecbd9be4ac"
+ }
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}