From 6485723d5fc60a322c46915f77db9d910b77b35c Mon Sep 17 00:00:00 2001
From: Robert Lucian Chiriac <robert.lucian.chiriac@gmail.com>
Date: Wed, 14 Jul 2021 22:37:30 +0300
Subject: [PATCH 01/40] Limit istiod to 5 replicas and the default namespace

---
 manager/install.sh              |  1 +
 manager/manifests/istio.yaml.j2 | 22 +++++++++++++++++++++-
 2 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/manager/install.sh b/manager/install.sh
index 7d63049871..7b873eaba9 100755
--- a/manager/install.sh
+++ b/manager/install.sh
@@ -361,6 +361,7 @@ function remove_nodegroups() {
 
 function setup_istio() {
   envsubst < manifests/istio-namespace.yaml | kubectl apply -f - >/dev/null
+  kubectl label namespaces default istio-discovery=enabled
 
   if ! grep -q "istio-customgateway-certs" <<< $(kubectl get secret -n istio-system); then
     WEBSITE=localhost
diff --git a/manager/manifests/istio.yaml.j2 b/manager/manifests/istio.yaml.j2
index ecccf38695..53714590ba 100644
--- a/manager/manifests/istio.yaml.j2
+++ b/manager/manifests/istio.yaml.j2
@@ -18,6 +18,10 @@ spec:
   profile: minimal
   hub: {{ env['CORTEX_IMAGE_ISTIO_PROXY_HUB'] }}  # this is only used by proxy, since pilot overrides it (proxy doesn't have dedicated hub config)
   tag: {{ env['CORTEX_IMAGE_ISTIO_PROXY_TAG'] }}  # this is only used by proxy, since pilot overrides it (proxy doesn't have dedicated tag config)
+  meshConfig:
+    discoverySelectors:
+      - matchLabels:
+          istio-discovery: enabled
   components:
     pilot:  # "pilot" refers to the istiod container
       hub: {{ env['CORTEX_IMAGE_ISTIO_PILOT_HUB'] }}
@@ -26,7 +30,23 @@ spec:
         resources:
           requests:
             cpu: 100m  # default is 500m
-            memory: 200Mi  # default is 2048Mi == 2Gi
+            memory: 700Mi  # default is 2048Mi == 2Gi
+        hpaSpec:
+          minReplicas: 1
+          maxReplicas: 5  # edit autoscaleEnabled in values if increasing this
+          metrics:
+            - type: Resource
+              resource:
+                name: cpu
+                targetAverageUtilization: 95
+            - type: Resource
+              resource:
+                name: memory
+                targetAverageUtilization: 80
+          scaleTargetRef:
+            apiVersion: apps/v1
+            kind: Deployment
+            name: istiod
     cni:
       enabled: false
     ingressGateways:

From 9e5dfafb602bf206decc5a0f9e0bc6055b4f7583 Mon Sep 17 00:00:00 2001
From: Robert Lucian Chiriac <robert.lucian.chiriac@gmail.com>
Date: Wed, 14 Jul 2021 22:48:00 +0300
Subject: [PATCH 02/40] Move prometheus exporters and fluent bit to their
 dedicated namespace

---
 manager/manifests/fluent-bit.yaml.j2                 | 10 +++++-----
 manager/manifests/prometheus-dcgm-exporter.yaml      |  6 +++---
 manager/manifests/prometheus-kube-state-metrics.yaml |  8 ++++----
 manager/manifests/prometheus-kubelet-exporter.yaml   |  2 +-
 manager/manifests/prometheus-monitoring.yaml         |  2 +-
 manager/manifests/prometheus-node-exporter.yaml      | 12 ++++++------
 manager/manifests/prometheus-operator.yaml           |  8 ++++----
 manager/manifests/prometheus-statsd-exporter.yaml    |  6 +++---
 8 files changed, 27 insertions(+), 27 deletions(-)

diff --git a/manager/manifests/fluent-bit.yaml.j2 b/manager/manifests/fluent-bit.yaml.j2
index a2e1140f2c..4e75212e35 100644
--- a/manager/manifests/fluent-bit.yaml.j2
+++ b/manager/manifests/fluent-bit.yaml.j2
@@ -16,7 +16,7 @@ apiVersion: v1
 kind: ServiceAccount
 metadata:
   name: fluent-bit
-  namespace: default
+  namespace: logging
 ---
 apiVersion: rbac.authorization.k8s.io/v1
 kind: ClusterRole
@@ -40,13 +40,13 @@ roleRef:
 subjects:
   - kind: ServiceAccount
     name: fluent-bit
-    namespace: default
+    namespace: logging
 ---
 apiVersion: v1
 kind: ConfigMap
 metadata:
   name: fluent-bit-config
-  namespace: default
+  namespace: logging
   labels:
     k8s-app: fluent-bit
 data:
@@ -83,7 +83,7 @@ data:
     [FILTER]
         Name                kubernetes
         Match               kube.var.log.containers.*
-        Kube_URL            https://kubernetes.default.svc:443
+        Kube_URL            https://kubernetes.logging.svc:443
         Kube_Tag_Prefix     kube.var.log.containers.
         Merge_Log           On
 
@@ -186,7 +186,7 @@ apiVersion: apps/v1
 kind: DaemonSet
 metadata:
   name: fluent-bit
-  namespace: default
+  namespace: logging
 spec:
   selector:
     matchLabels:
diff --git a/manager/manifests/prometheus-dcgm-exporter.yaml b/manager/manifests/prometheus-dcgm-exporter.yaml
index 8b37d969dd..e93ce14e9e 100644
--- a/manager/manifests/prometheus-dcgm-exporter.yaml
+++ b/manager/manifests/prometheus-dcgm-exporter.yaml
@@ -21,7 +21,7 @@ apiVersion: v1
 kind: ServiceAccount
 metadata:
   name: dcgm-exporter
-  namespace: default
+  namespace: prometheus
   labels:
     app.kubernetes.io/name: dcgm-exporter
     app.kubernetes.io/instance: dcgm-exporter
@@ -31,7 +31,7 @@ apiVersion: apps/v1
 kind: DaemonSet
 metadata:
   name: dcgm-exporter
-  namespace: default
+  namespace: prometheus
   labels:
     app.kubernetes.io/name: dcgm-exporter
     app.kubernetes.io/instance: dcgm-exporter
@@ -106,7 +106,7 @@ apiVersion: monitoring.coreos.com/v1
 kind: PodMonitor
 metadata:
   name: dcgm-exporter
-  namespace: default
+  namespace: prometheus
   labels:
     monitoring.cortex.dev: dcgm-exporter
     app.kubernetes.io/name: dcgm-exporter
diff --git a/manager/manifests/prometheus-kube-state-metrics.yaml b/manager/manifests/prometheus-kube-state-metrics.yaml
index edf69cd7ba..89da6c4842 100644
--- a/manager/manifests/prometheus-kube-state-metrics.yaml
+++ b/manager/manifests/prometheus-kube-state-metrics.yaml
@@ -17,7 +17,7 @@ metadata:
   labels:
     app.kubernetes.io/name: kube-state-metrics
   name: kube-state-metrics
-  namespace: default
+  namespace: prometheus
 ---
 apiVersion: rbac.authorization.k8s.io/v1
 kind: ClusterRole
@@ -180,13 +180,13 @@ roleRef:
 subjects:
 - kind: ServiceAccount
   name: kube-state-metrics
-  namespace: default
+  namespace: prometheus
 ---
 apiVersion: apps/v1
 kind: Deployment
 metadata:
   name: kube-state-metrics
-  namespace: default
+  namespace: prometheus
   labels:
     app.kubernetes.io/name: kube-state-metrics
     app.kubernetes.io/version: "2.1.0"
@@ -245,7 +245,7 @@ apiVersion: monitoring.coreos.com/v1
 kind: PodMonitor
 metadata:
   name: kube-state-metrics
-  namespace: default
+  namespace: prometheus
   labels:
     name: kube-state-metrics
     monitoring.cortex.dev: kube-state-metrics
diff --git a/manager/manifests/prometheus-kubelet-exporter.yaml b/manager/manifests/prometheus-kubelet-exporter.yaml
index 8982706c42..87855746a0 100644
--- a/manager/manifests/prometheus-kubelet-exporter.yaml
+++ b/manager/manifests/prometheus-kubelet-exporter.yaml
@@ -19,7 +19,7 @@ metadata:
     k8s-app: kubelet
     monitoring.cortex.dev: kubelet-exporter
   name: kubelet
-  namespace: default
+  namespace: prometheus
 spec:
   endpoints:
   - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
diff --git a/manager/manifests/prometheus-monitoring.yaml b/manager/manifests/prometheus-monitoring.yaml
index 0982504aff..fbfc6309da 100644
--- a/manager/manifests/prometheus-monitoring.yaml
+++ b/manager/manifests/prometheus-monitoring.yaml
@@ -114,7 +114,7 @@ roleRef:
 subjects:
   - kind: ServiceAccount
     name: prometheus
-    namespace: default
+    namespace: prometheus
 
 ---
 
diff --git a/manager/manifests/prometheus-node-exporter.yaml b/manager/manifests/prometheus-node-exporter.yaml
index 3bb631e15d..c381f22902 100644
--- a/manager/manifests/prometheus-node-exporter.yaml
+++ b/manager/manifests/prometheus-node-exporter.yaml
@@ -18,7 +18,7 @@ metadata:
   labels:
     app.kubernetes.io/version: v1.1.2
   name: node-exporter
-  namespace: default
+  namespace: prometheus
 
 ---
 
@@ -57,7 +57,7 @@ roleRef:
 subjects:
   - kind: ServiceAccount
     name: node-exporter
-    namespace: default
+    namespace: prometheus
 
 ---
 
@@ -68,7 +68,7 @@ metadata:
     app.kubernetes.io/name: node-exporter
     app.kubernetes.io/version: v1.1.2
   name: node-exporter
-  namespace: default
+  namespace: prometheus
 spec:
   clusterIP: None
   ports:
@@ -87,7 +87,7 @@ metadata:
     app.kubernetes.io/name: node-exporter
     app.kubernetes.io/version: v1.1.2
   name: node-exporter
-  namespace: default
+  namespace: prometheus
 spec:
   selector:
     matchLabels:
@@ -181,7 +181,7 @@ metadata:
     app.kubernetes.io/version: v1.1.2
     monitoring.cortex.dev: node-exporter
   name: node-exporter
-  namespace: default
+  namespace: prometheus
 spec:
   endpoints:
     - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
@@ -240,7 +240,7 @@ metadata:
     app.kubernetes.io/version: 1.1.2
     prometheus: k8s
   name: node-exporter-rules
-  namespace: default
+  namespace: prometheus
 spec:
   groups:
     - name: node-exporter.rules
diff --git a/manager/manifests/prometheus-operator.yaml b/manager/manifests/prometheus-operator.yaml
index 3b7b558318..fc610a31a6 100644
--- a/manager/manifests/prometheus-operator.yaml
+++ b/manager/manifests/prometheus-operator.yaml
@@ -14073,7 +14073,7 @@ roleRef:
 subjects:
 - kind: ServiceAccount
   name: prometheus-operator
-  namespace: default
+  namespace: prometheus
 ---
 apiVersion: rbac.authorization.k8s.io/v1
 kind: ClusterRole
@@ -14163,7 +14163,7 @@ metadata:
     app.kubernetes.io/name: prometheus-operator
     app.kubernetes.io/version: 0.48.1
   name: prometheus-operator
-  namespace: default
+  namespace: prometheus
 spec:
   replicas: 1
   selector:
@@ -14217,7 +14217,7 @@ metadata:
     app.kubernetes.io/name: prometheus-operator
     app.kubernetes.io/version: 0.48.1
   name: prometheus-operator
-  namespace: default
+  namespace: prometheus
 ---
 apiVersion: v1
 kind: Service
@@ -14227,7 +14227,7 @@ metadata:
     app.kubernetes.io/name: prometheus-operator
     app.kubernetes.io/version: 0.48.1
   name: prometheus-operator
-  namespace: default
+  namespace: prometheus
 spec:
   clusterIP: None
   ports:
diff --git a/manager/manifests/prometheus-statsd-exporter.yaml b/manager/manifests/prometheus-statsd-exporter.yaml
index ea58db52d8..1a1fe6dd33 100644
--- a/manager/manifests/prometheus-statsd-exporter.yaml
+++ b/manager/manifests/prometheus-statsd-exporter.yaml
@@ -16,7 +16,7 @@ apiVersion: v1
 kind: ConfigMap
 metadata:
   name: prometheus-statsd-exporter-config
-  namespace: default
+  namespace: prometheus
 data:
   statsd-mapping.yaml: |
     defaults:
@@ -27,7 +27,7 @@ apiVersion: apps/v1
 kind: Deployment
 metadata:
   name: prometheus-statsd-exporter
-  namespace: default
+  namespace: prometheus
 spec:
   replicas: 1
   selector:
@@ -93,7 +93,7 @@ spec:
 apiVersion: v1
 kind: Service
 metadata:
-  namespace: default
+  namespace: prometheus
   name: prometheus-statsd-exporter
   labels:
     cortex.dev/name: prometheus-statsd-exporter

From 060ebb933e0aa63d3d9f4eb39847b66a8d51542c Mon Sep 17 00:00:00 2001
From: Robert Lucian Chiriac <robert.lucian.chiriac@gmail.com>
Date: Wed, 14 Jul 2021 23:07:50 +0300
Subject: [PATCH 03/40] Change the HPA's targets a bit

---
 manager/manifests/istio.yaml.j2 | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/manager/manifests/istio.yaml.j2 b/manager/manifests/istio.yaml.j2
index 53714590ba..b537e08c7b 100644
--- a/manager/manifests/istio.yaml.j2
+++ b/manager/manifests/istio.yaml.j2
@@ -38,11 +38,11 @@ spec:
             - type: Resource
               resource:
                 name: cpu
-                targetAverageUtilization: 95
+                targetAverageUtilization: 90
             - type: Resource
               resource:
                 name: memory
-                targetAverageUtilization: 80
+                targetAverageUtilization: 90
           scaleTargetRef:
             apiVersion: apps/v1
             kind: Deployment

From d65fafcdf40ab4153ceebf1391b3122adfec30ae Mon Sep 17 00:00:00 2001
From: Robert Lucian Chiriac <robert.lucian.chiriac@gmail.com>
Date: Thu, 15 Jul 2021 00:33:55 +0300
Subject: [PATCH 04/40] Redirect kubectl's output to /dev/null

---
 manager/install.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/manager/install.sh b/manager/install.sh
index 7b873eaba9..367ee76e5a 100755
--- a/manager/install.sh
+++ b/manager/install.sh
@@ -361,7 +361,7 @@ function remove_nodegroups() {
 
 function setup_istio() {
   envsubst < manifests/istio-namespace.yaml | kubectl apply -f - >/dev/null
-  kubectl label namespaces default istio-discovery=enabled
+  kubectl label namespaces default istio-discovery=enabled >/dev/null
 
   if ! grep -q "istio-customgateway-certs" <<< $(kubectl get secret -n istio-system); then
     WEBSITE=localhost

From 4b96c20e0c9728c5b3e488167e92c487c698f6be Mon Sep 17 00:00:00 2001
From: Robert Lucian Chiriac <robert.lucian.chiriac@gmail.com>
Date: Thu, 15 Jul 2021 01:09:14 +0300
Subject: [PATCH 05/40] Add logging namespace

---
 manager/manifests/fluent-bit.yaml.j2 | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/manager/manifests/fluent-bit.yaml.j2 b/manager/manifests/fluent-bit.yaml.j2
index 4e75212e35..b4f3ac68df 100644
--- a/manager/manifests/fluent-bit.yaml.j2
+++ b/manager/manifests/fluent-bit.yaml.j2
@@ -12,6 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+apiVersion: v1
+kind: Namespace
+metadata:
+  name: logging
+---
 apiVersion: v1
 kind: ServiceAccount
 metadata:

From 067720840ec5e040b13f84262c09ae52ebb2dcb5 Mon Sep 17 00:00:00 2001
From: Robert Lucian Chiriac <robert.lucian.chiriac@gmail.com>
Date: Thu, 15 Jul 2021 01:10:35 +0300
Subject: [PATCH 06/40] Remove unnecessary namespace

---
 manager/manifests/prometheus-dcgm-exporter.yaml | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/manager/manifests/prometheus-dcgm-exporter.yaml b/manager/manifests/prometheus-dcgm-exporter.yaml
index e93ce14e9e..96d82a5644 100644
--- a/manager/manifests/prometheus-dcgm-exporter.yaml
+++ b/manager/manifests/prometheus-dcgm-exporter.yaml
@@ -12,11 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-apiVersion: v1
-kind: Namespace
-metadata:
-  name: monitoring
----
 apiVersion: v1
 kind: ServiceAccount
 metadata:

From bbcf4e39abc88ea54c36183eed6c6623f1c85d6c Mon Sep 17 00:00:00 2001
From: Robert Lucian Chiriac <robert.lucian.chiriac@gmail.com>
Date: Thu, 15 Jul 2021 01:25:42 +0300
Subject: [PATCH 07/40] Fixes

---
 manager/install.sh                          |  5 +++--
 manager/manifests/default-namespace.yaml    | 20 ++++++++++++++++++++
 manager/manifests/prometheus-namespace.yaml | 18 ++++++++++++++++++
 3 files changed, 41 insertions(+), 2 deletions(-)
 create mode 100644 manager/manifests/default-namespace.yaml
 create mode 100644 manager/manifests/prometheus-namespace.yaml

diff --git a/manager/install.sh b/manager/install.sh
index 367ee76e5a..2450863425 100755
--- a/manager/install.sh
+++ b/manager/install.sh
@@ -220,6 +220,7 @@ function setup_configmap() {
 }
 
 function setup_prometheus() {
+  kubectl apply -f manifests/prometheus-namespace.yaml >/dev/null
   envsubst < manifests/prometheus-operator.yaml | kubectl apply -f - >/dev/null
   envsubst < manifests/prometheus-statsd-exporter.yaml | kubectl apply -f - >/dev/null
   envsubst < manifests/prometheus-kubelet-exporter.yaml | kubectl apply -f - >/dev/null
@@ -360,8 +361,8 @@ function remove_nodegroups() {
 }
 
 function setup_istio() {
-  envsubst < manifests/istio-namespace.yaml | kubectl apply -f - >/dev/null
-  kubectl label namespaces default istio-discovery=enabled >/dev/null
+  kubectl apply -f manifests/istio-namespace.yaml >/dev/null
+  kubectl apply -f manifests/default-namespace.yaml >/dev/null
 
   if ! grep -q "istio-customgateway-certs" <<< $(kubectl get secret -n istio-system); then
     WEBSITE=localhost
diff --git a/manager/manifests/default-namespace.yaml b/manager/manifests/default-namespace.yaml
new file mode 100644
index 0000000000..af30d267ec
--- /dev/null
+++ b/manager/manifests/default-namespace.yaml
@@ -0,0 +1,20 @@
+# Copyright 2021 Cortex Labs, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+apiVersion: v1
+kind: Namespace
+metadata:
+  name: default
+  labels:
+    istio-discovery: enabled
diff --git a/manager/manifests/prometheus-namespace.yaml b/manager/manifests/prometheus-namespace.yaml
new file mode 100644
index 0000000000..995138ef09
--- /dev/null
+++ b/manager/manifests/prometheus-namespace.yaml
@@ -0,0 +1,18 @@
+# Copyright 2021 Cortex Labs, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+apiVersion: v1
+kind: Namespace
+metadata:
+  name: prometheus

From da66f7f05817cae1ea071580339773b0c9b572a7 Mon Sep 17 00:00:00 2001
From: Robert Lucian Chiriac <robert.lucian.chiriac@gmail.com>
Date: Thu, 15 Jul 2021 01:56:14 +0300
Subject: [PATCH 08/40] Refactoring a bit

---
 manager/install.sh                                         | 7 ++++---
 .../{default-namespace.yaml => namespaces/default.yaml}    | 0
 .../{istio-namespace.yaml => namespaces/istio.yaml}        | 0
 .../prometheus.yaml}                                       | 0
 4 files changed, 4 insertions(+), 3 deletions(-)
 rename manager/manifests/{default-namespace.yaml => namespaces/default.yaml} (100%)
 rename manager/manifests/{istio-namespace.yaml => namespaces/istio.yaml} (100%)
 rename manager/manifests/{prometheus-namespace.yaml => namespaces/prometheus.yaml} (100%)

diff --git a/manager/install.sh b/manager/install.sh
index 2450863425..770d0bfec0 100755
--- a/manager/install.sh
+++ b/manager/install.sh
@@ -220,7 +220,7 @@ function setup_configmap() {
 }
 
 function setup_prometheus() {
-  kubectl apply -f manifests/prometheus-namespace.yaml >/dev/null
+  kubectl apply -f manifests/namespaces/prometheus.yaml >/dev/null
   envsubst < manifests/prometheus-operator.yaml | kubectl apply -f - >/dev/null
   envsubst < manifests/prometheus-statsd-exporter.yaml | kubectl apply -f - >/dev/null
   envsubst < manifests/prometheus-kubelet-exporter.yaml | kubectl apply -f - >/dev/null
@@ -361,8 +361,9 @@ function remove_nodegroups() {
 }
 
 function setup_istio() {
-  kubectl apply -f manifests/istio-namespace.yaml >/dev/null
-  kubectl apply -f manifests/default-namespace.yaml >/dev/null
+  kubectl apply -f manifests/namespaces/istio.yaml >/dev/null
+  # to apply the istio-discovery label
+  kubectl apply -f manifests/namespaces/default.yaml >/dev/null
 
   if ! grep -q "istio-customgateway-certs" <<< $(kubectl get secret -n istio-system); then
     WEBSITE=localhost
diff --git a/manager/manifests/default-namespace.yaml b/manager/manifests/namespaces/default.yaml
similarity index 100%
rename from manager/manifests/default-namespace.yaml
rename to manager/manifests/namespaces/default.yaml
diff --git a/manager/manifests/istio-namespace.yaml b/manager/manifests/namespaces/istio.yaml
similarity index 100%
rename from manager/manifests/istio-namespace.yaml
rename to manager/manifests/namespaces/istio.yaml
diff --git a/manager/manifests/prometheus-namespace.yaml b/manager/manifests/namespaces/prometheus.yaml
similarity index 100%
rename from manager/manifests/prometheus-namespace.yaml
rename to manager/manifests/namespaces/prometheus.yaml

From 56554fbd187f58f4787425e42d294767eb1356aa Mon Sep 17 00:00:00 2001
From: Robert Lucian Chiriac <robert.lucian.chiriac@gmail.com>
Date: Thu, 15 Jul 2021 01:59:23 +0300
Subject: [PATCH 09/40] Fixes

---
 manager/manifests/event-exporter.yaml | 8 ++++----
 manager/manifests/fluent-bit.yaml.j2  | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/manager/manifests/event-exporter.yaml b/manager/manifests/event-exporter.yaml
index ab4847c4a7..3bc701f2a1 100644
--- a/manager/manifests/event-exporter.yaml
+++ b/manager/manifests/event-exporter.yaml
@@ -15,7 +15,7 @@
 apiVersion: v1
 kind: ServiceAccount
 metadata:
-  namespace: default
+  namespace: prometheus
   name: event-exporter
 
 ---
@@ -30,7 +30,7 @@ roleRef:
   name: view
 subjects:
   - kind: ServiceAccount
-    namespace: default
+    namespace: prometheus
     name: event-exporter
 
 ---
@@ -39,7 +39,7 @@ apiVersion: v1
 kind: ConfigMap
 metadata:
   name: event-exporter-config
-  namespace: default
+  namespace: prometheus
 data:
   config.yaml: |
     logLevel: error
@@ -61,7 +61,7 @@ apiVersion: apps/v1
 kind: Deployment
 metadata:
   name: event-exporter
-  namespace: default
+  namespace: prometheus
 spec:
   replicas: 1
   selector:
diff --git a/manager/manifests/fluent-bit.yaml.j2 b/manager/manifests/fluent-bit.yaml.j2
index b4f3ac68df..2bff58b1fb 100644
--- a/manager/manifests/fluent-bit.yaml.j2
+++ b/manager/manifests/fluent-bit.yaml.j2
@@ -88,7 +88,7 @@ data:
     [FILTER]
         Name                kubernetes
         Match               kube.var.log.containers.*
-        Kube_URL            https://kubernetes.logging.svc:443
+        Kube_URL            https://kubernetes.default.svc:443
         Kube_Tag_Prefix     kube.var.log.containers.
         Merge_Log           On
 

From 77dc2e91bf55b00c4f5c7096403515605ae13fab Mon Sep 17 00:00:00 2001
From: Robert Lucian Chiriac <robert.lucian.chiriac@gmail.com>
Date: Thu, 15 Jul 2021 02:12:50 +0300
Subject: [PATCH 10/40] More refactoring

---
 manager/install.sh                        | 19 +++++++++++++------
 manager/manifests/fluent-bit.yaml.j2      |  5 -----
 manager/manifests/namespaces/logging.yaml | 18 ++++++++++++++++++
 pkg/workloads/k8s.go                      |  2 +-
 4 files changed, 32 insertions(+), 12 deletions(-)
 create mode 100644 manager/manifests/namespaces/logging.yaml

diff --git a/manager/install.sh b/manager/install.sh
index 770d0bfec0..04bc320dfc 100755
--- a/manager/install.sh
+++ b/manager/install.sh
@@ -33,7 +33,11 @@ function main() {
 }
 
 function cluster_up() {
-  create_eks
+  check_eks
+
+  echo -n "￮ creating namespaces "
+  setup_namespaces
+  echo "✓"
 
   echo -n "￮ updating cluster configuration "
   setup_configmap
@@ -195,6 +199,14 @@ function write_kubeconfig() {
   out=$(kubectl get pods 2>&1 || true); if [[ "$out" == *"must be logged in to the server"* ]]; then echo "error: your aws iam user does not have access to this cluster; to grant access, see https://docs.cortex.dev/v/${CORTEX_VERSION_MINOR}/"; exit 1; fi
 }
 
+function setup_namespaces() {
+  # to apply the istio-discovery label
+  kubectl apply -f manifests/namespaces/default.yaml >/dev/null
+  kubectl apply -f manifests/namespaces/istio.yaml >/dev/null
+  kubectl apply -f manifests/namespaces/prometheus.yaml >/dev/null
+  kubectl apply -f manifests/namespaces/logging.yaml >/dev/null
+}
+
 function setup_configmap() {
   envsubst < manifests/default_cortex_cli_config.yaml > tmp_cli_config.yaml
   kubectl -n=default create configmap 'client-config' \
@@ -220,7 +232,6 @@ function setup_configmap() {
 }
 
 function setup_prometheus() {
-  kubectl apply -f manifests/namespaces/prometheus.yaml >/dev/null
   envsubst < manifests/prometheus-operator.yaml | kubectl apply -f - >/dev/null
   envsubst < manifests/prometheus-statsd-exporter.yaml | kubectl apply -f - >/dev/null
   envsubst < manifests/prometheus-kubelet-exporter.yaml | kubectl apply -f - >/dev/null
@@ -361,10 +372,6 @@ function remove_nodegroups() {
 }
 
 function setup_istio() {
-  kubectl apply -f manifests/namespaces/istio.yaml >/dev/null
-  # to apply the istio-discovery label
-  kubectl apply -f manifests/namespaces/default.yaml >/dev/null
-
   if ! grep -q "istio-customgateway-certs" <<< $(kubectl get secret -n istio-system); then
     WEBSITE=localhost
     openssl req -subj "/C=US/CN=$WEBSITE" -newkey rsa:2048 -nodes -keyout $WEBSITE.key -x509 -days 3650 -out $WEBSITE.crt >/dev/null 2>&1
diff --git a/manager/manifests/fluent-bit.yaml.j2 b/manager/manifests/fluent-bit.yaml.j2
index 2bff58b1fb..03fef37062 100644
--- a/manager/manifests/fluent-bit.yaml.j2
+++ b/manager/manifests/fluent-bit.yaml.j2
@@ -12,11 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-apiVersion: v1
-kind: Namespace
-metadata:
-  name: logging
----
 apiVersion: v1
 kind: ServiceAccount
 metadata:
diff --git a/manager/manifests/namespaces/logging.yaml b/manager/manifests/namespaces/logging.yaml
new file mode 100644
index 0000000000..02fe6ce079
--- /dev/null
+++ b/manager/manifests/namespaces/logging.yaml
@@ -0,0 +1,18 @@
+# Copyright 2021 Cortex Labs, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+apiVersion: v1
+kind: Namespace
+metadata:
+  name: logging
diff --git a/pkg/workloads/k8s.go b/pkg/workloads/k8s.go
index 06e0526472..ecf25b3aef 100644
--- a/pkg/workloads/k8s.go
+++ b/pkg/workloads/k8s.go
@@ -61,7 +61,7 @@ const (
 	_clusterConfigConfigMap = "cluster-config"
 	_clusterConfigDir       = "/configs/cluster"
 
-	_statsdAddress = "prometheus-statsd-exporter.default:9125"
+	_statsdAddress = "prometheus-statsd-exporter.prometheus:9125"
 )
 
 var (

From 50249d826c621529d8dfe688ae0bc46b87466dcd Mon Sep 17 00:00:00 2001
From: Robert Lucian Chiriac <robert.lucian.chiriac@gmail.com>
Date: Thu, 15 Jul 2021 02:15:11 +0300
Subject: [PATCH 11/40] Event exporter must be in logging namespace

---
 manager/manifests/event-exporter.yaml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/manager/manifests/event-exporter.yaml b/manager/manifests/event-exporter.yaml
index 3bc701f2a1..8ff19efb09 100644
--- a/manager/manifests/event-exporter.yaml
+++ b/manager/manifests/event-exporter.yaml
@@ -15,7 +15,7 @@
 apiVersion: v1
 kind: ServiceAccount
 metadata:
-  namespace: prometheus
+  namespace: logging
   name: event-exporter
 
 ---
@@ -30,7 +30,7 @@ roleRef:
   name: view
 subjects:
   - kind: ServiceAccount
-    namespace: prometheus
+    namespace: logging
     name: event-exporter
 
 ---
@@ -39,7 +39,7 @@ apiVersion: v1
 kind: ConfigMap
 metadata:
   name: event-exporter-config
-  namespace: prometheus
+  namespace: logging
 data:
   config.yaml: |
     logLevel: error
@@ -61,7 +61,7 @@ apiVersion: apps/v1
 kind: Deployment
 metadata:
   name: event-exporter
-  namespace: prometheus
+  namespace: logging
 spec:
   replicas: 1
   selector:

From 23918dfeacc8a267eae62312522fd755fc334b5f Mon Sep 17 00:00:00 2001
From: Robert Lucian Chiriac <robert.lucian.chiriac@gmail.com>
Date: Thu, 15 Jul 2021 02:27:15 +0300
Subject: [PATCH 12/40] Use consts where possible

---
 manager/manifests/prometheus-monitoring.yaml |  2 ++
 pkg/config/config.go                         |  4 +--
 pkg/consts/consts.go                         |  7 +++--
 pkg/health/health.go                         | 31 ++++++++++----------
 pkg/workloads/k8s.go                         |  5 ++--
 5 files changed, 28 insertions(+), 21 deletions(-)

diff --git a/manager/manifests/prometheus-monitoring.yaml b/manager/manifests/prometheus-monitoring.yaml
index fbfc6309da..f42e0d078f 100644
--- a/manager/manifests/prometheus-monitoring.yaml
+++ b/manager/manifests/prometheus-monitoring.yaml
@@ -27,6 +27,7 @@ apiVersion: monitoring.coreos.com/v1
 kind: Prometheus
 metadata:
   name: prometheus
+  namespace: prometheus
 spec:
   image: $CORTEX_IMAGE_PROMETHEUS
   serviceAccountName: prometheus
@@ -73,6 +74,7 @@ apiVersion: v1
 kind: ServiceAccount
 metadata:
   name: prometheus
+  namespace: prometheus
 
 ---
 
diff --git a/pkg/config/config.go b/pkg/config/config.go
index 304ee1dcf3..eb7bd5e269 100644
--- a/pkg/config/config.go
+++ b/pkg/config/config.go
@@ -152,7 +152,7 @@ func Init() error {
 
 	prometheusURL := os.Getenv("CORTEX_PROMETHEUS_URL")
 	if len(prometheusURL) == 0 {
-		prometheusURL = fmt.Sprintf("http://prometheus.%s:9090", consts.DefaultNamespace)
+		prometheusURL = fmt.Sprintf("http://prometheus.%s:9090", consts.PrometheusNamespace)
 	}
 
 	promClient, err := promapi.NewClient(promapi.Config{
@@ -168,7 +168,7 @@ func Init() error {
 	}
 
 	if OperatorMetadata.IsOperatorInCluster {
-		MetricsClient, err = statsd.New(fmt.Sprintf("prometheus-statsd-exporter.%s:9125", consts.DefaultNamespace))
+		MetricsClient, err = statsd.New(fmt.Sprintf("prometheus-statsd-exporter.%s:9125", consts.PrometheusNamespace))
 		if err != nil {
 			return errors.Wrap(errors.WithStack(err), "unable to initialize metrics client")
 		}
diff --git a/pkg/consts/consts.go b/pkg/consts/consts.go
index 67a9c7e4d0..4b06b6b589 100644
--- a/pkg/consts/consts.go
+++ b/pkg/consts/consts.go
@@ -27,8 +27,11 @@ var (
 	CortexVersion      = "master" // CORTEX_VERSION
 	CortexVersionMinor = "master" // CORTEX_VERSION_MINOR
 
-	DefaultNamespace = "default"
-	IstioNamespace   = "istio-system"
+	KubeSystemNamespace = "kube-system"
+	DefaultNamespace    = "default"
+	PrometheusNamespace = "prometheus"
+	LoggingNamespace    = "logging"
+	IstioNamespace      = "istio-system"
 
 	DefaultMaxQueueLength = int64(100)
 	DefaultMaxConcurrency = int64(1)
diff --git a/pkg/health/health.go b/pkg/health/health.go
index 45543ec220..cd556dd80b 100644
--- a/pkg/health/health.go
+++ b/pkg/health/health.go
@@ -22,6 +22,7 @@ import (
 	"reflect"
 
 	"github.com/aws/aws-sdk-go/service/elbv2"
+	"github.com/cortexlabs/cortex/pkg/consts"
 	awslib "github.com/cortexlabs/cortex/pkg/lib/aws"
 	"github.com/cortexlabs/cortex/pkg/lib/errors"
 	"github.com/cortexlabs/cortex/pkg/lib/json"
@@ -106,47 +107,47 @@ func Check(awsClient *awslib.Client, k8sClient *k8s.Client, clusterName string)
 	if err := parallel.RunFirstErr(
 		func() error {
 			var err error
-			operatorHealth, err = getDeploymentReadiness(k8sClient, "operator", "default")
+			operatorHealth, err = getDeploymentReadiness(k8sClient, "operator", consts.DefaultNamespace)
 			return err
 		},
 		func() error {
 			var err error
-			controllerManagerHealth, err = getDeploymentReadiness(k8sClient, "operator-controller-manager", "default")
+			controllerManagerHealth, err = getDeploymentReadiness(k8sClient, "operator-controller-manager", consts.DefaultNamespace)
 			return err
 		},
 		func() error {
 			var err error
-			prometheusHealth, err = getStatefulSetReadiness(k8sClient, "prometheus-prometheus", "default")
+			prometheusHealth, err = getStatefulSetReadiness(k8sClient, "prometheus-prometheus", consts.PrometheusNamespace)
 			return err
 		},
 		func() error {
 			var err error
-			autoscalerHealth, err = getDeploymentReadiness(k8sClient, "autoscaler", "default")
+			autoscalerHealth, err = getDeploymentReadiness(k8sClient, "autoscaler", consts.DefaultNamespace)
 			return err
 		},
 		func() error {
 			var err error
-			activatorHealth, err = getDeploymentReadiness(k8sClient, "activator", "default")
+			activatorHealth, err = getDeploymentReadiness(k8sClient, "activator", consts.DefaultNamespace)
 			return err
 		},
 		func() error {
 			var err error
-			grafanaHealth, err = getStatefulSetReadiness(k8sClient, "grafana", "default")
+			grafanaHealth, err = getStatefulSetReadiness(k8sClient, "grafana", consts.DefaultNamespace)
 			return err
 		},
 		func() error {
 			var err error
-			operatorGatewayHealth, err = getDeploymentReadiness(k8sClient, "ingressgateway-operator", "istio-system")
+			operatorGatewayHealth, err = getDeploymentReadiness(k8sClient, "ingressgateway-operator", consts.IstioNamespace)
 			return err
 		},
 		func() error {
 			var err error
-			apisGatewayHealth, err = getDeploymentReadiness(k8sClient, "ingressgateway-apis", "istio-system")
+			apisGatewayHealth, err = getDeploymentReadiness(k8sClient, "ingressgateway-apis", consts.IstioNamespace)
 			return err
 		},
 		func() error {
 			var err error
-			clusterAutoscalerHealth, err = getDeploymentReadiness(k8sClient, "cluster-autoscaler", "kube-system")
+			clusterAutoscalerHealth, err = getDeploymentReadiness(k8sClient, "cluster-autoscaler", consts.KubeSystemNamespace)
 			return err
 		},
 		func() error {
@@ -161,32 +162,32 @@ func Check(awsClient *awslib.Client, k8sClient *k8s.Client, clusterName string)
 		},
 		func() error {
 			var err error
-			fluentBitHealth, err = getDaemonSetReadiness(k8sClient, "fluent-bit", "default")
+			fluentBitHealth, err = getDaemonSetReadiness(k8sClient, "fluent-bit", consts.LoggingNamespace)
 			return err
 		},
 		func() error {
 			var err error
-			dcgmExporterHealth, err = getDaemonSetReadiness(k8sClient, "dcgm-exporter", "default")
+			dcgmExporterHealth, err = getDaemonSetReadiness(k8sClient, "dcgm-exporter", consts.PrometheusNamespace)
 			return err
 		},
 		func() error {
 			var err error
-			nodeExporterHealth, err = getDaemonSetReadiness(k8sClient, "node-exporter", "default")
+			nodeExporterHealth, err = getDaemonSetReadiness(k8sClient, "node-exporter", consts.PrometheusNamespace)
 			return err
 		},
 		func() error {
 			var err error
-			statsdExporterHealth, err = getDeploymentReadiness(k8sClient, "prometheus-statsd-exporter", "default")
+			statsdExporterHealth, err = getDeploymentReadiness(k8sClient, "prometheus-statsd-exporter", consts.PrometheusNamespace)
 			return err
 		},
 		func() error {
 			var err error
-			eventExporterHealth, err = getDeploymentReadiness(k8sClient, "event-exporter", "default")
+			eventExporterHealth, err = getDeploymentReadiness(k8sClient, "event-exporter", consts.LoggingNamespace)
 			return err
 		},
 		func() error {
 			var err error
-			kubeStateMetricsHealth, err = getDeploymentReadiness(k8sClient, "kube-state-metrics", "default")
+			kubeStateMetricsHealth, err = getDeploymentReadiness(k8sClient, "kube-state-metrics", consts.PrometheusNamespace)
 			return err
 		},
 	); err != nil {
diff --git a/pkg/workloads/k8s.go b/pkg/workloads/k8s.go
index ecf25b3aef..0523c8cbfe 100644
--- a/pkg/workloads/k8s.go
+++ b/pkg/workloads/k8s.go
@@ -17,6 +17,7 @@ limitations under the License.
 package workloads
 
 import (
+	"fmt"
 	"path"
 	"strings"
 
@@ -60,14 +61,14 @@ const (
 	_clusterConfigDirVolume = "cluster-config"
 	_clusterConfigConfigMap = "cluster-config"
 	_clusterConfigDir       = "/configs/cluster"
-
-	_statsdAddress = "prometheus-statsd-exporter.prometheus:9125"
 )
 
 var (
 	_asyncGatewayCPURequest = kresource.MustParse("100m")
 	_asyncGatewayMemRequest = kresource.MustParse("100Mi")
 
+	_statsdAddress = fmt.Sprintf("prometheus-statsd-exporter.%s:9125", consts.PrometheusNamespace)
+
 	// each Inferentia chip requires 128 HugePages with each HugePage having a size of 2Mi
 	_hugePagesMemPerInf = int64(128 * 2 * 1024 * 1024) // bytes
 )

From a2874571a6901865ed5e26e0d13fb21ace315663 Mon Sep 17 00:00:00 2001
From: Robert Lucian Chiriac <robert.lucian.chiriac@gmail.com>
Date: Thu, 15 Jul 2021 02:42:10 +0300
Subject: [PATCH 13/40] Have all namespaces in a single yaml & revert temporary
 change

---
 manager/install.sh                            | 16 +++++-----------
 .../default.yaml => namespaces.yaml}          | 19 +++++++++++++++++++
 manager/manifests/namespaces/istio.yaml       | 18 ------------------
 manager/manifests/namespaces/logging.yaml     | 18 ------------------
 manager/manifests/namespaces/prometheus.yaml  | 18 ------------------
 5 files changed, 24 insertions(+), 65 deletions(-)
 rename manager/manifests/{namespaces/default.yaml => namespaces.yaml} (77%)
 delete mode 100644 manager/manifests/namespaces/istio.yaml
 delete mode 100644 manager/manifests/namespaces/logging.yaml
 delete mode 100644 manager/manifests/namespaces/prometheus.yaml

diff --git a/manager/install.sh b/manager/install.sh
index 04bc320dfc..89d87b3906 100755
--- a/manager/install.sh
+++ b/manager/install.sh
@@ -33,10 +33,10 @@ function main() {
 }
 
 function cluster_up() {
-  check_eks
+  create_eks
 
   echo -n "￮ creating namespaces "
-  setup_namespaces
+  kubectl apply -f manifests/namespaces.yaml >/dev/null
   echo "✓"
 
   echo -n "￮ updating cluster configuration "
@@ -199,14 +199,6 @@ function write_kubeconfig() {
   out=$(kubectl get pods 2>&1 || true); if [[ "$out" == *"must be logged in to the server"* ]]; then echo "error: your aws iam user does not have access to this cluster; to grant access, see https://docs.cortex.dev/v/${CORTEX_VERSION_MINOR}/"; exit 1; fi
 }
 
-function setup_namespaces() {
-  # to apply the istio-discovery label
-  kubectl apply -f manifests/namespaces/default.yaml >/dev/null
-  kubectl apply -f manifests/namespaces/istio.yaml >/dev/null
-  kubectl apply -f manifests/namespaces/prometheus.yaml >/dev/null
-  kubectl apply -f manifests/namespaces/logging.yaml >/dev/null
-}
-
 function setup_configmap() {
   envsubst < manifests/default_cortex_cli_config.yaml > tmp_cli_config.yaml
   kubectl -n=default create configmap 'client-config' \
@@ -239,7 +231,9 @@ function setup_prometheus() {
   envsubst < manifests/prometheus-node-exporter.yaml | kubectl apply -f - >/dev/null
   envsubst < manifests/prometheus-monitoring.yaml | kubectl apply -f - >/dev/null
   python render_template.py $CORTEX_CLUSTER_CONFIG_FILE manifests/prometheus-additional-scrape-configs.yaml.j2 > prometheus-additional-scrape-configs.yaml
-  kubectl create secret generic additional-scrape-configs --from-file=prometheus-additional-scrape-configs.yaml
+  if ! kubectl get secret additional-scrape-configs >/dev/null 2>&1; then
+    kubectl create secret generic additional-scrape-configs --from-file=prometheus-additional-scrape-configs.yaml
+  fi
 }
 
 function setup_grafana() {
diff --git a/manager/manifests/namespaces/default.yaml b/manager/manifests/namespaces.yaml
similarity index 77%
rename from manager/manifests/namespaces/default.yaml
rename to manager/manifests/namespaces.yaml
index af30d267ec..e6612f5304 100644
--- a/manager/manifests/namespaces/default.yaml
+++ b/manager/manifests/namespaces.yaml
@@ -18,3 +18,22 @@ metadata:
   name: default
   labels:
     istio-discovery: enabled
+---
+
+apiVersion: v1
+kind: Namespace
+metadata:
+  name: istio-system
+---
+
+apiVersion: v1
+kind: Namespace
+metadata:
+  name: logging
+---
+
+apiVersion: v1
+kind: Namespace
+metadata:
+  name: prometheus
+---
diff --git a/manager/manifests/namespaces/istio.yaml b/manager/manifests/namespaces/istio.yaml
deleted file mode 100644
index 3f5ce71534..0000000000
--- a/manager/manifests/namespaces/istio.yaml
+++ /dev/null
@@ -1,18 +0,0 @@
-# Copyright 2021 Cortex Labs, Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-apiVersion: v1
-kind: Namespace
-metadata:
-  name: istio-system
diff --git a/manager/manifests/namespaces/logging.yaml b/manager/manifests/namespaces/logging.yaml
deleted file mode 100644
index 02fe6ce079..0000000000
--- a/manager/manifests/namespaces/logging.yaml
+++ /dev/null
@@ -1,18 +0,0 @@
-# Copyright 2021 Cortex Labs, Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-apiVersion: v1
-kind: Namespace
-metadata:
-  name: logging
diff --git a/manager/manifests/namespaces/prometheus.yaml b/manager/manifests/namespaces/prometheus.yaml
deleted file mode 100644
index 995138ef09..0000000000
--- a/manager/manifests/namespaces/prometheus.yaml
+++ /dev/null
@@ -1,18 +0,0 @@
-# Copyright 2021 Cortex Labs, Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-apiVersion: v1
-kind: Namespace
-metadata:
-  name: prometheus

From 34045d44e0cb48fb304b0e1620be6b23e70a72b3 Mon Sep 17 00:00:00 2001
From: Robert Lucian Chiriac <robert.lucian.chiriac@gmail.com>
Date: Thu, 15 Jul 2021 03:24:08 +0300
Subject: [PATCH 14/40] Patch instead of applying the default namespace

---
 manager/install.sh                | 10 ++++++++--
 manager/manifests/namespaces.yaml |  8 --------
 2 files changed, 8 insertions(+), 10 deletions(-)

diff --git a/manager/install.sh b/manager/install.sh
index 89d87b3906..18e5a0936b 100755
--- a/manager/install.sh
+++ b/manager/install.sh
@@ -36,7 +36,7 @@ function cluster_up() {
   create_eks
 
   echo -n "￮ creating namespaces "
-  kubectl apply -f manifests/namespaces.yaml >/dev/null
+  setup_namespaces
   echo "✓"
 
   echo -n "￮ updating cluster configuration "
@@ -199,6 +199,12 @@ function write_kubeconfig() {
   out=$(kubectl get pods 2>&1 || true); if [[ "$out" == *"must be logged in to the server"* ]]; then echo "error: your aws iam user does not have access to this cluster; to grant access, see https://docs.cortex.dev/v/${CORTEX_VERSION_MINOR}/"; exit 1; fi
 }
 
+function setup_namespaces() {
+  # doing a patch to prevent getting the kubectl.kubernetes.io/last-applied-configuration annotation warning
+  kubectl patch namespace default -p '{"metadata": {"labels": {"istio-discovery": "enabled"}}}' >/dev/null
+  kubectl apply -f manifests/namespaces.yaml >/dev/null
+}
+
 function setup_configmap() {
   envsubst < manifests/default_cortex_cli_config.yaml > tmp_cli_config.yaml
   kubectl -n=default create configmap 'client-config' \
@@ -232,7 +238,7 @@ function setup_prometheus() {
   envsubst < manifests/prometheus-monitoring.yaml | kubectl apply -f - >/dev/null
   python render_template.py $CORTEX_CLUSTER_CONFIG_FILE manifests/prometheus-additional-scrape-configs.yaml.j2 > prometheus-additional-scrape-configs.yaml
   if ! kubectl get secret additional-scrape-configs >/dev/null 2>&1; then
-    kubectl create secret generic additional-scrape-configs --from-file=prometheus-additional-scrape-configs.yaml
+    kubectl create secret generic additional-scrape-configs --from-file=prometheus-additional-scrape-configs.yaml > /dev/null
   fi
 }
 
diff --git a/manager/manifests/namespaces.yaml b/manager/manifests/namespaces.yaml
index e6612f5304..ce1959e923 100644
--- a/manager/manifests/namespaces.yaml
+++ b/manager/manifests/namespaces.yaml
@@ -12,14 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-apiVersion: v1
-kind: Namespace
-metadata:
-  name: default
-  labels:
-    istio-discovery: enabled
----
-
 apiVersion: v1
 kind: Namespace
 metadata:

From b24dff9e750b3643a26773f005f516459067bec0 Mon Sep 17 00:00:00 2001
From: Robert Lucian Chiriac <robert.lucian.chiriac@gmail.com>
Date: Thu, 15 Jul 2021 03:29:50 +0300
Subject: [PATCH 15/40] Fix waiting on the load balancer stage

---
 manager/install.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/manager/install.sh b/manager/install.sh
index 18e5a0936b..8ce233fb0d 100755
--- a/manager/install.sh
+++ b/manager/install.sh
@@ -540,8 +540,8 @@ function validate_cortex() {
     fi
 
     if [ "$prometheus_ready" == "" ]; then
-      readyReplicas=$(kubectl get statefulset -n default prometheus-prometheus -o jsonpath='{.status.readyReplicas}' 2> /dev/null)
-      desiredReplicas=$(kubectl get statefulset -n default prometheus-prometheus -o jsonpath='{.status.replicas}' 2> /dev/null)
+      readyReplicas=$(kubectl get statefulset -n prometheus prometheus-prometheus -o jsonpath='{.status.readyReplicas}' 2> /dev/null)
+      desiredReplicas=$(kubectl get statefulset -n prometheus prometheus-prometheus -o jsonpath='{.status.replicas}' 2> /dev/null)
 
       if [ "$readyReplicas" != "" ] && [ "$desiredReplicas" != "" ]; then
         if [ "$readyReplicas" == "$desiredReplicas" ]; then

From 6c9421d0374f31e0bd14a1a13f9de59af0486fbb Mon Sep 17 00:00:00 2001
From: Robert Lucian Chiriac <robert.lucian.chiriac@gmail.com>
Date: Thu, 15 Jul 2021 16:00:13 +0300
Subject: [PATCH 16/40] More namespace fixes

---
 cli/cmd/cluster.go                        | 4 ++--
 manager/manifests/autoscaler.yaml.j2      | 2 +-
 manager/manifests/grafana/grafana.yaml.j2 | 2 +-
 pkg/consts/consts.go                      | 4 ++--
 pkg/crds/hack/run_manager.sh              | 2 +-
 pkg/crds/main.go                          | 2 +-
 pkg/health/health.go                      | 2 +-
 7 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/cli/cmd/cluster.go b/cli/cmd/cluster.go
index 3311c60a01..f89ba386f3 100644
--- a/cli/cmd/cluster.go
+++ b/cli/cmd/cluster.go
@@ -358,7 +358,7 @@ var _clusterConfigureCmd = &cobra.Command{
 			exit.Error(err)
 		}
 
-		k8sClient, err := k8s.New("default", false, restConfig, scheme)
+		k8sClient, err := k8s.New(consts.DefaultNamespace, false, restConfig, scheme)
 		if err != nil {
 			exit.Error(err)
 		}
@@ -804,7 +804,7 @@ var _clusterHealthCmd = &cobra.Command{
 			exit.Error(err)
 		}
 
-		k8sClient, err := k8s.New("default", false, restConfig, scheme)
+		k8sClient, err := k8s.New(consts.DefaultNamespace, false, restConfig, scheme)
 		if err != nil {
 			exit.Error(err)
 		}
diff --git a/manager/manifests/autoscaler.yaml.j2 b/manager/manifests/autoscaler.yaml.j2
index df42a90965..842552f31a 100644
--- a/manager/manifests/autoscaler.yaml.j2
+++ b/manager/manifests/autoscaler.yaml.j2
@@ -82,7 +82,7 @@ spec:
           args:
             - "--in-cluster"
             - "--port=8000"
-            - "--prometheus-url=http://prometheus.default:9090"
+            - "--prometheus-url=http://prometheus.prometheus:9090"
             - "--namespace=default"
             - "--cluster-config=/configs/cluster/cluster.yaml"
           ports:
diff --git a/manager/manifests/grafana/grafana.yaml.j2 b/manager/manifests/grafana/grafana.yaml.j2
index ec863d3fb5..1cd9052393 100644
--- a/manager/manifests/grafana/grafana.yaml.j2
+++ b/manager/manifests/grafana/grafana.yaml.j2
@@ -28,7 +28,7 @@ data:
                 "name": "prometheus",
                 "orgId": 1,
                 "type": "prometheus",
-                "url": "http://prometheus.default:9090",
+                "url": "http://prometheus.prometheus:9090",
                 "version": 1,
                 "isDefault": true
             }
diff --git a/pkg/consts/consts.go b/pkg/consts/consts.go
index 4b06b6b589..7ea590fc45 100644
--- a/pkg/consts/consts.go
+++ b/pkg/consts/consts.go
@@ -27,11 +27,11 @@ var (
 	CortexVersion      = "master" // CORTEX_VERSION
 	CortexVersionMinor = "master" // CORTEX_VERSION_MINOR
 
-	KubeSystemNamespace = "kube-system"
 	DefaultNamespace    = "default"
+	KubeSystemNamespace = "kube-system"
+	IstioNamespace      = "istio-system"
 	PrometheusNamespace = "prometheus"
 	LoggingNamespace    = "logging"
-	IstioNamespace      = "istio-system"
 
 	DefaultMaxQueueLength = int64(100)
 	DefaultMaxConcurrency = int64(1)
diff --git a/pkg/crds/hack/run_manager.sh b/pkg/crds/hack/run_manager.sh
index 1b7d1a79fc..acd0ac9b37 100755
--- a/pkg/crds/hack/run_manager.sh
+++ b/pkg/crds/hack/run_manager.sh
@@ -18,7 +18,7 @@
 
 CLUSTER_CONFIG=$1
 
-port_forward_cmd="kubectl port-forward -n default prometheus-prometheus-0 9090"
+port_forward_cmd="kubectl port-forward -n prometheus prometheus-prometheus-0 9090"
 kill $(pgrep -f "${port_forward_cmd}") >/dev/null 2>&1 || true
 
 echo "Port-forwarding Prometheus to localhost:9090"
diff --git a/pkg/crds/main.go b/pkg/crds/main.go
index 62b552a426..ee8c0c476b 100644
--- a/pkg/crds/main.go
+++ b/pkg/crds/main.go
@@ -102,7 +102,7 @@ func main() {
 	}
 
 	if prometheusURL == "" {
-		prometheusURL = fmt.Sprintf("http://prometheus.%s:9090", consts.DefaultNamespace)
+		prometheusURL = fmt.Sprintf("http://prometheus.%s:9090", consts.PrometheusNamespace)
 	}
 
 	mgr, err := ctrl.NewManager(ctrl.GetConfigOrDie(), ctrl.Options{
diff --git a/pkg/health/health.go b/pkg/health/health.go
index cd556dd80b..7e7ea9e9d5 100644
--- a/pkg/health/health.go
+++ b/pkg/health/health.go
@@ -218,7 +218,7 @@ func Check(awsClient *awslib.Client, k8sClient *k8s.Client, clusterName string)
 func GetWarnings(k8sClient *k8s.Client) (ClusterWarnings, error) {
 	var prometheusMemorySaturationWarn string
 
-	saturation, err := getPodMemorySaturation(k8sClient, "prometheus-prometheus-0", "default")
+	saturation, err := getPodMemorySaturation(k8sClient, "prometheus-prometheus-0", consts.PrometheusNamespace)
 	if err != nil {
 		return ClusterWarnings{}, err
 	}

From 4f4daefa4a1ae13e9c7a9379c798c44227189a82 Mon Sep 17 00:00:00 2001
From: Robert Lucian Chiriac <robert.lucian.chiriac@gmail.com>
Date: Thu, 15 Jul 2021 16:26:20 +0300
Subject: [PATCH 17/40] Create additional-scrape-configs in prometheus ns

---
 manager/install.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/manager/install.sh b/manager/install.sh
index 8ce233fb0d..c5c0881c46 100755
--- a/manager/install.sh
+++ b/manager/install.sh
@@ -237,8 +237,8 @@ function setup_prometheus() {
   envsubst < manifests/prometheus-node-exporter.yaml | kubectl apply -f - >/dev/null
   envsubst < manifests/prometheus-monitoring.yaml | kubectl apply -f - >/dev/null
   python render_template.py $CORTEX_CLUSTER_CONFIG_FILE manifests/prometheus-additional-scrape-configs.yaml.j2 > prometheus-additional-scrape-configs.yaml
-  if ! kubectl get secret additional-scrape-configs >/dev/null 2>&1; then
-    kubectl create secret generic additional-scrape-configs --from-file=prometheus-additional-scrape-configs.yaml > /dev/null
+  if ! kubectl get secret -n prometheus additional-scrape-configs >/dev/null 2>&1; then
+    kubectl create secret generic -n prometheus additional-scrape-configs --from-file=prometheus-additional-scrape-configs.yaml > /dev/null
   fi
 }
 

From 2d572aa3f057934daec649b73d168ac96bf2f596 Mon Sep 17 00:00:00 2001
From: Robert Lucian Chiriac <robert.lucian.chiriac@gmail.com>
Date: Thu, 15 Jul 2021 16:26:34 +0300
Subject: [PATCH 18/40] Prometheus' service must be in prometheus ns

---
 manager/manifests/prometheus-monitoring.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/manager/manifests/prometheus-monitoring.yaml b/manager/manifests/prometheus-monitoring.yaml
index f42e0d078f..bda334e4d0 100644
--- a/manager/manifests/prometheus-monitoring.yaml
+++ b/manager/manifests/prometheus-monitoring.yaml
@@ -124,6 +124,7 @@ apiVersion: v1
 kind: Service
 metadata:
   name: prometheus
+  namespace: prometheus
 spec:
   type: ClusterIP
   ports:

From fde6ab05351fd1a487f8a9b4e7a418569dcc579e Mon Sep 17 00:00:00 2001
From: Robert Lucian Chiriac <robert.lucian.chiriac@gmail.com>
Date: Thu, 15 Jul 2021 17:53:03 +0300
Subject: [PATCH 19/40] Fix cortex cluster health cmd panicking

---
 cli/cmd/cluster.go | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/cli/cmd/cluster.go b/cli/cmd/cluster.go
index f89ba386f3..bb5d7882fb 100644
--- a/cli/cmd/cluster.go
+++ b/cli/cmd/cluster.go
@@ -814,6 +814,11 @@ var _clusterHealthCmd = &cobra.Command{
 			exit.Error(err)
 		}
 
+		k8sClient, err = k8s.New(consts.PrometheusNamespace, false, restConfig, scheme)
+		if err != nil {
+			exit.Error(err)
+		}
+
 		clusterWarnings, err := health.GetWarnings(k8sClient)
 		if err != nil {
 			exit.Error(err)

From 4c6b670d816c128f12b3bc94f029f47ac28e8dd4 Mon Sep 17 00:00:00 2001
From: Robert Lucian Chiriac <robert.lucian.chiriac@gmail.com>
Date: Thu, 15 Jul 2021 17:57:58 +0300
Subject: [PATCH 20/40] Fix getPodMemorySaturation function instead

---
 cli/cmd/cluster.go   | 5 -----
 pkg/health/health.go | 8 ++++++--
 2 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/cli/cmd/cluster.go b/cli/cmd/cluster.go
index bb5d7882fb..f89ba386f3 100644
--- a/cli/cmd/cluster.go
+++ b/cli/cmd/cluster.go
@@ -814,11 +814,6 @@ var _clusterHealthCmd = &cobra.Command{
 			exit.Error(err)
 		}
 
-		k8sClient, err = k8s.New(consts.PrometheusNamespace, false, restConfig, scheme)
-		if err != nil {
-			exit.Error(err)
-		}
-
 		clusterWarnings, err := health.GetWarnings(k8sClient)
 		if err != nil {
 			exit.Error(err)
diff --git a/pkg/health/health.go b/pkg/health/health.go
index 7e7ea9e9d5..014e4b9f48 100644
--- a/pkg/health/health.go
+++ b/pkg/health/health.go
@@ -30,6 +30,7 @@ import (
 	"github.com/cortexlabs/cortex/pkg/lib/parallel"
 	"github.com/cortexlabs/cortex/pkg/types/clusterconfig"
 	kapps "k8s.io/api/apps/v1"
+	v1 "k8s.io/api/core/v1"
 	kerrors "k8s.io/apimachinery/pkg/api/errors"
 	kresource "k8s.io/apimachinery/pkg/api/resource"
 	kmeta "k8s.io/apimachinery/pkg/apis/meta/v1"
@@ -296,8 +297,11 @@ func getLoadBalancerHealth(awsClient *awslib.Client, clusterName string, loadBal
 
 func getPodMemorySaturation(k8sClient *k8s.Client, podName, namespace string) (float64, error) {
 	ctx := context.Background()
-	pod, err := k8sClient.GetPod(podName)
-	if err != nil {
+	var pod v1.Pod
+	if err := k8sClient.Get(ctx, ctrlclient.ObjectKey{
+		Namespace: namespace,
+		Name:      podName,
+	}, &pod); err != nil {
 		return 0, err
 	}
 

From 042b2b3a45332de4149b1770a5beb9b76cb3a844 Mon Sep 17 00:00:00 2001
From: Robert Lucian Chiriac <robert.lucian.chiriac@gmail.com>
Date: Thu, 15 Jul 2021 20:16:07 +0300
Subject: [PATCH 21/40] Report live (x/y) and up-to-date replicas

---
 cli/cluster/delete.go                        |  2 +-
 cli/cmd/get.go                               |  8 ++-
 cli/cmd/lib_async_apis.go                    | 25 ++--------
 cli/cmd/lib_batch_apis.go                    |  2 +-
 cli/cmd/lib_realtime_apis.go                 | 21 ++------
 cli/cmd/lib_task_apis.go                     |  2 +-
 cli/cmd/lib_traffic_splitters.go             | 13 ++---
 pkg/operator/resources/asyncapi/status.go    | 52 ++++----------------
 pkg/operator/resources/realtimeapi/api.go    |  4 +-
 pkg/operator/resources/realtimeapi/status.go | 48 +++++-------------
 pkg/operator/resources/resources.go          |  5 +-
 pkg/types/status/status.go                   | 13 ++---
 12 files changed, 52 insertions(+), 143 deletions(-)

diff --git a/cli/cluster/delete.go b/cli/cluster/delete.go
index e81624f98d..7b1d96d86d 100644
--- a/cli/cluster/delete.go
+++ b/cli/cluster/delete.go
@@ -70,7 +70,7 @@ func getReadyRealtimeAPIReplicasOrNil(operatorConfig OperatorConfig, apiName str
 		return nil
 	}
 
-	totalReady := apiRes.Status.Updated.Ready + apiRes.Status.Stale.Ready
+	totalReady := apiRes.Status.Ready
 	return &totalReady
 }
 
diff --git a/cli/cmd/get.go b/cli/cmd/get.go
index 1b11b984a0..e366b432e4 100644
--- a/cli/cmd/get.go
+++ b/cli/cmd/get.go
@@ -41,12 +41,10 @@ import (
 const (
 	_titleEnvironment = "env"
 	_titleRealtimeAPI = "realtime api"
-	_titleStatus      = "status"
+	_titleAsyncAPI    = "async api"
+	_titleLive        = "live"
 	_titleUpToDate    = "up-to-date"
-	_titleStale       = "stale"
-	_titleRequested   = "requested"
-	_titleFailed      = "failed"
-	_titleLastupdated = "last update"
+	_titleLastUpdated = "last update"
 )
 
 var (
diff --git a/cli/cmd/lib_async_apis.go b/cli/cmd/lib_async_apis.go
index 114c88bca8..3b0347e51c 100644
--- a/cli/cmd/lib_async_apis.go
+++ b/cli/cmd/lib_async_apis.go
@@ -17,6 +17,7 @@ limitations under the License.
 package cmd
 
 import (
+	"fmt"
 	"strings"
 	"time"
 
@@ -27,10 +28,6 @@ import (
 	"github.com/cortexlabs/cortex/pkg/operator/schema"
 )
 
-const (
-	_titleAsyncAPI = "async api"
-)
-
 func asyncAPITable(asyncAPI schema.APIResponse, env cliconfig.Environment) (string, error) {
 	var out string
 
@@ -60,36 +57,24 @@ func asyncAPITable(asyncAPI schema.APIResponse, env cliconfig.Environment) (stri
 func asyncAPIsTable(asyncAPIs []schema.APIResponse, envNames []string) table.Table {
 	rows := make([][]interface{}, 0, len(asyncAPIs))
 
-	var totalFailed int32
-	var totalStale int32
-
 	for i, asyncAPI := range asyncAPIs {
 		lastUpdated := time.Unix(asyncAPI.Spec.LastUpdated, 0)
 		rows = append(rows, []interface{}{
 			envNames[i],
 			asyncAPI.Spec.Name,
-			asyncAPI.Status.Message(),
-			asyncAPI.Status.Updated.Ready,
-			asyncAPI.Status.Stale.Ready,
-			asyncAPI.Status.Requested,
-			asyncAPI.Status.Updated.TotalFailed(),
+			fmt.Sprintf("%d/%d", asyncAPI.Status.Ready, asyncAPI.Status.Requested),
+			asyncAPI.Status.UpToDate,
 			libtime.SinceStr(&lastUpdated),
 		})
-
-		totalFailed += asyncAPI.Status.Updated.TotalFailed()
-		totalStale += asyncAPI.Status.Stale.Ready
 	}
 
 	return table.Table{
 		Headers: []table.Header{
 			{Title: _titleEnvironment},
 			{Title: _titleAsyncAPI},
-			{Title: _titleStatus},
+			{Title: _titleLive},
 			{Title: _titleUpToDate},
-			{Title: _titleStale, Hidden: totalStale == 0},
-			{Title: _titleRequested},
-			{Title: _titleFailed, Hidden: totalFailed == 0},
-			{Title: _titleLastupdated},
+			{Title: _titleLastUpdated},
 		},
 		Rows: rows,
 	}
diff --git a/cli/cmd/lib_batch_apis.go b/cli/cmd/lib_batch_apis.go
index 272dbfa0fa..ac43150a53 100644
--- a/cli/cmd/lib_batch_apis.go
+++ b/cli/cmd/lib_batch_apis.go
@@ -74,7 +74,7 @@ func batchAPIsTable(batchAPIs []schema.APIResponse, envNames []string) table.Tab
 			{Title: _titleBatchAPI},
 			{Title: _titleJobCount},
 			{Title: _titleLatestJobID},
-			{Title: _titleLastupdated},
+			{Title: _titleLastUpdated},
 		},
 		Rows: rows,
 	}
diff --git a/cli/cmd/lib_realtime_apis.go b/cli/cmd/lib_realtime_apis.go
index be4316e0a8..48fdab941c 100644
--- a/cli/cmd/lib_realtime_apis.go
+++ b/cli/cmd/lib_realtime_apis.go
@@ -17,6 +17,7 @@ limitations under the License.
 package cmd
 
 import (
+	"fmt"
 	"strings"
 	"time"
 
@@ -56,36 +57,24 @@ func realtimeAPITable(realtimeAPI schema.APIResponse, env cliconfig.Environment)
 func realtimeAPIsTable(realtimeAPIs []schema.APIResponse, envNames []string) table.Table {
 	rows := make([][]interface{}, 0, len(realtimeAPIs))
 
-	var totalFailed int32
-	var totalStale int32
-
 	for i, realtimeAPI := range realtimeAPIs {
 		lastUpdated := time.Unix(realtimeAPI.Spec.LastUpdated, 0)
 		rows = append(rows, []interface{}{
 			envNames[i],
 			realtimeAPI.Spec.Name,
-			realtimeAPI.Status.Message(),
-			realtimeAPI.Status.Updated.Ready,
-			realtimeAPI.Status.Stale.Ready,
-			realtimeAPI.Status.Requested,
-			realtimeAPI.Status.Updated.TotalFailed(),
+			fmt.Sprintf("%d/%d", realtimeAPI.Status.Ready, realtimeAPI.Status.Requested),
+			realtimeAPI.Status.UpToDate,
 			libtime.SinceStr(&lastUpdated),
 		})
-
-		totalFailed += realtimeAPI.Status.Updated.TotalFailed()
-		totalStale += realtimeAPI.Status.Stale.Ready
 	}
 
 	return table.Table{
 		Headers: []table.Header{
 			{Title: _titleEnvironment},
 			{Title: _titleRealtimeAPI},
-			{Title: _titleStatus},
+			{Title: _titleLive},
 			{Title: _titleUpToDate},
-			{Title: _titleStale, Hidden: totalStale == 0},
-			{Title: _titleRequested},
-			{Title: _titleFailed, Hidden: totalFailed == 0},
-			{Title: _titleLastupdated},
+			{Title: _titleLastUpdated},
 		},
 		Rows: rows,
 	}
diff --git a/cli/cmd/lib_task_apis.go b/cli/cmd/lib_task_apis.go
index 50575b8516..3bd0275caf 100644
--- a/cli/cmd/lib_task_apis.go
+++ b/cli/cmd/lib_task_apis.go
@@ -72,7 +72,7 @@ func taskAPIsTable(taskAPIs []schema.APIResponse, envNames []string) table.Table
 			{Title: _titleTaskAPI},
 			{Title: _titleTaskJobCount},
 			{Title: _titleLatestTaskJobID},
-			{Title: _titleLastupdated},
+			{Title: _titleLastUpdated},
 		},
 		Rows: rows,
 	}
diff --git a/cli/cmd/lib_traffic_splitters.go b/cli/cmd/lib_traffic_splitters.go
index 39c344038a..f89d350ded 100644
--- a/cli/cmd/lib_traffic_splitters.go
+++ b/cli/cmd/lib_traffic_splitters.go
@@ -17,6 +17,7 @@ limitations under the License.
 package cmd
 
 import (
+	"fmt"
 	"strings"
 	"time"
 
@@ -82,8 +83,8 @@ func trafficSplitTable(trafficSplitter schema.APIResponse, env cliconfig.Environ
 			env.Name,
 			apiName,
 			api.Weight,
-			apiRes.Status.Message(),
-			apiRes.Status.Requested,
+			fmt.Sprintf("%d/%d", apiRes.Status.Ready, apiRes.Status.Requested),
+			apiRes.Status.UpToDate,
 			libtime.SinceStr(&lastUpdated),
 		})
 	}
@@ -93,9 +94,9 @@ func trafficSplitTable(trafficSplitter schema.APIResponse, env cliconfig.Environ
 			{Title: _titleEnvironment},
 			{Title: _titleAPIs},
 			{Title: _trafficSplitterWeights},
-			{Title: _titleStatus},
-			{Title: _titleRequested},
-			{Title: _titleLastupdated},
+			{Title: _titleLive},
+			{Title: _titleUpToDate},
+			{Title: _titleLastUpdated},
 		},
 		Rows: rows,
 	}, nil
@@ -127,7 +128,7 @@ func trafficSplitterListTable(trafficSplitter []schema.APIResponse, envNames []s
 			{Title: _titleEnvironment},
 			{Title: _titleTrafficSplitter},
 			{Title: _titleAPIs},
-			{Title: _titleLastupdated},
+			{Title: _titleLastUpdated},
 		},
 		Rows: rows,
 	}
diff --git a/pkg/operator/resources/asyncapi/status.go b/pkg/operator/resources/asyncapi/status.go
index 38e02329d0..ff7ff67530 100644
--- a/pkg/operator/resources/asyncapi/status.go
+++ b/pkg/operator/resources/asyncapi/status.go
@@ -42,8 +42,6 @@ type asyncResourceGroup struct {
 func GetStatus(apiName string) (*status.Status, error) {
 	var apiDeployment *kapps.Deployment
 	var gatewayDeployment *kapps.Deployment
-	var gatewayPods []kcore.Pod
-	var apiPods []kcore.Pod
 
 	err := parallel.RunFirstErr(
 		func() error {
@@ -56,26 +54,6 @@ func GetStatus(apiName string) (*status.Status, error) {
 			gatewayDeployment, err = config.K8s.GetDeployment(getGatewayK8sName(apiName))
 			return err
 		},
-		func() error {
-			var err error
-			gatewayPods, err = config.K8s.ListPodsByLabels(
-				map[string]string{
-					"apiName":          apiName,
-					"cortex.dev/async": "gateway",
-				},
-			)
-			return err
-		},
-		func() error {
-			var err error
-			apiPods, err = config.K8s.ListPodsByLabels(
-				map[string]string{
-					"apiName":          apiName,
-					"cortex.dev/async": "api",
-				},
-			)
-			return err
-		},
 	)
 	if err != nil {
 		return nil, err
@@ -89,7 +67,7 @@ func GetStatus(apiName string) (*status.Status, error) {
 		return nil, errors.ErrorUnexpected("unable to find gateway deployment", apiName)
 	}
 
-	return apiStatus(apiDeployment, apiPods, gatewayDeployment, gatewayPods)
+	return apiStatus(apiDeployment), nil
 }
 
 func GetAllStatuses(deployments []kapps.Deployment, pods []kcore.Pod) ([]status.Status, error) {
@@ -106,11 +84,7 @@ func GetAllStatuses(deployments []kapps.Deployment, pods []kcore.Pod) ([]status.
 			return nil, errors.ErrorUnexpected("unable to find gateway deployment", apiName)
 		}
 
-		st, err := apiStatus(k8sResources.APIDeployment, k8sResources.APIPods, k8sResources.GatewayDeployment, k8sResources.GatewayPods)
-		if err != nil {
-			return nil, err
-		}
-		statuses[i] = *st
+		statuses[i] = *apiStatus(k8sResources.APIDeployment)
 		i++
 	}
 
@@ -174,22 +148,14 @@ func groupResourcesByAPI(deployments []kapps.Deployment, pods []kcore.Pod) map[s
 	return resourcesByAPI
 }
 
-func apiStatus(apiDeployment *kapps.Deployment, apiPods []kcore.Pod, gatewayDeployment *kapps.Deployment, gatewayPods []kcore.Pod) (*status.Status, error) {
-	autoscalingSpec, err := userconfig.AutoscalingFromAnnotations(apiDeployment)
-	if err != nil {
-		return nil, err
+func apiStatus(deployment *kapps.Deployment) *status.Status {
+	return &status.Status{
+		APIName:   deployment.Labels["apiName"],
+		APIID:     deployment.Labels["apiID"],
+		Ready:     deployment.Status.ReadyReplicas,
+		Requested: deployment.Status.Replicas,
+		UpToDate:  deployment.Status.UpdatedReplicas,
 	}
-
-	apiReplicaCounts := getReplicaCounts(apiDeployment, apiPods)
-	gatewayReplicaCounts := getReplicaCounts(gatewayDeployment, gatewayPods)
-
-	st := &status.Status{}
-	st.APIName = apiDeployment.Labels["apiName"]
-	st.APIID = apiDeployment.Labels["apiID"]
-	st.ReplicaCounts = apiReplicaCounts
-	st.Code = getStatusCode(apiReplicaCounts, gatewayReplicaCounts, autoscalingSpec.MinReplicas)
-
-	return st, nil
 }
 
 func getStatusCode(apiCounts status.ReplicaCounts, gatewayCounts status.ReplicaCounts, apiMinReplicas int32) status.Code {
diff --git a/pkg/operator/resources/realtimeapi/api.go b/pkg/operator/resources/realtimeapi/api.go
index 256b253f8e..c9ddaf5957 100644
--- a/pkg/operator/resources/realtimeapi/api.go
+++ b/pkg/operator/resources/realtimeapi/api.go
@@ -175,8 +175,8 @@ func DeleteAPI(apiName string, keepCache bool) error {
 	return nil
 }
 
-func GetAllAPIs(pods []kcore.Pod, deployments []kapps.Deployment) ([]schema.APIResponse, error) {
-	statuses, err := GetAllStatuses(deployments, pods)
+func GetAllAPIs(deployments []kapps.Deployment) ([]schema.APIResponse, error) {
+	statuses, err := GetAllStatuses(deployments)
 	if err != nil {
 		return nil, err
 	}
diff --git a/pkg/operator/resources/realtimeapi/status.go b/pkg/operator/resources/realtimeapi/status.go
index a65716f35c..4c8eb5ac46 100644
--- a/pkg/operator/resources/realtimeapi/status.go
+++ b/pkg/operator/resources/realtimeapi/status.go
@@ -24,30 +24,15 @@ import (
 	"github.com/cortexlabs/cortex/pkg/consts"
 	"github.com/cortexlabs/cortex/pkg/lib/errors"
 	"github.com/cortexlabs/cortex/pkg/lib/k8s"
-	"github.com/cortexlabs/cortex/pkg/lib/parallel"
 	"github.com/cortexlabs/cortex/pkg/types/status"
-	"github.com/cortexlabs/cortex/pkg/types/userconfig"
 	"github.com/cortexlabs/cortex/pkg/workloads"
 	kapps "k8s.io/api/apps/v1"
 	kcore "k8s.io/api/core/v1"
 )
 
 func GetStatus(apiName string) (*status.Status, error) {
-	var deployment *kapps.Deployment
-	var pods []kcore.Pod
-
-	err := parallel.RunFirstErr(
-		func() error {
-			var err error
-			deployment, err = config.K8s.GetDeployment(workloads.K8sName(apiName))
-			return err
-		},
-		func() error {
-			var err error
-			pods, err = config.K8s.ListPodsByLabel("apiName", apiName)
-			return err
-		},
-	)
+	var err error
+	deployment, err := config.K8s.GetDeployment(workloads.K8sName(apiName))
 	if err != nil {
 		return nil, err
 	}
@@ -56,17 +41,13 @@ func GetStatus(apiName string) (*status.Status, error) {
 		return nil, errors.ErrorUnexpected("unable to find deployment", apiName)
 	}
 
-	return apiStatus(deployment, pods)
+	return apiStatus(*deployment), nil
 }
 
-func GetAllStatuses(deployments []kapps.Deployment, pods []kcore.Pod) ([]status.Status, error) {
+func GetAllStatuses(deployments []kapps.Deployment) ([]status.Status, error) {
 	statuses := make([]status.Status, len(deployments))
 	for i := range deployments {
-		st, err := apiStatus(&deployments[i], pods)
-		if err != nil {
-			return nil, err
-		}
-		statuses[i] = *st
+		statuses[i] = *apiStatus(deployments[i])
 	}
 
 	sort.Slice(statuses, func(i, j int) bool {
@@ -76,19 +57,14 @@ func GetAllStatuses(deployments []kapps.Deployment, pods []kcore.Pod) ([]status.
 	return statuses, nil
 }
 
-func apiStatus(deployment *kapps.Deployment, allPods []kcore.Pod) (*status.Status, error) {
-	autoscalingSpec, err := userconfig.AutoscalingFromAnnotations(deployment)
-	if err != nil {
-		return nil, err
+func apiStatus(deployment kapps.Deployment) *status.Status {
+	return &status.Status{
+		APIName:   deployment.Labels["apiName"],
+		APIID:     deployment.Labels["apiID"],
+		Ready:     deployment.Status.ReadyReplicas,
+		Requested: deployment.Status.Replicas,
+		UpToDate:  deployment.Status.UpdatedReplicas,
 	}
-
-	status := &status.Status{}
-	status.APIName = deployment.Labels["apiName"]
-	status.APIID = deployment.Labels["apiID"]
-	status.ReplicaCounts = getReplicaCounts(deployment, allPods)
-	status.Code = getStatusCode(&status.ReplicaCounts, autoscalingSpec.MinReplicas)
-
-	return status, nil
 }
 
 func getReplicaCounts(deployment *kapps.Deployment, pods []kcore.Pod) status.ReplicaCounts {
diff --git a/pkg/operator/resources/resources.go b/pkg/operator/resources/resources.go
index 87069c2136..b9853c1993 100644
--- a/pkg/operator/resources/resources.go
+++ b/pkg/operator/resources/resources.go
@@ -308,14 +308,11 @@ func GetAPIs() ([]schema.APIResponse, error) {
 		}
 	}
 
-	var realtimeAPIPods []kcore.Pod
 	var batchAPIPods []kcore.Pod
 	var taskAPIPods []kcore.Pod
 	var asyncAPIPods []kcore.Pod
 	for _, pod := range pods {
 		switch pod.Labels["apiKind"] {
-		case userconfig.RealtimeAPIKind.String():
-			realtimeAPIPods = append(realtimeAPIPods, pod)
 		case userconfig.BatchAPIKind.String():
 			batchAPIPods = append(batchAPIPods, pod)
 		case userconfig.TaskAPIKind.String():
@@ -340,7 +337,7 @@ func GetAPIs() ([]schema.APIResponse, error) {
 		}
 	}
 
-	realtimeAPIList, err := realtimeapi.GetAllAPIs(realtimeAPIPods, realtimeAPIDeployments)
+	realtimeAPIList, err := realtimeapi.GetAllAPIs(realtimeAPIDeployments)
 	if err != nil {
 		return nil, err
 	}
diff --git a/pkg/types/status/status.go b/pkg/types/status/status.go
index 6dad4e1992..9a95c16463 100644
--- a/pkg/types/status/status.go
+++ b/pkg/types/status/status.go
@@ -17,10 +17,11 @@ limitations under the License.
 package status
 
 type Status struct {
-	APIName       string `json:"api_name"`
-	APIID         string `json:"api_id"`
-	Code          Code   `json:"status_code"`
-	ReplicaCounts `json:"replica_counts"`
+	APIName   string `json:"api_name"`
+	APIID     string `json:"api_id"`
+	Ready     int32  `json:"ready"`
+	Requested int32  `json:"requested"`
+	UpToDate  int32  `json:"up_to_date"`
 }
 
 type ReplicaCounts struct {
@@ -53,10 +54,6 @@ type WorkerCounts struct {
 	Unknown      int32 `json:"unknown,omitempty"`
 }
 
-func (status *Status) Message() string {
-	return status.Code.Message()
-}
-
 func (src *SubReplicaCounts) TotalFailed() int32 {
 	return src.Failed + src.ErrImagePull + src.Killed + src.KilledOOM + src.Stalled
 }

From dd50f2b509df8d5f9f112bb843e61eab04ecc61d Mon Sep 17 00:00:00 2001
From: Robert Lucian Chiriac <robert.lucian.chiriac@gmail.com>
Date: Thu, 15 Jul 2021 21:43:48 +0300
Subject: [PATCH 22/40] WIP on upgrading the statuses

---
 cli/cmd/get.go                                |  6 +-
 cli/cmd/lib_realtime_apis.go                  |  2 +-
 go.mod                                        |  2 +-
 pkg/operator/endpoints/logs.go                | 11 +++-
 pkg/operator/operator/k8s.go                  | 13 ++++
 pkg/operator/resources/asyncapi/api.go        | 30 ++++-----
 pkg/operator/resources/asyncapi/status.go     | 66 ++++---------------
 pkg/operator/resources/job/batchapi/api.go    |  4 +-
 pkg/operator/resources/job/taskapi/api.go     |  4 +-
 pkg/operator/resources/realtimeapi/api.go     | 28 ++++----
 pkg/operator/resources/realtimeapi/status.go  | 14 +---
 pkg/operator/resources/resources.go           |  9 +--
 pkg/operator/resources/trafficsplitter/api.go |  4 +-
 pkg/operator/schema/schema.go                 |  2 +-
 pkg/types/status/status.go                    | 27 ++++++--
 15 files changed, 103 insertions(+), 119 deletions(-)

diff --git a/cli/cmd/get.go b/cli/cmd/get.go
index e366b432e4..0a9b6f9be8 100644
--- a/cli/cmd/get.go
+++ b/cli/cmd/get.go
@@ -219,7 +219,11 @@ func getAPIsInAllEnvironments() (string, error) {
 
 		if err == nil {
 			for _, api := range apisRes {
-				switch api.Spec.Kind {
+				if api.Status == nil {
+					// TODO remove this once the status is present for all
+					continue
+				}
+				switch api.Status.APIKind {
 				case userconfig.BatchAPIKind:
 					allBatchAPIEnvs = append(allBatchAPIEnvs, env.Name)
 					allBatchAPIs = append(allBatchAPIs, api)
diff --git a/cli/cmd/lib_realtime_apis.go b/cli/cmd/lib_realtime_apis.go
index 48fdab941c..01be8891c2 100644
--- a/cli/cmd/lib_realtime_apis.go
+++ b/cli/cmd/lib_realtime_apis.go
@@ -61,7 +61,7 @@ func realtimeAPIsTable(realtimeAPIs []schema.APIResponse, envNames []string) tab
 		lastUpdated := time.Unix(realtimeAPI.Spec.LastUpdated, 0)
 		rows = append(rows, []interface{}{
 			envNames[i],
-			realtimeAPI.Spec.Name,
+			realtimeAPI.Status.APIName,
 			fmt.Sprintf("%d/%d", realtimeAPI.Status.Ready, realtimeAPI.Status.Requested),
 			realtimeAPI.Status.UpToDate,
 			libtime.SinceStr(&lastUpdated),
diff --git a/go.mod b/go.mod
index 6acb918587..a33a4c4903 100644
--- a/go.mod
+++ b/go.mod
@@ -67,7 +67,7 @@ require (
 	golang.org/x/time v0.0.0-20210611083556-38a9dc6acbc6 // indirect
 	gomodules.xyz/jsonpatch/v2 v2.2.0 // indirect
 	google.golang.org/genproto v0.0.0-20210701133433-6b8dcf568a95 // indirect
-	google.golang.org/grpc v1.39.0 // indirect
+	google.golang.org/grpc v1.39.0
 	gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f // indirect
 	gopkg.in/karalabe/cookiejar.v2 v2.0.0-20150724131613-8dcd6a7f4951
 	gopkg.in/segmentio/analytics-go.v3 v3.1.0
diff --git a/pkg/operator/endpoints/logs.go b/pkg/operator/endpoints/logs.go
index 2d335e27da..d56add3806 100644
--- a/pkg/operator/endpoints/logs.go
+++ b/pkg/operator/endpoints/logs.go
@@ -19,6 +19,7 @@ package endpoints
 import (
 	"net/http"
 
+	"github.com/cortexlabs/cortex/pkg/lib/errors"
 	"github.com/cortexlabs/cortex/pkg/operator/operator"
 	"github.com/cortexlabs/cortex/pkg/operator/resources"
 	"github.com/cortexlabs/cortex/pkg/operator/resources/asyncapi"
@@ -98,7 +99,10 @@ func GetLogURL(w http.ResponseWriter, r *http.Request) {
 			respondError(w, r, err)
 			return
 		}
-		logURL, err := operator.APILogURL(apiResponse[0].Spec)
+		if apiResponse[0].Spec == nil {
+			respondError(w, r, errors.ErrorUnexpected("unable to get api spec", apiName))
+		}
+		logURL, err := operator.APILogURL(*apiResponse[0].Spec)
 		if err != nil {
 			respondError(w, r, err)
 			return
@@ -112,7 +116,10 @@ func GetLogURL(w http.ResponseWriter, r *http.Request) {
 			respondError(w, r, err)
 			return
 		}
-		logURL, err := operator.APILogURL(apiResponse[0].Spec)
+		if apiResponse[0].Spec == nil {
+			respondError(w, r, errors.ErrorUnexpected("unable to get api spec", apiName))
+		}
+		logURL, err := operator.APILogURL(*apiResponse[0].Spec)
 		if err != nil {
 			respondError(w, r, err)
 			return
diff --git a/pkg/operator/operator/k8s.go b/pkg/operator/operator/k8s.go
index f9536596ce..b85cb81b9e 100644
--- a/pkg/operator/operator/k8s.go
+++ b/pkg/operator/operator/k8s.go
@@ -63,3 +63,16 @@ func APIEndpoint(api *spec.API) (string, error) {
 
 	return urls.Join(baseAPIEndpoint, *api.Networking.Endpoint), nil
 }
+
+func APIEndpointFromPath(apiNetworkingPath string) (string, error) {
+	var err error
+	baseAPIEndpoint := ""
+
+	baseAPIEndpoint, err = APILoadBalancerURL()
+	if err != nil {
+		return "", err
+	}
+	baseAPIEndpoint = strings.Replace(baseAPIEndpoint, "https://", "http://", 1)
+
+	return urls.Join(baseAPIEndpoint, apiNetworkingPath), nil
+}
diff --git a/pkg/operator/resources/asyncapi/api.go b/pkg/operator/resources/asyncapi/api.go
index 39cce27446..4666792ee4 100644
--- a/pkg/operator/resources/asyncapi/api.go
+++ b/pkg/operator/resources/asyncapi/api.go
@@ -269,7 +269,7 @@ func GetAPIByName(deployedResource *operator.DeployedResource) ([]schema.APIResp
 
 	return []schema.APIResponse{
 		{
-			Spec:         *api,
+			Spec:         api,
 			Status:       status,
 			Endpoint:     apiEndpoint,
 			DashboardURL: dashboardURL,
@@ -277,29 +277,27 @@ func GetAPIByName(deployedResource *operator.DeployedResource) ([]schema.APIResp
 	}, nil
 }
 
-func GetAllAPIs(pods []kcore.Pod, deployments []kapps.Deployment) ([]schema.APIResponse, error) {
-	statuses, err := GetAllStatuses(deployments, pods)
+func GetAllAPIs(deployments []kapps.Deployment) ([]schema.APIResponse, error) {
+	statuses, err := GetAllStatuses(deployments)
 	if err != nil {
 		return nil, err
 	}
 
-	apiNames, apiIDs := namesAndIDsFromStatuses(statuses)
-	apis, err := operator.DownloadAPISpecs(apiNames, apiIDs)
-	if err != nil {
-		return nil, err
-	}
-
-	asyncAPIs := make([]schema.APIResponse, len(apis))
+	asyncAPIs := make([]schema.APIResponse, len(statuses))
 
-	for i := range apis {
-		api := apis[i]
-		endpoint, err := operator.APIEndpoint(&api)
-		if err != nil {
-			return nil, err
+	for i := range statuses {
+		var endpoint string
+		for _, deployment := range deployments {
+			if deployment.Labels["apiName"] == statuses[i].APIName {
+				endpoint, err = operator.APIEndpointFromPath(deployment.Annotations[userconfig.EndpointAnnotationKey])
+				if err != nil {
+					return nil, err
+				}
+				break
+			}
 		}
 
 		asyncAPIs[i] = schema.APIResponse{
-			Spec:     api,
 			Status:   &statuses[i],
 			Endpoint: endpoint,
 		}
diff --git a/pkg/operator/resources/asyncapi/status.go b/pkg/operator/resources/asyncapi/status.go
index ff7ff67530..37f29b36da 100644
--- a/pkg/operator/resources/asyncapi/status.go
+++ b/pkg/operator/resources/asyncapi/status.go
@@ -32,11 +32,9 @@ import (
 	kcore "k8s.io/api/core/v1"
 )
 
-type asyncResourceGroup struct {
+type asyncDeployments struct {
 	APIDeployment     *kapps.Deployment
-	APIPods           []kcore.Pod
 	GatewayDeployment *kapps.Deployment
-	GatewayPods       []kcore.Pod
 }
 
 func GetStatus(apiName string) (*status.Status, error) {
@@ -67,15 +65,15 @@ func GetStatus(apiName string) (*status.Status, error) {
 		return nil, errors.ErrorUnexpected("unable to find gateway deployment", apiName)
 	}
 
-	return apiStatus(apiDeployment), nil
+	return status.StatusFromDeployment(apiDeployment), nil
 }
 
-func GetAllStatuses(deployments []kapps.Deployment, pods []kcore.Pod) ([]status.Status, error) {
-	resourcesByAPI := groupResourcesByAPI(deployments, pods)
-	statuses := make([]status.Status, len(resourcesByAPI))
+func GetAllStatuses(deployments []kapps.Deployment) ([]status.Status, error) {
+	deploymentsByAPI := groupDeploymentsByAPI(deployments)
+	statuses := make([]status.Status, len(deploymentsByAPI))
 
 	var i int
-	for apiName, k8sResources := range resourcesByAPI {
+	for apiName, k8sResources := range deploymentsByAPI {
 		if k8sResources.APIDeployment == nil {
 			return nil, errors.ErrorUnexpected("unable to find api deployment", apiName)
 		}
@@ -84,7 +82,7 @@ func GetAllStatuses(deployments []kapps.Deployment, pods []kcore.Pod) ([]status.
 			return nil, errors.ErrorUnexpected("unable to find gateway deployment", apiName)
 		}
 
-		statuses[i] = *apiStatus(k8sResources.APIDeployment)
+		statuses[i] = *status.StatusFromDeployment(k8sResources.APIDeployment)
 		i++
 	}
 
@@ -95,26 +93,14 @@ func GetAllStatuses(deployments []kapps.Deployment, pods []kcore.Pod) ([]status.
 	return statuses, nil
 }
 
-func namesAndIDsFromStatuses(statuses []status.Status) ([]string, []string) {
-	apiNames := make([]string, len(statuses))
-	apiIDs := make([]string, len(statuses))
-
-	for i, st := range statuses {
-		apiNames[i] = st.APIName
-		apiIDs[i] = st.APIID
-	}
-
-	return apiNames, apiIDs
-}
-
 // let's do CRDs instead, to avoid this
-func groupResourcesByAPI(deployments []kapps.Deployment, pods []kcore.Pod) map[string]*asyncResourceGroup {
-	resourcesByAPI := map[string]*asyncResourceGroup{}
+func groupDeploymentsByAPI(deployments []kapps.Deployment) map[string]*asyncDeployments {
+	deploymentsByAPI := map[string]*asyncDeployments{}
 	for i := range deployments {
 		deployment := deployments[i]
 		apiName := deployment.Labels["apiName"]
 		asyncType := deployment.Labels["cortex.dev/async"]
-		apiResources, exists := resourcesByAPI[apiName]
+		apiResources, exists := deploymentsByAPI[apiName]
 		if exists {
 			if asyncType == "api" {
 				apiResources.APIDeployment = &deployment
@@ -123,39 +109,13 @@ func groupResourcesByAPI(deployments []kapps.Deployment, pods []kcore.Pod) map[s
 			}
 		} else {
 			if asyncType == "api" {
-				resourcesByAPI[apiName] = &asyncResourceGroup{APIDeployment: &deployment}
+				deploymentsByAPI[apiName] = &asyncDeployments{APIDeployment: &deployment}
 			} else {
-				resourcesByAPI[apiName] = &asyncResourceGroup{GatewayDeployment: &deployment}
+				deploymentsByAPI[apiName] = &asyncDeployments{GatewayDeployment: &deployment}
 			}
 		}
 	}
-
-	for _, pod := range pods {
-		apiName := pod.Labels["apiName"]
-		asyncType := pod.Labels["cortex.dev/async"]
-		apiResources, exists := resourcesByAPI[apiName]
-		if !exists {
-			// ignore pods that might still be waiting to be deleted while the deployment has already been deleted
-			continue
-		}
-
-		if asyncType == "api" {
-			apiResources.APIPods = append(resourcesByAPI[apiName].APIPods, pod)
-		} else {
-			apiResources.GatewayPods = append(resourcesByAPI[apiName].GatewayPods, pod)
-		}
-	}
-	return resourcesByAPI
-}
-
-func apiStatus(deployment *kapps.Deployment) *status.Status {
-	return &status.Status{
-		APIName:   deployment.Labels["apiName"],
-		APIID:     deployment.Labels["apiID"],
-		Ready:     deployment.Status.ReadyReplicas,
-		Requested: deployment.Status.Replicas,
-		UpToDate:  deployment.Status.UpdatedReplicas,
-	}
+	return deploymentsByAPI
 }
 
 func getStatusCode(apiCounts status.ReplicaCounts, gatewayCounts status.ReplicaCounts, apiMinReplicas int32) status.Code {
diff --git a/pkg/operator/resources/job/batchapi/api.go b/pkg/operator/resources/job/batchapi/api.go
index b85726a531..0570ae8124 100644
--- a/pkg/operator/resources/job/batchapi/api.go
+++ b/pkg/operator/resources/job/batchapi/api.go
@@ -184,7 +184,7 @@ func GetAllAPIs(virtualServices []istioclientnetworking.VirtualService, batchJob
 		}
 
 		batchAPIsMap[apiName] = &schema.APIResponse{
-			Spec:             *api,
+			Spec:             api,
 			Endpoint:         endpoint,
 			BatchJobStatuses: jobStatuses,
 		}
@@ -263,7 +263,7 @@ func GetAPIByName(deployedResource *operator.DeployedResource) ([]schema.APIResp
 
 	return []schema.APIResponse{
 		{
-			Spec:             *api,
+			Spec:             api,
 			BatchJobStatuses: jobStatuses,
 			Endpoint:         endpoint,
 			DashboardURL:     dashboardURL,
diff --git a/pkg/operator/resources/job/taskapi/api.go b/pkg/operator/resources/job/taskapi/api.go
index 9261cc16a9..535259c1b3 100644
--- a/pkg/operator/resources/job/taskapi/api.go
+++ b/pkg/operator/resources/job/taskapi/api.go
@@ -173,7 +173,7 @@ func GetAllAPIs(virtualServices []istioclientnetworking.VirtualService, k8sJobs
 		}
 
 		taskAPIsMap[apiName] = &schema.APIResponse{
-			Spec:            *api,
+			Spec:            api,
 			Endpoint:        endpoint,
 			TaskJobStatuses: jobStatuses,
 		}
@@ -295,7 +295,7 @@ func GetAPIByName(deployedResource *operator.DeployedResource) ([]schema.APIResp
 
 	return []schema.APIResponse{
 		{
-			Spec:            *api,
+			Spec:            api,
 			TaskJobStatuses: jobStatuses,
 			Endpoint:        endpoint,
 			DashboardURL:    dashboardURL,
diff --git a/pkg/operator/resources/realtimeapi/api.go b/pkg/operator/resources/realtimeapi/api.go
index c9ddaf5957..9f675063fd 100644
--- a/pkg/operator/resources/realtimeapi/api.go
+++ b/pkg/operator/resources/realtimeapi/api.go
@@ -181,23 +181,21 @@ func GetAllAPIs(deployments []kapps.Deployment) ([]schema.APIResponse, error) {
 		return nil, err
 	}
 
-	apiNames, apiIDs := namesAndIDsFromStatuses(statuses)
-	apis, err := operator.DownloadAPISpecs(apiNames, apiIDs)
-	if err != nil {
-		return nil, err
-	}
-
-	realtimeAPIs := make([]schema.APIResponse, len(apis))
-
-	for i := range apis {
-		api := apis[i]
-		endpoint, err := operator.APIEndpoint(&api)
-		if err != nil {
-			return nil, err
+	realtimeAPIs := make([]schema.APIResponse, len(statuses))
+
+	for i := range statuses {
+		var endpoint string
+		for _, deployment := range deployments {
+			if deployment.Labels["apiName"] == statuses[i].APIName {
+				endpoint, err = operator.APIEndpointFromPath(deployment.Annotations[userconfig.EndpointAnnotationKey])
+				if err != nil {
+					return nil, err
+				}
+				break
+			}
 		}
 
 		realtimeAPIs[i] = schema.APIResponse{
-			Spec:     api,
 			Status:   &statuses[i],
 			Endpoint: endpoint,
 		}
@@ -238,7 +236,7 @@ func GetAPIByName(deployedResource *operator.DeployedResource) ([]schema.APIResp
 
 	return []schema.APIResponse{
 		{
-			Spec:         *api,
+			Spec:         api,
 			Status:       st,
 			Endpoint:     apiEndpoint,
 			DashboardURL: dashboardURL,
diff --git a/pkg/operator/resources/realtimeapi/status.go b/pkg/operator/resources/realtimeapi/status.go
index 4c8eb5ac46..9b815c66bd 100644
--- a/pkg/operator/resources/realtimeapi/status.go
+++ b/pkg/operator/resources/realtimeapi/status.go
@@ -41,13 +41,13 @@ func GetStatus(apiName string) (*status.Status, error) {
 		return nil, errors.ErrorUnexpected("unable to find deployment", apiName)
 	}
 
-	return apiStatus(*deployment), nil
+	return status.StatusFromDeployment(deployment), nil
 }
 
 func GetAllStatuses(deployments []kapps.Deployment) ([]status.Status, error) {
 	statuses := make([]status.Status, len(deployments))
 	for i := range deployments {
-		statuses[i] = *apiStatus(deployments[i])
+		statuses[i] = *status.StatusFromDeployment(&deployments[i])
 	}
 
 	sort.Slice(statuses, func(i, j int) bool {
@@ -57,16 +57,6 @@ func GetAllStatuses(deployments []kapps.Deployment) ([]status.Status, error) {
 	return statuses, nil
 }
 
-func apiStatus(deployment kapps.Deployment) *status.Status {
-	return &status.Status{
-		APIName:   deployment.Labels["apiName"],
-		APIID:     deployment.Labels["apiID"],
-		Ready:     deployment.Status.ReadyReplicas,
-		Requested: deployment.Status.Replicas,
-		UpToDate:  deployment.Status.UpdatedReplicas,
-	}
-}
-
 func getReplicaCounts(deployment *kapps.Deployment, pods []kcore.Pod) status.ReplicaCounts {
 	counts := status.ReplicaCounts{}
 	counts.Requested = *deployment.Spec.Replicas
diff --git a/pkg/operator/resources/resources.go b/pkg/operator/resources/resources.go
index b9853c1993..768122d69d 100644
--- a/pkg/operator/resources/resources.go
+++ b/pkg/operator/resources/resources.go
@@ -158,7 +158,7 @@ func UpdateAPI(apiConfig *userconfig.API, force bool) (*schema.APIResponse, stri
 		apiEndpoint, _ := operator.APIEndpoint(api)
 
 		return &schema.APIResponse{
-			Spec:     *api,
+			Spec:     api,
 			Endpoint: apiEndpoint,
 		}, msg, nil
 	}
@@ -310,15 +310,12 @@ func GetAPIs() ([]schema.APIResponse, error) {
 
 	var batchAPIPods []kcore.Pod
 	var taskAPIPods []kcore.Pod
-	var asyncAPIPods []kcore.Pod
 	for _, pod := range pods {
 		switch pod.Labels["apiKind"] {
 		case userconfig.BatchAPIKind.String():
 			batchAPIPods = append(batchAPIPods, pod)
 		case userconfig.TaskAPIKind.String():
 			taskAPIPods = append(taskAPIPods, pod)
-		case userconfig.AsyncAPIKind.String():
-			asyncAPIPods = append(asyncAPIPods, pod)
 		}
 	}
 
@@ -353,7 +350,7 @@ func GetAPIs() ([]schema.APIResponse, error) {
 		return nil, err
 	}
 
-	asyncAPIList, err := asyncapi.GetAllAPIs(asyncAPIPods, asyncAPIDeployments)
+	asyncAPIList, err := asyncapi.GetAllAPIs(asyncAPIDeployments)
 	if err != nil {
 		return nil, err
 	}
@@ -446,7 +443,7 @@ func GetAPIByID(apiName string, apiID string) ([]schema.APIResponse, error) {
 
 	return []schema.APIResponse{
 		{
-			Spec: *apiSpec,
+			Spec: apiSpec,
 		},
 	}, nil
 }
diff --git a/pkg/operator/resources/trafficsplitter/api.go b/pkg/operator/resources/trafficsplitter/api.go
index 9d81a17faa..a1fba2b504 100644
--- a/pkg/operator/resources/trafficsplitter/api.go
+++ b/pkg/operator/resources/trafficsplitter/api.go
@@ -158,7 +158,7 @@ func GetAllAPIs(virtualServices []istioclientnetworking.VirtualService) ([]schem
 		}
 
 		trafficSplitters = append(trafficSplitters, schema.APIResponse{
-			Spec:     trafficSplitter,
+			Spec:     &trafficSplitter,
 			Endpoint: endpoint,
 		})
 	}
@@ -180,7 +180,7 @@ func GetAPIByName(deployedResource *operator.DeployedResource) ([]schema.APIResp
 
 	return []schema.APIResponse{
 		{
-			Spec:     *api,
+			Spec:     api,
 			Endpoint: endpoint,
 		},
 	}, nil
diff --git a/pkg/operator/schema/schema.go b/pkg/operator/schema/schema.go
index eff68701ee..401b59a30c 100644
--- a/pkg/operator/schema/schema.go
+++ b/pkg/operator/schema/schema.go
@@ -56,7 +56,7 @@ type DeployResult struct {
 }
 
 type APIResponse struct {
-	Spec             spec.API                `json:"spec"`
+	Spec             *spec.API               `json:"spec,omitempty"`
 	Status           *status.Status          `json:"status,omitempty"`
 	Endpoint         string                  `json:"endpoint"`
 	DashboardURL     *string                 `json:"dashboard_url,omitempty"`
diff --git a/pkg/types/status/status.go b/pkg/types/status/status.go
index 9a95c16463..b2299bce5c 100644
--- a/pkg/types/status/status.go
+++ b/pkg/types/status/status.go
@@ -16,12 +16,18 @@ limitations under the License.
 
 package status
 
+import (
+	"github.com/cortexlabs/cortex/pkg/types/userconfig"
+	kapps "k8s.io/api/apps/v1"
+)
+
 type Status struct {
-	APIName   string `json:"api_name"`
-	APIID     string `json:"api_id"`
-	Ready     int32  `json:"ready"`
-	Requested int32  `json:"requested"`
-	UpToDate  int32  `json:"up_to_date"`
+	APIName   string          `json:"api_name"`
+	APIKind   userconfig.Kind `json:"api_kind"`
+	APIID     string          `json:"api_id"`
+	Ready     int32           `json:"ready"`
+	Requested int32           `json:"requested"`
+	UpToDate  int32           `json:"up_to_date"`
 }
 
 type ReplicaCounts struct {
@@ -54,6 +60,17 @@ type WorkerCounts struct {
 	Unknown      int32 `json:"unknown,omitempty"`
 }
 
+func StatusFromDeployment(deployment *kapps.Deployment) *Status {
+	return &Status{
+		APIName:   deployment.Labels["apiName"],
+		APIKind:   userconfig.KindFromString(deployment.Labels["apiKind"]),
+		APIID:     deployment.Labels["apiID"],
+		Ready:     deployment.Status.ReadyReplicas,
+		Requested: deployment.Status.Replicas,
+		UpToDate:  deployment.Status.UpdatedReplicas,
+	}
+}
+
 func (src *SubReplicaCounts) TotalFailed() int32 {
 	return src.Failed + src.ErrImagePull + src.Killed + src.KilledOOM + src.Stalled
 }

From 4f9dc7c47020bf3a92fe0f6e1327d1d66f73ebe8 Mon Sep 17 00:00:00 2001
From: Robert Lucian Chiriac <robert.lucian.chiriac@gmail.com>
Date: Wed, 21 Jul 2021 14:59:58 +0300
Subject: [PATCH 23/40] WIP on API statuses

---
 cli/cmd/get.go                                |   4 +-
 cli/cmd/lib_async_apis.go                     |   4 +-
 cli/cmd/lib_batch_apis.go                     |   4 +-
 cli/cmd/lib_realtime_apis.go                  |  16 ++-
 cli/cmd/lib_task_apis.go                      |   4 +-
 cli/cmd/lib_traffic_splitters.go              |   5 +-
 pkg/operator/resources/asyncapi/api.go        | 104 ++++++++++++------
 pkg/operator/resources/asyncapi/status.go     |  60 ----------
 pkg/operator/resources/job/batchapi/api.go    |   4 +-
 pkg/operator/resources/job/taskapi/api.go     |   4 +-
 pkg/operator/resources/realtimeapi/api.go     |  70 ++++++------
 pkg/operator/resources/realtimeapi/status.go  |  31 ------
 pkg/operator/resources/resources.go           |  27 ++---
 pkg/operator/resources/trafficsplitter/api.go |   4 +-
 pkg/operator/schema/schema.go                 |   3 +-
 pkg/types/spec/api.go                         |  24 ++++
 pkg/types/status/status.go                    |  13 +--
 17 files changed, 177 insertions(+), 204 deletions(-)

diff --git a/cli/cmd/get.go b/cli/cmd/get.go
index 0a9b6f9be8..f2356fb69a 100644
--- a/cli/cmd/get.go
+++ b/cli/cmd/get.go
@@ -219,11 +219,11 @@ func getAPIsInAllEnvironments() (string, error) {
 
 		if err == nil {
 			for _, api := range apisRes {
-				if api.Status == nil {
+				if api.Metadata == nil {
 					// TODO remove this once the status is present for all
 					continue
 				}
-				switch api.Status.APIKind {
+				switch api.Metadata.Kind {
 				case userconfig.BatchAPIKind:
 					allBatchAPIEnvs = append(allBatchAPIEnvs, env.Name)
 					allBatchAPIs = append(allBatchAPIs, api)
diff --git a/cli/cmd/lib_async_apis.go b/cli/cmd/lib_async_apis.go
index 3b0347e51c..d810acad62 100644
--- a/cli/cmd/lib_async_apis.go
+++ b/cli/cmd/lib_async_apis.go
@@ -41,7 +41,9 @@ func asyncAPITable(asyncAPI schema.APIResponse, env cliconfig.Environment) (stri
 		out += "\n" + console.Bold("metrics dashboard: ") + *asyncAPI.DashboardURL + "\n"
 	}
 
-	out += "\n" + console.Bold("endpoint: ") + asyncAPI.Endpoint + "\n"
+	if asyncAPI.Endpoint != nil {
+		out += "\n" + console.Bold("endpoint: ") + *asyncAPI.Endpoint + "\n"
+	}
 
 	out += "\n" + apiHistoryTable(asyncAPI.APIVersions)
 
diff --git a/cli/cmd/lib_batch_apis.go b/cli/cmd/lib_batch_apis.go
index ac43150a53..1499ac8869 100644
--- a/cli/cmd/lib_batch_apis.go
+++ b/cli/cmd/lib_batch_apis.go
@@ -123,7 +123,9 @@ func batchAPITable(batchAPI schema.APIResponse) string {
 		out += "\n" + console.Bold("metrics dashboard: ") + *batchAPI.DashboardURL + "\n"
 	}
 
-	out += "\n" + console.Bold("endpoint: ") + batchAPI.Endpoint + "\n"
+	if batchAPI.Endpoint != nil {
+		out += "\n" + console.Bold("endpoint: ") + *batchAPI.Endpoint + "\n"
+	}
 
 	out += "\n" + apiHistoryTable(batchAPI.APIVersions)
 
diff --git a/cli/cmd/lib_realtime_apis.go b/cli/cmd/lib_realtime_apis.go
index 01be8891c2..36abdaff87 100644
--- a/cli/cmd/lib_realtime_apis.go
+++ b/cli/cmd/lib_realtime_apis.go
@@ -32,8 +32,9 @@ func realtimeAPITable(realtimeAPI schema.APIResponse, env cliconfig.Environment)
 	var out string
 
 	t := realtimeAPIsTable([]schema.APIResponse{realtimeAPI}, []string{env.Name})
-	t.FindHeaderByTitle(_titleEnvironment).Hidden = true
-	t.FindHeaderByTitle(_titleRealtimeAPI).Hidden = true
+	// TODO decide on whether we want to keep this consistent with `cortex get` command
+	// t.FindHeaderByTitle(_titleEnvironment).Hidden = true
+	// t.FindHeaderByTitle(_titleRealtimeAPI).Hidden = true
 
 	out += t.MustFormat()
 
@@ -41,7 +42,9 @@ func realtimeAPITable(realtimeAPI schema.APIResponse, env cliconfig.Environment)
 		out += "\n" + console.Bold("metrics dashboard: ") + *realtimeAPI.DashboardURL + "\n"
 	}
 
-	out += "\n" + console.Bold("endpoint: ") + realtimeAPI.Endpoint + "\n"
+	if realtimeAPI.Endpoint != nil {
+		out += "\n" + console.Bold("endpoint: ") + *realtimeAPI.Endpoint + "\n"
+	}
 
 	out += "\n" + apiHistoryTable(realtimeAPI.APIVersions)
 
@@ -58,10 +61,13 @@ func realtimeAPIsTable(realtimeAPIs []schema.APIResponse, envNames []string) tab
 	rows := make([][]interface{}, 0, len(realtimeAPIs))
 
 	for i, realtimeAPI := range realtimeAPIs {
-		lastUpdated := time.Unix(realtimeAPI.Spec.LastUpdated, 0)
+		if realtimeAPI.Metadata == nil || realtimeAPI.Status == nil {
+			continue
+		}
+		lastUpdated := time.Unix(realtimeAPI.Metadata.LastUpdated, 0)
 		rows = append(rows, []interface{}{
 			envNames[i],
-			realtimeAPI.Status.APIName,
+			realtimeAPI.Metadata.Name,
 			fmt.Sprintf("%d/%d", realtimeAPI.Status.Ready, realtimeAPI.Status.Requested),
 			realtimeAPI.Status.UpToDate,
 			libtime.SinceStr(&lastUpdated),
diff --git a/cli/cmd/lib_task_apis.go b/cli/cmd/lib_task_apis.go
index 3bd0275caf..a639cee5f0 100644
--- a/cli/cmd/lib_task_apis.go
+++ b/cli/cmd/lib_task_apis.go
@@ -118,7 +118,9 @@ func taskAPITable(taskAPI schema.APIResponse) string {
 		out += "\n" + console.Bold("metrics dashboard: ") + *taskAPI.DashboardURL + "\n"
 	}
 
-	out += "\n" + console.Bold("endpoint: ") + taskAPI.Endpoint + "\n"
+	if taskAPI.Endpoint != nil {
+		out += "\n" + console.Bold("endpoint: ") + *taskAPI.Endpoint + "\n"
+	}
 
 	out += "\n" + apiHistoryTable(taskAPI.APIVersions)
 
diff --git a/cli/cmd/lib_traffic_splitters.go b/cli/cmd/lib_traffic_splitters.go
index f89d350ded..cc23e00a87 100644
--- a/cli/cmd/lib_traffic_splitters.go
+++ b/cli/cmd/lib_traffic_splitters.go
@@ -50,7 +50,10 @@ func trafficSplitterTable(trafficSplitter schema.APIResponse, env cliconfig.Envi
 	out += t.MustFormat()
 
 	out += "\n" + console.Bold("last updated: ") + libtime.SinceStr(&lastUpdated)
-	out += "\n" + console.Bold("endpoint: ") + trafficSplitter.Endpoint + "\n"
+
+	if trafficSplitter.Endpoint != nil {
+		out += "\n" + console.Bold("endpoint: ") + *trafficSplitter.Endpoint + "\n"
+	}
 
 	out += "\n" + apiHistoryTable(trafficSplitter.APIVersions)
 
diff --git a/pkg/operator/resources/asyncapi/api.go b/pkg/operator/resources/asyncapi/api.go
index 4666792ee4..4c93657a3c 100644
--- a/pkg/operator/resources/asyncapi/api.go
+++ b/pkg/operator/resources/asyncapi/api.go
@@ -19,6 +19,7 @@ package asyncapi
 import (
 	"fmt"
 	"path/filepath"
+	"sort"
 	"time"
 
 	"github.com/cortexlabs/cortex/pkg/config"
@@ -31,6 +32,7 @@ import (
 	"github.com/cortexlabs/cortex/pkg/operator/operator"
 	"github.com/cortexlabs/cortex/pkg/operator/schema"
 	"github.com/cortexlabs/cortex/pkg/types/spec"
+	"github.com/cortexlabs/cortex/pkg/types/status"
 	"github.com/cortexlabs/cortex/pkg/types/userconfig"
 	"github.com/cortexlabs/cortex/pkg/workloads"
 	istioclientnetworking "istio.io/client-go/pkg/apis/networking/v1beta1"
@@ -249,13 +251,77 @@ func DeleteAPI(apiName string, keepCache bool) error {
 	return nil
 }
 
+func GetAllAPIs(deployments []kapps.Deployment, virtualServices []istioclientnetworking.VirtualService) ([]schema.APIResponse, error) {
+	asyncAPIs := make([]schema.APIResponse, len(deployments))
+	mappedAsyncAPIs := make(map[string]schema.APIResponse, len(deployments))
+	keys := make([]string, len(deployments))
+
+	for i := range deployments {
+		apiName := deployments[i].Labels["apiName"]
+		keys = append(keys, apiName)
+
+		metadata, err := spec.MetadataFromDeployment(&deployments[i])
+		if err != nil {
+			return nil, errors.Wrap(err, fmt.Sprintf("api %s", apiName))
+		}
+		mappedAsyncAPIs[apiName] = schema.APIResponse{
+			Status:   status.StatusFromDeployment(&deployments[i]),
+			Metadata: metadata,
+		}
+	}
+
+	sort.Strings(keys)
+	for _, apiName := range keys {
+		asyncAPIs = append(asyncAPIs, mappedAsyncAPIs[apiName])
+	}
+
+	return asyncAPIs, nil
+}
+
 func GetAPIByName(deployedResource *operator.DeployedResource) ([]schema.APIResponse, error) {
-	status, err := GetStatus(deployedResource.Name)
+	var apiDeployment *kapps.Deployment
+	var gatewayDeployment *kapps.Deployment
+
+	err := parallel.RunFirstErr(
+		func() error {
+			var err error
+			apiDeployment, err = config.K8s.GetDeployment(workloads.K8sName(deployedResource.Name))
+			return err
+		},
+		func() error {
+			var err error
+			gatewayDeployment, err = config.K8s.GetDeployment(getGatewayK8sName(deployedResource.Name))
+			return err
+		},
+	)
 	if err != nil {
 		return nil, err
 	}
 
-	api, err := operator.DownloadAPISpec(status.APIName, status.APIID)
+	if apiDeployment == nil {
+		return nil, errors.ErrorUnexpected("unable to find api deployment", deployedResource.Name)
+	}
+
+	if gatewayDeployment == nil {
+		return nil, errors.ErrorUnexpected("unable to find gateway deployment", deployedResource.Name)
+	}
+
+	deployment, err := config.K8s.GetDeployment(workloads.K8sName(deployedResource.Name))
+	if err != nil {
+		return nil, err
+	}
+
+	if deployment == nil {
+		return nil, errors.ErrorUnexpected("unable to find deployment", deployedResource.Name)
+	}
+
+	apiStatus := status.StatusFromDeployment(deployment)
+	apiMetadata, err := spec.MetadataFromDeployment(deployment)
+	if err != nil {
+		return nil, errors.ErrorUnexpected("unable to obtain metadata", deployedResource.Name)
+	}
+
+	api, err := operator.DownloadAPISpec(apiMetadata.Name, apiMetadata.APIID)
 	if err != nil {
 		return nil, err
 	}
@@ -270,42 +336,14 @@ func GetAPIByName(deployedResource *operator.DeployedResource) ([]schema.APIResp
 	return []schema.APIResponse{
 		{
 			Spec:         api,
-			Status:       status,
-			Endpoint:     apiEndpoint,
+			Metadata:     apiMetadata,
+			Status:       apiStatus,
+			Endpoint:     &apiEndpoint,
 			DashboardURL: dashboardURL,
 		},
 	}, nil
 }
 
-func GetAllAPIs(deployments []kapps.Deployment) ([]schema.APIResponse, error) {
-	statuses, err := GetAllStatuses(deployments)
-	if err != nil {
-		return nil, err
-	}
-
-	asyncAPIs := make([]schema.APIResponse, len(statuses))
-
-	for i := range statuses {
-		var endpoint string
-		for _, deployment := range deployments {
-			if deployment.Labels["apiName"] == statuses[i].APIName {
-				endpoint, err = operator.APIEndpointFromPath(deployment.Annotations[userconfig.EndpointAnnotationKey])
-				if err != nil {
-					return nil, err
-				}
-				break
-			}
-		}
-
-		asyncAPIs[i] = schema.APIResponse{
-			Status:   &statuses[i],
-			Endpoint: endpoint,
-		}
-	}
-
-	return asyncAPIs, nil
-}
-
 func UpdateAPIMetricsCron(apiDeployment *kapps.Deployment) error {
 	apiName := apiDeployment.Labels["apiName"]
 
diff --git a/pkg/operator/resources/asyncapi/status.go b/pkg/operator/resources/asyncapi/status.go
index 37f29b36da..48189a89cb 100644
--- a/pkg/operator/resources/asyncapi/status.go
+++ b/pkg/operator/resources/asyncapi/status.go
@@ -17,17 +17,13 @@ limitations under the License.
 package asyncapi
 
 import (
-	"sort"
 	"time"
 
 	"github.com/cortexlabs/cortex/pkg/config"
 	"github.com/cortexlabs/cortex/pkg/consts"
-	"github.com/cortexlabs/cortex/pkg/lib/errors"
 	"github.com/cortexlabs/cortex/pkg/lib/k8s"
-	"github.com/cortexlabs/cortex/pkg/lib/parallel"
 	"github.com/cortexlabs/cortex/pkg/types/status"
 	"github.com/cortexlabs/cortex/pkg/types/userconfig"
-	"github.com/cortexlabs/cortex/pkg/workloads"
 	kapps "k8s.io/api/apps/v1"
 	kcore "k8s.io/api/core/v1"
 )
@@ -37,62 +33,6 @@ type asyncDeployments struct {
 	GatewayDeployment *kapps.Deployment
 }
 
-func GetStatus(apiName string) (*status.Status, error) {
-	var apiDeployment *kapps.Deployment
-	var gatewayDeployment *kapps.Deployment
-
-	err := parallel.RunFirstErr(
-		func() error {
-			var err error
-			apiDeployment, err = config.K8s.GetDeployment(workloads.K8sName(apiName))
-			return err
-		},
-		func() error {
-			var err error
-			gatewayDeployment, err = config.K8s.GetDeployment(getGatewayK8sName(apiName))
-			return err
-		},
-	)
-	if err != nil {
-		return nil, err
-	}
-
-	if apiDeployment == nil {
-		return nil, errors.ErrorUnexpected("unable to find api deployment", apiName)
-	}
-
-	if gatewayDeployment == nil {
-		return nil, errors.ErrorUnexpected("unable to find gateway deployment", apiName)
-	}
-
-	return status.StatusFromDeployment(apiDeployment), nil
-}
-
-func GetAllStatuses(deployments []kapps.Deployment) ([]status.Status, error) {
-	deploymentsByAPI := groupDeploymentsByAPI(deployments)
-	statuses := make([]status.Status, len(deploymentsByAPI))
-
-	var i int
-	for apiName, k8sResources := range deploymentsByAPI {
-		if k8sResources.APIDeployment == nil {
-			return nil, errors.ErrorUnexpected("unable to find api deployment", apiName)
-		}
-
-		if k8sResources.GatewayDeployment == nil {
-			return nil, errors.ErrorUnexpected("unable to find gateway deployment", apiName)
-		}
-
-		statuses[i] = *status.StatusFromDeployment(k8sResources.APIDeployment)
-		i++
-	}
-
-	sort.Slice(statuses, func(i, j int) bool {
-		return statuses[i].APIName < statuses[j].APIName
-	})
-
-	return statuses, nil
-}
-
 // let's do CRDs instead, to avoid this
 func groupDeploymentsByAPI(deployments []kapps.Deployment) map[string]*asyncDeployments {
 	deploymentsByAPI := map[string]*asyncDeployments{}
diff --git a/pkg/operator/resources/job/batchapi/api.go b/pkg/operator/resources/job/batchapi/api.go
index 0570ae8124..6b230847c6 100644
--- a/pkg/operator/resources/job/batchapi/api.go
+++ b/pkg/operator/resources/job/batchapi/api.go
@@ -185,7 +185,7 @@ func GetAllAPIs(virtualServices []istioclientnetworking.VirtualService, batchJob
 
 		batchAPIsMap[apiName] = &schema.APIResponse{
 			Spec:             api,
-			Endpoint:         endpoint,
+			Endpoint:         &endpoint,
 			BatchJobStatuses: jobStatuses,
 		}
 	}
@@ -265,7 +265,7 @@ func GetAPIByName(deployedResource *operator.DeployedResource) ([]schema.APIResp
 		{
 			Spec:             api,
 			BatchJobStatuses: jobStatuses,
-			Endpoint:         endpoint,
+			Endpoint:         &endpoint,
 			DashboardURL:     dashboardURL,
 		},
 	}, nil
diff --git a/pkg/operator/resources/job/taskapi/api.go b/pkg/operator/resources/job/taskapi/api.go
index 535259c1b3..32aa249636 100644
--- a/pkg/operator/resources/job/taskapi/api.go
+++ b/pkg/operator/resources/job/taskapi/api.go
@@ -174,7 +174,7 @@ func GetAllAPIs(virtualServices []istioclientnetworking.VirtualService, k8sJobs
 
 		taskAPIsMap[apiName] = &schema.APIResponse{
 			Spec:            api,
-			Endpoint:        endpoint,
+			Endpoint:        &endpoint,
 			TaskJobStatuses: jobStatuses,
 		}
 	}
@@ -297,7 +297,7 @@ func GetAPIByName(deployedResource *operator.DeployedResource) ([]schema.APIResp
 		{
 			Spec:            api,
 			TaskJobStatuses: jobStatuses,
-			Endpoint:        endpoint,
+			Endpoint:        &endpoint,
 			DashboardURL:    dashboardURL,
 		},
 	}, nil
diff --git a/pkg/operator/resources/realtimeapi/api.go b/pkg/operator/resources/realtimeapi/api.go
index 9f675063fd..14d24b3e04 100644
--- a/pkg/operator/resources/realtimeapi/api.go
+++ b/pkg/operator/resources/realtimeapi/api.go
@@ -19,6 +19,7 @@ package realtimeapi
 import (
 	"fmt"
 	"path/filepath"
+	"sort"
 	"time"
 
 	"github.com/cortexlabs/cortex/pkg/config"
@@ -175,54 +176,50 @@ func DeleteAPI(apiName string, keepCache bool) error {
 	return nil
 }
 
-func GetAllAPIs(deployments []kapps.Deployment) ([]schema.APIResponse, error) {
-	statuses, err := GetAllStatuses(deployments)
-	if err != nil {
-		return nil, err
-	}
+func GetAllAPIs(deployments []kapps.Deployment, virtualServices []istioclientnetworking.VirtualService) ([]schema.APIResponse, error) {
+	realtimeAPIs := make([]schema.APIResponse, len(deployments))
+	mappedRealtimeAPIs := make(map[string]schema.APIResponse, len(deployments))
+	keys := make([]string, len(deployments))
 
-	realtimeAPIs := make([]schema.APIResponse, len(statuses))
-
-	for i := range statuses {
-		var endpoint string
-		for _, deployment := range deployments {
-			if deployment.Labels["apiName"] == statuses[i].APIName {
-				endpoint, err = operator.APIEndpointFromPath(deployment.Annotations[userconfig.EndpointAnnotationKey])
-				if err != nil {
-					return nil, err
-				}
-				break
-			}
-		}
+	for i := range deployments {
+		apiName := deployments[i].Labels["apiName"]
+		keys = append(keys, apiName)
 
-		realtimeAPIs[i] = schema.APIResponse{
-			Status:   &statuses[i],
-			Endpoint: endpoint,
+		metadata, err := spec.MetadataFromDeployment(&deployments[i])
+		if err != nil {
+			return nil, errors.Wrap(err, fmt.Sprintf("api %s", apiName))
+		}
+		mappedRealtimeAPIs[apiName] = schema.APIResponse{
+			Status:   status.StatusFromDeployment(&deployments[i]),
+			Metadata: metadata,
 		}
 	}
 
-	return realtimeAPIs, nil
-}
-
-func namesAndIDsFromStatuses(statuses []status.Status) ([]string, []string) {
-	apiNames := make([]string, len(statuses))
-	apiIDs := make([]string, len(statuses))
-
-	for i, st := range statuses {
-		apiNames[i] = st.APIName
-		apiIDs[i] = st.APIID
+	sort.Strings(keys)
+	for _, apiName := range keys {
+		realtimeAPIs = append(realtimeAPIs, mappedRealtimeAPIs[apiName])
 	}
 
-	return apiNames, apiIDs
+	return realtimeAPIs, nil
 }
 
 func GetAPIByName(deployedResource *operator.DeployedResource) ([]schema.APIResponse, error) {
-	st, err := GetStatus(deployedResource.Name)
+	deployment, err := config.K8s.GetDeployment(workloads.K8sName(deployedResource.Name))
 	if err != nil {
 		return nil, err
 	}
 
-	api, err := operator.DownloadAPISpec(st.APIName, st.APIID)
+	if deployment == nil {
+		return nil, errors.ErrorUnexpected("unable to find deployment", deployedResource.Name)
+	}
+
+	apiStatus := status.StatusFromDeployment(deployment)
+	apiMetadata, err := spec.MetadataFromDeployment(deployment)
+	if err != nil {
+		return nil, errors.ErrorUnexpected("unable to obtain metadata", deployedResource.Name)
+	}
+
+	api, err := operator.DownloadAPISpec(apiMetadata.Name, apiMetadata.APIID)
 	if err != nil {
 		return nil, err
 	}
@@ -237,8 +234,9 @@ func GetAPIByName(deployedResource *operator.DeployedResource) ([]schema.APIResp
 	return []schema.APIResponse{
 		{
 			Spec:         api,
-			Status:       st,
-			Endpoint:     apiEndpoint,
+			Metadata:     apiMetadata,
+			Status:       apiStatus,
+			Endpoint:     &apiEndpoint,
 			DashboardURL: dashboardURL,
 		},
 	}, nil
diff --git a/pkg/operator/resources/realtimeapi/status.go b/pkg/operator/resources/realtimeapi/status.go
index 9b815c66bd..9952ccd9ca 100644
--- a/pkg/operator/resources/realtimeapi/status.go
+++ b/pkg/operator/resources/realtimeapi/status.go
@@ -17,46 +17,15 @@ limitations under the License.
 package realtimeapi
 
 import (
-	"sort"
 	"time"
 
-	"github.com/cortexlabs/cortex/pkg/config"
 	"github.com/cortexlabs/cortex/pkg/consts"
-	"github.com/cortexlabs/cortex/pkg/lib/errors"
 	"github.com/cortexlabs/cortex/pkg/lib/k8s"
 	"github.com/cortexlabs/cortex/pkg/types/status"
-	"github.com/cortexlabs/cortex/pkg/workloads"
 	kapps "k8s.io/api/apps/v1"
 	kcore "k8s.io/api/core/v1"
 )
 
-func GetStatus(apiName string) (*status.Status, error) {
-	var err error
-	deployment, err := config.K8s.GetDeployment(workloads.K8sName(apiName))
-	if err != nil {
-		return nil, err
-	}
-
-	if deployment == nil {
-		return nil, errors.ErrorUnexpected("unable to find deployment", apiName)
-	}
-
-	return status.StatusFromDeployment(deployment), nil
-}
-
-func GetAllStatuses(deployments []kapps.Deployment) ([]status.Status, error) {
-	statuses := make([]status.Status, len(deployments))
-	for i := range deployments {
-		statuses[i] = *status.StatusFromDeployment(&deployments[i])
-	}
-
-	sort.Slice(statuses, func(i, j int) bool {
-		return statuses[i].APIName < statuses[j].APIName
-	})
-
-	return statuses, nil
-}
-
 func getReplicaCounts(deployment *kapps.Deployment, pods []kcore.Pod) status.ReplicaCounts {
 	counts := status.ReplicaCounts{}
 	counts.Requested = *deployment.Spec.Replicas
diff --git a/pkg/operator/resources/resources.go b/pkg/operator/resources/resources.go
index 768122d69d..690616e923 100644
--- a/pkg/operator/resources/resources.go
+++ b/pkg/operator/resources/resources.go
@@ -159,7 +159,7 @@ func UpdateAPI(apiConfig *userconfig.API, force bool) (*schema.APIResponse, stri
 
 		return &schema.APIResponse{
 			Spec:     api,
-			Endpoint: apiEndpoint,
+			Endpoint: &apiEndpoint,
 		}, msg, nil
 	}
 
@@ -256,7 +256,7 @@ func DeleteAPI(apiName string, keepCache bool) (*schema.DeleteResponse, error) {
 func GetAPIs() ([]schema.APIResponse, error) {
 	var deployments []kapps.Deployment
 	var k8sTaskJobs []kbatch.Job
-	var pods []kcore.Pod
+	var taskAPIPods []kcore.Pod
 	var virtualServices []istioclientnetworking.VirtualService
 	var batchJobList batch.BatchJobList
 
@@ -268,7 +268,7 @@ func GetAPIs() ([]schema.APIResponse, error) {
 		},
 		func() error {
 			var err error
-			pods, err = config.K8s.ListPodsWithLabelKeys("apiName")
+			taskAPIPods, err = config.K8s.ListPodsByLabel("apiKind", userconfig.TaskAPIKind.String())
 			return err
 		},
 		func() error {
@@ -308,23 +308,18 @@ func GetAPIs() ([]schema.APIResponse, error) {
 		}
 	}
 
-	var batchAPIPods []kcore.Pod
-	var taskAPIPods []kcore.Pod
-	for _, pod := range pods {
-		switch pod.Labels["apiKind"] {
-		case userconfig.BatchAPIKind.String():
-			batchAPIPods = append(batchAPIPods, pod)
-		case userconfig.TaskAPIKind.String():
-			taskAPIPods = append(taskAPIPods, pod)
-		}
-	}
-
+	var realtimeAPIVirtualServices []istioclientnetworking.VirtualService
+	var asyncAPIVirtualServices []istioclientnetworking.VirtualService
 	var batchAPIVirtualServices []istioclientnetworking.VirtualService
 	var taskAPIVirtualServices []istioclientnetworking.VirtualService
 	var trafficSplitterVirtualServices []istioclientnetworking.VirtualService
 
 	for _, vs := range virtualServices {
 		switch vs.Labels["apiKind"] {
+		case userconfig.RealtimeAPIKind.String():
+			realtimeAPIVirtualServices = append(realtimeAPIVirtualServices, vs)
+		case userconfig.AsyncAPIKind.String():
+			asyncAPIVirtualServices = append(asyncAPIVirtualServices, vs)
 		case userconfig.BatchAPIKind.String():
 			batchAPIVirtualServices = append(batchAPIVirtualServices, vs)
 		case userconfig.TrafficSplitterKind.String():
@@ -334,7 +329,7 @@ func GetAPIs() ([]schema.APIResponse, error) {
 		}
 	}
 
-	realtimeAPIList, err := realtimeapi.GetAllAPIs(realtimeAPIDeployments)
+	realtimeAPIList, err := realtimeapi.GetAllAPIs(realtimeAPIDeployments, realtimeAPIVirtualServices)
 	if err != nil {
 		return nil, err
 	}
@@ -350,7 +345,7 @@ func GetAPIs() ([]schema.APIResponse, error) {
 		return nil, err
 	}
 
-	asyncAPIList, err := asyncapi.GetAllAPIs(asyncAPIDeployments)
+	asyncAPIList, err := asyncapi.GetAllAPIs(asyncAPIDeployments, asyncAPIVirtualServices)
 	if err != nil {
 		return nil, err
 	}
diff --git a/pkg/operator/resources/trafficsplitter/api.go b/pkg/operator/resources/trafficsplitter/api.go
index a1fba2b504..3492544d82 100644
--- a/pkg/operator/resources/trafficsplitter/api.go
+++ b/pkg/operator/resources/trafficsplitter/api.go
@@ -159,7 +159,7 @@ func GetAllAPIs(virtualServices []istioclientnetworking.VirtualService) ([]schem
 
 		trafficSplitters = append(trafficSplitters, schema.APIResponse{
 			Spec:     &trafficSplitter,
-			Endpoint: endpoint,
+			Endpoint: &endpoint,
 		})
 	}
 
@@ -181,7 +181,7 @@ func GetAPIByName(deployedResource *operator.DeployedResource) ([]schema.APIResp
 	return []schema.APIResponse{
 		{
 			Spec:     api,
-			Endpoint: endpoint,
+			Endpoint: &endpoint,
 		},
 	}, nil
 }
diff --git a/pkg/operator/schema/schema.go b/pkg/operator/schema/schema.go
index 401b59a30c..522a927a2f 100644
--- a/pkg/operator/schema/schema.go
+++ b/pkg/operator/schema/schema.go
@@ -57,8 +57,9 @@ type DeployResult struct {
 
 type APIResponse struct {
 	Spec             *spec.API               `json:"spec,omitempty"`
+	Metadata         *spec.Metadata          `json:"metadata,omitempty"`
 	Status           *status.Status          `json:"status,omitempty"`
-	Endpoint         string                  `json:"endpoint"`
+	Endpoint         *string                 `json:"endpoint,omitempty"`
 	DashboardURL     *string                 `json:"dashboard_url,omitempty"`
 	BatchJobStatuses []status.BatchJobStatus `json:"batch_job_statuses,omitempty"`
 	TaskJobStatuses  []status.TaskJobStatus  `json:"task_job_statuses,omitempty"`
diff --git a/pkg/types/spec/api.go b/pkg/types/spec/api.go
index e181a0ffab..929f3fd51b 100644
--- a/pkg/types/spec/api.go
+++ b/pkg/types/spec/api.go
@@ -30,6 +30,7 @@ import (
 	"github.com/cortexlabs/cortex/pkg/lib/hash"
 	s "github.com/cortexlabs/cortex/pkg/lib/strings"
 	"github.com/cortexlabs/cortex/pkg/types/userconfig"
+	kapps "k8s.io/api/apps/v1"
 )
 
 type API struct {
@@ -46,6 +47,29 @@ type API struct {
 	MetadataRoot          string `json:"metadata_root"`
 }
 
+type Metadata struct {
+	*userconfig.Resource
+	APIID        string `json:"id"`
+	DeploymentID string `json:"deployment_id"`
+	LastUpdated  int64  `json:"last_updated"`
+}
+
+func MetadataFromDeployment(deployment *kapps.Deployment) (*Metadata, error) {
+	lastUpdated, err := TimeFromAPIID(deployment.Labels["apiID"])
+	if err != nil {
+		return nil, err
+	}
+	return &Metadata{
+		Resource: &userconfig.Resource{
+			Name: deployment.Labels["apiName"],
+			Kind: userconfig.KindFromString(deployment.Labels["apiKind"]),
+		},
+		APIID:        deployment.Labels["apiID"],
+		DeploymentID: deployment.Labels["deploymentID"],
+		LastUpdated:  lastUpdated.Unix(),
+	}, nil
+}
+
 /*
 * ID (uniquely identifies an api configuration for a given deployment)
 	* DeploymentID (used for refreshing a deployment)
diff --git a/pkg/types/status/status.go b/pkg/types/status/status.go
index b2299bce5c..8a6a4dd160 100644
--- a/pkg/types/status/status.go
+++ b/pkg/types/status/status.go
@@ -17,17 +17,13 @@ limitations under the License.
 package status
 
 import (
-	"github.com/cortexlabs/cortex/pkg/types/userconfig"
 	kapps "k8s.io/api/apps/v1"
 )
 
 type Status struct {
-	APIName   string          `json:"api_name"`
-	APIKind   userconfig.Kind `json:"api_kind"`
-	APIID     string          `json:"api_id"`
-	Ready     int32           `json:"ready"`
-	Requested int32           `json:"requested"`
-	UpToDate  int32           `json:"up_to_date"`
+	Ready     int32 `json:"ready"`
+	Requested int32 `json:"requested"`
+	UpToDate  int32 `json:"up_to_date"`
 }
 
 type ReplicaCounts struct {
@@ -62,9 +58,6 @@ type WorkerCounts struct {
 
 func StatusFromDeployment(deployment *kapps.Deployment) *Status {
 	return &Status{
-		APIName:   deployment.Labels["apiName"],
-		APIKind:   userconfig.KindFromString(deployment.Labels["apiKind"]),
-		APIID:     deployment.Labels["apiID"],
 		Ready:     deployment.Status.ReadyReplicas,
 		Requested: deployment.Status.Replicas,
 		UpToDate:  deployment.Status.UpdatedReplicas,

From 4cde8f425bf0b0ddc3dc4d9bef23a0b901635ef2 Mon Sep 17 00:00:00 2001
From: Robert Lucian Chiriac <robert.lucian.chiriac@gmail.com>
Date: Wed, 21 Jul 2021 20:02:01 +0300
Subject: [PATCH 24/40] WIP on API statuses

---
 cli/cmd/lib_async_apis.go                 |  7 +++++--
 cli/cmd/lib_task_apis.go                  |  7 +++++--
 pkg/operator/resources/asyncapi/api.go    |  3 +++
 pkg/operator/resources/job/taskapi/api.go | 23 +++++++----------------
 pkg/types/spec/api.go                     | 17 +++++++++++++++++
 5 files changed, 37 insertions(+), 20 deletions(-)

diff --git a/cli/cmd/lib_async_apis.go b/cli/cmd/lib_async_apis.go
index d810acad62..c0605014aa 100644
--- a/cli/cmd/lib_async_apis.go
+++ b/cli/cmd/lib_async_apis.go
@@ -60,10 +60,13 @@ func asyncAPIsTable(asyncAPIs []schema.APIResponse, envNames []string) table.Tab
 	rows := make([][]interface{}, 0, len(asyncAPIs))
 
 	for i, asyncAPI := range asyncAPIs {
-		lastUpdated := time.Unix(asyncAPI.Spec.LastUpdated, 0)
+		if asyncAPI.Metadata == nil || asyncAPI.Status == nil {
+			continue
+		}
+		lastUpdated := time.Unix(asyncAPI.Metadata.LastUpdated, 0)
 		rows = append(rows, []interface{}{
 			envNames[i],
-			asyncAPI.Spec.Name,
+			asyncAPI.Metadata.Name,
 			fmt.Sprintf("%d/%d", asyncAPI.Status.Ready, asyncAPI.Status.Requested),
 			asyncAPI.Status.UpToDate,
 			libtime.SinceStr(&lastUpdated),
diff --git a/cli/cmd/lib_task_apis.go b/cli/cmd/lib_task_apis.go
index a639cee5f0..cda53e18b8 100644
--- a/cli/cmd/lib_task_apis.go
+++ b/cli/cmd/lib_task_apis.go
@@ -41,7 +41,10 @@ func taskAPIsTable(taskAPIs []schema.APIResponse, envNames []string) table.Table
 	rows := make([][]interface{}, 0, len(taskAPIs))
 
 	for i, taskAPI := range taskAPIs {
-		lastAPIUpdated := time.Unix(taskAPI.Spec.LastUpdated, 0)
+		if taskAPI.Metadata == nil {
+			continue
+		}
+		lastAPIUpdated := time.Unix(taskAPI.Metadata.LastUpdated, 0)
 		latestStartTime := time.Time{}
 		latestJobID := "-"
 		runningJobs := 0
@@ -59,7 +62,7 @@ func taskAPIsTable(taskAPIs []schema.APIResponse, envNames []string) table.Table
 
 		rows = append(rows, []interface{}{
 			envNames[i],
-			taskAPI.Spec.Name,
+			taskAPI.Metadata.Name,
 			runningJobs,
 			latestJobID,
 			libtime.SinceStr(&lastAPIUpdated),
diff --git a/pkg/operator/resources/asyncapi/api.go b/pkg/operator/resources/asyncapi/api.go
index 4c93657a3c..6cb180de69 100644
--- a/pkg/operator/resources/asyncapi/api.go
+++ b/pkg/operator/resources/asyncapi/api.go
@@ -257,6 +257,9 @@ func GetAllAPIs(deployments []kapps.Deployment, virtualServices []istioclientnet
 	keys := make([]string, len(deployments))
 
 	for i := range deployments {
+		if deployments[i].Labels["cortex.dev/async"] != "api" {
+			continue
+		}
 		apiName := deployments[i].Labels["apiName"]
 		keys = append(keys, apiName)
 
diff --git a/pkg/operator/resources/job/taskapi/api.go b/pkg/operator/resources/job/taskapi/api.go
index 32aa249636..1c407a42a0 100644
--- a/pkg/operator/resources/job/taskapi/api.go
+++ b/pkg/operator/resources/job/taskapi/api.go
@@ -147,20 +147,12 @@ func GetAllAPIs(virtualServices []istioclientnetworking.VirtualService, k8sJobs
 	}
 
 	for _, virtualService := range virtualServices {
-		apiName := virtualService.Labels["apiName"]
-		apiID := virtualService.Labels["apiID"]
-
-		api, err := operator.DownloadAPISpec(apiName, apiID)
-		if err != nil {
-			return nil, err
-		}
-
-		endpoint, err := operator.APIEndpoint(api)
+		metadata, err := spec.MetadataFromVirtualService(&virtualService)
 		if err != nil {
-			return nil, err
+			return nil, errors.Wrap(err, fmt.Sprintf("api %s", metadata.Name))
 		}
 
-		jobStates, err := job.GetMostRecentlySubmittedJobStates(apiName, 1, userconfig.TaskAPIKind)
+		jobStates, err := job.GetMostRecentlySubmittedJobStates(metadata.Name, 1, userconfig.TaskAPIKind)
 
 		jobStatuses := []status.TaskJobStatus{}
 		if len(jobStates) > 0 {
@@ -172,9 +164,8 @@ func GetAllAPIs(virtualServices []istioclientnetworking.VirtualService, k8sJobs
 			jobStatuses = append(jobStatuses, *jobStatus)
 		}
 
-		taskAPIsMap[apiName] = &schema.APIResponse{
-			Spec:            api,
-			Endpoint:        &endpoint,
+		taskAPIsMap[metadata.Name] = &schema.APIResponse{
+			Metadata:        metadata,
 			TaskJobStatuses: jobStatuses,
 		}
 	}
@@ -209,8 +200,8 @@ func GetAllAPIs(virtualServices []istioclientnetworking.VirtualService, k8sJobs
 
 	taskAPIList := make([]schema.APIResponse, 0, len(taskAPIsMap))
 
-	for _, batchAPI := range taskAPIsMap {
-		taskAPIList = append(taskAPIList, *batchAPI)
+	for _, taskAPI := range taskAPIsMap {
+		taskAPIList = append(taskAPIList, *taskAPI)
 	}
 
 	return taskAPIList, nil
diff --git a/pkg/types/spec/api.go b/pkg/types/spec/api.go
index 929f3fd51b..f2aaf22465 100644
--- a/pkg/types/spec/api.go
+++ b/pkg/types/spec/api.go
@@ -30,6 +30,7 @@ import (
 	"github.com/cortexlabs/cortex/pkg/lib/hash"
 	s "github.com/cortexlabs/cortex/pkg/lib/strings"
 	"github.com/cortexlabs/cortex/pkg/types/userconfig"
+	istioclientnetworking "istio.io/client-go/pkg/apis/networking/v1beta1"
 	kapps "k8s.io/api/apps/v1"
 )
 
@@ -70,6 +71,22 @@ func MetadataFromDeployment(deployment *kapps.Deployment) (*Metadata, error) {
 	}, nil
 }
 
+func MetadataFromVirtualService(vs *istioclientnetworking.VirtualService) (*Metadata, error) {
+	lastUpdated, err := TimeFromAPIID(vs.Labels["apiID"])
+	if err != nil {
+		return nil, err
+	}
+	return &Metadata{
+		Resource: &userconfig.Resource{
+			Name: vs.Labels["apiName"],
+			Kind: userconfig.KindFromString(vs.Labels["apiKind"]),
+		},
+		APIID:        vs.Labels["apiID"],
+		DeploymentID: vs.Labels["deploymentID"],
+		LastUpdated:  lastUpdated.Unix(),
+	}, nil
+}
+
 /*
 * ID (uniquely identifies an api configuration for a given deployment)
 	* DeploymentID (used for refreshing a deployment)

From d7ca13fb80060e83960be0b6fcedd1515feebe2a Mon Sep 17 00:00:00 2001
From: Robert Lucian Chiriac <robert.lucian.chiriac@gmail.com>
Date: Wed, 21 Jul 2021 21:35:42 +0300
Subject: [PATCH 25/40] WIP on API statuses

---
 cli/cmd/get.go                                |  4 --
 cli/cmd/lib_async_apis.go                     |  2 -
 cli/cmd/lib_batch_apis.go                     |  7 +++-
 cli/cmd/lib_realtime_apis.go                  |  4 --
 cli/cmd/lib_traffic_splitters.go              | 12 ++++--
 pkg/operator/resources/asyncapi/api.go        | 14 +++----
 pkg/operator/resources/job/batchapi/api.go    | 29 ++++++-------
 pkg/operator/resources/job/taskapi/api.go     | 13 ++++--
 pkg/operator/resources/realtimeapi/api.go     | 12 +++---
 pkg/operator/resources/resources.go           | 13 +++---
 pkg/operator/resources/trafficsplitter/api.go | 41 ++++++++-----------
 pkg/types/spec/api.go                         |  2 +-
 12 files changed, 70 insertions(+), 83 deletions(-)

diff --git a/cli/cmd/get.go b/cli/cmd/get.go
index f2356fb69a..05d6a2e980 100644
--- a/cli/cmd/get.go
+++ b/cli/cmd/get.go
@@ -219,10 +219,6 @@ func getAPIsInAllEnvironments() (string, error) {
 
 		if err == nil {
 			for _, api := range apisRes {
-				if api.Metadata == nil {
-					// TODO remove this once the status is present for all
-					continue
-				}
 				switch api.Metadata.Kind {
 				case userconfig.BatchAPIKind:
 					allBatchAPIEnvs = append(allBatchAPIEnvs, env.Name)
diff --git a/cli/cmd/lib_async_apis.go b/cli/cmd/lib_async_apis.go
index c0605014aa..ea42397017 100644
--- a/cli/cmd/lib_async_apis.go
+++ b/cli/cmd/lib_async_apis.go
@@ -32,8 +32,6 @@ func asyncAPITable(asyncAPI schema.APIResponse, env cliconfig.Environment) (stri
 	var out string
 
 	t := asyncAPIsTable([]schema.APIResponse{asyncAPI}, []string{env.Name})
-	t.FindHeaderByTitle(_titleEnvironment).Hidden = true
-	t.FindHeaderByTitle(_titleAsyncAPI).Hidden = true
 
 	out += t.MustFormat()
 
diff --git a/cli/cmd/lib_batch_apis.go b/cli/cmd/lib_batch_apis.go
index 1499ac8869..3bc2992acb 100644
--- a/cli/cmd/lib_batch_apis.go
+++ b/cli/cmd/lib_batch_apis.go
@@ -43,7 +43,10 @@ func batchAPIsTable(batchAPIs []schema.APIResponse, envNames []string) table.Tab
 	rows := make([][]interface{}, 0, len(batchAPIs))
 
 	for i, batchAPI := range batchAPIs {
-		lastAPIUpdated := time.Unix(batchAPI.Spec.LastUpdated, 0)
+		if batchAPI.Metadata == nil {
+			continue
+		}
+		lastAPIUpdated := time.Unix(batchAPI.Metadata.LastUpdated, 0)
 		latestStartTime := time.Time{}
 		latestJobID := "-"
 		runningJobs := 0
@@ -61,7 +64,7 @@ func batchAPIsTable(batchAPIs []schema.APIResponse, envNames []string) table.Tab
 
 		rows = append(rows, []interface{}{
 			envNames[i],
-			batchAPI.Spec.Name,
+			batchAPI.Metadata.Name,
 			runningJobs,
 			latestJobID,
 			libtime.SinceStr(&lastAPIUpdated),
diff --git a/cli/cmd/lib_realtime_apis.go b/cli/cmd/lib_realtime_apis.go
index 36abdaff87..128e5df06d 100644
--- a/cli/cmd/lib_realtime_apis.go
+++ b/cli/cmd/lib_realtime_apis.go
@@ -32,10 +32,6 @@ func realtimeAPITable(realtimeAPI schema.APIResponse, env cliconfig.Environment)
 	var out string
 
 	t := realtimeAPIsTable([]schema.APIResponse{realtimeAPI}, []string{env.Name})
-	// TODO decide on whether we want to keep this consistent with `cortex get` command
-	// t.FindHeaderByTitle(_titleEnvironment).Hidden = true
-	// t.FindHeaderByTitle(_titleRealtimeAPI).Hidden = true
-
 	out += t.MustFormat()
 
 	if realtimeAPI.DashboardURL != nil && *realtimeAPI.DashboardURL != "" {
diff --git a/cli/cmd/lib_traffic_splitters.go b/cli/cmd/lib_traffic_splitters.go
index cc23e00a87..6b1b1a8837 100644
--- a/cli/cmd/lib_traffic_splitters.go
+++ b/cli/cmd/lib_traffic_splitters.go
@@ -76,7 +76,10 @@ func trafficSplitTable(trafficSplitter schema.APIResponse, env cliconfig.Environ
 		}
 
 		apiRes := apisRes[0]
-		lastUpdated := time.Unix(apiRes.Spec.LastUpdated, 0)
+		if apiRes.Metadata == nil || apiRes.Status == nil {
+			continue
+		}
+		lastUpdated := time.Unix(apiRes.Metadata.LastUpdated, 0)
 
 		apiName := apiRes.Spec.Name
 		if api.Shadow {
@@ -108,7 +111,10 @@ func trafficSplitTable(trafficSplitter schema.APIResponse, env cliconfig.Environ
 func trafficSplitterListTable(trafficSplitter []schema.APIResponse, envNames []string) table.Table {
 	rows := make([][]interface{}, 0, len(trafficSplitter))
 	for i, splitAPI := range trafficSplitter {
-		lastUpdated := time.Unix(splitAPI.Spec.LastUpdated, 0)
+		if splitAPI.Metadata == nil || splitAPI.Spec == nil {
+			continue
+		}
+		lastUpdated := time.Unix(splitAPI.Metadata.LastUpdated, 0)
 		var apis []string
 		for _, api := range splitAPI.Spec.APIs {
 			apiName := api.Name
@@ -120,7 +126,7 @@ func trafficSplitterListTable(trafficSplitter []schema.APIResponse, envNames []s
 		apisStr := s.TruncateEllipses(strings.Join(apis, " "), 50)
 		rows = append(rows, []interface{}{
 			envNames[i],
-			splitAPI.Spec.Name,
+			splitAPI.Metadata.Name,
 			apisStr,
 			libtime.SinceStr(&lastUpdated),
 		})
diff --git a/pkg/operator/resources/asyncapi/api.go b/pkg/operator/resources/asyncapi/api.go
index 6cb180de69..26553ecab8 100644
--- a/pkg/operator/resources/asyncapi/api.go
+++ b/pkg/operator/resources/asyncapi/api.go
@@ -251,17 +251,17 @@ func DeleteAPI(apiName string, keepCache bool) error {
 	return nil
 }
 
-func GetAllAPIs(deployments []kapps.Deployment, virtualServices []istioclientnetworking.VirtualService) ([]schema.APIResponse, error) {
-	asyncAPIs := make([]schema.APIResponse, len(deployments))
-	mappedAsyncAPIs := make(map[string]schema.APIResponse, len(deployments))
-	keys := make([]string, len(deployments))
+func GetAllAPIs(deployments []kapps.Deployment) ([]schema.APIResponse, error) {
+	asyncAPIs := make([]schema.APIResponse, 0)
+	mappedAsyncAPIs := make(map[string]schema.APIResponse, 0)
+	apiNames := make([]string, 0)
 
 	for i := range deployments {
 		if deployments[i].Labels["cortex.dev/async"] != "api" {
 			continue
 		}
 		apiName := deployments[i].Labels["apiName"]
-		keys = append(keys, apiName)
+		apiNames = append(apiNames, apiName)
 
 		metadata, err := spec.MetadataFromDeployment(&deployments[i])
 		if err != nil {
@@ -273,8 +273,8 @@ func GetAllAPIs(deployments []kapps.Deployment, virtualServices []istioclientnet
 		}
 	}
 
-	sort.Strings(keys)
-	for _, apiName := range keys {
+	sort.Strings(apiNames)
+	for _, apiName := range apiNames {
 		asyncAPIs = append(asyncAPIs, mappedAsyncAPIs[apiName])
 	}
 
diff --git a/pkg/operator/resources/job/batchapi/api.go b/pkg/operator/resources/job/batchapi/api.go
index 6b230847c6..8d87040eef 100644
--- a/pkg/operator/resources/job/batchapi/api.go
+++ b/pkg/operator/resources/job/batchapi/api.go
@@ -142,23 +142,16 @@ func GetAllAPIs(virtualServices []istioclientnetworking.VirtualService, batchJob
 
 	for _, virtualService := range virtualServices {
 		apiName := virtualService.Labels["apiName"]
-		apiID := virtualService.Labels["apiID"]
-
-		api, err := operator.DownloadAPISpec(apiName, apiID)
-		if err != nil {
-			return nil, err
-		}
-
-		endpoint, err := operator.APIEndpoint(api)
+		metadata, err := spec.MetadataFromVirtualService(&virtualService)
 		if err != nil {
-			return nil, err
+			return nil, errors.Wrap(err, fmt.Sprintf("api %s", apiName))
 		}
 
 		var jobStatuses []status.BatchJobStatus
-		batchJobs := apiNameToBatchJobsMap[apiName]
+		batchJobs := apiNameToBatchJobsMap[metadata.Name]
 
 		if len(batchJobs) == 0 {
-			jobStates, err := job.GetMostRecentlySubmittedJobStates(apiName, 1, userconfig.BatchAPIKind)
+			jobStates, err := job.GetMostRecentlySubmittedJobStates(metadata.Name, 1, userconfig.BatchAPIKind)
 			if err != nil {
 				return nil, err
 			}
@@ -183,9 +176,8 @@ func GetAllAPIs(virtualServices []istioclientnetworking.VirtualService, batchJob
 			}
 		}
 
-		batchAPIsMap[apiName] = &schema.APIResponse{
-			Spec:             api,
-			Endpoint:         &endpoint,
+		batchAPIsMap[metadata.Name] = &schema.APIResponse{
+			Metadata:         metadata,
 			BatchJobStatuses: jobStatuses,
 		}
 	}
@@ -200,10 +192,12 @@ func GetAllAPIs(virtualServices []istioclientnetworking.VirtualService, batchJob
 }
 
 func GetAPIByName(deployedResource *operator.DeployedResource) ([]schema.APIResponse, error) {
-	virtualService := deployedResource.VirtualService
+	metadata, err := spec.MetadataFromVirtualService(deployedResource.VirtualService)
+	if err != nil {
+		return nil, err
+	}
 
-	apiID := virtualService.Labels["apiID"]
-	api, err := operator.DownloadAPISpec(deployedResource.Name, apiID)
+	api, err := operator.DownloadAPISpec(deployedResource.Name, metadata.APIID)
 	if err != nil {
 		return nil, err
 	}
@@ -264,6 +258,7 @@ func GetAPIByName(deployedResource *operator.DeployedResource) ([]schema.APIResp
 	return []schema.APIResponse{
 		{
 			Spec:             api,
+			Metadata:         metadata,
 			BatchJobStatuses: jobStatuses,
 			Endpoint:         &endpoint,
 			DashboardURL:     dashboardURL,
diff --git a/pkg/operator/resources/job/taskapi/api.go b/pkg/operator/resources/job/taskapi/api.go
index 1c407a42a0..6c6afdf425 100644
--- a/pkg/operator/resources/job/taskapi/api.go
+++ b/pkg/operator/resources/job/taskapi/api.go
@@ -147,9 +147,11 @@ func GetAllAPIs(virtualServices []istioclientnetworking.VirtualService, k8sJobs
 	}
 
 	for _, virtualService := range virtualServices {
+		apiName := virtualService.Labels["apiName"]
+
 		metadata, err := spec.MetadataFromVirtualService(&virtualService)
 		if err != nil {
-			return nil, errors.Wrap(err, fmt.Sprintf("api %s", metadata.Name))
+			return nil, errors.Wrap(err, fmt.Sprintf("api %s", apiName))
 		}
 
 		jobStates, err := job.GetMostRecentlySubmittedJobStates(metadata.Name, 1, userconfig.TaskAPIKind)
@@ -209,10 +211,12 @@ func GetAllAPIs(virtualServices []istioclientnetworking.VirtualService, k8sJobs
 
 // GetAPIByName returns a single task API and its most recently submitted job along with all running task jobs
 func GetAPIByName(deployedResource *operator.DeployedResource) ([]schema.APIResponse, error) {
-	virtualService := deployedResource.VirtualService
+	metadata, err := spec.MetadataFromVirtualService(deployedResource.VirtualService)
+	if err != nil {
+		return nil, err
+	}
 
-	apiID := virtualService.Labels["apiID"]
-	api, err := operator.DownloadAPISpec(deployedResource.Name, apiID)
+	api, err := operator.DownloadAPISpec(deployedResource.Name, metadata.APIID)
 	if err != nil {
 		return nil, err
 	}
@@ -287,6 +291,7 @@ func GetAPIByName(deployedResource *operator.DeployedResource) ([]schema.APIResp
 	return []schema.APIResponse{
 		{
 			Spec:            api,
+			Metadata:        metadata,
 			TaskJobStatuses: jobStatuses,
 			Endpoint:        &endpoint,
 			DashboardURL:    dashboardURL,
diff --git a/pkg/operator/resources/realtimeapi/api.go b/pkg/operator/resources/realtimeapi/api.go
index 14d24b3e04..d787f5c99d 100644
--- a/pkg/operator/resources/realtimeapi/api.go
+++ b/pkg/operator/resources/realtimeapi/api.go
@@ -176,14 +176,14 @@ func DeleteAPI(apiName string, keepCache bool) error {
 	return nil
 }
 
-func GetAllAPIs(deployments []kapps.Deployment, virtualServices []istioclientnetworking.VirtualService) ([]schema.APIResponse, error) {
+func GetAllAPIs(deployments []kapps.Deployment) ([]schema.APIResponse, error) {
 	realtimeAPIs := make([]schema.APIResponse, len(deployments))
 	mappedRealtimeAPIs := make(map[string]schema.APIResponse, len(deployments))
-	keys := make([]string, len(deployments))
+	apiNames := make([]string, len(deployments))
 
 	for i := range deployments {
 		apiName := deployments[i].Labels["apiName"]
-		keys = append(keys, apiName)
+		apiNames[i] = apiName
 
 		metadata, err := spec.MetadataFromDeployment(&deployments[i])
 		if err != nil {
@@ -195,9 +195,9 @@ func GetAllAPIs(deployments []kapps.Deployment, virtualServices []istioclientnet
 		}
 	}
 
-	sort.Strings(keys)
-	for _, apiName := range keys {
-		realtimeAPIs = append(realtimeAPIs, mappedRealtimeAPIs[apiName])
+	sort.Strings(apiNames)
+	for i := range apiNames {
+		realtimeAPIs[i] = mappedRealtimeAPIs[apiNames[i]]
 	}
 
 	return realtimeAPIs, nil
diff --git a/pkg/operator/resources/resources.go b/pkg/operator/resources/resources.go
index 690616e923..5350f99e47 100644
--- a/pkg/operator/resources/resources.go
+++ b/pkg/operator/resources/resources.go
@@ -308,18 +308,15 @@ func GetAPIs() ([]schema.APIResponse, error) {
 		}
 	}
 
-	var realtimeAPIVirtualServices []istioclientnetworking.VirtualService
-	var asyncAPIVirtualServices []istioclientnetworking.VirtualService
+	fmt.Println("realtimeAPIDeployments", len(realtimeAPIDeployments))
+	fmt.Println("asyncAPIDeployments", len(asyncAPIDeployments))
+
 	var batchAPIVirtualServices []istioclientnetworking.VirtualService
 	var taskAPIVirtualServices []istioclientnetworking.VirtualService
 	var trafficSplitterVirtualServices []istioclientnetworking.VirtualService
 
 	for _, vs := range virtualServices {
 		switch vs.Labels["apiKind"] {
-		case userconfig.RealtimeAPIKind.String():
-			realtimeAPIVirtualServices = append(realtimeAPIVirtualServices, vs)
-		case userconfig.AsyncAPIKind.String():
-			asyncAPIVirtualServices = append(asyncAPIVirtualServices, vs)
 		case userconfig.BatchAPIKind.String():
 			batchAPIVirtualServices = append(batchAPIVirtualServices, vs)
 		case userconfig.TrafficSplitterKind.String():
@@ -329,7 +326,7 @@ func GetAPIs() ([]schema.APIResponse, error) {
 		}
 	}
 
-	realtimeAPIList, err := realtimeapi.GetAllAPIs(realtimeAPIDeployments, realtimeAPIVirtualServices)
+	realtimeAPIList, err := realtimeapi.GetAllAPIs(realtimeAPIDeployments)
 	if err != nil {
 		return nil, err
 	}
@@ -345,7 +342,7 @@ func GetAPIs() ([]schema.APIResponse, error) {
 		return nil, err
 	}
 
-	asyncAPIList, err := asyncapi.GetAllAPIs(asyncAPIDeployments, asyncAPIVirtualServices)
+	asyncAPIList, err := asyncapi.GetAllAPIs(asyncAPIDeployments)
 	if err != nil {
 		return nil, err
 	}
diff --git a/pkg/operator/resources/trafficsplitter/api.go b/pkg/operator/resources/trafficsplitter/api.go
index 3492544d82..a30a7579fd 100644
--- a/pkg/operator/resources/trafficsplitter/api.go
+++ b/pkg/operator/resources/trafficsplitter/api.go
@@ -132,35 +132,20 @@ func getTrafficSplitterDestinations(trafficSplitter *spec.API) []k8s.Destination
 
 // GetAllAPIs returns a list of metadata, in the form of schema.APIResponse, about all the created traffic splitter APIs
 func GetAllAPIs(virtualServices []istioclientnetworking.VirtualService) ([]schema.APIResponse, error) {
-	var (
-		apiNames         []string
-		apiIDs           []string
-		trafficSplitters []schema.APIResponse
-	)
-
+	var trafficSplitters []schema.APIResponse
 	for _, virtualService := range virtualServices {
-		if virtualService.Labels["apiKind"] == userconfig.TrafficSplitterKind.String() {
-			apiNames = append(apiNames, virtualService.Labels["apiName"])
-			apiIDs = append(apiIDs, virtualService.Labels["apiID"])
-		}
-	}
-
-	apis, err := operator.DownloadAPISpecs(apiNames, apiIDs)
-	if err != nil {
-		return nil, err
-	}
+		apiName := virtualService.Labels["apiName"]
 
-	for i := range apis {
-		trafficSplitter := apis[i]
-		endpoint, err := operator.APIEndpoint(&trafficSplitter)
+		metadata, err := spec.MetadataFromVirtualService(&virtualService)
 		if err != nil {
-			return nil, err
+			return nil, errors.Wrap(err, fmt.Sprintf("api %s", apiName))
 		}
 
-		trafficSplitters = append(trafficSplitters, schema.APIResponse{
-			Spec:     &trafficSplitter,
-			Endpoint: &endpoint,
-		})
+		if metadata.Kind == userconfig.TrafficSplitterKind {
+			trafficSplitters = append(trafficSplitters, schema.APIResponse{
+				Metadata: metadata,
+			})
+		}
 	}
 
 	return trafficSplitters, nil
@@ -168,7 +153,12 @@ func GetAllAPIs(virtualServices []istioclientnetworking.VirtualService) ([]schem
 
 // GetAPIByName retrieves the metadata, in the form of schema.APIResponse, of a single traffic splitter API
 func GetAPIByName(deployedResource *operator.DeployedResource) ([]schema.APIResponse, error) {
-	api, err := operator.DownloadAPISpec(deployedResource.Name, deployedResource.VirtualService.Labels["apiID"])
+	metadata, err := spec.MetadataFromVirtualService(deployedResource.VirtualService)
+	if err != nil {
+		return nil, err
+	}
+
+	api, err := operator.DownloadAPISpec(deployedResource.Name, metadata.APIID)
 	if err != nil {
 		return nil, err
 	}
@@ -181,6 +171,7 @@ func GetAPIByName(deployedResource *operator.DeployedResource) ([]schema.APIResp
 	return []schema.APIResponse{
 		{
 			Spec:     api,
+			Metadata: metadata,
 			Endpoint: &endpoint,
 		},
 	}, nil
diff --git a/pkg/types/spec/api.go b/pkg/types/spec/api.go
index f2aaf22465..b229962aee 100644
--- a/pkg/types/spec/api.go
+++ b/pkg/types/spec/api.go
@@ -51,7 +51,7 @@ type API struct {
 type Metadata struct {
 	*userconfig.Resource
 	APIID        string `json:"id"`
-	DeploymentID string `json:"deployment_id"`
+	DeploymentID string `json:"deployment_id,omitempty"`
 	LastUpdated  int64  `json:"last_updated"`
 }
 

From 34dc5a3feaab9a64b4ad75f3bce320e26dc44608 Mon Sep 17 00:00:00 2001
From: Robert Lucian Chiriac <robert.lucian.chiriac@gmail.com>
Date: Thu, 22 Jul 2021 17:16:55 +0300
Subject: [PATCH 26/40] Changes to the TrafficSplitter

---
 cli/cmd/lib_traffic_splitters.go              | 14 ++------------
 pkg/activator/helpers.go                      |  1 +
 pkg/operator/resources/trafficsplitter/api.go | 11 +++++++++++
 pkg/types/userconfig/api.go                   | 12 ++++++++++++
 pkg/types/userconfig/config_key.go            |  1 +
 5 files changed, 27 insertions(+), 12 deletions(-)

diff --git a/cli/cmd/lib_traffic_splitters.go b/cli/cmd/lib_traffic_splitters.go
index 6b1b1a8837..8eaf6b048b 100644
--- a/cli/cmd/lib_traffic_splitters.go
+++ b/cli/cmd/lib_traffic_splitters.go
@@ -45,7 +45,6 @@ func trafficSplitterTable(trafficSplitter schema.APIResponse, env cliconfig.Envi
 	if err != nil {
 		return "", err
 	}
-	t.FindHeaderByTitle(_titleEnvironment).Hidden = true
 
 	out += t.MustFormat()
 
@@ -111,23 +110,14 @@ func trafficSplitTable(trafficSplitter schema.APIResponse, env cliconfig.Environ
 func trafficSplitterListTable(trafficSplitter []schema.APIResponse, envNames []string) table.Table {
 	rows := make([][]interface{}, 0, len(trafficSplitter))
 	for i, splitAPI := range trafficSplitter {
-		if splitAPI.Metadata == nil || splitAPI.Spec == nil {
+		if splitAPI.Metadata == nil || splitAPI.Status == nil {
 			continue
 		}
 		lastUpdated := time.Unix(splitAPI.Metadata.LastUpdated, 0)
-		var apis []string
-		for _, api := range splitAPI.Spec.APIs {
-			apiName := api.Name
-			if api.Shadow {
-				apiName += " (shadow)"
-			}
-			apis = append(apis, apiName+":"+s.Int32(api.Weight))
-		}
-		apisStr := s.TruncateEllipses(strings.Join(apis, " "), 50)
 		rows = append(rows, []interface{}{
 			envNames[i],
 			splitAPI.Metadata.Name,
-			apisStr,
+			s.Int32(splitAPI.Status.Ready),
 			libtime.SinceStr(&lastUpdated),
 		})
 	}
diff --git a/pkg/activator/helpers.go b/pkg/activator/helpers.go
index f32c7e54f2..48790b5ac0 100644
--- a/pkg/activator/helpers.go
+++ b/pkg/activator/helpers.go
@@ -66,6 +66,7 @@ func getAPIMeta(obj interface{}) (apiMeta, error) {
 	}, nil
 }
 
+// TODO move this out of here
 func concurrencyFromAnnotations(annotations map[string]string) (int, int, error) {
 	maxQueueLength, err := strconv.Atoi(annotations[userconfig.MaxQueueLengthAnnotationKey])
 	if err != nil {
diff --git a/pkg/operator/resources/trafficsplitter/api.go b/pkg/operator/resources/trafficsplitter/api.go
index a30a7579fd..fa02f8ec35 100644
--- a/pkg/operator/resources/trafficsplitter/api.go
+++ b/pkg/operator/resources/trafficsplitter/api.go
@@ -30,6 +30,7 @@ import (
 	"github.com/cortexlabs/cortex/pkg/operator/operator"
 	"github.com/cortexlabs/cortex/pkg/operator/schema"
 	"github.com/cortexlabs/cortex/pkg/types/spec"
+	"github.com/cortexlabs/cortex/pkg/types/status"
 	"github.com/cortexlabs/cortex/pkg/types/userconfig"
 	"github.com/cortexlabs/cortex/pkg/workloads"
 	istioclientnetworking "istio.io/client-go/pkg/apis/networking/v1beta1"
@@ -141,9 +142,19 @@ func GetAllAPIs(virtualServices []istioclientnetworking.VirtualService) ([]schem
 			return nil, errors.Wrap(err, fmt.Sprintf("api %s", apiName))
 		}
 
+		targets, err := userconfig.TrafficSplitterTargetsFromAnnotations(&virtualService)
+		if err != nil {
+			return nil, errors.Wrap(err, fmt.Sprintf("api %s", apiName))
+		}
+
 		if metadata.Kind == userconfig.TrafficSplitterKind {
 			trafficSplitters = append(trafficSplitters, schema.APIResponse{
 				Metadata: metadata,
+				Status: &status.Status{
+					Ready:     targets,
+					Requested: targets,
+					UpToDate:  targets,
+				},
 			})
 		}
 	}
diff --git a/pkg/types/userconfig/api.go b/pkg/types/userconfig/api.go
index c2f8585941..1872187f8c 100644
--- a/pkg/types/userconfig/api.go
+++ b/pkg/types/userconfig/api.go
@@ -155,6 +155,10 @@ func IdentifyAPI(filePath string, name string, kind Kind, index int) string {
 func (api *API) ToK8sAnnotations() map[string]string {
 	annotations := map[string]string{}
 
+	if len(api.APIs) > 0 {
+		annotations[NumberOfTrafficSplitterTargets] = s.Int32(int32(len(api.APIs)))
+	}
+
 	if api.Pod != nil && api.Kind == RealtimeAPIKind {
 		annotations[MaxConcurrencyAnnotationKey] = s.Int64(api.Pod.MaxConcurrency)
 		annotations[MaxQueueLengthAnnotationKey] = s.Int64(api.Pod.MaxQueueLength)
@@ -245,6 +249,14 @@ func AutoscalingFromAnnotations(k8sObj kmeta.Object) (*Autoscaling, error) {
 	return &a, nil
 }
 
+func TrafficSplitterTargetsFromAnnotations(k8sObj kmeta.Object) (int32, error) {
+	targets, err := k8s.ParseInt32Annotation(k8sObj, NumberOfTrafficSplitterTargets)
+	if err != nil {
+		return 0, err
+	}
+	return targets, nil
+}
+
 func (api *API) UserStr() string {
 	var sb strings.Builder
 	sb.WriteString(fmt.Sprintf("%s: %s\n", NameKey, api.Name))
diff --git a/pkg/types/userconfig/config_key.go b/pkg/types/userconfig/config_key.go
index 826e144b05..263f764bd6 100644
--- a/pkg/types/userconfig/config_key.go
+++ b/pkg/types/userconfig/config_key.go
@@ -91,6 +91,7 @@ const (
 	EndpointAnnotationKey                     = "networking.cortex.dev/endpoint"
 	MaxConcurrencyAnnotationKey               = "pod.cortex.dev/max-concurrency"
 	MaxQueueLengthAnnotationKey               = "pod.cortex.dev/max-queue-length"
+	NumberOfTrafficSplitterTargets            = "apis.cortex.dev/traffic-splitter-targets"
 	MinReplicasAnnotationKey                  = "autoscaling.cortex.dev/min-replicas"
 	MaxReplicasAnnotationKey                  = "autoscaling.cortex.dev/max-replicas"
 	TargetInFlightAnnotationKey               = "autoscaling.cortex.dev/target-in-flight"

From 51606a799a6f1756344a245d5dd951d1fa7da1e0 Mon Sep 17 00:00:00 2001
From: Robert Lucian Chiriac <robert.lucian.chiriac@gmail.com>
Date: Fri, 23 Jul 2021 23:19:11 +0300
Subject: [PATCH 27/40] WIP on API statuses

---
 cli/cluster/get.go                            |  14 ++
 cli/cmd/describe.go                           | 113 ++++++++++++++
 cli/cmd/get.go                                |   8 +-
 cli/cmd/lib_apis.go                           |  59 ++++++++
 cli/cmd/lib_async_apis.go                     |  27 ++++
 cli/cmd/lib_batch_apis.go                     |  32 ++--
 cli/cmd/lib_realtime_apis.go                  |  27 ++++
 cli/cmd/lib_task_apis.go                      |  32 ++--
 cli/cmd/lib_watch.go                          |   4 +-
 cli/cmd/root.go                               |   2 +
 cmd/operator/main.go                          |   1 +
 pkg/consts/consts.go                          |   3 +-
 .../crd/bases/batch.cortex.dev_batchjobs.yaml |  19 ++-
 .../batch/batchjob_controller_helpers.go      |  86 +++++++----
 pkg/lib/k8s/pod.go                            | 140 +++++++++++++-----
 pkg/operator/endpoints/describe.go            |  36 +++++
 pkg/operator/operator/k8s.go                  |  11 +-
 pkg/operator/resources/asyncapi/api.go        | 127 ++++++++++++++--
 pkg/operator/resources/asyncapi/status.go     | 137 +++--------------
 pkg/operator/resources/job/worker_stats.go    |  31 ++--
 pkg/operator/resources/realtimeapi/api.go     |  43 +++++-
 pkg/operator/resources/realtimeapi/status.go  |  77 +++-------
 pkg/operator/resources/resources.go           |  33 ++++-
 pkg/types/status/code.go                      |  97 ------------
 pkg/types/status/status.go                    | 103 ++++++++++---
 pkg/types/userconfig/api.go                   |   8 +
 26 files changed, 847 insertions(+), 423 deletions(-)
 create mode 100644 cli/cmd/describe.go
 create mode 100644 cli/cmd/lib_apis.go
 create mode 100644 pkg/operator/endpoints/describe.go
 delete mode 100644 pkg/types/status/code.go

diff --git a/cli/cluster/get.go b/cli/cluster/get.go
index 47a24aa0a3..6d88e707b8 100644
--- a/cli/cluster/get.go
+++ b/cli/cluster/get.go
@@ -51,6 +51,20 @@ func GetAPI(operatorConfig OperatorConfig, apiName string) ([]schema.APIResponse
 	return apiRes, nil
 }
 
+func DescribeAPI(operatorConfig OperatorConfig, apiName string) ([]schema.APIResponse, error) {
+	httpRes, err := HTTPGet(operatorConfig, "/describe/"+apiName)
+	if err != nil {
+		return nil, err
+	}
+
+	var apiRes []schema.APIResponse
+	if err = json.Unmarshal(httpRes, &apiRes); err != nil {
+		return nil, errors.Wrap(err, "/describe/"+apiName, string(httpRes))
+	}
+
+	return apiRes, nil
+}
+
 func GetAPIByID(operatorConfig OperatorConfig, apiName string, apiID string) ([]schema.APIResponse, error) {
 	httpRes, err := HTTPGet(operatorConfig, "/get/"+apiName+"/"+apiID)
 	if err != nil {
diff --git a/cli/cmd/describe.go b/cli/cmd/describe.go
new file mode 100644
index 0000000000..be23ef6532
--- /dev/null
+++ b/cli/cmd/describe.go
@@ -0,0 +1,113 @@
+/*
+Copyright 2021 Cortex Labs, Inc.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package cmd
+
+import (
+	"fmt"
+
+	"github.com/cortexlabs/cortex/cli/cluster"
+	"github.com/cortexlabs/cortex/cli/types/cliconfig"
+	"github.com/cortexlabs/cortex/pkg/lib/errors"
+	"github.com/cortexlabs/cortex/pkg/lib/exit"
+	"github.com/cortexlabs/cortex/pkg/lib/telemetry"
+	"github.com/cortexlabs/cortex/pkg/types/userconfig"
+	"github.com/spf13/cobra"
+)
+
+const (
+	_titleReplicaStatus = "replica status"
+	_titleReplicaCount  = "replica count"
+)
+
+var (
+	_flagDescribeEnv   string
+	_flagDescribeWatch bool
+)
+
+func describeInit() {
+	_describeCmd.Flags().SortFlags = false
+	_describeCmd.Flags().StringVarP(&_flagDescribeEnv, "env", "e", "", "environment to use")
+	_describeCmd.Flags().BoolVarP(&_flagDescribeWatch, "watch", "w", false, "re-run the command every 2 seconds")
+}
+
+var _describeCmd = &cobra.Command{
+	Use:   "describe [API_NAME]",
+	Short: "describe an api",
+	Args:  cobra.ExactArgs(1),
+	Run: func(cmd *cobra.Command, args []string) {
+		apiName := args[0]
+
+		var envName string
+		if wasFlagProvided(cmd, "env") {
+			envName = _flagDescribeEnv
+		} else {
+			var err error
+			envName, err = getEnvFromFlag("")
+			if err != nil {
+				telemetry.Event("cli.describe")
+				exit.Error(err)
+			}
+		}
+
+		env, err := ReadOrConfigureEnv(envName)
+		if err != nil {
+			telemetry.Event("cli.describe")
+			exit.Error(err)
+		}
+		telemetry.Event("cli.describe", map[string]interface{}{"env_name": env.Name})
+
+		rerun(_flagDescribeWatch, func() (string, error) {
+			env, err := ReadOrConfigureEnv(envName)
+			if err != nil {
+				exit.Error(err)
+			}
+
+			out, err := envStringIfNotSpecified(envName, cmd)
+			if err != nil {
+				return "", err
+			}
+			apiTable, err := describeAPI(env, apiName)
+			if err != nil {
+				return "", err
+			}
+
+			return out + apiTable, nil
+		})
+	},
+}
+
+func describeAPI(env cliconfig.Environment, apiName string) (string, error) {
+	apisRes, err := cluster.DescribeAPI(MustGetOperatorConfig(env.Name), apiName)
+	if err != nil {
+		return "", err
+	}
+
+	if len(apisRes) == 0 {
+		exit.Error(errors.ErrorUnexpected(fmt.Sprintf("unable to find API %s", apiName)))
+	}
+
+	apiRes := apisRes[0]
+
+	switch apiRes.Metadata.Kind {
+	case userconfig.RealtimeAPIKind:
+		return realtimeDescribeAPITable(apiRes, env)
+	case userconfig.AsyncAPIKind:
+		return asyncDescribeAPITable(apiRes, env)
+	default:
+		return "", errors.ErrorUnexpected(fmt.Sprintf("encountered unexpected kind %s for api %s", apiRes.Spec.Kind, apiRes.Spec.Name))
+	}
+}
diff --git a/cli/cmd/get.go b/cli/cmd/get.go
index 05d6a2e980..e0083dfa99 100644
--- a/cli/cmd/get.go
+++ b/cli/cmd/get.go
@@ -48,14 +48,14 @@ const (
 )
 
 var (
-	_flagGetEnv string
-	_flagWatch  bool
+	_flagGetEnv   string
+	_flagGetWatch bool
 )
 
 func getInit() {
 	_getCmd.Flags().SortFlags = false
 	_getCmd.Flags().StringVarP(&_flagGetEnv, "env", "e", "", "environment to use")
-	_getCmd.Flags().BoolVarP(&_flagWatch, "watch", "w", false, "re-run the command every 2 seconds")
+	_getCmd.Flags().BoolVarP(&_flagGetWatch, "watch", "w", false, "re-run the command every 2 seconds")
 	_getCmd.Flags().VarP(&_flagOutput, "output", "o", fmt.Sprintf("output format: one of %s", strings.Join(flags.OutputTypeStringsExcluding(flags.YAMLOutputType), "|")))
 	addVerboseFlag(_getCmd)
 }
@@ -88,7 +88,7 @@ var _getCmd = &cobra.Command{
 			telemetry.Event("cli.get")
 		}
 
-		rerun(func() (string, error) {
+		rerun(_flagGetWatch, func() (string, error) {
 			if len(args) == 1 {
 				env, err := ReadOrConfigureEnv(envName)
 				if err != nil {
diff --git a/cli/cmd/lib_apis.go b/cli/cmd/lib_apis.go
new file mode 100644
index 0000000000..23514342dc
--- /dev/null
+++ b/cli/cmd/lib_apis.go
@@ -0,0 +1,59 @@
+/*
+Copyright 2021 Cortex Labs, Inc.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package cmd
+
+import (
+	"github.com/cortexlabs/cortex/pkg/lib/table"
+	"github.com/cortexlabs/cortex/pkg/types/status"
+)
+
+func replicaCountTable(counts *status.ReplicaCounts) table.Table {
+	var rows [][]interface{}
+	for _, replicaCountType := range status.ReplicaCountTypes {
+		count := counts.GetCountBy(replicaCountType)
+		canBeHiddenIfZero := false
+		switch replicaCountType {
+		case status.ReplicaCountFailed:
+			canBeHiddenIfZero = true
+		case status.ReplicaCountKilled:
+			canBeHiddenIfZero = true
+		case status.ReplicaCountKilledOOM:
+			canBeHiddenIfZero = true
+		case status.ReplicaCountErrImagePull:
+			canBeHiddenIfZero = true
+		case status.ReplicaCountUnknown:
+			canBeHiddenIfZero = true
+		case status.ReplicaCountStalled:
+			canBeHiddenIfZero = true
+		}
+		if count == 0 && canBeHiddenIfZero {
+			continue
+		}
+		rows = append(rows, []interface{}{
+			replicaCountType,
+			count,
+		})
+	}
+
+	return table.Table{
+		Headers: []table.Header{
+			{Title: _titleReplicaStatus, MinWidth: 32, MaxWidth: 32},
+			{Title: _titleReplicaCount},
+		},
+		Rows: rows,
+	}
+}
diff --git a/cli/cmd/lib_async_apis.go b/cli/cmd/lib_async_apis.go
index ea42397017..e2e4441003 100644
--- a/cli/cmd/lib_async_apis.go
+++ b/cli/cmd/lib_async_apis.go
@@ -23,6 +23,7 @@ import (
 
 	"github.com/cortexlabs/cortex/cli/types/cliconfig"
 	"github.com/cortexlabs/cortex/pkg/lib/console"
+	"github.com/cortexlabs/cortex/pkg/lib/errors"
 	"github.com/cortexlabs/cortex/pkg/lib/table"
 	libtime "github.com/cortexlabs/cortex/pkg/lib/time"
 	"github.com/cortexlabs/cortex/pkg/operator/schema"
@@ -54,6 +55,32 @@ func asyncAPITable(asyncAPI schema.APIResponse, env cliconfig.Environment) (stri
 	return out, nil
 }
 
+func asyncDescribeAPITable(asyncAPI schema.APIResponse, env cliconfig.Environment) (string, error) {
+	if asyncAPI.Metadata == nil {
+		return "", errors.ErrorUnexpected("missing metadata from operator response")
+	}
+
+	if asyncAPI.Status == nil {
+		return "", errors.ErrorUnexpected(fmt.Sprintf("missing status for %s api", asyncAPI.Metadata.Name))
+	}
+
+	t := asyncAPIsTable([]schema.APIResponse{asyncAPI}, []string{env.Name})
+	out := t.MustFormat()
+
+	if asyncAPI.DashboardURL != nil && *asyncAPI.DashboardURL != "" {
+		out += "\n" + console.Bold("metrics dashboard: ") + *asyncAPI.DashboardURL + "\n"
+	}
+
+	if asyncAPI.Endpoint != nil {
+		out += "\n" + console.Bold("endpoint: ") + *asyncAPI.Endpoint + "\n"
+	}
+
+	t = replicaCountTable(asyncAPI.Status.ReplicaCounts)
+	out += "\n" + t.MustFormat()
+
+	return out, nil
+}
+
 func asyncAPIsTable(asyncAPIs []schema.APIResponse, envNames []string) table.Table {
 	rows := make([][]interface{}, 0, len(asyncAPIs))
 
diff --git a/cli/cmd/lib_batch_apis.go b/cli/cmd/lib_batch_apis.go
index 3bc2992acb..5cebcdd2ba 100644
--- a/cli/cmd/lib_batch_apis.go
+++ b/cli/cmd/lib_batch_apis.go
@@ -221,22 +221,34 @@ func getBatchJob(env cliconfig.Environment, apiName string, jobID string) (strin
 		if job.WorkerCounts != nil {
 			t := table.Table{
 				Headers: []table.Header{
-					{Title: "requested"},
-					{Title: "pending", Hidden: job.WorkerCounts.Pending == 0},
-					{Title: "initializing", Hidden: job.WorkerCounts.Initializing == 0},
-					{Title: "stalled", Hidden: job.WorkerCounts.Stalled == 0},
-					{Title: "running"},
-					{Title: "failed", Hidden: job.WorkerCounts.Failed == 0},
-					{Title: "succeeded"},
+					{Title: "Requested"},
+					{Title: "Pending", Hidden: job.WorkerCounts.Pending == 0},
+					{Title: "Creating", Hidden: job.WorkerCounts.Creating == 0},
+					{Title: "Ready"},
+					{Title: "NotReady"},
+					{Title: "ErrImagePull", Hidden: job.WorkerCounts.ErrImagePull == 0},
+					{Title: "Terminating", Hidden: job.WorkerCounts.Terminating == 0},
+					{Title: "Failed", Hidden: job.WorkerCounts.Failed == 0},
+					{Title: "Killed", Hidden: job.WorkerCounts.Killed == 0},
+					{Title: "KilledOOM", Hidden: job.WorkerCounts.KilledOOM == 0},
+					{Title: "Stalled", Hidden: job.WorkerCounts.Stalled == 0},
+					{Title: "Unknown", Hidden: job.WorkerCounts.Unknown == 0},
+					{Title: "Succeeded"},
 				},
 				Rows: [][]interface{}{
 					{
 						job.Workers,
 						job.WorkerCounts.Pending,
-						job.WorkerCounts.Initializing,
-						job.WorkerCounts.Stalled,
-						job.WorkerCounts.Running,
+						job.WorkerCounts.Creating,
+						job.WorkerCounts.Ready,
+						job.WorkerCounts.NotReady,
+						job.WorkerCounts.ErrImagePull,
+						job.WorkerCounts.Terminating,
 						job.WorkerCounts.Failed,
+						job.WorkerCounts.Killed,
+						job.WorkerCounts.KilledOOM,
+						job.WorkerCounts.Stalled,
+						job.WorkerCounts.Unknown,
 						job.WorkerCounts.Succeeded,
 					},
 				},
diff --git a/cli/cmd/lib_realtime_apis.go b/cli/cmd/lib_realtime_apis.go
index 128e5df06d..dd73db1282 100644
--- a/cli/cmd/lib_realtime_apis.go
+++ b/cli/cmd/lib_realtime_apis.go
@@ -23,6 +23,7 @@ import (
 
 	"github.com/cortexlabs/cortex/cli/types/cliconfig"
 	"github.com/cortexlabs/cortex/pkg/lib/console"
+	"github.com/cortexlabs/cortex/pkg/lib/errors"
 	"github.com/cortexlabs/cortex/pkg/lib/table"
 	libtime "github.com/cortexlabs/cortex/pkg/lib/time"
 	"github.com/cortexlabs/cortex/pkg/operator/schema"
@@ -53,6 +54,32 @@ func realtimeAPITable(realtimeAPI schema.APIResponse, env cliconfig.Environment)
 	return out, nil
 }
 
+func realtimeDescribeAPITable(realtimeAPI schema.APIResponse, env cliconfig.Environment) (string, error) {
+	if realtimeAPI.Metadata == nil {
+		return "", errors.ErrorUnexpected("missing metadata from operator response")
+	}
+
+	if realtimeAPI.Status == nil {
+		return "", errors.ErrorUnexpected(fmt.Sprintf("missing status for %s api", realtimeAPI.Metadata.Name))
+	}
+
+	t := realtimeAPIsTable([]schema.APIResponse{realtimeAPI}, []string{env.Name})
+	out := t.MustFormat()
+
+	if realtimeAPI.DashboardURL != nil && *realtimeAPI.DashboardURL != "" {
+		out += "\n" + console.Bold("metrics dashboard: ") + *realtimeAPI.DashboardURL + "\n"
+	}
+
+	if realtimeAPI.Endpoint != nil {
+		out += "\n" + console.Bold("endpoint: ") + *realtimeAPI.Endpoint + "\n"
+	}
+
+	t = replicaCountTable(realtimeAPI.Status.ReplicaCounts)
+	out += "\n" + t.MustFormat()
+
+	return out, nil
+}
+
 func realtimeAPIsTable(realtimeAPIs []schema.APIResponse, envNames []string) table.Table {
 	rows := make([][]interface{}, 0, len(realtimeAPIs))
 
diff --git a/cli/cmd/lib_task_apis.go b/cli/cmd/lib_task_apis.go
index cda53e18b8..49541aad77 100644
--- a/cli/cmd/lib_task_apis.go
+++ b/cli/cmd/lib_task_apis.go
@@ -181,22 +181,34 @@ func getTaskJob(env cliconfig.Environment, apiName string, jobID string) (string
 		if job.WorkerCounts != nil {
 			t := table.Table{
 				Headers: []table.Header{
-					{Title: "requested"},
-					{Title: "pending", Hidden: job.WorkerCounts.Pending == 0},
-					{Title: "initializing", Hidden: job.WorkerCounts.Initializing == 0},
-					{Title: "stalled", Hidden: job.WorkerCounts.Stalled == 0},
-					{Title: "running"},
-					{Title: "failed", Hidden: job.WorkerCounts.Failed == 0},
-					{Title: "succeeded"},
+					{Title: "Requested"},
+					{Title: "Pending", Hidden: job.WorkerCounts.Pending == 0},
+					{Title: "Creating", Hidden: job.WorkerCounts.Creating == 0},
+					{Title: "Ready"},
+					{Title: "NotReady"},
+					{Title: "ErrImagePull", Hidden: job.WorkerCounts.ErrImagePull == 0},
+					{Title: "Terminating", Hidden: job.WorkerCounts.Terminating == 0},
+					{Title: "Failed", Hidden: job.WorkerCounts.Failed == 0},
+					{Title: "Killed", Hidden: job.WorkerCounts.Killed == 0},
+					{Title: "KilledOOM", Hidden: job.WorkerCounts.KilledOOM == 0},
+					{Title: "Stalled", Hidden: job.WorkerCounts.Stalled == 0},
+					{Title: "Unknown", Hidden: job.WorkerCounts.Unknown == 0},
+					{Title: "Succeeded"},
 				},
 				Rows: [][]interface{}{
 					{
 						job.Workers,
 						job.WorkerCounts.Pending,
-						job.WorkerCounts.Initializing,
-						job.WorkerCounts.Stalled,
-						job.WorkerCounts.Running,
+						job.WorkerCounts.Creating,
+						job.WorkerCounts.Ready,
+						job.WorkerCounts.NotReady,
+						job.WorkerCounts.ErrImagePull,
+						job.WorkerCounts.Terminating,
 						job.WorkerCounts.Failed,
+						job.WorkerCounts.Killed,
+						job.WorkerCounts.KilledOOM,
+						job.WorkerCounts.Stalled,
+						job.WorkerCounts.Unknown,
 						job.WorkerCounts.Succeeded,
 					},
 				},
diff --git a/cli/cmd/lib_watch.go b/cli/cmd/lib_watch.go
index 06aebb26c2..a0f9043492 100644
--- a/cli/cmd/lib_watch.go
+++ b/cli/cmd/lib_watch.go
@@ -56,8 +56,8 @@ func watchHeader() string {
 	return fmt.Sprintf("$ %s  %s%s", _cmdStr, padding, libtime.LocalHourNow())
 }
 
-func rerun(f func() (string, error)) {
-	if _flagWatch {
+func rerun(watchFlag bool, f func() (string, error)) {
+	if watchFlag {
 		print("\033[H\033[2J") // clear the screen
 
 		var prevStrSlice []string
diff --git a/cli/cmd/root.go b/cli/cmd/root.go
index 68649c0cc1..8aa7d1e0e0 100644
--- a/cli/cmd/root.go
+++ b/cli/cmd/root.go
@@ -112,6 +112,7 @@ func init() {
 	clusterInit()
 	completionInit()
 	deleteInit()
+	describeInit()
 	deployInit()
 	envInit()
 	getInit()
@@ -154,6 +155,7 @@ func Execute() {
 
 	_rootCmd.AddCommand(_deployCmd)
 	_rootCmd.AddCommand(_getCmd)
+	_rootCmd.AddCommand(_describeCmd)
 	_rootCmd.AddCommand(_logsCmd)
 	_rootCmd.AddCommand(_refreshCmd)
 	_rootCmd.AddCommand(_deleteCmd)
diff --git a/cmd/operator/main.go b/cmd/operator/main.go
index bf5a50d33b..ac38ee7130 100644
--- a/cmd/operator/main.go
+++ b/cmd/operator/main.go
@@ -105,6 +105,7 @@ func main() {
 	routerWithAuth.HandleFunc("/get", endpoints.GetAPIs).Methods("GET")
 	routerWithAuth.HandleFunc("/get/{apiName}", endpoints.GetAPI).Methods("GET")
 	routerWithAuth.HandleFunc("/get/{apiName}/{apiID}", endpoints.GetAPIByID).Methods("GET")
+	routerWithAuth.HandleFunc("/describe/{apiName}", endpoints.DescribeAPI).Methods("GET")
 	routerWithAuth.HandleFunc("/streamlogs/{apiName}", endpoints.ReadLogs)
 	routerWithAuth.HandleFunc("/logs/{apiName}", endpoints.GetLogURL).Methods("GET")
 
diff --git a/pkg/consts/consts.go b/pkg/consts/consts.go
index 7ea590fc45..3fe860d776 100644
--- a/pkg/consts/consts.go
+++ b/pkg/consts/consts.go
@@ -76,8 +76,7 @@ var (
 	CortexProbeHeader         = "X-Cortex-Probe"
 	CortexOriginHeader        = "X-Cortex-Origin"
 
-	WaitForInitializingReplicasTimeout = 15 * time.Minute
-	WaitForReadyReplicasTimeout        = 20 * time.Minute
+	WaitForReadyReplicasTimeout = 20 * time.Minute
 )
 
 func DefaultRegistry() string {
diff --git a/pkg/crds/config/crd/bases/batch.cortex.dev_batchjobs.yaml b/pkg/crds/config/crd/bases/batch.cortex.dev_batchjobs.yaml
index 63b1987bd9..a60ccbba4a 100644
--- a/pkg/crds/config/crd/bases/batch.cortex.dev_batchjobs.yaml
+++ b/pkg/crds/config/crd/bases/batch.cortex.dev_batchjobs.yaml
@@ -251,16 +251,28 @@ spec:
               worker_counts:
                 description: Detailed worker counts with respective status
                 properties:
+                  creating:
+                    format: int32
+                    type: integer
+                  err_image_pull:
+                    format: int32
+                    type: integer
                   failed:
                     format: int32
                     type: integer
-                  initializing:
+                  killed:
+                    format: int32
+                    type: integer
+                  killed_oom:
+                    format: int32
+                    type: integer
+                  not_ready:
                     format: int32
                     type: integer
                   pending:
                     format: int32
                     type: integer
-                  running:
+                  ready:
                     format: int32
                     type: integer
                   stalled:
@@ -269,6 +281,9 @@ spec:
                   succeeded:
                     format: int32
                     type: integer
+                  terminating:
+                    format: int32
+                    type: integer
                   unknown:
                     format: int32
                     type: integer
diff --git a/pkg/crds/controllers/batch/batchjob_controller_helpers.go b/pkg/crds/controllers/batch/batchjob_controller_helpers.go
index dd052dfc58..0785628452 100644
--- a/pkg/crds/controllers/batch/batchjob_controller_helpers.go
+++ b/pkg/crds/controllers/batch/batchjob_controller_helpers.go
@@ -442,6 +442,21 @@ func (r *BatchJobReconciler) getWorkerJob(ctx context.Context, batchJob batch.Ba
 	return &job, nil
 }
 
+func (r *BatchJobReconciler) getWorkerJobPods(ctx context.Context, batchJob batch.BatchJob) ([]kcore.Pod, error) {
+	workerJobPods := kcore.PodList{}
+	if err := r.List(ctx, &workerJobPods,
+		client.InNamespace(consts.DefaultNamespace),
+		client.MatchingLabels{
+			"jobID":   batchJob.Name,
+			"apiName": batchJob.Spec.APIName,
+			"apiID":   batchJob.Spec.APIID,
+		},
+	); err != nil {
+		return nil, err
+	}
+	return workerJobPods.Items, nil
+}
+
 func (r *BatchJobReconciler) updateStatus(ctx context.Context, batchJob *batch.BatchJob, statusInfo batchJobStatusInfo) error {
 	batchJob.Status.ID = batchJob.Name
 
@@ -461,6 +476,11 @@ func (r *BatchJobReconciler) updateStatus(ctx context.Context, batchJob *batch.B
 		batchJob.Status.TotalBatchCount = statusInfo.TotalBatchCount
 	}
 
+	workerJobPods, err := r.getWorkerJobPods(ctx, *batchJob)
+	if err != nil {
+		return errors.Wrap(err, "failed to retrieve worker pods")
+	}
+
 	worker := statusInfo.WorkerJob
 	if worker != nil {
 		batchJob.Status.EndTime = worker.Status.CompletionTime // assign right away, because it's a pointer
@@ -486,13 +506,11 @@ func (r *BatchJobReconciler) updateStatus(ctx context.Context, batchJob *batch.B
 				}
 			}
 
-			isWorkerOOM, err := r.checkWorkersOOM(ctx, batchJob)
-			if err != nil {
-				return err
-			}
-
-			if isWorkerOOM {
-				batchJobStatus = status.JobWorkerOOM
+			for i := range workerJobPods {
+				if k8s.WasPodOOMKilled(&workerJobPods[i]) {
+					batchJobStatus = status.JobWorkerOOM
+					break
+				}
 			}
 
 			batchJob.Status.Status = batchJobStatus
@@ -512,11 +530,36 @@ func (r *BatchJobReconciler) updateStatus(ctx context.Context, batchJob *batch.B
 			batchJob.Status.Status = status.JobRunning
 		}
 
-		batchJob.Status.WorkerCounts = &status.WorkerCounts{
-			Running:   worker.Status.Active,
-			Succeeded: worker.Status.Succeeded,
-			Failed:    worker.Status.Failed,
+		// TODO move this to its own function
+		workerCounts := status.WorkerCounts{}
+		for i := range workerJobPods {
+			switch k8s.GetPodStatus(&workerJobPods[i]) {
+			case k8s.PodStatusPending:
+				workerCounts.Pending++
+			case k8s.PodStatusStalled:
+				workerCounts.Stalled++
+			case k8s.PodStatusCreating:
+				workerCounts.Creating++
+			case k8s.PodStatusNotReady:
+				workerCounts.NotReady++
+			case k8s.PodStatusErrImagePull:
+				workerCounts.ErrImagePull++
+			case k8s.PodStatusTerminating:
+				workerCounts.Terminating++
+			case k8s.PodStatusFailed:
+				workerCounts.Failed++
+			case k8s.PodStatusKilled:
+				workerCounts.Killed++
+			case k8s.PodStatusKilledOOM:
+				workerCounts.KilledOOM++
+			case k8s.PodStatusSucceeded:
+				workerCounts.Succeeded++
+			case k8s.PodStatusUnknown:
+				workerCounts.Unknown++
+			}
 		}
+
+		batchJob.Status.WorkerCounts = &workerCounts
 	}
 
 	if err := r.Status().Update(ctx, batchJob); err != nil {
@@ -526,27 +569,6 @@ func (r *BatchJobReconciler) updateStatus(ctx context.Context, batchJob *batch.B
 	return nil
 }
 
-func (r *BatchJobReconciler) checkWorkersOOM(ctx context.Context, batchJob *batch.BatchJob) (bool, error) {
-	workerJobPods := kcore.PodList{}
-	if err := r.List(ctx, &workerJobPods,
-		client.InNamespace(consts.DefaultNamespace),
-		client.MatchingLabels{
-			"jobID":   batchJob.Name,
-			"apiName": batchJob.Spec.APIName,
-			"apiID":   batchJob.Spec.APIID,
-		},
-	); err != nil {
-		return false, err
-	}
-
-	for i := range workerJobPods.Items {
-		if k8s.WasPodOOMKilled(&workerJobPods.Items[i]) {
-			return true, nil
-		}
-	}
-	return false, nil
-}
-
 func (r *BatchJobReconciler) deleteSQSQueue(batchJob batch.BatchJob) error {
 	queueURL := r.getQueueURL(batchJob)
 	input := sqs.DeleteQueueInput{QueueUrl: aws.String(queueURL)}
diff --git a/pkg/lib/k8s/pod.go b/pkg/lib/k8s/pod.go
index e841a7b8a8..2646783e88 100644
--- a/pkg/lib/k8s/pod.go
+++ b/pkg/lib/k8s/pod.go
@@ -23,6 +23,7 @@ import (
 	"time"
 
 	"github.com/cortexlabs/cortex/pkg/lib/errors"
+	"github.com/cortexlabs/cortex/pkg/lib/pointer"
 	"github.com/cortexlabs/cortex/pkg/lib/sets/strset"
 	kcore "k8s.io/api/core/v1"
 	kerrors "k8s.io/apimachinery/pkg/api/errors"
@@ -45,37 +46,50 @@ const (
 	ReasonCompleted = "Completed"
 )
 
+type PodSpec struct {
+	Name        string
+	K8sPodSpec  kcore.PodSpec
+	Labels      map[string]string
+	Annotations map[string]string
+}
+
 type PodStatus string
 
 const (
-	PodStatusUnknown      PodStatus = "Unknown"
 	PodStatusPending      PodStatus = "Pending"
-	PodStatusInitializing PodStatus = "Initializing"
-	PodStatusRunning      PodStatus = "Running"
-	PodStatusErrImagePull PodStatus = "Image pull error"
+	PodStatusCreating     PodStatus = "Creating"
+	PodStatusNotReady     PodStatus = "NotReady"
+	PodStatusReady        PodStatus = "Ready"
+	PodStatusErrImagePull PodStatus = "ErrImagePull"
 	PodStatusTerminating  PodStatus = "Terminating"
-	PodStatusSucceeded    PodStatus = "Succeeded"
 	PodStatusFailed       PodStatus = "Failed"
 	PodStatusKilled       PodStatus = "Killed"
-	PodStatusKilledOOM    PodStatus = "Out of Memory"
+	PodStatusKilledOOM    PodStatus = "KilledOOM"
+	PodStatusStalled      PodStatus = "Stalled"
+
+	PodStatusSucceeded PodStatus = "Succeeded"
+
+	PodStatusUnknown PodStatus = "Unknown"
 )
 
-var _killStatuses = map[int32]bool{
-	137: true, // SIGKILL
-	143: true, // SIGTERM
-	130: true, // SIGINT
-	129: true, // SIGHUP
-}
+var (
+	_killStatuses = map[int32]bool{
+		137: true, // SIGKILL
+		143: true, // SIGTERM
+		130: true, // SIGINT
+		129: true, // SIGHUP
+	}
 
-// https://github.com/kubernetes/kubernetes/blob/master/pkg/kubelet/images/types.go#L27
-var _imagePullErrorStrings = strset.New("ErrImagePull", "ImagePullBackOff", "RegistryUnavailable")
+	_evictedMemoryMessageRegex = regexp.MustCompile(`(?i)low\W+on\W+resource\W+memory`)
 
-type PodSpec struct {
-	Name        string
-	K8sPodSpec  kcore.PodSpec
-	Labels      map[string]string
-	Annotations map[string]string
-}
+	// https://github.com/kubernetes/kubernetes/blob/master/pkg/kubelet/images/types.go#L27
+	_imagePullErrorStrings = strset.New("ErrImagePull", "ImagePullBackOff", "RegistryUnavailable")
+
+	// https://github.com/kubernetes/kubernetes/blob/9f47110aa29094ed2878cf1d85874cb59214664a/staging/src/k8s.io/api/core/v1/types.go#L76-L77
+	_creatingReasons = strset.New("ContainerCreating", "PodInitializing")
+
+	_waitForCreatingPodTimeout = time.Minute * 15
+)
 
 func Pod(spec *PodSpec) *kcore.Pod {
 	pod := &kcore.Pod{
@@ -90,6 +104,26 @@ func Pod(spec *PodSpec) *kcore.Pod {
 	return pod
 }
 
+func GetPodConditionOf(pod *kcore.Pod, podType kcore.PodConditionType) *bool {
+	if pod == nil {
+		return nil
+	}
+
+	var condition *bool
+	for _, podCondition := range pod.Status.Conditions {
+		if podCondition.Type == podType {
+			if podCondition.Status == kcore.ConditionTrue {
+				condition = pointer.Bool(true)
+			}
+			if podCondition.Status == kcore.ConditionFalse {
+				condition = pointer.Bool(false)
+			}
+			break
+		}
+	}
+	return condition
+}
+
 func (c *Client) CreatePod(pod *kcore.Pod) (*kcore.Pod, error) {
 	pod.TypeMeta = _podTypeMeta
 	pod, err := c.podClient.Create(context.Background(), pod, kmeta.CreateOptions{})
@@ -120,12 +154,28 @@ func (c *Client) ApplyPod(pod *kcore.Pod) (*kcore.Pod, error) {
 }
 
 func IsPodReady(pod *kcore.Pod) bool {
-	if GetPodStatus(pod) != PodStatusRunning {
+	if GetPodStatus(pod) != PodStatusReady {
+		return false
+	}
+
+	// TODO use the GetPodConditionOf func here
+	for _, condition := range pod.Status.Conditions {
+		if condition.Type == kcore.PodReady && condition.Status == kcore.ConditionTrue {
+			return true
+		}
+	}
+
+	return false
+}
+
+func IsPodStalled(pod *kcore.Pod) bool {
+	if GetPodStatus(pod) != PodStatusPending {
 		return false
 	}
 
+	// TODO use the GetPodConditionOf func here
 	for _, condition := range pod.Status.Conditions {
-		if condition.Type == "Ready" && condition.Status == kcore.ConditionTrue {
+		if condition.Type == kcore.PodScheduled && condition.Status == kcore.ConditionFalse && !condition.LastTransitionTime.Time.IsZero() && time.Since(condition.LastTransitionTime.Time) >= _waitForCreatingPodTimeout {
 			return true
 		}
 	}
@@ -137,7 +187,7 @@ func GetPodReadyTime(pod *kcore.Pod) *time.Time {
 	for i := range pod.Status.Conditions {
 		condition := pod.Status.Conditions[i]
 
-		if condition.Type == "Ready" && condition.Status == kcore.ConditionTrue {
+		if condition.Type == kcore.PodReady && condition.Status == kcore.ConditionTrue {
 			if condition.LastTransitionTime.Time.IsZero() {
 				return nil
 			}
@@ -148,8 +198,6 @@ func GetPodReadyTime(pod *kcore.Pod) *time.Time {
 	return nil
 }
 
-var _evictedMemoryMessageRegex = regexp.MustCompile(`(?i)low\W+on\W+resource\W+memory`)
-
 func WasPodOOMKilled(pod *kcore.Pod) bool {
 	if pod.Status.Reason == ReasonEvicted && _evictedMemoryMessageRegex.MatchString(pod.Status.Message) {
 		return true
@@ -176,15 +224,11 @@ func GetPodStatus(pod *kcore.Pod) PodStatus {
 
 	switch pod.Status.Phase {
 	case kcore.PodPending:
-		initPodStatus := PodStatusFromContainerStatuses(pod.Status.InitContainerStatuses)
-		if initPodStatus == PodStatusRunning {
-			return PodStatusInitializing
-		}
-		allPodStatus := PodStatusFromContainerStatuses(append(pod.Status.InitContainerStatuses, pod.Status.ContainerStatuses...))
-		if allPodStatus == PodStatusErrImagePull {
-			return PodStatusErrImagePull
+		podCondition := GetPodConditionOf(pod, kcore.PodScheduled)
+		if podCondition != nil && !*podCondition {
+			return PodStatusStalled
 		}
-		return PodStatusPending
+		return PodStatusFromContainerStatuses(append(pod.Status.InitContainerStatuses, pod.Status.ContainerStatuses...))
 	case kcore.PodSucceeded:
 		return PodStatusSucceeded
 	case kcore.PodFailed:
@@ -215,7 +259,17 @@ func GetPodStatus(pod *kcore.Pod) PodStatus {
 			return PodStatusTerminating
 		}
 
-		return PodStatusFromContainerStatuses(pod.Status.ContainerStatuses)
+		podCondition := GetPodConditionOf(pod, kcore.PodReady)
+		if podCondition != nil && *podCondition {
+			return PodStatusReady
+		}
+
+		status := PodStatusFromContainerStatuses(pod.Status.ContainerStatuses)
+		if status == PodStatusReady || status == PodStatusNotReady {
+			return PodStatusNotReady
+		}
+
+		return status
 	default:
 		return PodStatusUnknown
 	}
@@ -224,7 +278,9 @@ func GetPodStatus(pod *kcore.Pod) PodStatus {
 func PodStatusFromContainerStatuses(containerStatuses []kcore.ContainerStatus) PodStatus {
 	numContainers := len(containerStatuses)
 	numWaiting := 0
-	numRunning := 0
+	numCreating := 0
+	numNotReady := 0
+	numReady := 0
 	numSucceeded := 0
 	numFailed := 0
 	numKilled := 0
@@ -235,9 +291,9 @@ func PodStatusFromContainerStatuses(containerStatuses []kcore.ContainerStatus) P
 	}
 	for _, containerStatus := range containerStatuses {
 		if containerStatus.State.Running != nil && containerStatus.Ready {
-			numRunning++
-		} else if containerStatus.State.Running != nil && containerStatus.RestartCount == 0 {
-			numRunning++
+			numReady++
+		} else if containerStatus.State.Running != nil && !containerStatus.Ready {
+			numNotReady++
 		} else if containerStatus.State.Terminated != nil {
 			exitCode := containerStatus.State.Terminated.ExitCode
 			reason := containerStatus.State.Terminated.Reason
@@ -264,6 +320,8 @@ func PodStatusFromContainerStatuses(containerStatuses []kcore.ContainerStatus) P
 			}
 		} else if containerStatus.State.Waiting != nil && _imagePullErrorStrings.Has(containerStatus.State.Waiting.Reason) {
 			return PodStatusErrImagePull
+		} else if containerStatus.State.Waiting != nil && _creatingReasons.Has(containerStatus.State.Waiting.Reason) {
+			numCreating++
 		} else {
 			// either containerStatus.State.Waiting != nil or all containerStatus.States are nil (which implies waiting)
 			numWaiting++
@@ -279,8 +337,12 @@ func PodStatusFromContainerStatuses(containerStatuses []kcore.ContainerStatus) P
 		return PodStatusPending
 	} else if numSucceeded == numContainers {
 		return PodStatusSucceeded
+	} else if numCreating > 0 {
+		return PodStatusCreating
+	} else if numNotReady > 0 {
+		return PodStatusNotReady
 	} else {
-		return PodStatusRunning
+		return PodStatusReady
 	}
 }
 
diff --git a/pkg/operator/endpoints/describe.go b/pkg/operator/endpoints/describe.go
new file mode 100644
index 0000000000..b574d5eefc
--- /dev/null
+++ b/pkg/operator/endpoints/describe.go
@@ -0,0 +1,36 @@
+/*
+Copyright 2021 Cortex Labs, Inc.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package endpoints
+
+import (
+	"net/http"
+
+	"github.com/cortexlabs/cortex/pkg/operator/resources"
+	"github.com/gorilla/mux"
+)
+
+func DescribeAPI(w http.ResponseWriter, r *http.Request) {
+	apiName := mux.Vars(r)["apiName"]
+
+	response, err := resources.DescribeAPI(apiName)
+	if err != nil {
+		respondError(w, r, err)
+		return
+	}
+
+	respondJSON(w, r, response)
+}
diff --git a/pkg/operator/operator/k8s.go b/pkg/operator/operator/k8s.go
index b85cb81b9e..43e36168c9 100644
--- a/pkg/operator/operator/k8s.go
+++ b/pkg/operator/operator/k8s.go
@@ -22,6 +22,7 @@ import (
 	"github.com/cortexlabs/cortex/pkg/config"
 	"github.com/cortexlabs/cortex/pkg/lib/urls"
 	"github.com/cortexlabs/cortex/pkg/types/spec"
+	"github.com/cortexlabs/cortex/pkg/types/userconfig"
 )
 
 // APILoadBalancerURL returns the http endpoint of the ingress load balancer for deployed APIs
@@ -64,8 +65,12 @@ func APIEndpoint(api *spec.API) (string, error) {
 	return urls.Join(baseAPIEndpoint, *api.Networking.Endpoint), nil
 }
 
-func APIEndpointFromPath(apiNetworkingPath string) (string, error) {
-	var err error
+func APIEndpointFromResource(deployedResource *DeployedResource) (string, error) {
+	apiEndpoint, err := userconfig.EndpointFromAnnotation(deployedResource.VirtualService)
+	if err != nil {
+		return "", err
+	}
+
 	baseAPIEndpoint := ""
 
 	baseAPIEndpoint, err = APILoadBalancerURL()
@@ -74,5 +79,5 @@ func APIEndpointFromPath(apiNetworkingPath string) (string, error) {
 	}
 	baseAPIEndpoint = strings.Replace(baseAPIEndpoint, "https://", "http://", 1)
 
-	return urls.Join(baseAPIEndpoint, apiNetworkingPath), nil
+	return urls.Join(baseAPIEndpoint, apiEndpoint), nil
 }
diff --git a/pkg/operator/resources/asyncapi/api.go b/pkg/operator/resources/asyncapi/api.go
index 26553ecab8..9766811445 100644
--- a/pkg/operator/resources/asyncapi/api.go
+++ b/pkg/operator/resources/asyncapi/api.go
@@ -59,6 +59,12 @@ type resources struct {
 	gatewayVirtualService *istioclientnetworking.VirtualService
 }
 
+// TODO remove this
+type asyncDeployments struct {
+	APIDeployment     *kapps.Deployment
+	GatewayDeployment *kapps.Deployment
+}
+
 func getGatewayK8sName(apiName string) string {
 	return "gateway-" + apiName
 }
@@ -309,36 +315,87 @@ func GetAPIByName(deployedResource *operator.DeployedResource) ([]schema.APIResp
 		return nil, errors.ErrorUnexpected("unable to find gateway deployment", deployedResource.Name)
 	}
 
-	deployment, err := config.K8s.GetDeployment(workloads.K8sName(deployedResource.Name))
+	apiStatus := status.StatusFromDeployment(apiDeployment)
+	apiMetadata, err := spec.MetadataFromDeployment(apiDeployment)
+	if err != nil {
+		return nil, errors.ErrorUnexpected("unable to obtain metadata", deployedResource.Name)
+	}
+
+	api, err := operator.DownloadAPISpec(apiMetadata.Name, apiMetadata.APIID)
+	if err != nil {
+		return nil, err
+	}
+
+	apiEndpoint, err := operator.APIEndpoint(api)
+	if err != nil {
+		return nil, err
+	}
+
+	dashboardURL := pointer.String(getDashboardURL(api.Name))
+
+	return []schema.APIResponse{
+		{
+			Spec:         api,
+			Metadata:     apiMetadata,
+			Status:       apiStatus,
+			Endpoint:     &apiEndpoint,
+			DashboardURL: dashboardURL,
+		},
+	}, nil
+}
+
+func DescribeAPIByName(deployedResource *operator.DeployedResource) ([]schema.APIResponse, error) {
+	var apiDeployment *kapps.Deployment
+	var gatewayDeployment *kapps.Deployment
+
+	err := parallel.RunFirstErr(
+		func() error {
+			var err error
+			apiDeployment, err = config.K8s.GetDeployment(workloads.K8sName(deployedResource.Name))
+			return err
+		},
+		func() error {
+			var err error
+			gatewayDeployment, err = config.K8s.GetDeployment(getGatewayK8sName(deployedResource.Name))
+			return err
+		},
+	)
 	if err != nil {
 		return nil, err
 	}
 
-	if deployment == nil {
-		return nil, errors.ErrorUnexpected("unable to find deployment", deployedResource.Name)
+	if apiDeployment == nil {
+		return nil, errors.ErrorUnexpected("unable to find api deployment", deployedResource.Name)
 	}
 
-	apiStatus := status.StatusFromDeployment(deployment)
-	apiMetadata, err := spec.MetadataFromDeployment(deployment)
+	if gatewayDeployment == nil {
+		return nil, errors.ErrorUnexpected("unable to find gateway deployment", deployedResource.Name)
+	}
+
+	apiStatus := status.StatusFromDeployment(apiDeployment)
+	apiMetadata, err := spec.MetadataFromDeployment(apiDeployment)
 	if err != nil {
 		return nil, errors.ErrorUnexpected("unable to obtain metadata", deployedResource.Name)
 	}
 
-	api, err := operator.DownloadAPISpec(apiMetadata.Name, apiMetadata.APIID)
+	apiPods, err := config.K8s.ListPodsByLabels(map[string]string{
+		"apiName":          apiDeployment.Labels["apiName"],
+		"cortex.dev/async": "api",
+	})
 	if err != nil {
 		return nil, err
 	}
+	apiStatus.ReplicaCounts = GetReplicaCounts(apiDeployment, apiPods)
 
-	apiEndpoint, err := operator.APIEndpoint(api)
+	apiEndpoint, err := operator.APIEndpointFromResource(deployedResource)
 	if err != nil {
 		return nil, err
 	}
 
-	dashboardURL := pointer.String(getDashboardURL(api.Name))
+	dashboardURL := pointer.String(getDashboardURL(deployedResource.Name))
 
 	return []schema.APIResponse{
 		{
-			Spec:         api,
 			Metadata:     apiMetadata,
 			Status:       apiStatus,
 			Endpoint:     &apiEndpoint,
@@ -584,6 +641,58 @@ func deleteK8sResources(apiName string) error {
 	return err
 }
 
+// let's do CRDs instead, to avoid this
+func groupDeploymentsByAPI(deployments []kapps.Deployment) map[string]*asyncDeployments {
+	deploymentsByAPI := map[string]*asyncDeployments{}
+	for i := range deployments {
+		deployment := deployments[i]
+		apiName := deployment.Labels["apiName"]
+		asyncType := deployment.Labels["cortex.dev/async"]
+		apiResources, exists := deploymentsByAPI[apiName]
+		if exists {
+			if asyncType == "api" {
+				apiResources.APIDeployment = &deployment
+			} else {
+				apiResources.GatewayDeployment = &deployment
+			}
+		} else {
+			if asyncType == "api" {
+				deploymentsByAPI[apiName] = &asyncDeployments{APIDeployment: &deployment}
+			} else {
+				deploymentsByAPI[apiName] = &asyncDeployments{GatewayDeployment: &deployment}
+			}
+		}
+	}
+	return deploymentsByAPI
+}
+
+// returns true if min_replicas are not ready and no updated replicas have errored
+func isAPIUpdating(deployment *kapps.Deployment) (bool, error) {
+	pods, err := config.K8s.ListPodsByLabel("apiName", deployment.Labels["apiName"])
+	if err != nil {
+		return false, err
+	}
+
+	replicaCounts := GetReplicaCounts(deployment, pods)
+
+	autoscalingSpec, err := userconfig.AutoscalingFromAnnotations(deployment)
+	if err != nil {
+		return false, err
+	}
+
+	if replicaCounts.Ready < autoscalingSpec.MinReplicas && replicaCounts.TotalFailed() == 0 {
+		return true, nil
+	}
+
+	return false, nil
+}
+
+func isPodSpecLatest(deployment *kapps.Deployment, pod *kcore.Pod) bool {
+	// Note: the gateway deployment/pods don't have "podID" or "deploymentID" labels, which is ok since it is always up-to-date
+	return deployment.Spec.Template.Labels["podID"] == pod.Labels["podID"] &&
+		deployment.Spec.Template.Labels["deploymentID"] == pod.Labels["deploymentID"]
+}
+
 func getDashboardURL(apiName string) string {
 	loadBalancerURL, err := operator.LoadBalancerURL()
 	if err != nil {
diff --git a/pkg/operator/resources/asyncapi/status.go b/pkg/operator/resources/asyncapi/status.go
index 48189a89cb..0268f26048 100644
--- a/pkg/operator/resources/asyncapi/status.go
+++ b/pkg/operator/resources/asyncapi/status.go
@@ -17,98 +17,13 @@ limitations under the License.
 package asyncapi
 
 import (
-	"time"
-
-	"github.com/cortexlabs/cortex/pkg/config"
-	"github.com/cortexlabs/cortex/pkg/consts"
 	"github.com/cortexlabs/cortex/pkg/lib/k8s"
 	"github.com/cortexlabs/cortex/pkg/types/status"
-	"github.com/cortexlabs/cortex/pkg/types/userconfig"
 	kapps "k8s.io/api/apps/v1"
 	kcore "k8s.io/api/core/v1"
 )
 
-type asyncDeployments struct {
-	APIDeployment     *kapps.Deployment
-	GatewayDeployment *kapps.Deployment
-}
-
-// let's do CRDs instead, to avoid this
-func groupDeploymentsByAPI(deployments []kapps.Deployment) map[string]*asyncDeployments {
-	deploymentsByAPI := map[string]*asyncDeployments{}
-	for i := range deployments {
-		deployment := deployments[i]
-		apiName := deployment.Labels["apiName"]
-		asyncType := deployment.Labels["cortex.dev/async"]
-		apiResources, exists := deploymentsByAPI[apiName]
-		if exists {
-			if asyncType == "api" {
-				apiResources.APIDeployment = &deployment
-			} else {
-				apiResources.GatewayDeployment = &deployment
-			}
-		} else {
-			if asyncType == "api" {
-				deploymentsByAPI[apiName] = &asyncDeployments{APIDeployment: &deployment}
-			} else {
-				deploymentsByAPI[apiName] = &asyncDeployments{GatewayDeployment: &deployment}
-			}
-		}
-	}
-	return deploymentsByAPI
-}
-
-func getStatusCode(apiCounts status.ReplicaCounts, gatewayCounts status.ReplicaCounts, apiMinReplicas int32) status.Code {
-	if apiCounts.Updated.Ready >= apiCounts.Requested && gatewayCounts.Updated.Ready >= 1 {
-		return status.Live
-	}
-
-	if apiCounts.Updated.ErrImagePull > 0 || gatewayCounts.Updated.ErrImagePull > 0 {
-		return status.ErrorImagePull
-	}
-
-	if apiCounts.Updated.Failed > 0 || apiCounts.Updated.Killed > 0 ||
-		gatewayCounts.Updated.Failed > 0 || gatewayCounts.Updated.Killed > 0 {
-		return status.Error
-	}
-
-	if apiCounts.Updated.KilledOOM > 0 || gatewayCounts.Updated.KilledOOM > 0 {
-		return status.OOM
-	}
-
-	if apiCounts.Updated.Stalled > 0 || gatewayCounts.Updated.Stalled > 0 {
-		return status.Stalled
-	}
-
-	if apiCounts.Updated.Ready >= apiMinReplicas && gatewayCounts.Updated.Ready >= 1 {
-		return status.Live
-	}
-
-	return status.Updating
-}
-
-// returns true if min_replicas are not ready and no updated replicas have errored
-func isAPIUpdating(deployment *kapps.Deployment) (bool, error) {
-	pods, err := config.K8s.ListPodsByLabel("apiName", deployment.Labels["apiName"])
-	if err != nil {
-		return false, err
-	}
-
-	replicaCounts := getReplicaCounts(deployment, pods)
-
-	autoscalingSpec, err := userconfig.AutoscalingFromAnnotations(deployment)
-	if err != nil {
-		return false, err
-	}
-
-	if replicaCounts.Updated.Ready < autoscalingSpec.MinReplicas && replicaCounts.Updated.TotalFailed() == 0 {
-		return true, nil
-	}
-
-	return false, nil
-}
-
-func getReplicaCounts(deployment *kapps.Deployment, pods []kcore.Pod) status.ReplicaCounts {
+func GetReplicaCounts(deployment *kapps.Deployment, pods []kcore.Pod) *status.ReplicaCounts {
 	counts := status.ReplicaCounts{}
 	counts.Requested = *deployment.Spec.Replicas
 
@@ -121,50 +36,44 @@ func getReplicaCounts(deployment *kapps.Deployment, pods []kcore.Pod) status.Rep
 		addPodToReplicaCounts(&pod, deployment, &counts)
 	}
 
-	return counts
+	return &counts
 }
 
 func addPodToReplicaCounts(pod *kcore.Pod, deployment *kapps.Deployment, counts *status.ReplicaCounts) {
-	var subCounts *status.SubReplicaCounts
+	latest := false
 	if isPodSpecLatest(deployment, pod) {
-		subCounts = &counts.Updated
-	} else {
-		subCounts = &counts.Stale
+		latest = true
 	}
 
-	if k8s.IsPodReady(pod) {
-		subCounts.Ready++
+	isPodReady := k8s.IsPodReady(pod)
+	if latest && isPodReady {
+		counts.Ready++
+		return
+	} else if !latest && isPodReady {
+		counts.ReadyOutOfDate++
 		return
 	}
 
 	switch k8s.GetPodStatus(pod) {
 	case k8s.PodStatusPending:
-		if time.Since(pod.CreationTimestamp.Time) > consts.WaitForInitializingReplicasTimeout {
-			subCounts.Stalled++
-		} else {
-			subCounts.Pending++
-		}
-	case k8s.PodStatusInitializing:
-		subCounts.Initializing++
-	case k8s.PodStatusRunning:
-		subCounts.Initializing++
+		counts.Pending++
+	case k8s.PodStatusStalled:
+		counts.Stalled++
+	case k8s.PodStatusCreating:
+		counts.Creating++
+	case k8s.PodStatusReady:
+		counts.Creating++
 	case k8s.PodStatusErrImagePull:
-		subCounts.ErrImagePull++
+		counts.ErrImagePull++
 	case k8s.PodStatusTerminating:
-		subCounts.Terminating++
+		counts.Terminating++
 	case k8s.PodStatusFailed:
-		subCounts.Failed++
+		counts.Failed++
 	case k8s.PodStatusKilled:
-		subCounts.Killed++
+		counts.Killed++
 	case k8s.PodStatusKilledOOM:
-		subCounts.KilledOOM++
+		counts.KilledOOM++
 	default:
-		subCounts.Unknown++
+		counts.Unknown++
 	}
 }
-
-func isPodSpecLatest(deployment *kapps.Deployment, pod *kcore.Pod) bool {
-	// Note: the gateway deployment/pods don't have "podID" or "deploymentID" labels, which is ok since it is always up-to-date
-	return deployment.Spec.Template.Labels["podID"] == pod.Labels["podID"] &&
-		deployment.Spec.Template.Labels["deploymentID"] == pod.Labels["deploymentID"]
-}
diff --git a/pkg/operator/resources/job/worker_stats.go b/pkg/operator/resources/job/worker_stats.go
index 07628995e4..797d65980e 100644
--- a/pkg/operator/resources/job/worker_stats.go
+++ b/pkg/operator/resources/job/worker_stats.go
@@ -17,9 +17,6 @@ limitations under the License.
 package job
 
 import (
-	"time"
-
-	"github.com/cortexlabs/cortex/pkg/consts"
 	"github.com/cortexlabs/cortex/pkg/lib/k8s"
 	"github.com/cortexlabs/cortex/pkg/types/status"
 	kbatch "k8s.io/api/batch/v1"
@@ -43,34 +40,32 @@ func GetWorkerCountsForJob(k8sJob kbatch.Job, pods []kcore.Pod) status.WorkerCou
 
 func addPodToWorkerCounts(pod *kcore.Pod, workerCounts *status.WorkerCounts) {
 	if k8s.IsPodReady(pod) {
-		workerCounts.Running++
+		workerCounts.Ready++
 		return
 	}
 
 	switch k8s.GetPodStatus(pod) {
 	case k8s.PodStatusPending:
-		if time.Since(pod.CreationTimestamp.Time) > consts.WaitForInitializingReplicasTimeout {
-			workerCounts.Stalled++
-		} else {
-			workerCounts.Pending++
-		}
-	case k8s.PodStatusInitializing:
-		workerCounts.Initializing++
-	case k8s.PodStatusRunning:
-		workerCounts.Initializing++
+		workerCounts.Pending++
+	case k8s.PodStatusStalled:
+		workerCounts.Stalled++
+	case k8s.PodStatusCreating:
+		workerCounts.Creating++
+	case k8s.PodStatusNotReady:
+		workerCounts.NotReady++
 	case k8s.PodStatusErrImagePull:
-		workerCounts.Failed++
+		workerCounts.ErrImagePull++
 	case k8s.PodStatusTerminating:
-		workerCounts.Failed++
+		workerCounts.Terminating++
 	case k8s.PodStatusFailed:
 		workerCounts.Failed++
 	case k8s.PodStatusKilled:
-		workerCounts.Failed++
+		workerCounts.Killed++
 	case k8s.PodStatusKilledOOM:
-		workerCounts.Failed++
+		workerCounts.KilledOOM++
 	case k8s.PodStatusSucceeded:
 		workerCounts.Succeeded++
-	default:
+	case k8s.PodStatusUnknown:
 		workerCounts.Unknown++
 	}
 }
diff --git a/pkg/operator/resources/realtimeapi/api.go b/pkg/operator/resources/realtimeapi/api.go
index d787f5c99d..b7df138a30 100644
--- a/pkg/operator/resources/realtimeapi/api.go
+++ b/pkg/operator/resources/realtimeapi/api.go
@@ -242,6 +242,45 @@ func GetAPIByName(deployedResource *operator.DeployedResource) ([]schema.APIResp
 	}, nil
 }
 
+func DescribeAPIByName(deployedResource *operator.DeployedResource) ([]schema.APIResponse, error) {
+	deployment, err := config.K8s.GetDeployment(workloads.K8sName(deployedResource.Name))
+	if err != nil {
+		return nil, err
+	}
+
+	if deployment == nil {
+		return nil, errors.ErrorUnexpected("unable to find deployment", deployedResource.Name)
+	}
+
+	apiStatus := status.StatusFromDeployment(deployment)
+	apiMetadata, err := spec.MetadataFromDeployment(deployment)
+	if err != nil {
+		return nil, errors.ErrorUnexpected("unable to obtain metadata", deployedResource.Name)
+	}
+
+	pods, err := config.K8s.ListPodsByLabel("apiName", deployment.Labels["apiName"])
+	if err != nil {
+		return nil, err
+	}
+	apiStatus.ReplicaCounts = GetReplicaCounts(deployment, pods)
+
+	apiEndpoint, err := operator.APIEndpointFromResource(deployedResource)
+	if err != nil {
+		return nil, err
+	}
+
+	dashboardURL := pointer.String(getDashboardURL(deployedResource.Name))
+
+	return []schema.APIResponse{
+		{
+			Metadata:     apiMetadata,
+			Status:       apiStatus,
+			Endpoint:     &apiEndpoint,
+			DashboardURL: dashboardURL,
+		},
+	}, nil
+}
+
 func getK8sResources(apiName string) (*kapps.Deployment, *kcore.Service, *istioclientnetworking.VirtualService, error) {
 	var deployment *kapps.Deployment
 	var service *kcore.Service
@@ -360,14 +399,14 @@ func isAPIUpdating(deployment *kapps.Deployment) (bool, error) {
 		return false, err
 	}
 
-	replicaCounts := getReplicaCounts(deployment, pods)
+	replicaCounts := GetReplicaCounts(deployment, pods)
 
 	autoscalingSpec, err := userconfig.AutoscalingFromAnnotations(deployment)
 	if err != nil {
 		return false, err
 	}
 
-	if replicaCounts.Updated.Ready < autoscalingSpec.MinReplicas && replicaCounts.Updated.TotalFailed() == 0 {
+	if replicaCounts.Ready < autoscalingSpec.MinReplicas && replicaCounts.TotalFailed() == 0 {
 		return true, nil
 	}
 
diff --git a/pkg/operator/resources/realtimeapi/status.go b/pkg/operator/resources/realtimeapi/status.go
index 9952ccd9ca..d021cc19f9 100644
--- a/pkg/operator/resources/realtimeapi/status.go
+++ b/pkg/operator/resources/realtimeapi/status.go
@@ -17,16 +17,13 @@ limitations under the License.
 package realtimeapi
 
 import (
-	"time"
-
-	"github.com/cortexlabs/cortex/pkg/consts"
 	"github.com/cortexlabs/cortex/pkg/lib/k8s"
 	"github.com/cortexlabs/cortex/pkg/types/status"
 	kapps "k8s.io/api/apps/v1"
 	kcore "k8s.io/api/core/v1"
 )
 
-func getReplicaCounts(deployment *kapps.Deployment, pods []kcore.Pod) status.ReplicaCounts {
+func GetReplicaCounts(deployment *kapps.Deployment, pods []kcore.Pod) *status.ReplicaCounts {
 	counts := status.ReplicaCounts{}
 	counts.Requested = *deployment.Spec.Replicas
 
@@ -38,72 +35,44 @@ func getReplicaCounts(deployment *kapps.Deployment, pods []kcore.Pod) status.Rep
 		addPodToReplicaCounts(&pods[i], deployment, &counts)
 	}
 
-	return counts
+	return &counts
 }
 
 func addPodToReplicaCounts(pod *kcore.Pod, deployment *kapps.Deployment, counts *status.ReplicaCounts) {
-	var subCounts *status.SubReplicaCounts
+	latest := false
 	if isPodSpecLatest(deployment, pod) {
-		subCounts = &counts.Updated
-	} else {
-		subCounts = &counts.Stale
+		latest = true
 	}
 
-	if k8s.IsPodReady(pod) {
-		subCounts.Ready++
+	isPodReady := k8s.IsPodReady(pod)
+	if latest && isPodReady {
+		counts.Ready++
+		return
+	} else if !latest && isPodReady {
+		counts.ReadyOutOfDate++
 		return
 	}
 
 	switch k8s.GetPodStatus(pod) {
 	case k8s.PodStatusPending:
-		if time.Since(pod.CreationTimestamp.Time) > consts.WaitForInitializingReplicasTimeout {
-			subCounts.Stalled++
-		} else {
-			subCounts.Pending++
-		}
-	case k8s.PodStatusInitializing:
-		subCounts.Initializing++
-	case k8s.PodStatusRunning:
-		subCounts.Initializing++
+		counts.Pending++
+	case k8s.PodStatusStalled:
+		counts.Stalled++
+	case k8s.PodStatusCreating:
+		counts.Creating++
+	case k8s.PodStatusReady:
+		counts.Creating++
 	case k8s.PodStatusErrImagePull:
-		subCounts.ErrImagePull++
+		counts.ErrImagePull++
 	case k8s.PodStatusTerminating:
-		subCounts.Terminating++
+		counts.Terminating++
 	case k8s.PodStatusFailed:
-		subCounts.Failed++
+		counts.Failed++
 	case k8s.PodStatusKilled:
-		subCounts.Killed++
+		counts.Killed++
 	case k8s.PodStatusKilledOOM:
-		subCounts.KilledOOM++
+		counts.KilledOOM++
 	default:
-		subCounts.Unknown++
+		counts.Unknown++
 	}
 }
-
-func getStatusCode(counts *status.ReplicaCounts, minReplicas int32) status.Code {
-	if counts.Updated.Ready >= counts.Requested {
-		return status.Live
-	}
-
-	if counts.Updated.ErrImagePull > 0 {
-		return status.ErrorImagePull
-	}
-
-	if counts.Updated.Failed > 0 || counts.Updated.Killed > 0 {
-		return status.Error
-	}
-
-	if counts.Updated.KilledOOM > 0 {
-		return status.OOM
-	}
-
-	if counts.Updated.Stalled > 0 {
-		return status.Stalled
-	}
-
-	if counts.Updated.Ready >= minReplicas {
-		return status.Live
-	}
-
-	return status.Updating
-}
diff --git a/pkg/operator/resources/resources.go b/pkg/operator/resources/resources.go
index 5350f99e47..445571ad25 100644
--- a/pkg/operator/resources/resources.go
+++ b/pkg/operator/resources/resources.go
@@ -308,9 +308,6 @@ func GetAPIs() ([]schema.APIResponse, error) {
 		}
 	}
 
-	fmt.Println("realtimeAPIDeployments", len(realtimeAPIDeployments))
-	fmt.Println("asyncAPIDeployments", len(asyncAPIDeployments))
-
 	var batchAPIVirtualServices []istioclientnetworking.VirtualService
 	var taskAPIVirtualServices []istioclientnetworking.VirtualService
 	var trafficSplitterVirtualServices []istioclientnetworking.VirtualService
@@ -486,3 +483,33 @@ func checkIfUsedByTrafficSplitter(apiName string) error {
 	}
 	return nil
 }
+
+func DescribeAPI(apiName string) ([]schema.APIResponse, error) {
+	deployedResource, err := GetDeployedResourceByName(apiName)
+	if err != nil {
+		return nil, err
+	}
+
+	var apiResponse []schema.APIResponse
+
+	switch deployedResource.Kind {
+	case userconfig.RealtimeAPIKind:
+		apiResponse, err = realtimeapi.DescribeAPIByName(deployedResource)
+		if err != nil {
+			return nil, err
+		}
+	case userconfig.AsyncAPIKind:
+		apiResponse, err = asyncapi.DescribeAPIByName(deployedResource)
+		if err != nil {
+			return nil, err
+		}
+	default:
+		return nil, ErrorOperationIsOnlySupportedForKind(
+			*deployedResource,
+			userconfig.RealtimeAPIKind,
+			userconfig.AsyncAPIKind,
+		) // unexpected
+	}
+
+	return apiResponse, nil
+}
diff --git a/pkg/types/status/code.go b/pkg/types/status/code.go
deleted file mode 100644
index 41a8a13d91..0000000000
--- a/pkg/types/status/code.go
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
-Copyright 2021 Cortex Labs, Inc.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-
-package status
-
-type Code int
-
-const (
-	Unknown Code = iota
-	Stalled
-	Error
-	ErrorImagePull
-	OOM
-	Live
-	Updating
-)
-
-var _codes = []string{
-	"status_unknown",
-	"status_stalled",
-	"status_error",
-	"status_error_image_pull",
-	"status_oom",
-	"status_live",
-	"status_updating",
-}
-
-var _ = [1]int{}[int(Updating)-(len(_codes)-1)] // Ensure list length matches
-
-var _codeMessages = []string{
-	"unknown",               // Unknown
-	"compute unavailable",   // Stalled
-	"error",                 // Error
-	"error (image pull)",    // Live
-	"error (out of memory)", // OOM
-	"live",                  // Live
-	"updating",              // Updating
-}
-
-var _ = [1]int{}[int(Updating)-(len(_codeMessages)-1)] // Ensure list length matches
-
-func (code Code) String() string {
-	if int(code) < 0 || int(code) >= len(_codes) {
-		return _codes[Unknown]
-	}
-	return _codes[code]
-}
-
-func (code Code) Message() string {
-	if int(code) < 0 || int(code) >= len(_codeMessages) {
-		return _codeMessages[Unknown]
-	}
-	return _codeMessages[code]
-}
-
-// MarshalText satisfies TextMarshaler
-func (code Code) MarshalText() ([]byte, error) {
-	return []byte(code.String()), nil
-}
-
-// UnmarshalText satisfies TextUnmarshaler
-func (code *Code) UnmarshalText(text []byte) error {
-	enum := string(text)
-	for i := 0; i < len(_codes); i++ {
-		if enum == _codes[i] {
-			*code = Code(i)
-			return nil
-		}
-	}
-
-	*code = Unknown
-	return nil
-}
-
-// UnmarshalBinary satisfies BinaryUnmarshaler
-// Needed for msgpack
-func (code *Code) UnmarshalBinary(data []byte) error {
-	return code.UnmarshalText(data)
-}
-
-// MarshalBinary satisfies BinaryMarshaler
-func (code Code) MarshalBinary() ([]byte, error) {
-	return []byte(code.String()), nil
-}
diff --git a/pkg/types/status/status.go b/pkg/types/status/status.go
index 8a6a4dd160..e3faba9e9f 100644
--- a/pkg/types/status/status.go
+++ b/pkg/types/status/status.go
@@ -21,37 +21,66 @@ import (
 )
 
 type Status struct {
-	Ready     int32 `json:"ready"`
-	Requested int32 `json:"requested"`
-	UpToDate  int32 `json:"up_to_date"`
+	Ready         int32          `json:"ready"`
+	Requested     int32          `json:"requested"`
+	UpToDate      int32          `json:"up_to_date"`
+	ReplicaCounts *ReplicaCounts `json:"replica_counts,omitempty"`
 }
 
-type ReplicaCounts struct {
-	Updated   SubReplicaCounts `json:"updated"`
-	Stale     SubReplicaCounts `json:"stale"`
-	Requested int32            `json:"requested"`
+type ReplicaCountType string
+
+const (
+	ReplicaCountRequested      ReplicaCountType = "Requested"
+	ReplicaCountPending        ReplicaCountType = "Pending"
+	ReplicaCountCreating       ReplicaCountType = "Creating"
+	ReplicaCountNotReady       ReplicaCountType = "NotReady"
+	ReplicaCountReady          ReplicaCountType = "Ready"
+	ReplicaCountReadyOutOfDate ReplicaCountType = "ReadyOutOfDate"
+	ReplicaCountErrImagePull   ReplicaCountType = "ErrImagePull"
+	ReplicaCountTerminating    ReplicaCountType = "Terminating"
+	ReplicaCountFailed         ReplicaCountType = "Failed"
+	ReplicaCountKilled         ReplicaCountType = "Killed"
+	ReplicaCountKilledOOM      ReplicaCountType = "KilledOOM"
+	ReplicaCountStalled        ReplicaCountType = "Stalled"
+	ReplicaCountUnknown        ReplicaCountType = "Unknown"
+)
+
+var ReplicaCountTypes []ReplicaCountType = []ReplicaCountType{
+	ReplicaCountRequested, ReplicaCountPending, ReplicaCountCreating,
+	ReplicaCountNotReady, ReplicaCountReady, ReplicaCountReadyOutOfDate,
+	ReplicaCountErrImagePull, ReplicaCountTerminating, ReplicaCountFailed,
+	ReplicaCountKilled, ReplicaCountKilledOOM, ReplicaCountStalled,
+	ReplicaCountUnknown,
 }
 
-type SubReplicaCounts struct {
-	Pending      int32 `json:"pending"`
-	Initializing int32 `json:"initializing"`
-	Ready        int32 `json:"ready"`
-	ErrImagePull int32 `json:"err_image_pull"`
-	Terminating  int32 `json:"terminating"`
-	Failed       int32 `json:"failed"`
-	Killed       int32 `json:"killed"`
-	KilledOOM    int32 `json:"killed_oom"`
-	Stalled      int32 `json:"stalled"` // pending for a long time
-	Unknown      int32 `json:"unknown"`
+type ReplicaCounts struct {
+	Requested      int32 `json:"requested"`
+	Pending        int32 `json:"pending"`
+	Creating       int32 `json:"creating"`
+	NotReady       int32 `json:"not_ready"`
+	Ready          int32 `json:"ready"`
+	ReadyOutOfDate int32 `json:"ready_out_of_date"`
+	ErrImagePull   int32 `json:"err_image_pull"`
+	Terminating    int32 `json:"terminating"`
+	Failed         int32 `json:"failed"`
+	Killed         int32 `json:"killed"`
+	KilledOOM      int32 `json:"killed_oom"`
+	Stalled        int32 `json:"stalled"` // pending for a long time
+	Unknown        int32 `json:"unknown"`
 }
 
 // Worker counts don't have as many failure variations because Jobs clean up dead pods, so counting different failure scenarios isn't interesting
 type WorkerCounts struct {
 	Pending      int32 `json:"pending,omitempty"`
-	Initializing int32 `json:"initializing,omitempty"`
-	Running      int32 `json:"running,omitempty"`
+	Creating     int32 `json:"creating,omitempty"`
+	NotReady     int32 `json:"not_ready,omitempty"`
+	Ready        int32 `json:"ready,omitempty"`
 	Succeeded    int32 `json:"succeeded,omitempty"`
+	ErrImagePull int32 `json:"err_image_pull,omitempty"`
+	Terminating  int32 `json:"terminating,omitempty"`
 	Failed       int32 `json:"failed,omitempty"`
+	Killed       int32 `json:"killed,omitempty"`
+	KilledOOM    int32 `json:"killed_oom,omitempty"`
 	Stalled      int32 `json:"stalled,omitempty"` // pending for a long time
 	Unknown      int32 `json:"unknown,omitempty"`
 }
@@ -64,6 +93,36 @@ func StatusFromDeployment(deployment *kapps.Deployment) *Status {
 	}
 }
 
-func (src *SubReplicaCounts) TotalFailed() int32 {
-	return src.Failed + src.ErrImagePull + src.Killed + src.KilledOOM + src.Stalled
+func (counts *ReplicaCounts) GetCountBy(replicaType ReplicaCountType) int32 {
+	switch replicaType {
+	case ReplicaCountRequested:
+		return counts.Requested
+	case ReplicaCountPending:
+		return counts.Pending
+	case ReplicaCountCreating:
+		return counts.Creating
+	case ReplicaCountNotReady:
+		return counts.NotReady
+	case ReplicaCountReady:
+		return counts.Ready
+	case ReplicaCountReadyOutOfDate:
+		return counts.ReadyOutOfDate
+	case ReplicaCountErrImagePull:
+		return counts.ErrImagePull
+	case ReplicaCountTerminating:
+		return counts.Terminating
+	case ReplicaCountFailed:
+		return counts.Failed
+	case ReplicaCountKilled:
+		return counts.Killed
+	case ReplicaCountKilledOOM:
+		return counts.KilledOOM
+	case ReplicaCountStalled:
+		return counts.Stalled
+	}
+	return counts.Unknown
+}
+
+func (counts *ReplicaCounts) TotalFailed() int32 {
+	return counts.ErrImagePull + counts.Failed + counts.Killed + counts.KilledOOM + counts.Unknown
 }
diff --git a/pkg/types/userconfig/api.go b/pkg/types/userconfig/api.go
index 1872187f8c..d1e9f65f7b 100644
--- a/pkg/types/userconfig/api.go
+++ b/pkg/types/userconfig/api.go
@@ -257,6 +257,14 @@ func TrafficSplitterTargetsFromAnnotations(k8sObj kmeta.Object) (int32, error) {
 	return targets, nil
 }
 
+func EndpointFromAnnotation(k8sObj kmeta.Object) (string, error) {
+	endpoint, err := k8s.GetAnnotation(k8sObj, EndpointAnnotationKey)
+	if err != nil {
+		return "", err
+	}
+	return endpoint, nil
+}
+
 func (api *API) UserStr() string {
 	var sb strings.Builder
 	sb.WriteString(fmt.Sprintf("%s: %s\n", NameKey, api.Name))

From fb1512205f6c5f4a07a6e18d8051c40956d0ae2f Mon Sep 17 00:00:00 2001
From: Robert Lucian Chiriac <robert.lucian.chiriac@gmail.com>
Date: Sat, 24 Jul 2021 01:32:50 +0300
Subject: [PATCH 28/40] Fixes

---
 cli/cluster/delete.go                         |  4 +-
 cli/cmd/describe.go                           |  2 +-
 cli/cmd/get.go                                |  2 +-
 cli/cmd/lib_batch_apis.go                     |  4 +-
 cli/cmd/lib_task_apis.go                      |  4 +-
 pkg/lib/k8s/pod.go                            | 32 +++++++++-------
 pkg/operator/resources/asyncapi/api.go        | 37 ++-----------------
 pkg/operator/resources/asyncapi/status.go     |  8 +++-
 pkg/operator/resources/job/batchapi/api.go    |  6 +--
 pkg/operator/resources/job/taskapi/api.go     |  6 +--
 pkg/operator/resources/realtimeapi/api.go     |  6 +--
 pkg/operator/resources/realtimeapi/status.go  |  8 +++-
 pkg/operator/resources/trafficsplitter/api.go |  8 ++--
 pkg/types/status/status.go                    |  2 +-
 14 files changed, 57 insertions(+), 72 deletions(-)

diff --git a/cli/cluster/delete.go b/cli/cluster/delete.go
index 7b1d96d86d..47618b304b 100644
--- a/cli/cluster/delete.go
+++ b/cli/cluster/delete.go
@@ -22,6 +22,7 @@ import (
 
 	"github.com/cortexlabs/cortex/pkg/lib/errors"
 	"github.com/cortexlabs/cortex/pkg/lib/json"
+	"github.com/cortexlabs/cortex/pkg/lib/pointer"
 	"github.com/cortexlabs/cortex/pkg/lib/prompt"
 	s "github.com/cortexlabs/cortex/pkg/lib/strings"
 	"github.com/cortexlabs/cortex/pkg/operator/schema"
@@ -70,8 +71,7 @@ func getReadyRealtimeAPIReplicasOrNil(operatorConfig OperatorConfig, apiName str
 		return nil
 	}
 
-	totalReady := apiRes.Status.Ready
-	return &totalReady
+	return pointer.Int32(apiRes.Status.Ready)
 }
 
 func StopJob(operatorConfig OperatorConfig, kind userconfig.Kind, apiName string, jobID string) (schema.DeleteResponse, error) {
diff --git a/cli/cmd/describe.go b/cli/cmd/describe.go
index be23ef6532..767045c5a2 100644
--- a/cli/cmd/describe.go
+++ b/cli/cmd/describe.go
@@ -97,7 +97,7 @@ func describeAPI(env cliconfig.Environment, apiName string) (string, error) {
 	}
 
 	if len(apisRes) == 0 {
-		exit.Error(errors.ErrorUnexpected(fmt.Sprintf("unable to find API %s", apiName)))
+		exit.Error(errors.ErrorUnexpected(fmt.Sprintf("unable to find api %s", apiName)))
 	}
 
 	apiRes := apisRes[0]
diff --git a/cli/cmd/get.go b/cli/cmd/get.go
index e0083dfa99..0fac23d076 100644
--- a/cli/cmd/get.go
+++ b/cli/cmd/get.go
@@ -464,7 +464,7 @@ func getAPI(env cliconfig.Environment, apiName string) (string, error) {
 	}
 
 	if len(apisRes) == 0 {
-		exit.Error(errors.ErrorUnexpected(fmt.Sprintf("unable to find API %s", apiName)))
+		exit.Error(errors.ErrorUnexpected(fmt.Sprintf("unable to find api %s", apiName)))
 	}
 
 	apiRes := apisRes[0]
diff --git a/cli/cmd/lib_batch_apis.go b/cli/cmd/lib_batch_apis.go
index 5cebcdd2ba..17cdb1301c 100644
--- a/cli/cmd/lib_batch_apis.go
+++ b/cli/cmd/lib_batch_apis.go
@@ -222,8 +222,8 @@ func getBatchJob(env cliconfig.Environment, apiName string, jobID string) (strin
 			t := table.Table{
 				Headers: []table.Header{
 					{Title: "Requested"},
-					{Title: "Pending", Hidden: job.WorkerCounts.Pending == 0},
-					{Title: "Creating", Hidden: job.WorkerCounts.Creating == 0},
+					{Title: "Pending"},
+					{Title: "Creating"},
 					{Title: "Ready"},
 					{Title: "NotReady"},
 					{Title: "ErrImagePull", Hidden: job.WorkerCounts.ErrImagePull == 0},
diff --git a/cli/cmd/lib_task_apis.go b/cli/cmd/lib_task_apis.go
index 49541aad77..6c3d2b8383 100644
--- a/cli/cmd/lib_task_apis.go
+++ b/cli/cmd/lib_task_apis.go
@@ -182,8 +182,8 @@ func getTaskJob(env cliconfig.Environment, apiName string, jobID string) (string
 			t := table.Table{
 				Headers: []table.Header{
 					{Title: "Requested"},
-					{Title: "Pending", Hidden: job.WorkerCounts.Pending == 0},
-					{Title: "Creating", Hidden: job.WorkerCounts.Creating == 0},
+					{Title: "Pending"},
+					{Title: "Creating"},
 					{Title: "Ready"},
 					{Title: "NotReady"},
 					{Title: "ErrImagePull", Hidden: job.WorkerCounts.ErrImagePull == 0},
diff --git a/pkg/lib/k8s/pod.go b/pkg/lib/k8s/pod.go
index 2646783e88..0d3e0125bd 100644
--- a/pkg/lib/k8s/pod.go
+++ b/pkg/lib/k8s/pod.go
@@ -19,6 +19,7 @@ package k8s
 import (
 	"bytes"
 	"context"
+	"fmt"
 	"regexp"
 	"time"
 
@@ -104,24 +105,26 @@ func Pod(spec *PodSpec) *kcore.Pod {
 	return pod
 }
 
-func GetPodConditionOf(pod *kcore.Pod, podType kcore.PodConditionType) *bool {
+func GetPodConditionOf(pod *kcore.Pod, podType kcore.PodConditionType) (*bool, *kcore.PodCondition) {
 	if pod == nil {
-		return nil
+		return nil, nil
 	}
 
-	var condition *bool
-	for _, podCondition := range pod.Status.Conditions {
-		if podCondition.Type == podType {
-			if podCondition.Status == kcore.ConditionTrue {
-				condition = pointer.Bool(true)
+	var conditionState *bool
+	var condition *kcore.PodCondition
+	for i := range pod.Status.Conditions {
+		if pod.Status.Conditions[i].Type == podType {
+			if pod.Status.Conditions[i].Status == kcore.ConditionTrue {
+				conditionState = pointer.Bool(true)
 			}
-			if podCondition.Status == kcore.ConditionFalse {
-				condition = pointer.Bool(false)
+			if pod.Status.Conditions[i].Status == kcore.ConditionFalse {
+				conditionState = pointer.Bool(false)
 			}
+			condition = &pod.Status.Conditions[i]
 			break
 		}
 	}
-	return condition
+	return conditionState, condition
 }
 
 func (c *Client) CreatePod(pod *kcore.Pod) (*kcore.Pod, error) {
@@ -176,6 +179,7 @@ func IsPodStalled(pod *kcore.Pod) bool {
 	// TODO use the GetPodConditionOf func here
 	for _, condition := range pod.Status.Conditions {
 		if condition.Type == kcore.PodScheduled && condition.Status == kcore.ConditionFalse && !condition.LastTransitionTime.Time.IsZero() && time.Since(condition.LastTransitionTime.Time) >= _waitForCreatingPodTimeout {
+			fmt.Println(time.Since(condition.LastTransitionTime.Time), _waitForCreatingPodTimeout)
 			return true
 		}
 	}
@@ -224,8 +228,8 @@ func GetPodStatus(pod *kcore.Pod) PodStatus {
 
 	switch pod.Status.Phase {
 	case kcore.PodPending:
-		podCondition := GetPodConditionOf(pod, kcore.PodScheduled)
-		if podCondition != nil && !*podCondition {
+		podConditionState, podCondition := GetPodConditionOf(pod, kcore.PodScheduled)
+		if podConditionState != nil && !*podConditionState && !podCondition.LastTransitionTime.Time.IsZero() && time.Since(podCondition.LastTransitionTime.Time) >= _waitForCreatingPodTimeout {
 			return PodStatusStalled
 		}
 		return PodStatusFromContainerStatuses(append(pod.Status.InitContainerStatuses, pod.Status.ContainerStatuses...))
@@ -259,8 +263,8 @@ func GetPodStatus(pod *kcore.Pod) PodStatus {
 			return PodStatusTerminating
 		}
 
-		podCondition := GetPodConditionOf(pod, kcore.PodReady)
-		if podCondition != nil && *podCondition {
+		podConditionState, _ := GetPodConditionOf(pod, kcore.PodReady)
+		if podConditionState != nil && *podConditionState {
 			return PodStatusReady
 		}
 
diff --git a/pkg/operator/resources/asyncapi/api.go b/pkg/operator/resources/asyncapi/api.go
index 9766811445..0c6ba6b190 100644
--- a/pkg/operator/resources/asyncapi/api.go
+++ b/pkg/operator/resources/asyncapi/api.go
@@ -59,12 +59,6 @@ type resources struct {
 	gatewayVirtualService *istioclientnetworking.VirtualService
 }
 
-// TODO remove this
-type asyncDeployments struct {
-	APIDeployment     *kapps.Deployment
-	GatewayDeployment *kapps.Deployment
-}
-
 func getGatewayK8sName(apiName string) string {
 	return "gateway-" + apiName
 }
@@ -274,7 +268,7 @@ func GetAllAPIs(deployments []kapps.Deployment) ([]schema.APIResponse, error) {
 			return nil, errors.Wrap(err, fmt.Sprintf("api %s", apiName))
 		}
 		mappedAsyncAPIs[apiName] = schema.APIResponse{
-			Status:   status.StatusFromDeployment(&deployments[i]),
+			Status:   status.FromDeployment(&deployments[i]),
 			Metadata: metadata,
 		}
 	}
@@ -315,7 +309,7 @@ func GetAPIByName(deployedResource *operator.DeployedResource) ([]schema.APIResp
 		return nil, errors.ErrorUnexpected("unable to find gateway deployment", deployedResource.Name)
 	}
 
-	apiStatus := status.StatusFromDeployment(apiDeployment)
+	apiStatus := status.FromDeployment(apiDeployment)
 	apiMetadata, err := spec.MetadataFromDeployment(apiDeployment)
 	if err != nil {
 		return nil, errors.ErrorUnexpected("unable to obtain metadata", deployedResource.Name)
@@ -372,7 +366,7 @@ func DescribeAPIByName(deployedResource *operator.DeployedResource) ([]schema.AP
 		return nil, errors.ErrorUnexpected("unable to find gateway deployment", deployedResource.Name)
 	}
 
-	apiStatus := status.StatusFromDeployment(apiDeployment)
+	apiStatus := status.FromDeployment(apiDeployment)
 	apiMetadata, err := spec.MetadataFromDeployment(apiDeployment)
 	if err != nil {
 		return nil, errors.ErrorUnexpected("unable to obtain metadata", deployedResource.Name)
@@ -641,31 +635,6 @@ func deleteK8sResources(apiName string) error {
 	return err
 }
 
-// let's do CRDs instead, to avoid this
-func groupDeploymentsByAPI(deployments []kapps.Deployment) map[string]*asyncDeployments {
-	deploymentsByAPI := map[string]*asyncDeployments{}
-	for i := range deployments {
-		deployment := deployments[i]
-		apiName := deployment.Labels["apiName"]
-		asyncType := deployment.Labels["cortex.dev/async"]
-		apiResources, exists := deploymentsByAPI[apiName]
-		if exists {
-			if asyncType == "api" {
-				apiResources.APIDeployment = &deployment
-			} else {
-				apiResources.GatewayDeployment = &deployment
-			}
-		} else {
-			if asyncType == "api" {
-				deploymentsByAPI[apiName] = &asyncDeployments{APIDeployment: &deployment}
-			} else {
-				deploymentsByAPI[apiName] = &asyncDeployments{GatewayDeployment: &deployment}
-			}
-		}
-	}
-	return deploymentsByAPI
-}
-
 // returns true if min_replicas are not ready and no updated replicas have errored
 func isAPIUpdating(deployment *kapps.Deployment) (bool, error) {
 	pods, err := config.K8s.ListPodsByLabel("apiName", deployment.Labels["apiName"])
diff --git a/pkg/operator/resources/asyncapi/status.go b/pkg/operator/resources/asyncapi/status.go
index 0268f26048..3a0e4b5c1a 100644
--- a/pkg/operator/resources/asyncapi/status.go
+++ b/pkg/operator/resources/asyncapi/status.go
@@ -54,6 +54,10 @@ func addPodToReplicaCounts(pod *kcore.Pod, deployment *kapps.Deployment, counts
 		return
 	}
 
+	if !latest {
+		return
+	}
+
 	switch k8s.GetPodStatus(pod) {
 	case k8s.PodStatusPending:
 		counts.Pending++
@@ -63,6 +67,8 @@ func addPodToReplicaCounts(pod *kcore.Pod, deployment *kapps.Deployment, counts
 		counts.Creating++
 	case k8s.PodStatusReady:
 		counts.Creating++
+	case k8s.PodStatusNotReady:
+		counts.NotReady++
 	case k8s.PodStatusErrImagePull:
 		counts.ErrImagePull++
 	case k8s.PodStatusTerminating:
@@ -73,7 +79,7 @@ func addPodToReplicaCounts(pod *kcore.Pod, deployment *kapps.Deployment, counts
 		counts.Killed++
 	case k8s.PodStatusKilledOOM:
 		counts.KilledOOM++
-	default:
+	case k8s.PodStatusUnknown:
 		counts.Unknown++
 	}
 }
diff --git a/pkg/operator/resources/job/batchapi/api.go b/pkg/operator/resources/job/batchapi/api.go
index 8d87040eef..6ac1c87219 100644
--- a/pkg/operator/resources/job/batchapi/api.go
+++ b/pkg/operator/resources/job/batchapi/api.go
@@ -140,9 +140,9 @@ func GetAllAPIs(virtualServices []istioclientnetworking.VirtualService, batchJob
 		apiNameToBatchJobsMap[batchJob.Spec.APIName] = append(apiNameToBatchJobsMap[batchJob.Spec.APIName], &batchJobList[i])
 	}
 
-	for _, virtualService := range virtualServices {
-		apiName := virtualService.Labels["apiName"]
-		metadata, err := spec.MetadataFromVirtualService(&virtualService)
+	for i := range virtualServices {
+		apiName := virtualServices[i].Labels["apiName"]
+		metadata, err := spec.MetadataFromVirtualService(&virtualServices[i])
 		if err != nil {
 			return nil, errors.Wrap(err, fmt.Sprintf("api %s", apiName))
 		}
diff --git a/pkg/operator/resources/job/taskapi/api.go b/pkg/operator/resources/job/taskapi/api.go
index 6c6afdf425..c5ca6e17fa 100644
--- a/pkg/operator/resources/job/taskapi/api.go
+++ b/pkg/operator/resources/job/taskapi/api.go
@@ -146,10 +146,10 @@ func GetAllAPIs(virtualServices []istioclientnetworking.VirtualService, k8sJobs
 		}
 	}
 
-	for _, virtualService := range virtualServices {
-		apiName := virtualService.Labels["apiName"]
+	for i := range virtualServices {
+		apiName := virtualServices[i].Labels["apiName"]
 
-		metadata, err := spec.MetadataFromVirtualService(&virtualService)
+		metadata, err := spec.MetadataFromVirtualService(&virtualServices[i])
 		if err != nil {
 			return nil, errors.Wrap(err, fmt.Sprintf("api %s", apiName))
 		}
diff --git a/pkg/operator/resources/realtimeapi/api.go b/pkg/operator/resources/realtimeapi/api.go
index b7df138a30..885c661366 100644
--- a/pkg/operator/resources/realtimeapi/api.go
+++ b/pkg/operator/resources/realtimeapi/api.go
@@ -190,7 +190,7 @@ func GetAllAPIs(deployments []kapps.Deployment) ([]schema.APIResponse, error) {
 			return nil, errors.Wrap(err, fmt.Sprintf("api %s", apiName))
 		}
 		mappedRealtimeAPIs[apiName] = schema.APIResponse{
-			Status:   status.StatusFromDeployment(&deployments[i]),
+			Status:   status.FromDeployment(&deployments[i]),
 			Metadata: metadata,
 		}
 	}
@@ -213,7 +213,7 @@ func GetAPIByName(deployedResource *operator.DeployedResource) ([]schema.APIResp
 		return nil, errors.ErrorUnexpected("unable to find deployment", deployedResource.Name)
 	}
 
-	apiStatus := status.StatusFromDeployment(deployment)
+	apiStatus := status.FromDeployment(deployment)
 	apiMetadata, err := spec.MetadataFromDeployment(deployment)
 	if err != nil {
 		return nil, errors.ErrorUnexpected("unable to obtain metadata", deployedResource.Name)
@@ -252,7 +252,7 @@ func DescribeAPIByName(deployedResource *operator.DeployedResource) ([]schema.AP
 		return nil, errors.ErrorUnexpected("unable to find deployment", deployedResource.Name)
 	}
 
-	apiStatus := status.StatusFromDeployment(deployment)
+	apiStatus := status.FromDeployment(deployment)
 	apiMetadata, err := spec.MetadataFromDeployment(deployment)
 	if err != nil {
 		return nil, errors.ErrorUnexpected("unable to obtain metadata", deployedResource.Name)
diff --git a/pkg/operator/resources/realtimeapi/status.go b/pkg/operator/resources/realtimeapi/status.go
index d021cc19f9..160ebea638 100644
--- a/pkg/operator/resources/realtimeapi/status.go
+++ b/pkg/operator/resources/realtimeapi/status.go
@@ -53,6 +53,10 @@ func addPodToReplicaCounts(pod *kcore.Pod, deployment *kapps.Deployment, counts
 		return
 	}
 
+	if !latest {
+		return
+	}
+
 	switch k8s.GetPodStatus(pod) {
 	case k8s.PodStatusPending:
 		counts.Pending++
@@ -62,6 +66,8 @@ func addPodToReplicaCounts(pod *kcore.Pod, deployment *kapps.Deployment, counts
 		counts.Creating++
 	case k8s.PodStatusReady:
 		counts.Creating++
+	case k8s.PodStatusNotReady:
+		counts.NotReady++
 	case k8s.PodStatusErrImagePull:
 		counts.ErrImagePull++
 	case k8s.PodStatusTerminating:
@@ -72,7 +78,7 @@ func addPodToReplicaCounts(pod *kcore.Pod, deployment *kapps.Deployment, counts
 		counts.Killed++
 	case k8s.PodStatusKilledOOM:
 		counts.KilledOOM++
-	default:
+	case k8s.PodStatusUnknown:
 		counts.Unknown++
 	}
 }
diff --git a/pkg/operator/resources/trafficsplitter/api.go b/pkg/operator/resources/trafficsplitter/api.go
index fa02f8ec35..03c89ea4a1 100644
--- a/pkg/operator/resources/trafficsplitter/api.go
+++ b/pkg/operator/resources/trafficsplitter/api.go
@@ -134,15 +134,15 @@ func getTrafficSplitterDestinations(trafficSplitter *spec.API) []k8s.Destination
 // GetAllAPIs returns a list of metadata, in the form of schema.APIResponse, about all the created traffic splitter APIs
 func GetAllAPIs(virtualServices []istioclientnetworking.VirtualService) ([]schema.APIResponse, error) {
 	var trafficSplitters []schema.APIResponse
-	for _, virtualService := range virtualServices {
-		apiName := virtualService.Labels["apiName"]
+	for i := range virtualServices {
+		apiName := virtualServices[i].Labels["apiName"]
 
-		metadata, err := spec.MetadataFromVirtualService(&virtualService)
+		metadata, err := spec.MetadataFromVirtualService(&virtualServices[i])
 		if err != nil {
 			return nil, errors.Wrap(err, fmt.Sprintf("api %s", apiName))
 		}
 
-		targets, err := userconfig.TrafficSplitterTargetsFromAnnotations(&virtualService)
+		targets, err := userconfig.TrafficSplitterTargetsFromAnnotations(&virtualServices[i])
 		if err != nil {
 			return nil, errors.Wrap(err, fmt.Sprintf("api %s", apiName))
 		}
diff --git a/pkg/types/status/status.go b/pkg/types/status/status.go
index e3faba9e9f..b9b3258119 100644
--- a/pkg/types/status/status.go
+++ b/pkg/types/status/status.go
@@ -85,7 +85,7 @@ type WorkerCounts struct {
 	Unknown      int32 `json:"unknown,omitempty"`
 }
 
-func StatusFromDeployment(deployment *kapps.Deployment) *Status {
+func FromDeployment(deployment *kapps.Deployment) *Status {
 	return &Status{
 		Ready:     deployment.Status.ReadyReplicas,
 		Requested: deployment.Status.Replicas,

From c5b0e735e9cd8657eeacdee404687be9a5d2b040 Mon Sep 17 00:00:00 2001
From: Robert Lucian Chiriac <robert.lucian.chiriac@gmail.com>
Date: Sat, 24 Jul 2021 02:09:46 +0300
Subject: [PATCH 29/40] Fix terminating status

---
 pkg/operator/resources/asyncapi/status.go    | 11 ++++++++---
 pkg/operator/resources/realtimeapi/status.go | 11 ++++++++---
 2 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/pkg/operator/resources/asyncapi/status.go b/pkg/operator/resources/asyncapi/status.go
index 3a0e4b5c1a..69977c731e 100644
--- a/pkg/operator/resources/asyncapi/status.go
+++ b/pkg/operator/resources/asyncapi/status.go
@@ -54,11 +54,18 @@ func addPodToReplicaCounts(pod *kcore.Pod, deployment *kapps.Deployment, counts
 		return
 	}
 
+	podStatus := k8s.GetPodStatus(pod)
+
+	if podStatus == k8s.PodStatusTerminating {
+		counts.Terminating++
+		return
+	}
+
 	if !latest {
 		return
 	}
 
-	switch k8s.GetPodStatus(pod) {
+	switch podStatus {
 	case k8s.PodStatusPending:
 		counts.Pending++
 	case k8s.PodStatusStalled:
@@ -71,8 +78,6 @@ func addPodToReplicaCounts(pod *kcore.Pod, deployment *kapps.Deployment, counts
 		counts.NotReady++
 	case k8s.PodStatusErrImagePull:
 		counts.ErrImagePull++
-	case k8s.PodStatusTerminating:
-		counts.Terminating++
 	case k8s.PodStatusFailed:
 		counts.Failed++
 	case k8s.PodStatusKilled:
diff --git a/pkg/operator/resources/realtimeapi/status.go b/pkg/operator/resources/realtimeapi/status.go
index 160ebea638..0a88b83d17 100644
--- a/pkg/operator/resources/realtimeapi/status.go
+++ b/pkg/operator/resources/realtimeapi/status.go
@@ -53,11 +53,18 @@ func addPodToReplicaCounts(pod *kcore.Pod, deployment *kapps.Deployment, counts
 		return
 	}
 
+	podStatus := k8s.GetPodStatus(pod)
+
+	if podStatus == k8s.PodStatusTerminating {
+		counts.Terminating++
+		return
+	}
+
 	if !latest {
 		return
 	}
 
-	switch k8s.GetPodStatus(pod) {
+	switch podStatus {
 	case k8s.PodStatusPending:
 		counts.Pending++
 	case k8s.PodStatusStalled:
@@ -70,8 +77,6 @@ func addPodToReplicaCounts(pod *kcore.Pod, deployment *kapps.Deployment, counts
 		counts.NotReady++
 	case k8s.PodStatusErrImagePull:
 		counts.ErrImagePull++
-	case k8s.PodStatusTerminating:
-		counts.Terminating++
 	case k8s.PodStatusFailed:
 		counts.Failed++
 	case k8s.PodStatusKilled:

From 8edee7ab58eee7fc2d731517e3acf5cced3a811c Mon Sep 17 00:00:00 2001
From: Robert Lucian Chiriac <robert.lucian.chiriac@gmail.com>
Date: Mon, 26 Jul 2021 18:01:01 +0300
Subject: [PATCH 30/40] Fix the worker counts for the batch jobs

---
 pkg/crds/controllers/batch/batchjob_controller_helpers.go | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/pkg/crds/controllers/batch/batchjob_controller_helpers.go b/pkg/crds/controllers/batch/batchjob_controller_helpers.go
index 0785628452..191d5a9b0f 100644
--- a/pkg/crds/controllers/batch/batchjob_controller_helpers.go
+++ b/pkg/crds/controllers/batch/batchjob_controller_helpers.go
@@ -447,9 +447,10 @@ func (r *BatchJobReconciler) getWorkerJobPods(ctx context.Context, batchJob batc
 	if err := r.List(ctx, &workerJobPods,
 		client.InNamespace(consts.DefaultNamespace),
 		client.MatchingLabels{
-			"jobID":   batchJob.Name,
-			"apiName": batchJob.Spec.APIName,
-			"apiID":   batchJob.Spec.APIID,
+			"jobID":            batchJob.Name,
+			"apiName":          batchJob.Spec.APIName,
+			"apiID":            batchJob.Spec.APIID,
+			"cortex.dev/batch": "worker",
 		},
 	); err != nil {
 		return nil, err

From a0ec6dfa8588bbf4aeead3885171ff308215e073 Mon Sep 17 00:00:00 2001
From: Robert Lucian Chiriac <robert.lucian.chiriac@gmail.com>
Date: Mon, 26 Jul 2021 22:25:17 +0300
Subject: [PATCH 31/40] Output type (yaml) fixes

---
 cli/cmd/get.go                 | 47 +++++++++++++++++----------
 cli/cmd/lib_batch_apis.go      | 14 +++++---
 cli/cmd/lib_task_apis.go       | 14 +++++---
 pkg/operator/schema/schema.go  | 36 ++++++++++-----------
 pkg/types/spec/api.go          | 22 ++++++-------
 pkg/types/spec/job.go          | 40 +++++++++++------------
 pkg/types/status/job_status.go | 14 ++++----
 pkg/types/status/status.go     | 58 +++++++++++++++++-----------------
 8 files changed, 136 insertions(+), 109 deletions(-)

diff --git a/cli/cmd/get.go b/cli/cmd/get.go
index 0fac23d076..c260d0c5e9 100644
--- a/cli/cmd/get.go
+++ b/cli/cmd/get.go
@@ -35,6 +35,7 @@ import (
 	libtime "github.com/cortexlabs/cortex/pkg/lib/time"
 	"github.com/cortexlabs/cortex/pkg/operator/schema"
 	"github.com/cortexlabs/cortex/pkg/types/userconfig"
+	"github.com/cortexlabs/yaml"
 	"github.com/spf13/cobra"
 )
 
@@ -104,7 +105,7 @@ var _getCmd = &cobra.Command{
 					return "", err
 				}
 
-				if _flagOutput == flags.JSONOutputType {
+				if _flagOutput == flags.JSONOutputType || _flagOutput == flags.YAMLOutputType {
 					return apiTable, nil
 				}
 
@@ -134,7 +135,7 @@ var _getCmd = &cobra.Command{
 				if err != nil {
 					return "", err
 				}
-				if _flagOutput == flags.JSONOutputType {
+				if _flagOutput == flags.JSONOutputType || _flagOutput == flags.YAMLOutputType {
 					return jobTable, nil
 				}
 
@@ -164,7 +165,7 @@ var _getCmd = &cobra.Command{
 						return "", err
 					}
 
-					if _flagOutput == flags.JSONOutputType {
+					if _flagOutput == flags.JSONOutputType || _flagOutput == flags.YAMLOutputType {
 						return apiTable, nil
 					}
 
@@ -245,12 +246,16 @@ func getAPIsInAllEnvironments() (string, error) {
 		allAPIsOutput = append(allAPIsOutput, apisOutput)
 	}
 
+	var bytes []byte
 	if _flagOutput == flags.JSONOutputType {
-		bytes, err := libjson.Marshal(allAPIsOutput)
-		if err != nil {
-			return "", err
-		}
-
+		bytes, err = libjson.Marshal(allAPIsOutput)
+	} else if _flagOutput == flags.YAMLOutputType {
+		bytes, err = yaml.Marshal(allAPIsOutput)
+	}
+	if err != nil {
+		return "", err
+	}
+	if _flagOutput == flags.JSONOutputType || _flagOutput == flags.YAMLOutputType {
 		return string(bytes), nil
 	}
 
@@ -335,11 +340,16 @@ func getAPIsByEnv(env cliconfig.Environment) (string, error) {
 		return "", err
 	}
 
+	var bytes []byte
 	if _flagOutput == flags.JSONOutputType {
-		bytes, err := libjson.Marshal(apisRes)
-		if err != nil {
-			return "", err
-		}
+		bytes, err = libjson.Marshal(apisRes)
+	} else if _flagOutput == flags.YAMLOutputType {
+		bytes, err = yaml.Marshal(apisRes)
+	}
+	if err != nil {
+		return "", err
+	}
+	if _flagOutput == flags.JSONOutputType || _flagOutput == flags.YAMLOutputType {
 		return string(bytes), nil
 	}
 
@@ -455,11 +465,16 @@ func getAPI(env cliconfig.Environment, apiName string) (string, error) {
 		return "", err
 	}
 
+	var bytes []byte
 	if _flagOutput == flags.JSONOutputType {
-		bytes, err := libjson.Marshal(apisRes)
-		if err != nil {
-			return "", err
-		}
+		bytes, err = libjson.Marshal(apisRes)
+	} else if _flagOutput == flags.YAMLOutputType {
+		bytes, err = yaml.Marshal(apisRes)
+	}
+	if err != nil {
+		return "", err
+	}
+	if _flagOutput == flags.JSONOutputType || _flagOutput == flags.YAMLOutputType {
 		return string(bytes), nil
 	}
 
diff --git a/cli/cmd/lib_batch_apis.go b/cli/cmd/lib_batch_apis.go
index 17cdb1301c..ebabc29243 100644
--- a/cli/cmd/lib_batch_apis.go
+++ b/cli/cmd/lib_batch_apis.go
@@ -31,6 +31,7 @@ import (
 	libtime "github.com/cortexlabs/cortex/pkg/lib/time"
 	"github.com/cortexlabs/cortex/pkg/operator/schema"
 	"github.com/cortexlabs/cortex/pkg/types/status"
+	"github.com/cortexlabs/yaml"
 )
 
 const (
@@ -147,11 +148,16 @@ func getBatchJob(env cliconfig.Environment, apiName string, jobID string) (strin
 		return "", err
 	}
 
+	var bytes []byte
 	if _flagOutput == flags.JSONOutputType {
-		bytes, err := libjson.Marshal(resp)
-		if err != nil {
-			return "", err
-		}
+		bytes, err = libjson.Marshal(resp)
+	} else if _flagOutput == flags.YAMLOutputType {
+		bytes, err = yaml.Marshal(resp)
+	}
+	if err != nil {
+		return "", err
+	}
+	if _flagOutput == flags.JSONOutputType || _flagOutput == flags.YAMLOutputType {
 		return string(bytes), nil
 	}
 
diff --git a/cli/cmd/lib_task_apis.go b/cli/cmd/lib_task_apis.go
index 6c3d2b8383..295e1af875 100644
--- a/cli/cmd/lib_task_apis.go
+++ b/cli/cmd/lib_task_apis.go
@@ -29,6 +29,7 @@ import (
 	"github.com/cortexlabs/cortex/pkg/lib/table"
 	libtime "github.com/cortexlabs/cortex/pkg/lib/time"
 	"github.com/cortexlabs/cortex/pkg/operator/schema"
+	"github.com/cortexlabs/yaml"
 )
 
 const (
@@ -142,11 +143,16 @@ func getTaskJob(env cliconfig.Environment, apiName string, jobID string) (string
 		return "", err
 	}
 
+	var bytes []byte
 	if _flagOutput == flags.JSONOutputType {
-		bytes, err := libjson.Marshal(resp)
-		if err != nil {
-			return "", err
-		}
+		bytes, err = libjson.Marshal(resp)
+	} else if _flagOutput == flags.YAMLOutputType {
+		bytes, err = yaml.Marshal(resp)
+	}
+	if err != nil {
+		return "", err
+	}
+	if _flagOutput == flags.JSONOutputType || _flagOutput == flags.YAMLOutputType {
 		return string(bytes), nil
 	}
 
diff --git a/pkg/operator/schema/schema.go b/pkg/operator/schema/schema.go
index 522a927a2f..70809e1a07 100644
--- a/pkg/operator/schema/schema.go
+++ b/pkg/operator/schema/schema.go
@@ -50,20 +50,20 @@ type NodeInfo struct {
 }
 
 type DeployResult struct {
-	API     *APIResponse `json:"api"`
-	Message string       `json:"message"`
-	Error   string       `json:"error"`
+	API     *APIResponse `json:"api" yaml:"api"`
+	Message string       `json:"message" yaml:"message"`
+	Error   string       `json:"error" yaml:"error"`
 }
 
 type APIResponse struct {
-	Spec             *spec.API               `json:"spec,omitempty"`
-	Metadata         *spec.Metadata          `json:"metadata,omitempty"`
-	Status           *status.Status          `json:"status,omitempty"`
-	Endpoint         *string                 `json:"endpoint,omitempty"`
-	DashboardURL     *string                 `json:"dashboard_url,omitempty"`
-	BatchJobStatuses []status.BatchJobStatus `json:"batch_job_statuses,omitempty"`
-	TaskJobStatuses  []status.TaskJobStatus  `json:"task_job_statuses,omitempty"`
-	APIVersions      []APIVersion            `json:"api_versions,omitempty"`
+	Spec             *spec.API               `json:"spec,omitempty" yaml:"spec,omitempty"`
+	Metadata         *spec.Metadata          `json:"metadata,omitempty"  yaml:"metadata,omitempty"`
+	Status           *status.Status          `json:"status,omitempty"  yaml:"status,omitempty"`
+	Endpoint         *string                 `json:"endpoint,omitempty"  yaml:"endpoint,omitempty"`
+	DashboardURL     *string                 `json:"dashboard_url,omitempty"  yaml:"dashboard_url,omitempty"`
+	BatchJobStatuses []status.BatchJobStatus `json:"batch_job_statuses,omitempty"  yaml:"batch_job_statuses,omitempty"`
+	TaskJobStatuses  []status.TaskJobStatus  `json:"task_job_statuses,omitempty"  yaml:"task_job_statuses,omitempty"`
+	APIVersions      []APIVersion            `json:"api_versions,omitempty"  yaml:"api_versions,omitempty"`
 }
 
 type LogResponse struct {
@@ -71,16 +71,16 @@ type LogResponse struct {
 }
 
 type BatchJobResponse struct {
-	APISpec   spec.API              `json:"api_spec"`
-	JobStatus status.BatchJobStatus `json:"job_status"`
-	Metrics   *metrics.BatchMetrics `json:"metrics,omitempty"`
-	Endpoint  string                `json:"endpoint"`
+	APISpec   spec.API              `json:"api_spec" yaml:"api_spec"`
+	JobStatus status.BatchJobStatus `json:"job_status" yaml:"job_status"`
+	Metrics   *metrics.BatchMetrics `json:"metrics,omitempty" yaml:"metrics,omitempty"`
+	Endpoint  string                `json:"endpoint" yaml:"endpoint"`
 }
 
 type TaskJobResponse struct {
-	APISpec   spec.API             `json:"api_spec"`
-	JobStatus status.TaskJobStatus `json:"job_status"`
-	Endpoint  string               `json:"endpoint"`
+	APISpec   spec.API             `json:"api_spec" yaml:"api_spec"`
+	JobStatus status.TaskJobStatus `json:"job_status" yaml:"job_status"`
+	Endpoint  string               `json:"endpoint" yaml:"endpoint"`
 }
 
 type DeleteResponse struct {
diff --git a/pkg/types/spec/api.go b/pkg/types/spec/api.go
index b229962aee..5b0d39210c 100644
--- a/pkg/types/spec/api.go
+++ b/pkg/types/spec/api.go
@@ -36,23 +36,23 @@ import (
 
 type API struct {
 	*userconfig.API
-	ID           string `json:"id"`
-	SpecID       string `json:"spec_id"`
-	PodID        string `json:"pod_id"`
-	DeploymentID string `json:"deployment_id"`
+	ID           string `json:"id" yaml:"id"`
+	SpecID       string `json:"spec_id" yaml:"spec_id"`
+	PodID        string `json:"pod_id" yaml:"pod_id"`
+	DeploymentID string `json:"deployment_id" yaml:"deployment_id"`
 
-	Key string `json:"key"`
+	Key string `json:"key" yaml:"key"`
 
-	InitialDeploymentTime int64  `json:"initial_deployment_time"`
-	LastUpdated           int64  `json:"last_updated"`
-	MetadataRoot          string `json:"metadata_root"`
+	InitialDeploymentTime int64  `json:"initial_deployment_time" yaml:"initial_deployment_time"`
+	LastUpdated           int64  `json:"last_updated" yaml:"last_updated"`
+	MetadataRoot          string `json:"metadata_root" yaml:"metadata_root"`
 }
 
 type Metadata struct {
 	*userconfig.Resource
-	APIID        string `json:"id"`
-	DeploymentID string `json:"deployment_id,omitempty"`
-	LastUpdated  int64  `json:"last_updated"`
+	APIID        string `json:"id" yaml:"id"`
+	DeploymentID string `json:"deployment_id,omitempty" yaml:"deployment_id,omitempty"`
+	LastUpdated  int64  `json:"last_updated" yaml:"last_updated"`
 }
 
 func MetadataFromDeployment(deployment *kapps.Deployment) (*Metadata, error) {
diff --git a/pkg/types/spec/job.go b/pkg/types/spec/job.go
index 784fb4f199..d6c6cb354d 100644
--- a/pkg/types/spec/job.go
+++ b/pkg/types/spec/job.go
@@ -32,9 +32,9 @@ const (
 )
 
 type JobKey struct {
-	ID      string          `json:"job_id"`
-	APIName string          `json:"api_name"`
-	Kind    userconfig.Kind `json:"kind"`
+	ID      string          `json:"job_id" yaml:"job_id"`
+	APIName string          `json:"api_name" yaml:"api_name"`
+	Kind    userconfig.Kind `json:"kind" yaml:"kind"`
 }
 
 func (j JobKey) UserString() string {
@@ -56,39 +56,39 @@ func (j JobKey) K8sName() string {
 }
 
 type SQSDeadLetterQueue struct {
-	ARN             string `json:"arn"`
-	MaxReceiveCount int    `json:"max_receive_count"`
+	ARN             string `json:"arn" yaml:"arn"`
+	MaxReceiveCount int    `json:"max_receive_count" yaml:"max_receive_count"`
 }
 
 type RuntimeBatchJobConfig struct {
-	Workers            int                    `json:"workers"`
-	SQSDeadLetterQueue *SQSDeadLetterQueue    `json:"sqs_dead_letter_queue"`
-	Config             map[string]interface{} `json:"config"`
-	Timeout            *int                   `json:"timeout"`
+	Workers            int                    `json:"workers" yaml:"workers"`
+	SQSDeadLetterQueue *SQSDeadLetterQueue    `json:"sqs_dead_letter_queue" yaml:"sqs_dead_letter_queue"`
+	Config             map[string]interface{} `json:"config" yaml:"config"`
+	Timeout            *int                   `json:"timeout" yaml:"timeout"`
 }
 
 type RuntimeTaskJobConfig struct {
-	Workers int                    `json:"workers"`
-	Config  map[string]interface{} `json:"config"`
-	Timeout *int                   `json:"timeout"`
+	Workers int                    `json:"workers" yaml:"workers"`
+	Config  map[string]interface{} `json:"config" yaml:"config"`
+	Timeout *int                   `json:"timeout" yaml:"timeout"`
 }
 
 type BatchJob struct {
 	JobKey
 	RuntimeBatchJobConfig
-	APIID           string    `json:"api_id"`
-	SQSUrl          string    `json:"sqs_url"`
-	TotalBatchCount int       `json:"total_batch_count,omitempty"`
-	StartTime       time.Time `json:"start_time,omitempty"`
+	APIID           string    `json:"api_id" yaml:"api_id"`
+	SQSUrl          string    `json:"sqs_url" yaml:"sqs_url"`
+	TotalBatchCount int       `json:"total_batch_count,omitempty" yaml:"total_batch_count,omitempty"`
+	StartTime       time.Time `json:"start_time,omitempty" yaml:"start_time,omitempty"`
 }
 
 type TaskJob struct {
 	JobKey
 	RuntimeTaskJobConfig
-	APIID     string    `json:"api_id"`
-	SpecID    string    `json:"spec_id"`
-	PodID     string    `json:"pod_id"`
-	StartTime time.Time `json:"start_time"`
+	APIID     string    `json:"api_id" yaml:"api_id"`
+	SpecID    string    `json:"spec_id" yaml:"spec_id"`
+	PodID     string    `json:"pod_id" yaml:"pod_id"`
+	StartTime time.Time `json:"start_time" yaml:"start_time"`
 }
 
 // e.g. /<cluster UID>/jobs/<job_api_kind>/<cortex version>/<api_name>
diff --git a/pkg/types/status/job_status.go b/pkg/types/status/job_status.go
index eb299831ba..f106d051a7 100644
--- a/pkg/types/status/job_status.go
+++ b/pkg/types/status/job_status.go
@@ -24,15 +24,15 @@ import (
 
 type BatchJobStatus struct {
 	spec.BatchJob
-	Status         JobCode       `json:"status"`
-	EndTime        *time.Time    `json:"end_time,omitempty"`
-	BatchesInQueue int           `json:"batches_in_queue"`
-	WorkerCounts   *WorkerCounts `json:"worker_counts,omitempty"`
+	Status         JobCode       `json:"status" yaml:"status"`
+	EndTime        *time.Time    `json:"end_time,omitempty" yaml:"end_time,omitempty"`
+	BatchesInQueue int           `json:"batches_in_queue" yaml:"batches_in_queue"`
+	WorkerCounts   *WorkerCounts `json:"worker_counts,omitempty" yaml:"worker_counts,omitempty"`
 }
 
 type TaskJobStatus struct {
 	spec.TaskJob
-	EndTime      *time.Time    `json:"end_time"`
-	Status       JobCode       `json:"status"`
-	WorkerCounts *WorkerCounts `json:"worker_counts"`
+	EndTime      *time.Time    `json:"end_time,omitempty" yaml:"end_time,omitempty"`
+	Status       JobCode       `json:"status" yaml:"status"`
+	WorkerCounts *WorkerCounts `json:"worker_counts,omitempty" yaml:"worker_counts,omitempty"`
 }
diff --git a/pkg/types/status/status.go b/pkg/types/status/status.go
index b9b3258119..5872922b9a 100644
--- a/pkg/types/status/status.go
+++ b/pkg/types/status/status.go
@@ -21,10 +21,10 @@ import (
 )
 
 type Status struct {
-	Ready         int32          `json:"ready"`
-	Requested     int32          `json:"requested"`
-	UpToDate      int32          `json:"up_to_date"`
-	ReplicaCounts *ReplicaCounts `json:"replica_counts,omitempty"`
+	Ready         int32          `json:"ready" yaml:"ready"`
+	Requested     int32          `json:"requested" yaml:"requested"`
+	UpToDate      int32          `json:"up_to_date" yaml:"up_to_date"`
+	ReplicaCounts *ReplicaCounts `json:"replica_counts,omitempty" yaml:"replica_counts,omitempty"`
 }
 
 type ReplicaCountType string
@@ -54,35 +54,35 @@ var ReplicaCountTypes []ReplicaCountType = []ReplicaCountType{
 }
 
 type ReplicaCounts struct {
-	Requested      int32 `json:"requested"`
-	Pending        int32 `json:"pending"`
-	Creating       int32 `json:"creating"`
-	NotReady       int32 `json:"not_ready"`
-	Ready          int32 `json:"ready"`
-	ReadyOutOfDate int32 `json:"ready_out_of_date"`
-	ErrImagePull   int32 `json:"err_image_pull"`
-	Terminating    int32 `json:"terminating"`
-	Failed         int32 `json:"failed"`
-	Killed         int32 `json:"killed"`
-	KilledOOM      int32 `json:"killed_oom"`
-	Stalled        int32 `json:"stalled"` // pending for a long time
-	Unknown        int32 `json:"unknown"`
+	Requested      int32 `json:"requested" yaml:"requested"`
+	Pending        int32 `json:"pending" yaml:"pending"`
+	Creating       int32 `json:"creating" yaml:"creating"`
+	NotReady       int32 `json:"not_ready" yaml:"not_ready"`
+	Ready          int32 `json:"ready" yaml:"ready"`
+	ReadyOutOfDate int32 `json:"ready_out_of_date" yaml:"ready_out_of_date"`
+	ErrImagePull   int32 `json:"err_image_pull" yaml:"err_image_pull"`
+	Terminating    int32 `json:"terminating" yaml:"terminating"`
+	Failed         int32 `json:"failed" yaml:"failed"`
+	Killed         int32 `json:"killed" yaml:"killed"`
+	KilledOOM      int32 `json:"killed_oom" yaml:"killed_oom"`
+	Stalled        int32 `json:"stalled" yaml:"stalled"` // pending for a long time
+	Unknown        int32 `json:"unknown" yaml:"unknown"`
 }
 
 // Worker counts don't have as many failure variations because Jobs clean up dead pods, so counting different failure scenarios isn't interesting
 type WorkerCounts struct {
-	Pending      int32 `json:"pending,omitempty"`
-	Creating     int32 `json:"creating,omitempty"`
-	NotReady     int32 `json:"not_ready,omitempty"`
-	Ready        int32 `json:"ready,omitempty"`
-	Succeeded    int32 `json:"succeeded,omitempty"`
-	ErrImagePull int32 `json:"err_image_pull,omitempty"`
-	Terminating  int32 `json:"terminating,omitempty"`
-	Failed       int32 `json:"failed,omitempty"`
-	Killed       int32 `json:"killed,omitempty"`
-	KilledOOM    int32 `json:"killed_oom,omitempty"`
-	Stalled      int32 `json:"stalled,omitempty"` // pending for a long time
-	Unknown      int32 `json:"unknown,omitempty"`
+	Pending      int32 `json:"pending,omitempty" yaml:"pending,omitempty"`
+	Creating     int32 `json:"creating,omitempty" yaml:"creating,omitempty"`
+	NotReady     int32 `json:"not_ready,omitempty" yaml:"not_ready,omitempty"`
+	Ready        int32 `json:"ready,omitempty" yaml:"ready,omitempty"`
+	Succeeded    int32 `json:"succeeded,omitempty" yaml:"succeeded,omitempty"`
+	ErrImagePull int32 `json:"err_image_pull,omitempty" yaml:"err_image_pull,omitempty"`
+	Terminating  int32 `json:"terminating,omitempty" yaml:"terminating,omitempty"`
+	Failed       int32 `json:"failed,omitempty" yaml:"failed,omitempty"`
+	Killed       int32 `json:"killed,omitempty" yaml:"killed,omitempty"`
+	KilledOOM    int32 `json:"killed_oom,omitempty" yaml:"killed_oom,omitempty"`
+	Stalled      int32 `json:"stalled,omitempty" yaml:"stalled,omitempty"` // pending for a long time
+	Unknown      int32 `json:"unknown,omitempty" yaml:"unknown,omitempty"`
 }
 
 func FromDeployment(deployment *kapps.Deployment) *Status {

From e2b6404cc8aaca8b40df5fe06a1c880203765fb0 Mon Sep 17 00:00:00 2001
From: Robert Lucian Chiriac <robert.lucian.chiriac@gmail.com>
Date: Mon, 26 Jul 2021 22:27:23 +0300
Subject: [PATCH 32/40] Fix

---
 pkg/operator/schema/schema.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pkg/operator/schema/schema.go b/pkg/operator/schema/schema.go
index 70809e1a07..1127d3dbf8 100644
--- a/pkg/operator/schema/schema.go
+++ b/pkg/operator/schema/schema.go
@@ -97,8 +97,8 @@ type ErrorResponse struct {
 }
 
 type APIVersion struct {
-	APIID       string `json:"api_id"`
-	LastUpdated int64  `json:"last_updated"`
+	APIID       string `json:"api_id" yaml:"api_id"`
+	LastUpdated int64  `json:"last_updated" yaml:"last_updated"`
 }
 
 type VerifyCortexResponse struct{}

From f2bc8bb2de4135adc9eb5e190d05e4d31059c8a0 Mon Sep 17 00:00:00 2001
From: Robert Lucian Chiriac <robert.lucian.chiriac@gmail.com>
Date: Tue, 27 Jul 2021 01:10:53 +0300
Subject: [PATCH 33/40] Update docs

---
 dev/generate_cli_md.sh |  1 +
 docs/clients/cli.md    | 14 ++++++++++++++
 2 files changed, 15 insertions(+)

diff --git a/dev/generate_cli_md.sh b/dev/generate_cli_md.sh
index 5715f6fdb8..fdf2566624 100755
--- a/dev/generate_cli_md.sh
+++ b/dev/generate_cli_md.sh
@@ -33,6 +33,7 @@ echo "# CLI commands" >> $out_file
 commands=(
   "deploy"
   "get"
+  "describe"
   "logs"
   "refresh"
   "delete"
diff --git a/docs/clients/cli.md b/docs/clients/cli.md
index be43886dba..b10957bfe4 100644
--- a/docs/clients/cli.md
+++ b/docs/clients/cli.md
@@ -32,6 +32,20 @@ Flags:
   -h, --help            help for get
 ```
 
+## describe
+
+```text
+describe an api
+
+Usage:
+  cortex describe [API_NAME] [flags]
+
+Flags:
+  -e, --env string   environment to use
+  -w, --watch        re-run the command every 2 seconds
+  -h, --help         help for describe
+```
+
 ## logs
 
 ```text

From af364a65b2ccc46b409512e6ffd90730e56aa936 Mon Sep 17 00:00:00 2001
From: Robert Lucian Chiriac <robert.lucian.chiriac@gmail.com>
Date: Tue, 27 Jul 2021 19:20:07 +0300
Subject: [PATCH 34/40] Address PR comments

---
 pkg/activator/activator.go                    |  2 +-
 pkg/activator/helpers.go                      | 22 +------
 .../batch/batchjob_controller_helpers.go      | 61 ++++++++++---------
 pkg/lib/k8s/pod.go                            | 26 +++-----
 pkg/types/status/status.go                    | 32 +++++-----
 pkg/types/userconfig/api.go                   | 14 +++++
 6 files changed, 74 insertions(+), 83 deletions(-)

diff --git a/pkg/activator/activator.go b/pkg/activator/activator.go
index b7c54adc3d..7b68736951 100644
--- a/pkg/activator/activator.go
+++ b/pkg/activator/activator.go
@@ -131,7 +131,7 @@ func (a *activator) getOrCreateAPIActivator(ctx context.Context, apiName string)
 		return nil, errors.WithStack(err)
 	}
 
-	maxQueueLength, maxConcurrency, err := concurrencyFromAnnotations(vs.Annotations)
+	maxQueueLength, maxConcurrency, err := userconfig.ConcurrencyFromAnnotations(vs)
 	if err != nil {
 		return nil, err
 	}
diff --git a/pkg/activator/helpers.go b/pkg/activator/helpers.go
index 48790b5ac0..5bce2cb7bf 100644
--- a/pkg/activator/helpers.go
+++ b/pkg/activator/helpers.go
@@ -17,8 +17,6 @@ limitations under the License.
 package activator
 
 import (
-	"strconv"
-
 	"github.com/cortexlabs/cortex/pkg/lib/errors"
 	"github.com/cortexlabs/cortex/pkg/types/userconfig"
 	"k8s.io/apimachinery/pkg/api/meta"
@@ -50,8 +48,7 @@ func getAPIMeta(obj interface{}) (apiMeta, error) {
 		return apiMeta{}, errors.ErrorUnexpected("got a virtual service without apiName label")
 	}
 
-	annotations := resource.GetAnnotations()
-	maxQueueLength, maxConcurrency, err := concurrencyFromAnnotations(annotations)
+	maxQueueLength, maxConcurrency, err := userconfig.ConcurrencyFromAnnotations(resource)
 	if err != nil {
 		return apiMeta{}, err
 	}
@@ -60,23 +57,8 @@ func getAPIMeta(obj interface{}) (apiMeta, error) {
 		apiName:        apiName,
 		apiKind:        userconfig.KindFromString(apiKind),
 		labels:         labels,
-		annotations:    annotations,
+		annotations:    resource.GetAnnotations(),
 		maxConcurrency: maxConcurrency,
 		maxQueueLength: maxQueueLength,
 	}, nil
 }
-
-// TODO move this out of here
-func concurrencyFromAnnotations(annotations map[string]string) (int, int, error) {
-	maxQueueLength, err := strconv.Atoi(annotations[userconfig.MaxQueueLengthAnnotationKey])
-	if err != nil {
-		return 0, 0, errors.ErrorUnexpected("failed to parse annotation", userconfig.MaxQueueLengthAnnotationKey)
-	}
-
-	maxConcurrency, err := strconv.Atoi(annotations[userconfig.MaxConcurrencyAnnotationKey])
-	if err != nil {
-		return 0, 0, errors.ErrorUnexpected("failed to parse annotation", userconfig.MaxConcurrencyAnnotationKey)
-	}
-
-	return maxQueueLength, maxConcurrency, err
-}
diff --git a/pkg/crds/controllers/batch/batchjob_controller_helpers.go b/pkg/crds/controllers/batch/batchjob_controller_helpers.go
index 191d5a9b0f..0f11ba67ba 100644
--- a/pkg/crds/controllers/batch/batchjob_controller_helpers.go
+++ b/pkg/crds/controllers/batch/batchjob_controller_helpers.go
@@ -531,35 +531,7 @@ func (r *BatchJobReconciler) updateStatus(ctx context.Context, batchJob *batch.B
 			batchJob.Status.Status = status.JobRunning
 		}
 
-		// TODO move this to its own function
-		workerCounts := status.WorkerCounts{}
-		for i := range workerJobPods {
-			switch k8s.GetPodStatus(&workerJobPods[i]) {
-			case k8s.PodStatusPending:
-				workerCounts.Pending++
-			case k8s.PodStatusStalled:
-				workerCounts.Stalled++
-			case k8s.PodStatusCreating:
-				workerCounts.Creating++
-			case k8s.PodStatusNotReady:
-				workerCounts.NotReady++
-			case k8s.PodStatusErrImagePull:
-				workerCounts.ErrImagePull++
-			case k8s.PodStatusTerminating:
-				workerCounts.Terminating++
-			case k8s.PodStatusFailed:
-				workerCounts.Failed++
-			case k8s.PodStatusKilled:
-				workerCounts.Killed++
-			case k8s.PodStatusKilledOOM:
-				workerCounts.KilledOOM++
-			case k8s.PodStatusSucceeded:
-				workerCounts.Succeeded++
-			case k8s.PodStatusUnknown:
-				workerCounts.Unknown++
-			}
-		}
-
+		workerCounts := getReplicaCounts(workerJobPods)
 		batchJob.Status.WorkerCounts = &workerCounts
 	}
 
@@ -759,3 +731,34 @@ func saveJobStatus(r *BatchJobReconciler, batchJob batch.BatchJob) error {
 		},
 	)
 }
+
+func getReplicaCounts(workerJobPods []kcore.Pod) status.WorkerCounts {
+	workerCounts := status.WorkerCounts{}
+	for i := range workerJobPods {
+		switch k8s.GetPodStatus(&workerJobPods[i]) {
+		case k8s.PodStatusPending:
+			workerCounts.Pending++
+		case k8s.PodStatusStalled:
+			workerCounts.Stalled++
+		case k8s.PodStatusCreating:
+			workerCounts.Creating++
+		case k8s.PodStatusNotReady:
+			workerCounts.NotReady++
+		case k8s.PodStatusErrImagePull:
+			workerCounts.ErrImagePull++
+		case k8s.PodStatusTerminating:
+			workerCounts.Terminating++
+		case k8s.PodStatusFailed:
+			workerCounts.Failed++
+		case k8s.PodStatusKilled:
+			workerCounts.Killed++
+		case k8s.PodStatusKilledOOM:
+			workerCounts.KilledOOM++
+		case k8s.PodStatusSucceeded:
+			workerCounts.Succeeded++
+		case k8s.PodStatusUnknown:
+			workerCounts.Unknown++
+		}
+	}
+	return workerCounts
+}
diff --git a/pkg/lib/k8s/pod.go b/pkg/lib/k8s/pod.go
index 0d3e0125bd..293e88a476 100644
--- a/pkg/lib/k8s/pod.go
+++ b/pkg/lib/k8s/pod.go
@@ -19,7 +19,6 @@ package k8s
 import (
 	"bytes"
 	"context"
-	"fmt"
 	"regexp"
 	"time"
 
@@ -67,10 +66,8 @@ const (
 	PodStatusKilled       PodStatus = "Killed"
 	PodStatusKilledOOM    PodStatus = "KilledOOM"
 	PodStatusStalled      PodStatus = "Stalled"
-
-	PodStatusSucceeded PodStatus = "Succeeded"
-
-	PodStatusUnknown PodStatus = "Unknown"
+	PodStatusSucceeded    PodStatus = "Succeeded"
+	PodStatusUnknown      PodStatus = "Unknown"
 )
 
 var (
@@ -161,11 +158,9 @@ func IsPodReady(pod *kcore.Pod) bool {
 		return false
 	}
 
-	// TODO use the GetPodConditionOf func here
-	for _, condition := range pod.Status.Conditions {
-		if condition.Type == kcore.PodReady && condition.Status == kcore.ConditionTrue {
-			return true
-		}
+	podConditionState, _ := GetPodConditionOf(pod, kcore.PodReady)
+	if podConditionState != nil && *podConditionState {
+		return true
 	}
 
 	return false
@@ -176,12 +171,9 @@ func IsPodStalled(pod *kcore.Pod) bool {
 		return false
 	}
 
-	// TODO use the GetPodConditionOf func here
-	for _, condition := range pod.Status.Conditions {
-		if condition.Type == kcore.PodScheduled && condition.Status == kcore.ConditionFalse && !condition.LastTransitionTime.Time.IsZero() && time.Since(condition.LastTransitionTime.Time) >= _waitForCreatingPodTimeout {
-			fmt.Println(time.Since(condition.LastTransitionTime.Time), _waitForCreatingPodTimeout)
-			return true
-		}
+	podConditionState, podCondition := GetPodConditionOf(pod, kcore.PodScheduled)
+	if podConditionState != nil && !*podConditionState && !podCondition.LastTransitionTime.Time.IsZero() && time.Since(podCondition.LastTransitionTime.Time) >= _waitForCreatingPodTimeout {
+		return true
 	}
 
 	return false
@@ -269,7 +261,7 @@ func GetPodStatus(pod *kcore.Pod) PodStatus {
 		}
 
 		status := PodStatusFromContainerStatuses(pod.Status.ContainerStatuses)
-		if status == PodStatusReady || status == PodStatusNotReady {
+		if status == PodStatusReady {
 			return PodStatusNotReady
 		}
 
diff --git a/pkg/types/status/status.go b/pkg/types/status/status.go
index 5872922b9a..15288fc8d1 100644
--- a/pkg/types/status/status.go
+++ b/pkg/types/status/status.go
@@ -21,28 +21,28 @@ import (
 )
 
 type Status struct {
-	Ready         int32          `json:"ready" yaml:"ready"`
-	Requested     int32          `json:"requested" yaml:"requested"`
-	UpToDate      int32          `json:"up_to_date" yaml:"up_to_date"`
+	Ready         int32          `json:"ready" yaml:"ready"`           // deployment-reported number of ready replicas (latest + out of date)
+	Requested     int32          `json:"requested" yaml:"requested"`   // deployment-reported number of requested replicas
+	UpToDate      int32          `json:"up_to_date" yaml:"up_to_date"` // deployment-reported number of up-to-date replicas (in whichever phase they are found in)
 	ReplicaCounts *ReplicaCounts `json:"replica_counts,omitempty" yaml:"replica_counts,omitempty"`
 }
 
 type ReplicaCountType string
 
 const (
-	ReplicaCountRequested      ReplicaCountType = "Requested"
-	ReplicaCountPending        ReplicaCountType = "Pending"
-	ReplicaCountCreating       ReplicaCountType = "Creating"
-	ReplicaCountNotReady       ReplicaCountType = "NotReady"
-	ReplicaCountReady          ReplicaCountType = "Ready"
-	ReplicaCountReadyOutOfDate ReplicaCountType = "ReadyOutOfDate"
-	ReplicaCountErrImagePull   ReplicaCountType = "ErrImagePull"
-	ReplicaCountTerminating    ReplicaCountType = "Terminating"
-	ReplicaCountFailed         ReplicaCountType = "Failed"
-	ReplicaCountKilled         ReplicaCountType = "Killed"
-	ReplicaCountKilledOOM      ReplicaCountType = "KilledOOM"
-	ReplicaCountStalled        ReplicaCountType = "Stalled"
-	ReplicaCountUnknown        ReplicaCountType = "Unknown"
+	ReplicaCountRequested      ReplicaCountType = "Requested"      // requested number of replicas (for up-to-date pods)
+	ReplicaCountPending        ReplicaCountType = "Pending"        // pods that are in the pending state (for up-to-date pods)
+	ReplicaCountCreating       ReplicaCountType = "Creating"       // pods that that have their init/non-init containers in the process of being created (for up-to-date pods)
+	ReplicaCountNotReady       ReplicaCountType = "NotReady"       // pods that are not passing the readiness checks (for up-to-date pods)
+	ReplicaCountReady          ReplicaCountType = "Ready"          // pods that are passing the readiness checks (for up-to-date pods)
+	ReplicaCountReadyOutOfDate ReplicaCountType = "ReadyOutOfDate" // pods that are passing the readiness checks (for out-of-date pods)
+	ReplicaCountErrImagePull   ReplicaCountType = "ErrImagePull"   // pods that couldn't pull the containers' images (for up-to-date pods)
+	ReplicaCountTerminating    ReplicaCountType = "Terminating"    // pods that are in a terminating state (for up-to-date pods)
+	ReplicaCountFailed         ReplicaCountType = "Failed"         // pods that have had their containers erroring (for up-to-date pods)
+	ReplicaCountKilled         ReplicaCountType = "Killed"         // pods that have had their container processes killed (for up-to-date pods)
+	ReplicaCountKilledOOM      ReplicaCountType = "KilledOOM"      // pods that have had their containers OOM (for up-to-date pods)
+	ReplicaCountStalled        ReplicaCountType = "Stalled"        // pods that have been in a pending state for more than 15 mins (for up-to-date pods)
+	ReplicaCountUnknown        ReplicaCountType = "Unknown"        // pods that are in an unknown state (for up-to-date pods)
 )
 
 var ReplicaCountTypes []ReplicaCountType = []ReplicaCountType{
diff --git a/pkg/types/userconfig/api.go b/pkg/types/userconfig/api.go
index d1e9f65f7b..a90a29e952 100644
--- a/pkg/types/userconfig/api.go
+++ b/pkg/types/userconfig/api.go
@@ -265,6 +265,20 @@ func EndpointFromAnnotation(k8sObj kmeta.Object) (string, error) {
 	return endpoint, nil
 }
 
+func ConcurrencyFromAnnotations(k8sObj kmeta.Object) (int, int, error) {
+	maxQueueLength, err := k8s.ParseIntAnnotation(k8sObj, MaxQueueLengthAnnotationKey)
+	if err != nil {
+		return 0, 0, err
+	}
+
+	maxConcurrency, err := k8s.ParseIntAnnotation(k8sObj, MaxConcurrencyAnnotationKey)
+	if err != nil {
+		return 0, 0, err
+	}
+
+	return maxQueueLength, maxConcurrency, nil
+}
+
 func (api *API) UserStr() string {
 	var sb strings.Builder
 	sb.WriteString(fmt.Sprintf("%s: %s\n", NameKey, api.Name))

From f891aa368a6c1a9d4a7987992dec55446aafe6e1 Mon Sep 17 00:00:00 2001
From: Robert Lucian Chiriac <robert.lucian.chiriac@gmail.com>
Date: Tue, 27 Jul 2021 20:17:06 +0300
Subject: [PATCH 35/40] Docs updates

---
 docs/workloads/async/statuses.md           | 22 +++++++++++++++++-
 docs/workloads/batch/statuses.md           |  2 +-
 docs/workloads/realtime/statuses.md        | 27 ++++++++++++++--------
 docs/workloads/realtime/troubleshooting.md |  6 ++---
 docs/workloads/task/statuses.md            |  2 +-
 5 files changed, 44 insertions(+), 15 deletions(-)

diff --git a/docs/workloads/async/statuses.md b/docs/workloads/async/statuses.md
index 3ecaeba865..5cd8bd7cb4 100644
--- a/docs/workloads/async/statuses.md
+++ b/docs/workloads/async/statuses.md
@@ -1,4 +1,4 @@
-# Statuses
+# Request statuses
 
 | Status            | Meaning                                                               |
 | :---              | :---                                                                  |
@@ -6,3 +6,23 @@
 | in_progress       | Workload has been pulled by the API and is currently being processed  |
 | completed         | Workload has completed with success                                   |
 | failed            | Workload encountered an error during processing                       |
+
+# Replica states
+
+The replica states of an API can be inspected by running the `cortex describe <api-name>` command. When run, a table is presented that shows how many replicas of the said API are found in each of the following states:
+
+| State | Meaning |
+|:---|:---|
+| Ready | Replica is running and it has passed the readiness checks |
+| ReadyOutOfDate | Replica is running and it has passed the readiness checks (for an out-of-date replica) |
+| NotReady | Replica is running but it's not passing the readiness checks; make sure the server is listening on the designed port of the API |
+| Requested | Requested number of replicas for a given API |
+| Pending | Replica is in a pending state (waiting to get scheduled onto a node) |
+| Creating | Replica is in the process of having its containers created |
+| ErrImagePull | Replica was not created because one of the specified Docker images was inaccessible at runtime; check that your API's docker images exist and are accessible via your cluster's AWS credentials |
+| Failed | Replica couldn't start due to an error; run `cortex logs <name>` to view the logs |
+| Killed | Replica has had one of its containers' process(es) killed |
+| KilledOOM | Replica was terminated due to excessive memory usage; try allocating more memory to the API and re-deploy |
+| Stalled | Replica has been in a pending state for more than 15 minutes; causes like insufficient memory, CPU, GPU or Inf could be culprit; could also be that the node selector on the API is out-of-date |
+| Terminating | Replica is currently in the process of being terminated |
+| Unknown | Replica is in an undefined state; should not be possible |
diff --git a/docs/workloads/batch/statuses.md b/docs/workloads/batch/statuses.md
index 1bcddcd6bd..019ca55789 100644
--- a/docs/workloads/batch/statuses.md
+++ b/docs/workloads/batch/statuses.md
@@ -1,4 +1,4 @@
-# Statuses
+# Job statuses
 
 | Status                   | Meaning |
 | :--- | :--- |
diff --git a/docs/workloads/realtime/statuses.md b/docs/workloads/realtime/statuses.md
index 2ee32aca40..6decef16f0 100644
--- a/docs/workloads/realtime/statuses.md
+++ b/docs/workloads/realtime/statuses.md
@@ -1,10 +1,19 @@
-# Statuses
+# Replica states
 
-| Status                | Meaning |
-| :--- | :--- |
-| live                  | API is deployed and ready to serve requests (at least one replica is running) |
-| updating              | API is updating |
-| error                 | API was not created due to an error; run `cortex logs <name>` to view the logs |
-| error (image pull)    | API was not created because one of the specified Docker images was inaccessible at runtime; check that your API's docker images exist and are accessible via your cluster's AWS credentials |
-| error (out of memory) | API was terminated due to excessive memory usage; try allocating more memory to the API and re-deploying |
-| compute unavailable   | API could not start due to insufficient memory, CPU, GPU, or Inf in the cluster; some replicas may be ready |
+The replica states of an API can be inspected by running the `cortex describe <api-name>` command. When run, a table is presented that shows how many replicas of the said API are found in each of the following states:
+
+| State | Meaning |
+|:---|:---|
+| Ready | Replica is running and it has passed the readiness checks |
+| ReadyOutOfDate | Replica is running and it has passed the readiness checks (for an out-of-date replica) |
+| NotReady | Replica is running but it's not passing the readiness checks; make sure the server is listening on the designed port of the API |
+| Requested | Requested number of replicas for a given API |
+| Pending | Replica is in a pending state (waiting to get scheduled onto a node) |
+| Creating | Replica is in the process of having its containers created |
+| ErrImagePull | Replica was not created because one of the specified Docker images was inaccessible at runtime; check that your API's docker images exist and are accessible via your cluster's AWS credentials |
+| Failed | Replica couldn't start due to an error; run `cortex logs <name>` to view the logs |
+| Killed | Replica has had one of its containers' process(es) killed |
+| KilledOOM | Replica was terminated due to excessive memory usage; try allocating more memory to the API and re-deploy |
+| Stalled | Replica has been in a pending state for more than 15 minutes; causes like insufficient memory, CPU, GPU or Inf could be culprit; could also be that the node selector on the API is out-of-date |
+| Terminating | Replica is currently in the process of being terminated |
+| Unknown | Replica is in an undefined state; should not be possible |
diff --git a/docs/workloads/realtime/troubleshooting.md b/docs/workloads/realtime/troubleshooting.md
index 61de9dfe74..5254d25aaa 100644
--- a/docs/workloads/realtime/troubleshooting.md
+++ b/docs/workloads/realtime/troubleshooting.md
@@ -4,14 +4,14 @@
 
 When making requests to your API, it's possible to get a `no healthy upstream` error message (with HTTP status code `503`). This means that there are currently no live replicas running for your API. This could happen for a few reasons:
 
-1. It's possible that your API is simply not ready yet. You can check the status of your API with `cortex get API_NAME`, and inspect the logs in CloudWatch with the help of `cortex logs API_NAME`.
-1. Your API may have errored during initialization or while responding to a previous request. `cortex get API_NAME` will show the status of your API, and you can view the logs for all replicas by visiting the CloudWatch Insights URL from `cortex logs API_NAME`.
+1. It's possible that your API is simply not ready yet. You can check the number of ready replicas on your API with `cortex get API_NAME`, and inspect the logs in CloudWatch with the help of `cortex logs API_NAME`.
+1. Your API may have errored during initialization or while responding to a previous request. `cortex describe API_NAME` will show the number of replicas that have failed to start on your API, and you can view the logs for all replicas by visiting the CloudWatch Insights URL from `cortex logs API_NAME`.
 
 If you are using API Gateway in front of your API endpoints, it is also possible to receive a `{"message":"Service Unavailable"}` error message (with HTTP status code `503`) after 29 seconds if your request exceeds API Gateway's 29 second timeout. If this is the case, you can either modify your code to take less time, run on faster hardware (e.g. GPUs), or don't use API Gateway (there is no timeout when using the API's endpoint directly).
 
 ## API is stuck updating
 
-If your API is stuck in the "updating" or "compute unavailable" state (which is displayed when running `cortex get`), there are a few possible causes. Here are some things to check:
+If your API has pods stuck in the "pending" or "stalled" states (which is displayed when running `cortex describe API_NAME`), there are a few possible causes. Here are some things to check:
 
 ### Inspect API logs in CloudWatch
 
diff --git a/docs/workloads/task/statuses.md b/docs/workloads/task/statuses.md
index b51eaf010f..0631ab68f2 100644
--- a/docs/workloads/task/statuses.md
+++ b/docs/workloads/task/statuses.md
@@ -1,4 +1,4 @@
-# Statuses
+# Job statuses
 
 | Status                   | Meaning |
 | :--- | :--- |

From 121a66942714a42e4be507fe012cda9ec8b73679 Mon Sep 17 00:00:00 2001
From: David Eliahu <deliahu@users.noreply.github.com>
Date: Tue, 27 Jul 2021 10:26:35 -0700
Subject: [PATCH 36/40] Update statuses.md

---
 docs/workloads/realtime/statuses.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/workloads/realtime/statuses.md b/docs/workloads/realtime/statuses.md
index 6decef16f0..a6a12cf225 100644
--- a/docs/workloads/realtime/statuses.md
+++ b/docs/workloads/realtime/statuses.md
@@ -16,4 +16,4 @@ The replica states of an API can be inspected by running the `cortex describe <a
 | KilledOOM | Replica was terminated due to excessive memory usage; try allocating more memory to the API and re-deploy |
 | Stalled | Replica has been in a pending state for more than 15 minutes; causes like insufficient memory, CPU, GPU or Inf could be culprit; could also be that the node selector on the API is out-of-date |
 | Terminating | Replica is currently in the process of being terminated |
-| Unknown | Replica is in an undefined state; should not be possible |
+| Unknown | Replica is in an unknown state |

From c337f294025b34c6e19ade097e8caa8f89c79c04 Mon Sep 17 00:00:00 2001
From: David Eliahu <deliahu@users.noreply.github.com>
Date: Tue, 27 Jul 2021 10:27:05 -0700
Subject: [PATCH 37/40] Update statuses.md

---
 docs/workloads/async/statuses.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/workloads/async/statuses.md b/docs/workloads/async/statuses.md
index 5cd8bd7cb4..807f97e568 100644
--- a/docs/workloads/async/statuses.md
+++ b/docs/workloads/async/statuses.md
@@ -25,4 +25,4 @@ The replica states of an API can be inspected by running the `cortex describe <a
 | KilledOOM | Replica was terminated due to excessive memory usage; try allocating more memory to the API and re-deploy |
 | Stalled | Replica has been in a pending state for more than 15 minutes; causes like insufficient memory, CPU, GPU or Inf could be culprit; could also be that the node selector on the API is out-of-date |
 | Terminating | Replica is currently in the process of being terminated |
-| Unknown | Replica is in an undefined state; should not be possible |
+| Unknown | Replica is in an unknown state |

From 70a61322893f16764b89f792d84cd0c794ddf033 Mon Sep 17 00:00:00 2001
From: David Eliahu <deliahu@users.noreply.github.com>
Date: Tue, 27 Jul 2021 11:07:27 -0700
Subject: [PATCH 38/40] Update statuses.md

---
 docs/workloads/async/statuses.md | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/docs/workloads/async/statuses.md b/docs/workloads/async/statuses.md
index 807f97e568..9c4787f293 100644
--- a/docs/workloads/async/statuses.md
+++ b/docs/workloads/async/statuses.md
@@ -9,20 +9,19 @@
 
 # Replica states
 
-The replica states of an API can be inspected by running the `cortex describe <api-name>` command. When run, a table is presented that shows how many replicas of the said API are found in each of the following states:
+The replica states of an API can be inspected by running `cortex describe <api-name>`. Here are the possible states for each replica in an API:
 
 | State | Meaning |
 |:---|:---|
 | Ready | Replica is running and it has passed the readiness checks |
 | ReadyOutOfDate | Replica is running and it has passed the readiness checks (for an out-of-date replica) |
 | NotReady | Replica is running but it's not passing the readiness checks; make sure the server is listening on the designed port of the API |
-| Requested | Requested number of replicas for a given API |
 | Pending | Replica is in a pending state (waiting to get scheduled onto a node) |
 | Creating | Replica is in the process of having its containers created |
 | ErrImagePull | Replica was not created because one of the specified Docker images was inaccessible at runtime; check that your API's docker images exist and are accessible via your cluster's AWS credentials |
 | Failed | Replica couldn't start due to an error; run `cortex logs <name>` to view the logs |
-| Killed | Replica has had one of its containers' process(es) killed |
+| Killed | Replica has had one of its containers killed |
 | KilledOOM | Replica was terminated due to excessive memory usage; try allocating more memory to the API and re-deploy |
-| Stalled | Replica has been in a pending state for more than 15 minutes; causes like insufficient memory, CPU, GPU or Inf could be culprit; could also be that the node selector on the API is out-of-date |
+| Stalled | Replica has been in a pending state for more than 15 minutes; see [troubleshooting](../realtime/troubleshooting.md) |
 | Terminating | Replica is currently in the process of being terminated |
 | Unknown | Replica is in an unknown state |

From 4e0dc14d2d87dcc18975ac8df612f8a3710a9b42 Mon Sep 17 00:00:00 2001
From: David Eliahu <deliahu@users.noreply.github.com>
Date: Tue, 27 Jul 2021 11:07:57 -0700
Subject: [PATCH 39/40] Update statuses.md

---
 docs/workloads/realtime/statuses.md | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/docs/workloads/realtime/statuses.md b/docs/workloads/realtime/statuses.md
index a6a12cf225..d4e201bfba 100644
--- a/docs/workloads/realtime/statuses.md
+++ b/docs/workloads/realtime/statuses.md
@@ -1,19 +1,18 @@
 # Replica states
 
-The replica states of an API can be inspected by running the `cortex describe <api-name>` command. When run, a table is presented that shows how many replicas of the said API are found in each of the following states:
+The replica states of an API can be inspected by running `cortex describe <api-name>`. Here are the possible states for each replica in an API:
 
 | State | Meaning |
 |:---|:---|
 | Ready | Replica is running and it has passed the readiness checks |
 | ReadyOutOfDate | Replica is running and it has passed the readiness checks (for an out-of-date replica) |
 | NotReady | Replica is running but it's not passing the readiness checks; make sure the server is listening on the designed port of the API |
-| Requested | Requested number of replicas for a given API |
 | Pending | Replica is in a pending state (waiting to get scheduled onto a node) |
 | Creating | Replica is in the process of having its containers created |
 | ErrImagePull | Replica was not created because one of the specified Docker images was inaccessible at runtime; check that your API's docker images exist and are accessible via your cluster's AWS credentials |
 | Failed | Replica couldn't start due to an error; run `cortex logs <name>` to view the logs |
-| Killed | Replica has had one of its containers' process(es) killed |
+| Killed | Replica has had one of its containers killed |
 | KilledOOM | Replica was terminated due to excessive memory usage; try allocating more memory to the API and re-deploy |
-| Stalled | Replica has been in a pending state for more than 15 minutes; causes like insufficient memory, CPU, GPU or Inf could be culprit; could also be that the node selector on the API is out-of-date |
+| Stalled | Replica has been in a pending state for more than 15 minutes; see [troubleshooting](../realtime/troubleshooting.md) |
 | Terminating | Replica is currently in the process of being terminated |
 | Unknown | Replica is in an unknown state |

From ce6a89f19be1603bda055b2cc5600848e7934d58 Mon Sep 17 00:00:00 2001
From: Robert Lucian Chiriac <robert.lucian.chiriac@gmail.com>
Date: Tue, 27 Jul 2021 22:06:16 +0300
Subject: [PATCH 40/40] Address PR comments

---
 cli/cmd/lib_traffic_splitters.go              |  4 ++--
 pkg/operator/resources/asyncapi/status.go     |  2 +-
 pkg/operator/resources/realtimeapi/status.go  |  2 +-
 pkg/operator/resources/trafficsplitter/api.go | 20 +++++++++----------
 pkg/operator/schema/schema.go                 | 17 ++++++++--------
 pkg/types/status/status.go                    |  2 +-
 pkg/types/userconfig/api.go                   |  4 ++--
 pkg/types/userconfig/config_key.go            |  2 +-
 8 files changed, 26 insertions(+), 27 deletions(-)

diff --git a/cli/cmd/lib_traffic_splitters.go b/cli/cmd/lib_traffic_splitters.go
index 8eaf6b048b..af2b4e4aad 100644
--- a/cli/cmd/lib_traffic_splitters.go
+++ b/cli/cmd/lib_traffic_splitters.go
@@ -110,14 +110,14 @@ func trafficSplitTable(trafficSplitter schema.APIResponse, env cliconfig.Environ
 func trafficSplitterListTable(trafficSplitter []schema.APIResponse, envNames []string) table.Table {
 	rows := make([][]interface{}, 0, len(trafficSplitter))
 	for i, splitAPI := range trafficSplitter {
-		if splitAPI.Metadata == nil || splitAPI.Status == nil {
+		if splitAPI.Metadata == nil || splitAPI.NumTrafficSplitterTargets == nil {
 			continue
 		}
 		lastUpdated := time.Unix(splitAPI.Metadata.LastUpdated, 0)
 		rows = append(rows, []interface{}{
 			envNames[i],
 			splitAPI.Metadata.Name,
-			s.Int32(splitAPI.Status.Ready),
+			s.Int32(*splitAPI.NumTrafficSplitterTargets),
 			libtime.SinceStr(&lastUpdated),
 		})
 	}
diff --git a/pkg/operator/resources/asyncapi/status.go b/pkg/operator/resources/asyncapi/status.go
index 69977c731e..7035c31c01 100644
--- a/pkg/operator/resources/asyncapi/status.go
+++ b/pkg/operator/resources/asyncapi/status.go
@@ -73,7 +73,7 @@ func addPodToReplicaCounts(pod *kcore.Pod, deployment *kapps.Deployment, counts
 	case k8s.PodStatusCreating:
 		counts.Creating++
 	case k8s.PodStatusReady:
-		counts.Creating++
+		counts.Ready++
 	case k8s.PodStatusNotReady:
 		counts.NotReady++
 	case k8s.PodStatusErrImagePull:
diff --git a/pkg/operator/resources/realtimeapi/status.go b/pkg/operator/resources/realtimeapi/status.go
index 0a88b83d17..a90c42f387 100644
--- a/pkg/operator/resources/realtimeapi/status.go
+++ b/pkg/operator/resources/realtimeapi/status.go
@@ -72,7 +72,7 @@ func addPodToReplicaCounts(pod *kcore.Pod, deployment *kapps.Deployment, counts
 	case k8s.PodStatusCreating:
 		counts.Creating++
 	case k8s.PodStatusReady:
-		counts.Creating++
+		counts.Ready++
 	case k8s.PodStatusNotReady:
 		counts.NotReady++
 	case k8s.PodStatusErrImagePull:
diff --git a/pkg/operator/resources/trafficsplitter/api.go b/pkg/operator/resources/trafficsplitter/api.go
index 03c89ea4a1..4881f724e3 100644
--- a/pkg/operator/resources/trafficsplitter/api.go
+++ b/pkg/operator/resources/trafficsplitter/api.go
@@ -26,11 +26,11 @@ import (
 	"github.com/cortexlabs/cortex/pkg/lib/errors"
 	"github.com/cortexlabs/cortex/pkg/lib/k8s"
 	"github.com/cortexlabs/cortex/pkg/lib/parallel"
+	"github.com/cortexlabs/cortex/pkg/lib/pointer"
 	"github.com/cortexlabs/cortex/pkg/operator/lib/routines"
 	"github.com/cortexlabs/cortex/pkg/operator/operator"
 	"github.com/cortexlabs/cortex/pkg/operator/schema"
 	"github.com/cortexlabs/cortex/pkg/types/spec"
-	"github.com/cortexlabs/cortex/pkg/types/status"
 	"github.com/cortexlabs/cortex/pkg/types/userconfig"
 	"github.com/cortexlabs/cortex/pkg/workloads"
 	istioclientnetworking "istio.io/client-go/pkg/apis/networking/v1beta1"
@@ -142,21 +142,19 @@ func GetAllAPIs(virtualServices []istioclientnetworking.VirtualService) ([]schem
 			return nil, errors.Wrap(err, fmt.Sprintf("api %s", apiName))
 		}
 
+		if metadata.Kind != userconfig.TrafficSplitterKind {
+			continue
+		}
+
 		targets, err := userconfig.TrafficSplitterTargetsFromAnnotations(&virtualServices[i])
 		if err != nil {
 			return nil, errors.Wrap(err, fmt.Sprintf("api %s", apiName))
 		}
 
-		if metadata.Kind == userconfig.TrafficSplitterKind {
-			trafficSplitters = append(trafficSplitters, schema.APIResponse{
-				Metadata: metadata,
-				Status: &status.Status{
-					Ready:     targets,
-					Requested: targets,
-					UpToDate:  targets,
-				},
-			})
-		}
+		trafficSplitters = append(trafficSplitters, schema.APIResponse{
+			Metadata:                  metadata,
+			NumTrafficSplitterTargets: pointer.Int32(targets),
+		})
 	}
 
 	return trafficSplitters, nil
diff --git a/pkg/operator/schema/schema.go b/pkg/operator/schema/schema.go
index 1127d3dbf8..1ee895cace 100644
--- a/pkg/operator/schema/schema.go
+++ b/pkg/operator/schema/schema.go
@@ -56,14 +56,15 @@ type DeployResult struct {
 }
 
 type APIResponse struct {
-	Spec             *spec.API               `json:"spec,omitempty" yaml:"spec,omitempty"`
-	Metadata         *spec.Metadata          `json:"metadata,omitempty"  yaml:"metadata,omitempty"`
-	Status           *status.Status          `json:"status,omitempty"  yaml:"status,omitempty"`
-	Endpoint         *string                 `json:"endpoint,omitempty"  yaml:"endpoint,omitempty"`
-	DashboardURL     *string                 `json:"dashboard_url,omitempty"  yaml:"dashboard_url,omitempty"`
-	BatchJobStatuses []status.BatchJobStatus `json:"batch_job_statuses,omitempty"  yaml:"batch_job_statuses,omitempty"`
-	TaskJobStatuses  []status.TaskJobStatus  `json:"task_job_statuses,omitempty"  yaml:"task_job_statuses,omitempty"`
-	APIVersions      []APIVersion            `json:"api_versions,omitempty"  yaml:"api_versions,omitempty"`
+	Spec                      *spec.API               `json:"spec,omitempty" yaml:"spec,omitempty"`
+	Metadata                  *spec.Metadata          `json:"metadata,omitempty"  yaml:"metadata,omitempty"`
+	Status                    *status.Status          `json:"status,omitempty"  yaml:"status,omitempty"`
+	NumTrafficSplitterTargets *int32                  `json:"num_traffic_splitter_targets,omitempty" yaml:"num_traffic_splitter_targets,omitempty"`
+	Endpoint                  *string                 `json:"endpoint,omitempty"  yaml:"endpoint,omitempty"`
+	DashboardURL              *string                 `json:"dashboard_url,omitempty"  yaml:"dashboard_url,omitempty"`
+	BatchJobStatuses          []status.BatchJobStatus `json:"batch_job_statuses,omitempty"  yaml:"batch_job_statuses,omitempty"`
+	TaskJobStatuses           []status.TaskJobStatus  `json:"task_job_statuses,omitempty"  yaml:"task_job_statuses,omitempty"`
+	APIVersions               []APIVersion            `json:"api_versions,omitempty"  yaml:"api_versions,omitempty"`
 }
 
 type LogResponse struct {
diff --git a/pkg/types/status/status.go b/pkg/types/status/status.go
index 15288fc8d1..e0de4943ef 100644
--- a/pkg/types/status/status.go
+++ b/pkg/types/status/status.go
@@ -61,7 +61,7 @@ type ReplicaCounts struct {
 	Ready          int32 `json:"ready" yaml:"ready"`
 	ReadyOutOfDate int32 `json:"ready_out_of_date" yaml:"ready_out_of_date"`
 	ErrImagePull   int32 `json:"err_image_pull" yaml:"err_image_pull"`
-	Terminating    int32 `json:"terminating" yaml:"terminating"`
+	Terminating    int32 `json:"terminating" yaml:"terminating"` // includes up-to-date and out-of-date pods
 	Failed         int32 `json:"failed" yaml:"failed"`
 	Killed         int32 `json:"killed" yaml:"killed"`
 	KilledOOM      int32 `json:"killed_oom" yaml:"killed_oom"`
diff --git a/pkg/types/userconfig/api.go b/pkg/types/userconfig/api.go
index a90a29e952..c524c599e0 100644
--- a/pkg/types/userconfig/api.go
+++ b/pkg/types/userconfig/api.go
@@ -156,7 +156,7 @@ func (api *API) ToK8sAnnotations() map[string]string {
 	annotations := map[string]string{}
 
 	if len(api.APIs) > 0 {
-		annotations[NumberOfTrafficSplitterTargets] = s.Int32(int32(len(api.APIs)))
+		annotations[NumTrafficSplitterTargetsAnnotationKey] = s.Int32(int32(len(api.APIs)))
 	}
 
 	if api.Pod != nil && api.Kind == RealtimeAPIKind {
@@ -250,7 +250,7 @@ func AutoscalingFromAnnotations(k8sObj kmeta.Object) (*Autoscaling, error) {
 }
 
 func TrafficSplitterTargetsFromAnnotations(k8sObj kmeta.Object) (int32, error) {
-	targets, err := k8s.ParseInt32Annotation(k8sObj, NumberOfTrafficSplitterTargets)
+	targets, err := k8s.ParseInt32Annotation(k8sObj, NumTrafficSplitterTargetsAnnotationKey)
 	if err != nil {
 		return 0, err
 	}
diff --git a/pkg/types/userconfig/config_key.go b/pkg/types/userconfig/config_key.go
index 263f764bd6..5cbe3b2dda 100644
--- a/pkg/types/userconfig/config_key.go
+++ b/pkg/types/userconfig/config_key.go
@@ -91,7 +91,7 @@ const (
 	EndpointAnnotationKey                     = "networking.cortex.dev/endpoint"
 	MaxConcurrencyAnnotationKey               = "pod.cortex.dev/max-concurrency"
 	MaxQueueLengthAnnotationKey               = "pod.cortex.dev/max-queue-length"
-	NumberOfTrafficSplitterTargets            = "apis.cortex.dev/traffic-splitter-targets"
+	NumTrafficSplitterTargetsAnnotationKey    = "apis.cortex.dev/traffic-splitter-targets"
 	MinReplicasAnnotationKey                  = "autoscaling.cortex.dev/min-replicas"
 	MaxReplicasAnnotationKey                  = "autoscaling.cortex.dev/max-replicas"
 	TargetInFlightAnnotationKey               = "autoscaling.cortex.dev/target-in-flight"