Skip to content

minor changes fro printing things #22

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Oct 27, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
dist/
.python-version
25 changes: 18 additions & 7 deletions src/codeflare_sdk/cluster/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,10 @@ def down(self, namespace='default'):
oc.invoke("delete", ["AppWrapper", self.app_wrapper_name])

def status(self, print_to_console=True):
cluster = _ray_cluster_status(self.config.name)
cluster = _ray_cluster_status(self.config.name)
if cluster:
#overriding the number of gpus with requested
cluster.worker_gpu = self.config.gpu
if print_to_console:
pretty_print.print_clusters([cluster])
return cluster.status
Expand Down Expand Up @@ -92,6 +94,8 @@ def is_ready(self, print_to_console=True):
status = CodeFlareClusterStatus.FAILED

if print_to_console:
#overriding the number of gpus with requested
cluster.worker_gpu = self.config.gpu
pretty_print.print_clusters([cluster])
return status, ready

Expand Down Expand Up @@ -123,11 +127,16 @@ def _app_wrapper_status(name, namespace='default') -> Optional[AppWrapper]:

def _ray_cluster_status(name, namespace='default') -> Optional[RayCluster]:
# FIXME should we check the appwrapper first
with oc.project(namespace), oc.timeout(10*60):
cluster = oc.selector(f'rayclusters/{name}').object()

if cluster:
return _map_to_ray_cluster(cluster)
cluster = None
try:
with oc.project(namespace), oc.timeout(10*60):
cluster = oc.selector(f'rayclusters/{name}').object()

if cluster:
return _map_to_ray_cluster(cluster)
except:
pass
return cluster


def _get_ray_clusters(namespace='default') -> List[RayCluster]:
Expand Down Expand Up @@ -161,14 +170,16 @@ def _map_to_ray_cluster(cluster) -> RayCluster:
cluster_model = cluster.model
return RayCluster(
name=cluster.name(), status=RayClusterStatus(cluster_model.status.state.lower()),
#for now we are not using autoscaling so same replicas is fine
min_workers=cluster_model.spec.workerGroupSpecs[0].replicas,
max_workers=cluster_model.spec.workerGroupSpecs[0].replicas,
worker_mem_max=cluster_model.spec.workerGroupSpecs[
0].template.spec.containers[0].resources.limits.memory,
worker_mem_min=cluster_model.spec.workerGroupSpecs[
0].template.spec.containers[0].resources.requests.memory,
worker_cpu=cluster_model.spec.workerGroupSpecs[0].template.spec.containers[0].resources.limits.cpu,
worker_gpu=0)
worker_gpu=0, #hard to detect currently how many gpus, can override it with what the user asked for
namespace=cluster.namespace())


def _map_to_app_wrapper(cluster) -> AppWrapper:
Expand Down
1 change: 1 addition & 0 deletions src/codeflare_sdk/cluster/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ class RayCluster:
worker_mem_max: str
worker_cpu: int
worker_gpu: int
namespace: str

@dataclass
class AppWrapper:
Expand Down
10 changes: 6 additions & 4 deletions src/codeflare_sdk/utils/pretty_print.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,11 @@ def print_app_wrappers_status(app_wrappers:List[AppWrapper]):
name = app_wrapper.name
status = app_wrapper.status.value

table = Table(box=None, title="[bold] :rocket: List of CodeFlare clusters in queue:rocket:")
table = Table(box=box.ASCII_DOUBLE_HEAD, title="[bold] :rocket: List of CodeFlare clusters in queue:rocket:")
table.add_row("") #empty row for spacing
table.add_column("Name", style="cyan", no_wrap=True)
table.add_column("Status", style="magenta")
table.add_row("[bold underline]"+name,status)
table.add_row(name,status)
table.add_row("") #empty row for spacing
console.print(Panel.fit(table))

Expand All @@ -47,7 +48,7 @@ def print_clusters(clusters:List[RayCluster], verbose=True):
maxcount = str(cluster.max_workers)
memory = cluster.worker_mem_min+"~"+cluster.worker_mem_max
cpu = str(cluster.worker_cpu)
gpu = str(cluster.worker_mem_max)
gpu = str(cluster.worker_gpu)
#owned = bool(cluster["userOwned"])
owned = True

Expand All @@ -59,7 +60,8 @@ def print_clusters(clusters:List[RayCluster], verbose=True):
table0.add_row("")
table0.add_row("[bold underline]"+name,status)
table0.add_row()
table0.add_row(f"[bold]URI:[/bold] ray://{name}-head-svc:1001") #format that is used to generate the name of the service
#fixme harcded to default for now
table0.add_row(f"[bold]URI:[/bold] ray://{cluster.name}-head-svc.{cluster.namespace}.svc:10001") #format that is used to generate the name of the service
table0.add_row()
table0.add_row(f"[link={dashboard} blue underline]Dashboard:link:[/link]")
table0.add_row("") #empty row for spacing
Expand Down