Skip to content

task(RHOAIENG-26192): Removed dep fields head_gpus + num_gpus #835

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 0 additions & 4 deletions docs/sphinx/user-docs/cluster-configuration.rst
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,3 @@ deprecated.
- ``worker_memory_requests``
* - ``max_memory``
- ``worker_memory_limits``
* - ``head_gpus``
- ``head_extended_resource_requests``
* - ``num_gpus``
- ``worker_extended_resource_requests``
27 changes: 0 additions & 27 deletions src/codeflare_sdk/ray/cluster/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,6 @@ class ClusterConfiguration:
The number of CPUs to allocate to the head node.
head_memory:
The amount of memory to allocate to the head node.
head_gpus:
The number of GPUs to allocate to the head node. (Deprecated, use head_extended_resource_requests)
head_extended_resource_requests:
A dictionary of extended resource requests for the head node. ex: {"nvidia.com/gpu": 1}
head_tolerations:
Expand All @@ -70,8 +68,6 @@ class ClusterConfiguration:
The minimum amount of memory to allocate to each worker.
max_memory:
The maximum amount of memory to allocate to each worker.
num_gpus:
The number of GPUs to allocate to each worker. (Deprecated, use worker_extended_resource_requests)
worker_tolerations:
List of tolerations for worker nodes.
appwrapper:
Expand Down Expand Up @@ -120,7 +116,6 @@ class ClusterConfiguration:
head_memory_requests: Union[int, str] = 8
head_memory_limits: Union[int, str] = 8
head_memory: Optional[Union[int, str]] = None # Deprecating
head_gpus: Optional[int] = None # Deprecating
head_extended_resource_requests: Dict[str, Union[str, int]] = field(
default_factory=dict
)
Expand All @@ -134,7 +129,6 @@ class ClusterConfiguration:
worker_memory_limits: Union[int, str] = 2
min_memory: Optional[Union[int, str]] = None # Deprecating
max_memory: Optional[Union[int, str]] = None # Deprecating
num_gpus: Optional[int] = None # Deprecating
worker_tolerations: Optional[List[V1Toleration]] = None
appwrapper: bool = False
envs: Dict[str, str] = field(default_factory=dict)
Expand Down Expand Up @@ -195,7 +189,6 @@ def __post_init__(self):
self._memory_to_string()
self._str_mem_no_unit_add_GB()
self._cpu_to_resource()
self._gpu_to_resource()
self._combine_extended_resource_mapping()
self._validate_extended_resource_requests(self.head_extended_resource_requests)
self._validate_extended_resource_requests(
Expand Down Expand Up @@ -227,26 +220,6 @@ def _validate_extended_resource_requests(self, extended_resources: Dict[str, int
f"extended resource '{k}' not found in extended_resource_mapping, available resources are {list(self.extended_resource_mapping.keys())}, to add more supported resources use extended_resource_mapping. i.e. extended_resource_mapping = {{'{k}': 'FOO_BAR'}}"
)

def _gpu_to_resource(self):
if self.head_gpus:
warnings.warn(
f"head_gpus is being deprecated, replacing with head_extended_resource_requests['nvidia.com/gpu'] = {self.head_gpus}"
)
if "nvidia.com/gpu" in self.head_extended_resource_requests:
raise ValueError(
"nvidia.com/gpu already exists in head_extended_resource_requests"
)
self.head_extended_resource_requests["nvidia.com/gpu"] = self.head_gpus
if self.num_gpus:
warnings.warn(
f"num_gpus is being deprecated, replacing with worker_extended_resource_requests['nvidia.com/gpu'] = {self.num_gpus}"
)
if "nvidia.com/gpu" in self.worker_extended_resource_requests:
raise ValueError(
"nvidia.com/gpu already exists in worker_extended_resource_requests"
)
self.worker_extended_resource_requests["nvidia.com/gpu"] = self.num_gpus

def _str_mem_no_unit_add_GB(self):
if isinstance(self.head_memory, str) and self.head_memory.isdecimal():
self.head_memory = f"{self.head_memory}G"
Expand Down
4 changes: 0 additions & 4 deletions src/codeflare_sdk/ray/cluster/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,8 +139,6 @@ def test_config_creation_wrong_type():
def test_cluster_config_deprecation_conversion(mocker):
config = ClusterConfiguration(
name="test",
num_gpus=2,
head_gpus=1,
head_cpus=3,
head_memory=16,
min_memory=3,
Expand All @@ -152,8 +150,6 @@ def test_cluster_config_deprecation_conversion(mocker):
assert config.head_cpu_limits == 3
assert config.head_memory_requests == "16G"
assert config.head_memory_limits == "16G"
assert config.worker_extended_resource_requests == {"nvidia.com/gpu": 2}
assert config.head_extended_resource_requests == {"nvidia.com/gpu": 1}
assert config.worker_memory_requests == "3G"
assert config.worker_memory_limits == "4G"
assert config.worker_cpu_requests == 1
Expand Down