diff --git a/README.md b/README.md index fedd64c46..bd6fb2868 100644 --- a/README.md +++ b/README.md @@ -33,6 +33,7 @@ We use pre-commit to make sure the code is consistently formatted. To make sure - To run the unit tests, run `pytest -v tests/unit_test.py` - Any new test functions/scripts can be added into the `tests` folder - NOTE: Functional tests coming soon, will live in `tests/func_test.py` +- To test CLI, run `codeflare` followed by any command. To see list of commands, simply run `codeflare` #### Code Coverage diff --git a/pyproject.toml b/pyproject.toml index e4224fd99..2c03a102a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,6 +28,7 @@ kubernetes = ">= 25.3.0, < 27" codeflare-torchx = "0.6.0.dev0" cryptography = "40.0.2" executing = "1.2.0" +click = "8.0.4" [tool.poetry.group.docs] optional = true @@ -39,3 +40,10 @@ pdoc3 = "0.10.0" pytest = "7.4.0" coverage = "7.2.7" pytest-mock = "3.11.1" + +[tool.poetry.scripts] +codeflare = "codeflare_sdk.cli.codeflare_cli:cli" + +[build-system] +requires = ["poetry_core>=1.0.0"] +build-backend = "poetry.core.masonry.api" diff --git a/requirements.txt b/requirements.txt index 2a48812aa..c5d04bdc7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,3 +6,4 @@ codeflare-torchx==0.6.0.dev0 pydantic<2 # 2.0+ broke ray[default] see detail: https://github.com/ray-project/ray/pull/37000 cryptography==40.0.2 executing==1.2.0 +click==8.0.4 diff --git a/src/codeflare_sdk.egg-info/SOURCES.txt b/src/codeflare_sdk.egg-info/SOURCES.txt index 73e4e1243..b3c481f89 100644 --- a/src/codeflare_sdk.egg-info/SOURCES.txt +++ b/src/codeflare_sdk.egg-info/SOURCES.txt @@ -16,3 +16,7 @@ src/codeflare_sdk/job/jobs.py src/codeflare_sdk/utils/__init__.py src/codeflare_sdk/utils/generate_yaml.py src/codeflare_sdk/utils/pretty_print.py +src/codeflare_sdk/cli/__init__.py +src/codeflare_sdk/cli/codeflare_cli.py +src/codeflare_sdk/cli/commands/create.py +src/codeflare_sdk/cli/cli_utils.py diff --git a/src/codeflare_sdk/cli/__init__.py b/src/codeflare_sdk/cli/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/codeflare_sdk/cli/cli_utils.py b/src/codeflare_sdk/cli/cli_utils.py new file mode 100644 index 000000000..7152cc397 --- /dev/null +++ b/src/codeflare_sdk/cli/cli_utils.py @@ -0,0 +1,12 @@ +import ast +import click + + +class PythonLiteralOption(click.Option): + def type_cast_value(self, ctx, value): + try: + if not value: + return None + return ast.literal_eval(value) + except: + raise click.BadParameter(value) diff --git a/src/codeflare_sdk/cli/codeflare_cli.py b/src/codeflare_sdk/cli/codeflare_cli.py new file mode 100644 index 000000000..3083a40d0 --- /dev/null +++ b/src/codeflare_sdk/cli/codeflare_cli.py @@ -0,0 +1,36 @@ +import click +import sys +import os + +cmd_folder = os.path.abspath(os.path.join(os.path.dirname(__file__), "commands")) + + +class CodeflareCLI(click.MultiCommand): + def list_commands(self, ctx): + rv = [] + for filename in os.listdir(cmd_folder): + if filename.endswith(".py") and filename != "__init__.py": + rv.append(filename[:-3]) + rv.sort() + return rv + + def get_command(self, ctx, name): + ns = {} + fn = os.path.join(cmd_folder, name + ".py") + try: + with open(fn) as f: + code = compile(f.read(), fn, "exec") + eval(code, ns, ns) + return ns["cli"] + except FileNotFoundError: + return + + +@click.command(cls=CodeflareCLI) +@click.pass_context +def cli(ctx): + pass + + +if __name__ == "__main__": + cli() diff --git a/src/codeflare_sdk/cli/commands/define.py b/src/codeflare_sdk/cli/commands/define.py new file mode 100644 index 000000000..16b6fa480 --- /dev/null +++ b/src/codeflare_sdk/cli/commands/define.py @@ -0,0 +1,36 @@ +import click + +from codeflare_sdk.cluster.cluster import Cluster +from codeflare_sdk.cluster.config import ClusterConfiguration +from codeflare_sdk.cli.cli_utils import PythonLiteralOption + + +@click.group() +def cli(): + """Define a resource with parameter specifications""" + pass + + +@cli.command() +@click.option("--name", type=str, required=True) +@click.option("--namespace", "-n", type=str) +@click.option("--head_info", cls=PythonLiteralOption, type=list) +@click.option("--machine_types", cls=PythonLiteralOption, type=list) +@click.option("--min_cpus", type=int) +@click.option("--max_cpus", type=int) +@click.option("--min_worker", type=int) +@click.option("--max_worker", type=int) +@click.option("--min_memory", type=int) +@click.option("--max_memory", type=int) +@click.option("--gpu", type=int) +@click.option("--template", type=str) +@click.option("--instascale", type=bool) +@click.option("--envs", cls=PythonLiteralOption, type=dict) +@click.option("--image", type=str) +@click.option("--local_interactive", type=bool) +@click.option("--image_pull_secrets", cls=PythonLiteralOption, type=list) +def raycluster(**kwargs): + """Define a RayCluster with parameter specifications""" + filtered_kwargs = {k: v for k, v in kwargs.items() if v is not None} + clusterConfig = ClusterConfiguration(**filtered_kwargs) + Cluster(clusterConfig) # Creates yaml file diff --git a/tests/cli-test-case.yaml b/tests/cli-test-case.yaml new file mode 100644 index 000000000..0788996a7 --- /dev/null +++ b/tests/cli-test-case.yaml @@ -0,0 +1,195 @@ +apiVersion: mcad.ibm.com/v1beta1 +kind: AppWrapper +metadata: + labels: + orderedinstance: cpu.small_gpu.large + name: cli-test-cluster + namespace: ns +spec: + priority: 9 + resources: + GenericItems: + - custompodresources: + - limits: + cpu: 2 + memory: 8G + nvidia.com/gpu: 0 + replicas: 1 + requests: + cpu: 2 + memory: 8G + nvidia.com/gpu: 0 + - limits: + cpu: 4 + memory: 6G + nvidia.com/gpu: 7 + replicas: 2 + requests: + cpu: 3 + memory: 5G + nvidia.com/gpu: 7 + generictemplate: + apiVersion: ray.io/v1alpha1 + kind: RayCluster + metadata: + labels: + appwrapper.mcad.ibm.com: cli-test-cluster + controller-tools.k8s.io: '1.0' + name: cli-test-cluster + namespace: ns + spec: + autoscalerOptions: + idleTimeoutSeconds: 60 + imagePullPolicy: Always + resources: + limits: + cpu: 500m + memory: 512Mi + requests: + cpu: 500m + memory: 512Mi + upscalingMode: Default + enableInTreeAutoscaling: false + headGroupSpec: + rayStartParams: + block: 'true' + dashboard-host: 0.0.0.0 + num-gpus: '0' + serviceType: ClusterIP + template: + spec: + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: cli-test-cluster + operator: In + values: + - cli-test-cluster + containers: + - env: + - name: MY_POD_IP + valueFrom: + fieldRef: + fieldPath: status.podIP + - name: RAY_USE_TLS + value: '0' + - name: RAY_TLS_SERVER_CERT + value: /home/ray/workspace/tls/server.crt + - name: RAY_TLS_SERVER_KEY + value: /home/ray/workspace/tls/server.key + - name: RAY_TLS_CA_CERT + value: /home/ray/workspace/tls/ca.crt + image: quay.io/project-codeflare/ray:2.5.0-py38-cu116 + imagePullPolicy: Always + lifecycle: + preStop: + exec: + command: + - /bin/sh + - -c + - ray stop + name: ray-head + ports: + - containerPort: 6379 + name: gcs + - containerPort: 8265 + name: dashboard + - containerPort: 10001 + name: client + resources: + limits: + cpu: 2 + memory: 8G + nvidia.com/gpu: 0 + requests: + cpu: 2 + memory: 8G + nvidia.com/gpu: 0 + imagePullSecrets: + - name: cli-test-pull-secret + rayVersion: 2.1.0 + workerGroupSpecs: + - groupName: small-group-cli-test-cluster + maxReplicas: 2 + minReplicas: 2 + rayStartParams: + block: 'true' + num-gpus: '7' + replicas: 2 + template: + metadata: + annotations: + key: value + labels: + key: value + spec: + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: cli-test-cluster + operator: In + values: + - cli-test-cluster + containers: + - env: + - name: MY_POD_IP + valueFrom: + fieldRef: + fieldPath: status.podIP + - name: RAY_USE_TLS + value: '0' + - name: RAY_TLS_SERVER_CERT + value: /home/ray/workspace/tls/server.crt + - name: RAY_TLS_SERVER_KEY + value: /home/ray/workspace/tls/server.key + - name: RAY_TLS_CA_CERT + value: /home/ray/workspace/tls/ca.crt + image: quay.io/project-codeflare/ray:2.5.0-py38-cu116 + lifecycle: + preStop: + exec: + command: + - /bin/sh + - -c + - ray stop + name: machine-learning + resources: + limits: + cpu: 4 + memory: 6G + nvidia.com/gpu: 7 + requests: + cpu: 3 + memory: 5G + nvidia.com/gpu: 7 + imagePullSecrets: + - name: cli-test-pull-secret + initContainers: + - command: + - sh + - -c + - until nslookup $RAY_IP.$(cat /var/run/secrets/kubernetes.io/serviceaccount/namespace).svc.cluster.local; + do echo waiting for myservice; sleep 2; done + image: busybox:1.28 + name: init-myservice + replicas: 1 + - generictemplate: + apiVersion: route.openshift.io/v1 + kind: Route + metadata: + labels: + odh-ray-cluster-service: cli-test-cluster-head-svc + name: ray-dashboard-cli-test-cluster + namespace: ns + spec: + port: + targetPort: dashboard + to: + kind: Service + name: cli-test-cluster-head-svc + replica: 1 + Items: [] diff --git a/tests/unit_test.py b/tests/unit_test.py index 21c1adf24..9eddd3cfc 100644 --- a/tests/unit_test.py +++ b/tests/unit_test.py @@ -17,6 +17,7 @@ import filecmp import os import re +from click.testing import CliRunner parent = Path(__file__).resolve().parents[1] sys.path.append(str(parent) + "/src") @@ -63,6 +64,7 @@ generate_tls_cert, export_env, ) +from codeflare_sdk.cli.codeflare_cli import cli import openshift from openshift.selector import Selector @@ -75,6 +77,37 @@ import yaml +# CLI testing +def test_cli_working(): + runner = CliRunner() + result = runner.invoke(cli) + assert result.exit_code == 0 + + +def test_cluster_definition_cli(): + runner = CliRunner() + define_cluster_command = """ + define raycluster + --name=cli-test-cluster + --namespace=ns + --min_worker=1 + --max_worker=2 + --min_cpus=3 + --max_cpus=4 + --min_memory=5 + --max_memory=6 + --gpu=7 + --instascale=True + --machine_types='["cpu.small", "gpu.large"]' + --image_pull_secrets='["cli-test-pull-secret"]' + """ + result = runner.invoke(cli, define_cluster_command) + assert result.output == "Written to: cli-test-cluster.yaml\n" + assert filecmp.cmp( + "cli-test-cluster.yaml", f"{parent}/tests/cli-test-case.yaml", shallow=True + ) + + # For mocking openshift client results fake_res = openshift.Result("fake") @@ -2221,3 +2254,4 @@ def test_cleanup(): os.remove("unit-test-default-cluster.yaml") os.remove("test.yaml") os.remove("raytest2.yaml") + os.remove("cli-test-cluster.yaml")