Skip to content

added GPU environment support #11

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions sagemaker_studio_image_build/builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,12 +64,16 @@ def delete_zip_file(bucket, key):
s3.delete_object(Bucket=bucket, Key=key)


def build_image(repository, role, bucket, compute_type, extra_args, log=True):
def build_image(repository, role, bucket, compute_type, environment, extra_args, log=True):
bucket, key = upload_zip_file(repository, bucket, " ".join(extra_args))
try:
from sagemaker_studio_image_build.codebuild import TempCodeBuildProject

with TempCodeBuildProject(f"{bucket}/{key}", role, repository=repository, compute_type=compute_type) as p:
with TempCodeBuildProject(f"{bucket}/{key}",
role,
repository=repository,
compute_type=compute_type,
environment=environment) as p:
p.build(log)
finally:
delete_zip_file(bucket, key)
12 changes: 9 additions & 3 deletions sagemaker_studio_image_build/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def build_image(args, extra_args):
validate_args(args, extra_args)

builder.build_image(
args.repository, get_role(args), args.bucket, args.compute_type, extra_args, log=not args.no_logs
args.repository, get_role(args), args.bucket, args.compute_type, args.environment, extra_args, log=not args.no_logs
)


Expand All @@ -72,11 +72,17 @@ def main():
)
build_parser.add_argument(
"--compute-type",
help="The CodeBuild compute type (default: BUILD_GENERAL1_SMALL)",
help="The CodeBuild compute type (default: BUILD_GENERAL1_SMALL for CPU, BUILD_GENERAL1_LARGE for GPU)",
choices=["BUILD_GENERAL1_SMALL", "BUILD_GENERAL1_MEDIUM",
"BUILD_GENERAL1_LARGE", "BUILD_GENERAL1_2XLARGE"]
"BUILD_GENERAL1_LARGE", "BUILD_GENERAL1_2XLARGE"],
default="BUILD_GENERAL1_SMALL"
)
build_parser.add_argument(
"--environment",
help="The CodeBuild environment (default: LINUX_CONTAINER)",
choices=["LINUX_CONTAINER", "LINUX_GPU_CONTAINER"],
default="LINUX_CONTAINER"
)
build_parser.add_argument(
"--role",
help=f"The IAM role name for CodeBuild to use (default: the Studio execution role).",
Expand Down
7 changes: 5 additions & 2 deletions sagemaker_studio_image_build/codebuild.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,17 @@


class TempCodeBuildProject:
def __init__(self, s3_location, role, repository=None, compute_type=None):
def __init__(self, s3_location, role, repository=None, compute_type=None, environment=None):
self.s3_location = s3_location
self.role = role

self.session = boto3.session.Session()
self.domain_id, self.user_profile_name = self._get_studio_metadata()
self.repo_name = None
self.compute_type = compute_type or "BUILD_GENERAL1_SMALL"
self.environment = environment or "LINUX_CONTAINER"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This default is being specified in multiple places - in the argument parsing and here. From what I can tell, the environment param should always be non-null when we get to this point, so we can remove this or check.

if self.environment=="LINUX_GPU_CONTAINER" and self.compute_type=="BUILD_GENERAL1_SMALL":
self.compute_type="BUILD_GENERAL1_LARGE"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Question: Why is this required?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

BUILD_GENERAL1_LARGE will automatically switch to a GPU instance if LINUX_GPU_CONTAINER is set. Small or medium will not.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks. In that case, I recommend we make this explicit - Fail if the user is requested GPU and Small or Medium, because these are incompatible configuration options


if repository:
self.repo_name, self.tag = repository.split(":", maxsplit=1)
Expand Down Expand Up @@ -61,7 +64,7 @@ def __enter__(self):
"source": {"type": "S3", "location": self.s3_location},
"artifacts": {"type": "NO_ARTIFACTS"},
"environment": {
"type": "LINUX_CONTAINER",
"type": self.environment,
"image": "aws/codebuild/standard:4.0",
"computeType": self.compute_type,
"environmentVariables": [
Expand Down