Amazon SageMaker Service Update: Amazon SageMaker now supports Asynchronous Inference endpoints. Adds PlatformIdentifier field that allows Notebook Instance creation with different platform selections. Increases the maximum number of containers in multi-container endpoints to 15. Adds more instance types to InstanceType field.

AWS · AWS · commit 3f42a94d5b0b · 2021-08-18T18:12:51.000Z
diff --git a/.changes/next-release/feature-AmazonSageMakerService-51253f7.json b/.changes/next-release/feature-AmazonSageMakerService-51253f7.json
@@ -0,0 +1,6 @@
+{
+    "type": "feature",
+    "category": "Amazon SageMaker Service",
+    "contributor": "",
+    "description": "Amazon SageMaker now supports Asynchronous Inference endpoints. Adds PlatformIdentifier field that allows Notebook Instance creation with different platform selections. Increases the maximum number of containers in multi-container endpoints to 15. Adds more instance types to InstanceType field."
+}
diff --git a/services/sagemaker/src/main/resources/codegen-resources/service-2.json b/services/sagemaker/src/main/resources/codegen-resources/service-2.json
@@ -3602,6 +3602,64 @@
       },
       "documentation":"<p>Lists a summary of the properties of an association. An association is an entity that links other lineage or experiment entities. An example would be an association between a training job and a model.</p>"
     },
+    "AsyncInferenceClientConfig":{
+      "type":"structure",
+      "members":{
+        "MaxConcurrentInvocationsPerInstance":{
+          "shape":"MaxConcurrentInvocationsPerInstance",
+          "documentation":"<p>The maximum number of concurrent requests sent by the SageMaker client to the model container. If no value is provided, Amazon SageMaker will choose an optimal value for you.</p>"
+        }
+      },
+      "documentation":"<p>Configures the behavior of the client used by Amazon SageMaker to interact with the model container during asynchronous inference.</p>"
+    },
+    "AsyncInferenceConfig":{
+      "type":"structure",
+      "required":["OutputConfig"],
+      "members":{
+        "ClientConfig":{
+          "shape":"AsyncInferenceClientConfig",
+          "documentation":"<p>Configures the behavior of the client used by Amazon SageMaker to interact with the model container during asynchronous inference.</p>"
+        },
+        "OutputConfig":{
+          "shape":"AsyncInferenceOutputConfig",
+          "documentation":"<p>Specifies the configuration for asynchronous inference invocation outputs.</p>"
+        }
+      },
+      "documentation":"<p>Specifies configuration for how an endpoint performs asynchronous inference.</p>"
+    },
+    "AsyncInferenceNotificationConfig":{
+      "type":"structure",
+      "members":{
+        "SuccessTopic":{
+          "shape":"SnsTopicArn",
+          "documentation":"<p>Amazon SNS topic to post a notification to when inference completes successfully. If no topic is provided, no notification is sent on success.</p>"
+        },
+        "ErrorTopic":{
+          "shape":"SnsTopicArn",
+          "documentation":"<p>Amazon SNS topic to post a notification to when inference fails. If no topic is provided, no notification is sent on failure.</p>"
+        }
+      },
+      "documentation":"<p>Specifies the configuration for notifications of inference results for asynchronous inference.</p>"
+    },
+    "AsyncInferenceOutputConfig":{
+      "type":"structure",
+      "required":["S3OutputPath"],
+      "members":{
+        "KmsKeyId":{
+          "shape":"KmsKeyId",
+          "documentation":"<p>The Amazon Web Services Key Management Service (Amazon Web Services KMS) key that Amazon SageMaker uses to encrypt the asynchronous inference output in Amazon S3.</p> <p/>"
+        },
+        "S3OutputPath":{
+          "shape":"DestinationS3Uri",
+          "documentation":"<p>The Amazon S3 location to upload inference responses to.</p>"
+        },
+        "NotificationConfig":{
+          "shape":"AsyncInferenceNotificationConfig",
+          "documentation":"<p>Specifies the configuration for notifications of inference results for asynchronous inference.</p>"
+        }
+      },
+      "documentation":"<p>Specifies the configuration for asynchronous inference invocation outputs.</p>"
+    },
     "AthenaCatalog":{
       "type":"string",
       "documentation":"<p>The name of the data catalog used in Athena query execution.</p>",
@@ -4891,7 +4949,7 @@
     "ContainerDefinitionList":{
       "type":"list",
       "member":{"shape":"ContainerDefinition"},
-      "max":5
+      "max":15
     },
     "ContainerEntrypoint":{
       "type":"list",
@@ -5664,6 +5722,10 @@
         "KmsKeyId":{
           "shape":"KmsKeyId",
           "documentation":"<p>The Amazon Resource Name (ARN) of a Amazon Web Services Key Management Service key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance that hosts the endpoint.</p> <p>The KmsKeyId can be any of the following formats: </p> <ul> <li> <p>Key ID: <code>1234abcd-12ab-34cd-56ef-1234567890ab</code> </p> </li> <li> <p>Key ARN: <code>arn:aws:kms:us-west-2:111122223333:key/1234abcd-12ab-34cd-56ef-1234567890ab</code> </p> </li> <li> <p>Alias name: <code>alias/ExampleAlias</code> </p> </li> <li> <p>Alias name ARN: <code>arn:aws:kms:us-west-2:111122223333:alias/ExampleAlias</code> </p> </li> </ul> <p>The KMS key policy must grant permission to the IAM role that you specify in your <code>CreateEndpoint</code>, <code>UpdateEndpoint</code> requests. For more information, refer to the Amazon Web Services Key Management Service section<a href=\"https://docs.aws.amazon.com/kms/latest/developerguide/key-policies.html\"> Using Key Policies in Amazon Web Services KMS </a> </p> <note> <p>Certain Nitro-based instances include local storage, dependent on the instance type. Local storage volumes are encrypted using a hardware module on the instance. You can't request a <code>KmsKeyId</code> when using an instance type with local storage. If any of the models that you specify in the <code>ProductionVariants</code> parameter use nitro-based instances with local storage, do not specify a value for the <code>KmsKeyId</code> parameter. If you specify a value for <code>KmsKeyId</code> when using any nitro-based instances with local storage, the call to <code>CreateEndpointConfig</code> fails.</p> <p>For a list of instance types that support local instance storage, see <a href=\"https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/InstanceStorage.html#instance-store-volumes\">Instance Store Volumes</a>.</p> <p>For more information about local instance storage encryption, see <a href=\"https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ssd-instance-store.html\">SSD Instance Store Volumes</a>.</p> </note>"
+        },
+        "AsyncInferenceConfig":{
+          "shape":"AsyncInferenceConfig",
+          "documentation":"<p>Specifies configuration for how an endpoint performs asynchronous inference. This is a required field in order for your Endpoint to be invoked using <a href=\"https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_runtime_InvokeEndpoint.html\"> <code>InvokeEndpointAsync</code> </a>.</p>"
         }
       }
     },
@@ -6445,6 +6507,10 @@
         "RootAccess":{
           "shape":"RootAccess",
           "documentation":"<p>Whether root access is enabled or disabled for users of the notebook instance. The default value is <code>Enabled</code>.</p> <note> <p>Lifecycle configurations need root access to be able to set up a notebook instance. Because of this, lifecycle configurations associated with a notebook instance always run with root access even if you disable root access for users.</p> </note>"
+        },
+        "PlatformIdentifier":{
+          "shape":"PlatformIdentifier",
+          "documentation":"<p>The platform identifier of the notebook instance runtime environment.</p>"
         }
       }
     },
@@ -8995,6 +9061,10 @@
         "CreationTime":{
           "shape":"Timestamp",
           "documentation":"<p>A timestamp that shows when the endpoint configuration was created.</p>"
+        },
+        "AsyncInferenceConfig":{
+          "shape":"AsyncInferenceConfig",
+          "documentation":"<p>Returns the description of an endpoint configuration created using the <a href=\"https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_CreateEndpointConfig.html\"> <code>CreateEndpointConfig</code> </a> API.</p>"
         }
       }
     },
@@ -9055,6 +9125,10 @@
         "LastDeploymentConfig":{
           "shape":"DeploymentConfig",
           "documentation":"<p>The most recent deployment configuration for the endpoint.</p>"
+        },
+        "AsyncInferenceConfig":{
+          "shape":"AsyncInferenceConfig",
+          "documentation":"<p>Returns the description of an endpoint configuration created using the <a href=\"https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_CreateEndpointConfig.html\"> <code>CreateEndpointConfig</code> </a> API.</p>"
         }
       }
     },
@@ -10148,6 +10222,10 @@
         "RootAccess":{
           "shape":"RootAccess",
           "documentation":"<p>Whether root access is enabled or disabled for users of the notebook instance.</p> <note> <p>Lifecycle configurations need root access to be able to set up a notebook instance. Because of this, lifecycle configurations associated with a notebook instance always run with root access even if you disable root access for users.</p> </note>"
+        },
+        "PlatformIdentifier":{
+          "shape":"PlatformIdentifier",
+          "documentation":"<p>The platform identifier of the notebook instance runtime environment.</p>"
         }
       }
     },
@@ -13468,6 +13546,10 @@
       },
       "documentation":"<p>Defines how to perform inference generation after a training job is run.</p>"
     },
+    "InitialTaskCount":{
+      "type":"integer",
+      "min":1
+    },
     "InputConfig":{
       "type":"structure",
       "required":[
@@ -13534,6 +13616,14 @@
         "ml.m5.4xlarge",
         "ml.m5.12xlarge",
         "ml.m5.24xlarge",
+        "ml.m5d.large",
+        "ml.m5d.xlarge",
+        "ml.m5d.2xlarge",
+        "ml.m5d.4xlarge",
+        "ml.m5d.8xlarge",
+        "ml.m5d.12xlarge",
+        "ml.m5d.16xlarge",
+        "ml.m5d.24xlarge",
         "ml.c4.xlarge",
         "ml.c4.2xlarge",
         "ml.c4.4xlarge",
@@ -13553,7 +13643,22 @@
         "ml.p2.16xlarge",
         "ml.p3.2xlarge",
         "ml.p3.8xlarge",
-        "ml.p3.16xlarge"
+        "ml.p3.16xlarge",
+        "ml.p3dn.24xlarge",
+        "ml.g4dn.xlarge",
+        "ml.g4dn.2xlarge",
+        "ml.g4dn.4xlarge",
+        "ml.g4dn.8xlarge",
+        "ml.g4dn.12xlarge",
+        "ml.g4dn.16xlarge",
+        "ml.r5.large",
+        "ml.r5.xlarge",
+        "ml.r5.2xlarge",
+        "ml.r5.4xlarge",
+        "ml.r5.8xlarge",
+        "ml.r5.12xlarge",
+        "ml.r5.16xlarge",
+        "ml.r5.24xlarge"
       ]
     },
     "Integer":{"type":"integer"},
@@ -16830,6 +16935,11 @@
       "type":"integer",
       "min":1
     },
+    "MaxConcurrentInvocationsPerInstance":{
+      "type":"integer",
+      "max":1000,
+      "min":1
+    },
     "MaxConcurrentTaskCount":{
       "type":"integer",
       "max":1000,
@@ -17368,7 +17478,7 @@
     "ModelPackageContainerDefinitionList":{
       "type":"list",
       "member":{"shape":"ModelPackageContainerDefinition"},
-      "max":5,
+      "max":15,
       "min":1
     },
     "ModelPackageGroup":{
@@ -18611,10 +18721,10 @@
       "members":{
         "NotificationTopicArn":{
           "shape":"NotificationTopicArn",
-          "documentation":"<p>The ARN for the SNS topic to which notifications should be published.</p>"
+          "documentation":"<p>The ARN for the Amazon SNS topic to which notifications should be published.</p>"
         }
       },
-      "documentation":"<p>Configures SNS notifications of available or expiring work items for work teams.</p>"
+      "documentation":"<p>Configures Amazon SNS notifications of available or expiring work items for work teams.</p>"
     },
     "NotificationTopicArn":{
       "type":"string",
@@ -19372,6 +19482,11 @@
       "max":100,
       "min":0
     },
+    "PlatformIdentifier":{
+      "type":"string",
+      "max":15,
+      "pattern":"^(notebook-al1-v1|notebook-al2-v1)$"
+    },
     "PolicyString":{
       "type":"string",
       "max":20480,
@@ -19880,7 +19995,7 @@
           "documentation":"<p>The name of the model that you want to host. This is the name that you specified when creating the model.</p>"
         },
         "InitialInstanceCount":{
-          "shape":"TaskCount",
+          "shape":"InitialTaskCount",
           "documentation":"<p>Number of instances to launch initially.</p>"
         },
         "InstanceType":{
@@ -21792,7 +21907,7 @@
     },
     "TaskCount":{
       "type":"integer",
-      "min":1
+      "min":0
     },
     "TaskDescription":{
       "type":"string",