some small fixes

HDCharles · HDCharles · commit eded873a8f3b · 2024-09-30T21:26:17.000-07:00
Summary:

Test Plan:

Reviewers:

Subscribers:

Tasks:

Tags:
diff --git a/prototype_source/gpu_quantization_torchao_tutorial.py b/prototype_source/gpu_quantization_torchao_tutorial.py
@@ -157,8 +157,8 @@ def get_sam_model(only_one_block=False, batchsize=1):
 # weight data, rather than doing less computation. The torchao APIs:
 #
 # ``int8_dynamic_activation_int8_weight()``,
-# ``int8_dynamic_activation_int8_semi_sparse_weight`` or
-# ``int8_dynamic_activation_int4_weight``
+# ``int8_weight_only()`` or
+# ``int4_weight_only()``
 #
 # can be used to easily apply the desired quantization technique and then
 # once the model is compiled with ``torch.compile`` with ``max-autotune``, quantization is

Original file line number	Diff line number	Diff line change
`@@ -157,8 +157,8 @@ def get_sam_model(only_one_block=False, batchsize=1):`
`157`	`157`	`# weight data, rather than doing less computation. The torchao APIs:`
`158`	`158`	`#`
`159`	`159`	# ``int8_dynamic_activation_int8_weight()``,
`160`		-# ``int8_dynamic_activation_int8_semi_sparse_weight`` or
`161`		-# ``int8_dynamic_activation_int4_weight``
	`160`	+# ``int8_weight_only()`` or
	`161`	+# ``int4_weight_only()``
`162`	`162`	`#`
`163`	`163`	`# can be used to easily apply the desired quantization technique and then`
`164`	`164`	# once the model is compiled with ``torch.compile`` with ``max-autotune``, quantization is