[skip-ci] More updates for the tutorial

vfdev-5 · vfdev-5 · commit a02c120fc73a · 2023-09-04T12:23:44.000+02:00
diff --git a/.jenkins/validate_tutorials_built.py b/.jenkins/validate_tutorials_built.py
@@ -50,7 +50,7 @@
     "recipes_source/recipes/Captum_Recipe",
     "intermediate_source/flask_rest_api_tutorial",
     "intermediate_source/text_to_speech_with_torchaudio",
-    "intermediate_source/tensorboard_profiler_tutorial", # reenable after 2.0 release.
+    "intermediate_source/tensorboard_profiler_tutorial" # reenable after 2.0 release.
 ]
 
 def tutorial_source_dirs() -> List[Path]:
diff --git a/Makefile b/Makefile
@@ -111,7 +111,6 @@ download:
 	unzip -o $(DATADIR)/deu-eng.zip -d beginner_source/data/
 
 	# Download PennFudanPed dataset for intermediate_source/torchvision_tutorial.py
-	# and detection reference scripts: engine.py, utils.py ...
 	wget https://www.cis.upenn.edu/~jshi/ped_html/PennFudanPed.zip -P $(DATADIR)
 	unzip -o $(DATADIR)/PennFudanPed.zip -d intermediate_source/data/
 
@@ -131,3 +130,5 @@ html-noplot:
 clean-cache:
 	make clean
 	rm -rf advanced beginner intermediate recipes
+	# remove additional python files downloaded for torchvision_tutorial.py
+	rm -rf intermediate_source/engine.py intermediate_source/utils.py intermediate_source/transforms.py intermediate_source/coco_eval.py intermediate_source/coco_utils.py
diff --git a/intermediate_source/torchvision_tutorial.py b/intermediate_source/torchvision_tutorial.py
@@ -13,6 +13,12 @@
 # illustrate how to use the new features in torchvision in order to train
 # an object detection and instance segmentation model on a custom dataset.
 #
+#
+# .. note ::
+#
+#     This tutorial works only with torchvision version >=0.16 or nightly.
+#
+#
 # Defining the Dataset
 # --------------------
 #
@@ -106,14 +112,14 @@
 # :class:`torchvision.tv_tensors.BoundingBoxes` and masks into :class:`torchvision.tv_tensors.Mask`.
 # As ``torchvision.TVTensor`` are :class:`torch.Tensor` subclasses, wrapped objects are also tensors and inherit the plain
 # :class:`torch.Tensor` API. For more information about torchvision ``tv_tensors`` see
-# `this documentation <https://pytorch.org/vision/main/auto_examples/v2_transforms/plot_transforms_v2.html#sphx-glr-auto-examples-v2-transforms-plot-transforms-v2-py>`_.
+# `this documentation <https://pytorch.org/vision/main/auto_examples/transforms/plot_transforms_getting_started.html#what-are-tvtensors>`_.
 
 import os
 import torch
 
 from torchvision.io import read_image
 from torchvision.ops.boxes import masks_to_boxes
-from torchvision import tv_tensors as dp
+from torchvision import tv_tensors
 from torchvision.transforms.v2 import functional as F
 
 
@@ -154,11 +160,11 @@ def __getitem__(self, idx):
         iscrowd = torch.zeros((num_objs,), dtype=torch.int64)
 
         # Wrap sample and targets into torchvision tv_tensors:
-        img = dp.Image(img)
+        img = tv_tensors.Image(img)
 
         target = {}
-        target["boxes"] = dp.BoundingBoxes(boxes, format="XYXY", canvas_size=F.get_size(img))
-        target["masks"] = dp.Mask(masks)
+        target["boxes"] = tv_tensors.BoundingBoxes(boxes, format="XYXY", canvas_size=F.get_size(img))
+        target["masks"] = tv_tensors.Mask(masks)
         target["labels"] = labels
         target["image_id"] = image_id
         target["area"] = area
@@ -362,11 +368,13 @@ def get_transform(train):
 images = list(image for image in images)
 targets = [{k: v for k, v in t.items()} for t in targets]
 output = model(images, targets)  # Returns losses and detections
+print(output)
+
 # For inference
 model.eval()
 x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)]
 predictions = model(x)  # Returns predictions
-print(predictions[0]["boxes"].shape, predictions[0]["labels"], predictions[0]["scores"])
+print(predictions[0])
 
 
 ######################################################################
@@ -453,13 +461,35 @@ def get_transform(train):
 #
 # .. image:: ../../_static/img/tv_tutorial/tv_image05.png
 #
-# The trained model predicts 9
-# instances of person in this image, let’s see a couple of them:
-#
-# .. image:: ../../_static/img/tv_tutorial/tv_image06.png
-#
-# .. image:: ../../_static/img/tv_tutorial/tv_image07.png
-#
+import matplotlib.pyplot as plt
+
+from torchvision.utils import draw_bounding_boxes, draw_segmentation_masks
+
+
+image = read_image("../_static/img/tv_tutorial/tv_image05.png")
+eval_transform = get_transform(train=False)
+
+model.eval()
+with torch.no_grad():
+    x = eval_transform(image)
+    # convert RGBA -> RGB and move to device
+    x = x[:3, ...].to(device)
+    predictions = model([x, ])
+    pred = predictions[0]
+
+image = (255.0 * (image - image.min()) / (image.max() - image.min())).to(torch.uint8)
+image = image[:3, ...]
+pred_labels = [f"pedestrian: {score:.3f}" for label, score in zip(pred["labels"], pred["scores"])]
+pred_boxes = pred["boxes"].long()
+output_image = draw_bounding_boxes(image, pred_boxes, pred_labels, colors="red")
+
+masks = (pred["masks"] > 0.7).squeeze(1)
+output_image = draw_segmentation_masks(output_image, masks, alpha=0.5, colors="blue")
+
+plt.figure()
+plt.imshow(output_image.permute(1, 2, 0))
+
+######################################################################
 # The results look good!
 #
 # Wrapping up

Original file line number	Diff line number	Diff line change
`@@ -50,7 +50,7 @@`
`50`	`50`	`"recipes_source/recipes/Captum_Recipe",`
`51`	`51`	`"intermediate_source/flask_rest_api_tutorial",`
`52`	`52`	`"intermediate_source/text_to_speech_with_torchaudio",`
`53`		`- "intermediate_source/tensorboard_profiler_tutorial", # reenable after 2.0 release.`
	`53`	`+ "intermediate_source/tensorboard_profiler_tutorial" # reenable after 2.0 release.`
`54`	`54`	`]`
`55`	`55`
`56`	`56`	`def tutorial_source_dirs() -> List[Path]:`