Skip to content

Commit a02c120

Browse files
committed
[skip-ci] More updates for the tutorial
1 parent 1976daa commit a02c120

File tree

3 files changed

+46
-15
lines changed

3 files changed

+46
-15
lines changed

.jenkins/validate_tutorials_built.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@
5050
"recipes_source/recipes/Captum_Recipe",
5151
"intermediate_source/flask_rest_api_tutorial",
5252
"intermediate_source/text_to_speech_with_torchaudio",
53-
"intermediate_source/tensorboard_profiler_tutorial", # reenable after 2.0 release.
53+
"intermediate_source/tensorboard_profiler_tutorial" # reenable after 2.0 release.
5454
]
5555

5656
def tutorial_source_dirs() -> List[Path]:

Makefile

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,6 @@ download:
111111
unzip -o $(DATADIR)/deu-eng.zip -d beginner_source/data/
112112

113113
# Download PennFudanPed dataset for intermediate_source/torchvision_tutorial.py
114-
# and detection reference scripts: engine.py, utils.py ...
115114
wget https://www.cis.upenn.edu/~jshi/ped_html/PennFudanPed.zip -P $(DATADIR)
116115
unzip -o $(DATADIR)/PennFudanPed.zip -d intermediate_source/data/
117116

@@ -131,3 +130,5 @@ html-noplot:
131130
clean-cache:
132131
make clean
133132
rm -rf advanced beginner intermediate recipes
133+
# remove additional python files downloaded for torchvision_tutorial.py
134+
rm -rf intermediate_source/engine.py intermediate_source/utils.py intermediate_source/transforms.py intermediate_source/coco_eval.py intermediate_source/coco_utils.py

intermediate_source/torchvision_tutorial.py

Lines changed: 43 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,12 @@
1313
# illustrate how to use the new features in torchvision in order to train
1414
# an object detection and instance segmentation model on a custom dataset.
1515
#
16+
#
17+
# .. note ::
18+
#
19+
# This tutorial works only with torchvision version >=0.16 or nightly.
20+
#
21+
#
1622
# Defining the Dataset
1723
# --------------------
1824
#
@@ -106,14 +112,14 @@
106112
# :class:`torchvision.tv_tensors.BoundingBoxes` and masks into :class:`torchvision.tv_tensors.Mask`.
107113
# As ``torchvision.TVTensor`` are :class:`torch.Tensor` subclasses, wrapped objects are also tensors and inherit the plain
108114
# :class:`torch.Tensor` API. For more information about torchvision ``tv_tensors`` see
109-
# `this documentation <https://pytorch.org/vision/main/auto_examples/v2_transforms/plot_transforms_v2.html#sphx-glr-auto-examples-v2-transforms-plot-transforms-v2-py>`_.
115+
# `this documentation <https://pytorch.org/vision/main/auto_examples/transforms/plot_transforms_getting_started.html#what-are-tvtensors>`_.
110116

111117
import os
112118
import torch
113119

114120
from torchvision.io import read_image
115121
from torchvision.ops.boxes import masks_to_boxes
116-
from torchvision import tv_tensors as dp
122+
from torchvision import tv_tensors
117123
from torchvision.transforms.v2 import functional as F
118124

119125

@@ -154,11 +160,11 @@ def __getitem__(self, idx):
154160
iscrowd = torch.zeros((num_objs,), dtype=torch.int64)
155161

156162
# Wrap sample and targets into torchvision tv_tensors:
157-
img = dp.Image(img)
163+
img = tv_tensors.Image(img)
158164

159165
target = {}
160-
target["boxes"] = dp.BoundingBoxes(boxes, format="XYXY", canvas_size=F.get_size(img))
161-
target["masks"] = dp.Mask(masks)
166+
target["boxes"] = tv_tensors.BoundingBoxes(boxes, format="XYXY", canvas_size=F.get_size(img))
167+
target["masks"] = tv_tensors.Mask(masks)
162168
target["labels"] = labels
163169
target["image_id"] = image_id
164170
target["area"] = area
@@ -362,11 +368,13 @@ def get_transform(train):
362368
images = list(image for image in images)
363369
targets = [{k: v for k, v in t.items()} for t in targets]
364370
output = model(images, targets) # Returns losses and detections
371+
print(output)
372+
365373
# For inference
366374
model.eval()
367375
x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)]
368376
predictions = model(x) # Returns predictions
369-
print(predictions[0]["boxes"].shape, predictions[0]["labels"], predictions[0]["scores"])
377+
print(predictions[0])
370378

371379

372380
######################################################################
@@ -453,13 +461,35 @@ def get_transform(train):
453461
#
454462
# .. image:: ../../_static/img/tv_tutorial/tv_image05.png
455463
#
456-
# The trained model predicts 9
457-
# instances of person in this image, let’s see a couple of them:
458-
#
459-
# .. image:: ../../_static/img/tv_tutorial/tv_image06.png
460-
#
461-
# .. image:: ../../_static/img/tv_tutorial/tv_image07.png
462-
#
464+
import matplotlib.pyplot as plt
465+
466+
from torchvision.utils import draw_bounding_boxes, draw_segmentation_masks
467+
468+
469+
image = read_image("../_static/img/tv_tutorial/tv_image05.png")
470+
eval_transform = get_transform(train=False)
471+
472+
model.eval()
473+
with torch.no_grad():
474+
x = eval_transform(image)
475+
# convert RGBA -> RGB and move to device
476+
x = x[:3, ...].to(device)
477+
predictions = model([x, ])
478+
pred = predictions[0]
479+
480+
image = (255.0 * (image - image.min()) / (image.max() - image.min())).to(torch.uint8)
481+
image = image[:3, ...]
482+
pred_labels = [f"pedestrian: {score:.3f}" for label, score in zip(pred["labels"], pred["scores"])]
483+
pred_boxes = pred["boxes"].long()
484+
output_image = draw_bounding_boxes(image, pred_boxes, pred_labels, colors="red")
485+
486+
masks = (pred["masks"] > 0.7).squeeze(1)
487+
output_image = draw_segmentation_masks(output_image, masks, alpha=0.5, colors="blue")
488+
489+
plt.figure()
490+
plt.imshow(output_image.permute(1, 2, 0))
491+
492+
######################################################################
463493
# The results look good!
464494
#
465495
# Wrapping up

0 commit comments

Comments
 (0)