From c928e3601739aa9d5dacebd013d3846c57384dcd Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+drbeh@users.noreply.github.com> Date: Tue, 8 Mar 2022 19:52:00 +0000 Subject: [PATCH 1/4] Use rand_image_flip Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> --- .../torch/camelyon_train_evaluate_pytorch_gpu.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pathology/tumor_detection/torch/camelyon_train_evaluate_pytorch_gpu.py b/pathology/tumor_detection/torch/camelyon_train_evaluate_pytorch_gpu.py index d442a408d5..f8ef39b26b 100644 --- a/pathology/tumor_detection/torch/camelyon_train_evaluate_pytorch_gpu.py +++ b/pathology/tumor_detection/torch/camelyon_train_evaluate_pytorch_gpu.py @@ -214,7 +214,7 @@ def main(cfg): [ ToCupy(), RandCuCIM(name="color_jitter", brightness=64.0 / 255.0, contrast=0.75, saturation=0.25, hue=0.04), - RandCuCIM(name="image_flip", apply_prob=cfg["prob"], spatial_axis=-1), + RandCuCIM(name="rand_image_flip", prob=cfg["prob"], spatial_axis=-1), RandCuCIM(name="rand_image_rotate_90", prob=cfg["prob"], max_k=3, spatial_axis=(-2, -1)), CastToType(dtype=np.float32), RandCuCIM(name="rand_zoom", prob=cfg["prob"], min_zoom=0.9, max_zoom=1.1), From 682dabfdecba14799bd8b0a96586fd350169fecc Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+drbeh@users.noreply.github.com> Date: Thu, 10 Mar 2022 17:08:33 +0000 Subject: [PATCH 2/4] Fix the epoch/iteration counting Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> --- .../torch/camelyon_train_evaluate_pytorch_gpu.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/pathology/tumor_detection/torch/camelyon_train_evaluate_pytorch_gpu.py b/pathology/tumor_detection/torch/camelyon_train_evaluate_pytorch_gpu.py index f8ef39b26b..58713e6aac 100644 --- a/pathology/tumor_detection/torch/camelyon_train_evaluate_pytorch_gpu.py +++ b/pathology/tumor_detection/torch/camelyon_train_evaluate_pytorch_gpu.py @@ -78,12 +78,16 @@ def training( writer: SummaryWriter, print_step, ): + summary["epoch"] += 1 + model.train() n_steps = len(dataloader) iter_data = iter(dataloader) for step in range(n_steps): + summary["step"] += 1 + batch = next(iter_data) x = batch["image"].to(device) y = batch["label"].to(device) @@ -120,9 +124,6 @@ def training( f"train_loss: {loss_data:.5f}, train_acc: {acc_data:.3f}" ) - summary["step"] += 1 - - summary["epoch"] += 1 return summary @@ -347,7 +348,7 @@ def main(cfg): # ------------------------------------------------------------------------- # Training/Evaluating # ------------------------------------------------------------------------- - train_counter = {"n_epochs": cfg["n_epochs"], "epoch": 1, "step": 1} + train_counter = {"n_epochs": cfg["n_epochs"], "epoch": 0, "step": 0} total_valid_time, total_train_time = 0.0, 0.0 t_start = time.perf_counter() From f140f147c909468d11f91e44e96fdb1e110b3b7d Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+drbeh@users.noreply.github.com> Date: Thu, 10 Mar 2022 17:09:33 +0000 Subject: [PATCH 3/4] Change to .pt for all checkpoints Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> --- .../torch/camelyon_train_evaluate_pytorch_gpu.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pathology/tumor_detection/torch/camelyon_train_evaluate_pytorch_gpu.py b/pathology/tumor_detection/torch/camelyon_train_evaluate_pytorch_gpu.py index 58713e6aac..31abdf0326 100644 --- a/pathology/tumor_detection/torch/camelyon_train_evaluate_pytorch_gpu.py +++ b/pathology/tumor_detection/torch/camelyon_train_evaluate_pytorch_gpu.py @@ -422,12 +422,12 @@ def main(cfg): # Save the best and final model if cfg["validate"] is True: copyfile( - os.path.join(log_dir, f"model_epoch_{metric_summary['best_epoch']}.pth"), - os.path.join(log_dir, "model_best.pth"), + os.path.join(log_dir, f"model_epoch_{metric_summary['best_epoch']}.pt"), + os.path.join(log_dir, "model_best.pt"), ) copyfile( - os.path.join(log_dir, f"model_epoch_{cfg['n_epochs']}.pth"), - os.path.join(log_dir, "model_final.pth"), + os.path.join(log_dir, f"model_epoch_{cfg['n_epochs']}.pt"), + os.path.join(log_dir, "model_final.pt"), ) # Final prints From 6132f686c6d4ceddbd492620d1b57ed4fc02aacc Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+drbeh@users.noreply.github.com> Date: Mon, 21 Mar 2022 20:13:28 +0000 Subject: [PATCH 4/4] Use a new per-image colorjitter and fix minor issues Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> --- .../torch/camelyon_train_evaluate_pytorch_gpu.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pathology/tumor_detection/torch/camelyon_train_evaluate_pytorch_gpu.py b/pathology/tumor_detection/torch/camelyon_train_evaluate_pytorch_gpu.py index 31abdf0326..c6bb4c110e 100644 --- a/pathology/tumor_detection/torch/camelyon_train_evaluate_pytorch_gpu.py +++ b/pathology/tumor_detection/torch/camelyon_train_evaluate_pytorch_gpu.py @@ -214,7 +214,7 @@ def main(cfg): preprocess_gpu_train = Compose( [ ToCupy(), - RandCuCIM(name="color_jitter", brightness=64.0 / 255.0, contrast=0.75, saturation=0.25, hue=0.04), + RandCuCIM(name="rand_color_jitter", prob=cfg["prob"], brightness=64.0 / 255.0, contrast=0.75, saturation=0.25, hue=0.04), RandCuCIM(name="rand_image_flip", prob=cfg["prob"], spatial_axis=-1), RandCuCIM(name="rand_image_rotate_90", prob=cfg["prob"], max_k=3, spatial_axis=(-2, -1)), CastToType(dtype=np.float32), @@ -404,7 +404,7 @@ def main(cfg): writer.add_scalar("valid/accuracy", valid_acc, train_counter["epoch"]) logging.info( - f"[Epoch: {train_counter['epoch']}/{cfg['n_epochs']}] loss: {valid_loss:.3f}, accuracy: {valid_acc:.2f}, " + f"[Epoch: {train_counter['epoch']}/{cfg['n_epochs']}] loss: {valid_loss:.3f}, accuracy: {valid_acc:.3f}, " f"time: {t_valid - t_epoch:.1f}s (train: {train_time:.1f}s, valid: {valid_time:.1f}s)" ) else: @@ -479,7 +479,7 @@ def parse_arguments(): parser.add_argument("--optimized", action="store_true", help="use optimized parameters") parser.add_argument("-b", "--backend", type=str, dest="backend", help="backend for transforms") - parser.add_argument("--cpu", type=int, default=10, dest="num_workers", help="number of workers") + parser.add_argument("--cpu", type=int, default=8, dest="num_workers", help="number of workers") parser.add_argument("--gpu", type=str, default="0", dest="gpu", help="which gpu to use") args = parser.parse_args()