@@ -78,12 +78,16 @@ def training(
78
78
writer : SummaryWriter ,
79
79
print_step ,
80
80
):
81
+ summary ["epoch" ] += 1
82
+
81
83
model .train ()
82
84
83
85
n_steps = len (dataloader )
84
86
iter_data = iter (dataloader )
85
87
86
88
for step in range (n_steps ):
89
+ summary ["step" ] += 1
90
+
87
91
batch = next (iter_data )
88
92
x = batch ["image" ].to (device )
89
93
y = batch ["label" ].to (device )
@@ -120,9 +124,6 @@ def training(
120
124
f"train_loss: { loss_data :.5f} , train_acc: { acc_data :.3f} "
121
125
)
122
126
123
- summary ["step" ] += 1
124
-
125
- summary ["epoch" ] += 1
126
127
return summary
127
128
128
129
@@ -213,8 +214,8 @@ def main(cfg):
213
214
preprocess_gpu_train = Compose (
214
215
[
215
216
ToCupy (),
216
- RandCuCIM (name = "color_jitter" , brightness = 64.0 / 255.0 , contrast = 0.75 , saturation = 0.25 , hue = 0.04 ),
217
- RandCuCIM (name = "image_flip " , apply_prob = cfg ["prob" ], spatial_axis = - 1 ),
217
+ RandCuCIM (name = "rand_color_jitter" , prob = cfg [ "prob" ] , brightness = 64.0 / 255.0 , contrast = 0.75 , saturation = 0.25 , hue = 0.04 ),
218
+ RandCuCIM (name = "rand_image_flip " , prob = cfg ["prob" ], spatial_axis = - 1 ),
218
219
RandCuCIM (name = "rand_image_rotate_90" , prob = cfg ["prob" ], max_k = 3 , spatial_axis = (- 2 , - 1 )),
219
220
CastToType (dtype = np .float32 ),
220
221
RandCuCIM (name = "rand_zoom" , prob = cfg ["prob" ], min_zoom = 0.9 , max_zoom = 1.1 ),
@@ -347,7 +348,7 @@ def main(cfg):
347
348
# -------------------------------------------------------------------------
348
349
# Training/Evaluating
349
350
# -------------------------------------------------------------------------
350
- train_counter = {"n_epochs" : cfg ["n_epochs" ], "epoch" : 1 , "step" : 1 }
351
+ train_counter = {"n_epochs" : cfg ["n_epochs" ], "epoch" : 0 , "step" : 0 }
351
352
352
353
total_valid_time , total_train_time = 0.0 , 0.0
353
354
t_start = time .perf_counter ()
@@ -403,7 +404,7 @@ def main(cfg):
403
404
writer .add_scalar ("valid/accuracy" , valid_acc , train_counter ["epoch" ])
404
405
405
406
logging .info (
406
- f"[Epoch: { train_counter ['epoch' ]} /{ cfg ['n_epochs' ]} ] loss: { valid_loss :.3f} , accuracy: { valid_acc :.2f } , "
407
+ f"[Epoch: { train_counter ['epoch' ]} /{ cfg ['n_epochs' ]} ] loss: { valid_loss :.3f} , accuracy: { valid_acc :.3f } , "
407
408
f"time: { t_valid - t_epoch :.1f} s (train: { train_time :.1f} s, valid: { valid_time :.1f} s)"
408
409
)
409
410
else :
@@ -421,12 +422,12 @@ def main(cfg):
421
422
# Save the best and final model
422
423
if cfg ["validate" ] is True :
423
424
copyfile (
424
- os .path .join (log_dir , f"model_epoch_{ metric_summary ['best_epoch' ]} .pth " ),
425
- os .path .join (log_dir , "model_best.pth " ),
425
+ os .path .join (log_dir , f"model_epoch_{ metric_summary ['best_epoch' ]} .pt " ),
426
+ os .path .join (log_dir , "model_best.pt " ),
426
427
)
427
428
copyfile (
428
- os .path .join (log_dir , f"model_epoch_{ cfg ['n_epochs' ]} .pth " ),
429
- os .path .join (log_dir , "model_final.pth " ),
429
+ os .path .join (log_dir , f"model_epoch_{ cfg ['n_epochs' ]} .pt " ),
430
+ os .path .join (log_dir , "model_final.pt " ),
430
431
)
431
432
432
433
# Final prints
@@ -478,7 +479,7 @@ def parse_arguments():
478
479
parser .add_argument ("--optimized" , action = "store_true" , help = "use optimized parameters" )
479
480
parser .add_argument ("-b" , "--backend" , type = str , dest = "backend" , help = "backend for transforms" )
480
481
481
- parser .add_argument ("--cpu" , type = int , default = 10 , dest = "num_workers" , help = "number of workers" )
482
+ parser .add_argument ("--cpu" , type = int , default = 8 , dest = "num_workers" , help = "number of workers" )
482
483
parser .add_argument ("--gpu" , type = str , default = "0" , dest = "gpu" , help = "which gpu to use" )
483
484
484
485
args = parser .parse_args ()
0 commit comments