@@ -68,22 +68,20 @@ def build_vocab(filepath, tokenizer):
68
68
de_vocab = build_vocab (train_filepaths [0 ], de_tokenizer )
69
69
en_vocab = build_vocab (train_filepaths [1 ], en_tokenizer )
70
70
71
- def data_process (raw_de_iter , raw_en_iter ):
72
- data_ = []
71
+ def data_process (filepaths ):
72
+ raw_de_iter , raw_en_iter = iter (io .open (filepaths [0 ])), iter (io .open (filepaths [1 ]))
73
+ data = []
73
74
for (raw_de , raw_en ) in zip (raw_de_iter , raw_en_iter ):
74
75
de_tensor_ = torch .tensor ([de_vocab [token ] for token in de_tokenizer (raw_de )],
75
76
dtype = torch .long )
76
77
en_tensor_ = torch .tensor ([en_vocab [token ] for token in en_tokenizer (raw_en )],
77
78
dtype = torch .long )
78
- data_ .append ((de_tensor_ , en_tensor_ ))
79
- return data_
80
-
81
- train_data = data_process (iter (io .open (train_filepaths [0 ])),
82
- iter (io .open (train_filepaths [1 ])))
83
- val_data = data_process (iter (io .open (val_filepaths [0 ])),
84
- iter (io .open (val_filepaths [1 ])))
85
- test_data = data_process (iter (io .open (test_filepaths [0 ])),
86
- iter (io .open (test_filepaths [1 ])))
79
+ data .append ((de_tensor_ , en_tensor_ ))
80
+ return data
81
+
82
+ train_data = data_process (train_filepaths )
83
+ val_data = data_process (val_filepaths )
84
+ test_data = data_process (test_filepaths )
87
85
88
86
######################################################################
89
87
# ``DataLoader``
0 commit comments