Skip to content

Commit 21a1115

Browse files
author
Guanheng Zhang
committed
minor changes with review's feedback
1 parent c63ee4f commit 21a1115

File tree

1 file changed

+9
-11
lines changed

1 file changed

+9
-11
lines changed

beginner_source/torchtext_translation_tutorial.py

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -68,22 +68,20 @@ def build_vocab(filepath, tokenizer):
6868
de_vocab = build_vocab(train_filepaths[0], de_tokenizer)
6969
en_vocab = build_vocab(train_filepaths[1], en_tokenizer)
7070

71-
def data_process(raw_de_iter, raw_en_iter):
72-
data_ = []
71+
def data_process(filepaths):
72+
raw_de_iter, raw_en_iter = iter(io.open(filepaths[0])), iter(io.open(filepaths[1]))
73+
data = []
7374
for (raw_de, raw_en) in zip(raw_de_iter, raw_en_iter):
7475
de_tensor_ = torch.tensor([de_vocab[token] for token in de_tokenizer(raw_de)],
7576
dtype=torch.long)
7677
en_tensor_ = torch.tensor([en_vocab[token] for token in en_tokenizer(raw_en)],
7778
dtype=torch.long)
78-
data_.append((de_tensor_, en_tensor_))
79-
return data_
80-
81-
train_data = data_process(iter(io.open(train_filepaths[0])),
82-
iter(io.open(train_filepaths[1])))
83-
val_data = data_process(iter(io.open(val_filepaths[0])),
84-
iter(io.open(val_filepaths[1])))
85-
test_data = data_process(iter(io.open(test_filepaths[0])),
86-
iter(io.open(test_filepaths[1])))
79+
data.append((de_tensor_, en_tensor_))
80+
return data
81+
82+
train_data = data_process(train_filepaths)
83+
val_data = data_process(val_filepaths)
84+
test_data = data_process(test_filepaths)
8785

8886
######################################################################
8987
# ``DataLoader``

0 commit comments

Comments
 (0)