Open
Description
Please refer this tutorial.
I'm not sure that the function batchify
is correct.
import torch
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
def batchify(data: torch.Tensor, bsz: int) -> torch.Tensor:
"""Divides the data into bsz separate sequences, removing extra elements
that wouldn't cleanly fit.
Args:
data: Tensor, shape [N]
bsz: int, batch size
Returns:
Tensor of shape [N // bsz, bsz]
"""
seq_len = data.size(0) // bsz
data = data[:seq_len * bsz]
data = data.view(bsz, seq_len).t().contiguous()
return data.to(device)
batch_size = 20
raw_data = torch.arange(100)
train_data = batchify(raw_data, bsz=batch_size)
print(raw_data)
print('- '* 40)
print(train_data)
>>> tensor([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53,
54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71,
72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89,
90, 91, 92, 93, 94, 95, 96, 97, 98, 99])
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
>>> tensor([[ 0, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85,
90, 95],
[ 1, 6, 11, 16, 21, 26, 31, 36, 41, 46, 51, 56, 61, 66, 71, 76, 81, 86,
91, 96],
[ 2, 7, 12, 17, 22, 27, 32, 37, 42, 47, 52, 57, 62, 67, 72, 77, 82, 87,
92, 97],
[ 3, 8, 13, 18, 23, 28, 33, 38, 43, 48, 53, 58, 63, 68, 73, 78, 83, 88,
93, 98],
[ 4, 9, 14, 19, 24, 29, 34, 39, 44, 49, 54, 59, 64, 69, 74, 79, 84, 89,
94, 99]])
With batch_size=20
, train_data
would divide 100 tokens to 20 sequences of length 5.
Thus, I except that train_data
should be
tensor([[0, 1, 2, 3, 4],
[5, 6, 7, 8, 9],
... ,
[95, 96, 97, 98, 99]])
cc @suraj813