Open
Description
Add Link
https://pytorch.org/tutorials/recipes/intel_neural_compressor_for_pytorch.html
Describe the bug
Follow the tutorial, I write this code, and find that the segmentation fault occur when the tensor(and the GraphModule model) moved to cuda:0
# main.py
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
# LeNet Model definition
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
self.conv2_drop = nn.Dropout2d()
self.fc1 = nn.Linear(320, 50)
self.fc1_drop = nn.Dropout()
self.fc2 = nn.Linear(50, 10)
def forward(self, x):
x = F.relu(F.max_pool2d(self.conv1(x), 2))
x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
x = x.reshape(-1, 320)
x = F.relu(self.fc1(x))
x = self.fc1_drop(x)
x = self.fc2(x)
return F.log_softmax(x, dim=1)
def save_quant():
model = Net()
model.load_state_dict(torch.load('./lenet_mnist_model.pth'))
print(model)
model = model.to("cuda:0")
model.eval()
from torchvision import datasets, transforms
test_loader = torch.utils.data.DataLoader(
datasets.MNIST('./data', train=False, download=True,
transform=transforms.Compose([ transforms.ToTensor(), ])
),
batch_size=1)
# launch code for Intel® Neural Compressor
from neural_compressor.experimental import Quantization
quantizer = Quantization("./conf.yaml")
quantizer.model = model
quantizer.calib_dataloader = test_loader
quantizer.eval_dataloader = test_loader
q_model = quantizer()
# q_model = q_model.to("cuda:0")
q_model.save('./output')
def good_quant():
from neural_compressor.utils.pytorch import load
model = Net()
int8_model = load('./output', model)
first = np.random.rand(1,1,28,28).astype(np.float32)
first = torch.from_numpy(first)
print(f"when tensor on={first.device}")
x=int8_model(first)
print(x)
def bad_quant():
from neural_compressor.utils.pytorch import load
model = Net()
device = "cuda:0"
int8_model = load('./output', model).to(device)
first = np.random.rand(1,1,28,28).astype(np.float32)
first = torch.from_numpy(first).to(device)
print(f"when tensor on={first.device}")
x=int8_model(first)
print(x)
save_quant()
good_quant()
bad_quant()
and the yaml file:
# conf.yaml
version: 2.0
model:
name: LeNet
framework: pytorch_fx
evaluation:
accuracy:
metric:
topk: 1
tuning:
accuracy_criterion:
relative: 0.01
Expected Result: the quantized model run smoothly and efficiently on GPU
Actual Result: segmentation fault
Segmentation fault (core dumped)
Describe your environment
- Platform(Linux)
- CUDA(11, 12)
- 2.0.1+cu117
cc @ezyang @gchanan @zou3519 @kadeng @frank-wei @jgong5 @mingfeima @XiaobingSuper @sanchitintel @ashokei @jingxu10 @jerryzh168 @jianyuh @raghuramank100 @jamesr66a @vkuzo @Xia-Weiwen @leslie-fang-intel