Skip to content

Speedup DenseOps compilation #25

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
May 26, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
.coverage
.hypothesis
.mypy_cache
*.so*
*/*.pyc
*/*.so*
*/**/__pycache__
Expand Down Expand Up @@ -91,9 +92,9 @@ torch/version.py
intel_pytorch_extension_py/version.py
torch_ipex/csrc/version.cpp
torch_ipex/csrc/aten_ipex_sparse_type_default.*
torch_ipex/csrc/cpu/SparseOPs.*
torch_ipex/csrc/cpu/SparseOPs*
torch_ipex/csrc/cpu/OPs.*
torch_ipex/csrc/cpu/DenseOPs.*
torch_ipex/csrc/cpu/DenseOPs*

cscope.*

Expand Down
4 changes: 1 addition & 3 deletions intel_pytorch_extension_py/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,4 @@
from .version import __version__
from .optim import *
from .ops import *
import _torch_ipex as core

core._initialize_aten_bindings()
import _torch_ipex as core
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

better integrate these two packages into one, so that end user only needs to import one package.

15 changes: 15 additions & 0 deletions scripts/cpu/common/codegen.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import os

def write_or_skip(filepath, content):
try:
with open(filepath, 'r') as f:
old_content = f.read()
except IOError:
old_content = None

if old_content != content:
with open(filepath, 'w') as f:
print('writing', filepath)
f.write(content)
else:
print('skipped writing', filepath)
60 changes: 33 additions & 27 deletions scripts/cpu/gen-dense-cpu-ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import sys
import json

from common.codegen import write_or_skip
from common.cpp_sig_parser import CPPSig
from common.aten_sig_parser import AtenSig

Expand Down Expand Up @@ -92,6 +93,12 @@
.op(torch::RegisterOperators::options().schema("{}")
.impl_unboxedOnlyKernel<{}, &{}>(at::DispatchKey::DPCPPTensorId)
.aliasAnalysis(c10::AliasAnalysisKind::FROM_SCHEMA))"""

_REG_BLOCK = """
namespace {{
static auto dispatch = torch::RegisterOperators(){reg_ops};
}}"""

_H_HEADER = """// Autogenerated file by {gen}. Do not edit directly!
#pragma once

Expand All @@ -105,8 +112,6 @@ class AtenIpexCPUDefault {{
{hfuncs}
}};

void RegisterIpexDenseOPs();

}} // namespace cpu

}} // namespace torch_ipex
Expand Down Expand Up @@ -145,9 +150,7 @@ def __init__(self, reg_dec_file_path, func_file_path, op_h_file_path, op_cpp_fil
self._reg_dec_file_path = reg_dec_file_path
self._func_file_path = func_file_path
self._op_h_file_path = op_h_file_path
self._op_h_file = None
self._op_cpp_file_path = op_cpp_file_path
self._op_cpp_file = None
self._sigs = []
self._err_info = []
self._func_data = ''
Expand Down Expand Up @@ -223,9 +226,6 @@ def prepare_functions(self):
with open(self._func_file_path, 'r') as ff:
self._func_data = ff.read()

self._op_h_file = open(self._op_h_file_path, 'w')
self._op_cpp_file = open(self._op_cpp_file_path, 'w')

print('Extracted {} functions ({} errors) from {}'.format(
len(self._sigs),
len(self._err_info),
Expand Down Expand Up @@ -452,22 +452,37 @@ def gen_fallback_post_code(self, cpp_sig):
def gen_head_dec_code(self, cpp_func_str_h):
return ' static {};\n'.format(cpp_func_str_h)

def gen_cpu_ops_shard(self, func_defs, cpp_path, header_path, num_shards=1):
head_file_content = _H_HEADER.format(gen=os.path.basename(sys.argv[0]), hfuncs=''.join([f['dec'] for f in func_defs]))
write_or_skip(header_path, head_file_content)

shards = [[] for _ in range(num_shards)]
for idx, func in enumerate(func_defs):
shards[idx % num_shards].append(func)

for idx, shard in enumerate(shards):
regs_code = _REG_BLOCK.format(reg_ops=''.join([f['reg'] for f in shard]))
defs_code = ''.join([f['def'] for f in shard])

filename, ext = os.path.splitext(cpp_path)
shard_filepath = f'{filename}_{idx}{ext}'
shard_content = _CPP_HEADER.format(gen=os.path.basename(sys.argv[0]), funcs=defs_code, regs=regs_code)
write_or_skip(shard_filepath, shard_content)

def gen_code(self):
self.prepare_functions()
assert len(self._err_info) == 0

def is_conv_overrideable_func(fname):
return fname in ['convolution_overrideable', 'convolution_backward_overrideable']

func_decs = []
func_regs = []
func_defs = []
for cpp_sig, aten_sig, cpp_func_sig_str, aten_func_sig_str in self._sigs:
for cpp_sig, _, cpp_func_sig_str, aten_func_sig_str in self._sigs:
cpp_func_str_h, cpp_func_str_cpp = self.gen_func_signature(cpp_func_sig_str)
# Gen declaration code for head file
func_decs.append(self.gen_head_dec_code(cpp_func_str_h))
func_dec = self.gen_head_dec_code(cpp_func_str_h)

func_regs.append(_REG_PATTERN.format(aten_func_sig_str, self.get_func_dec(cpp_sig), "AtenIpexCPUDefault::" + cpp_sig.def_name))
func_reg = _REG_PATTERN.format(aten_func_sig_str, self.get_func_dec(cpp_sig), "AtenIpexCPUDefault::" + cpp_sig.def_name)

# Gen definition code for cpp file
code = '{} {{\n'.format(cpp_func_str_cpp)
Expand All @@ -480,23 +495,14 @@ def is_conv_overrideable_func(fname):
code += self.gen_fallback_code(cpp_sig)
code += self.gen_fallback_post_code(cpp_sig)

code += '}\n'

code += '\n'

func_defs.append(code)

head_file_content = _H_HEADER.format(gen=os.path.basename(sys.argv[0]), hfuncs=''.join(func_decs))

regs_code = 'void RegisterIpexDenseOPs() {\n'
regs_code += ' static auto dispatch = torch::RegisterOperators()\n'
regs_code += ''.join(func_regs)
regs_code += ';\n}\n'
code += '}\n\n'

source_file_content = _CPP_HEADER.format(gen=os.path.basename(sys.argv[0]), funcs=''.join(func_defs), regs=regs_code)
print(head_file_content, file=self._op_h_file)
print(source_file_content, file=self._op_cpp_file)
func_defs.append({'dec': func_dec, 'reg': func_reg, 'def': code})

self.gen_cpu_ops_shard(func_defs,
cpp_path=self._op_cpp_file_path,
header_path=self._op_h_file_path,
num_shards=8)

if __name__ == '__main__':
arg_parser = argparse.ArgumentParser()
Expand Down
76 changes: 43 additions & 33 deletions scripts/cpu/gen-sparse-cpu-ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import sys
import json

from common.codegen import write_or_skip
from common.cpp_sig_parser import CPPSig
from common.aten_sig_parser import AtenSig

Expand Down Expand Up @@ -47,6 +48,13 @@
.op(torch::RegisterOperators::options().schema("{}")
.impl_unboxedOnlyKernel<{}, &{}>(at::DispatchKey::SparseDPCPPTensorId)
.aliasAnalysis(c10::AliasAnalysisKind::FROM_SCHEMA))"""

_REG_BLOCK = """
namespace {{
static auto dispatch = torch::RegisterOperators(){reg_ops};
}}"""


_H_HEADER = """// Autogenerated file by {gen}. Do not edit directly!
#pragma once

Expand All @@ -60,8 +68,6 @@ class AtenIpexCPUSparse {{
{hfuncs}
}};

void RegisterIpexSparseOPs();

}} // namespace cpu

}} // namespace torch_ipex
Expand Down Expand Up @@ -100,9 +106,7 @@ def __init__(self, reg_dec_file_path, func_file_path, sparse_dec_file_path, spar
self._sparse_dec_file_path = sparse_dec_file_path
self._sparse_attr_file_path = sparse_attr_file_path
self._op_h_file_path = op_h_file_path
self._op_h_file = None
self._op_cpp_file_path = op_cpp_file_path
self._op_cpp_file = None
self._sigs = []
self._sparse_attr_data = ''
self._sparse_sigs = []
Expand Down Expand Up @@ -155,8 +159,8 @@ def prepare_functions(self):
continue
cpp_func_sig_str = m.group(1)
_sparse_sig_strs.append(cpp_func_sig_str)
print(cpp_func_sig_str)
print("********************")
# print(cpp_func_sig_str)
# print("********************")

# Parse SparseAttrType.h
with open(self._sparse_attr_file_path, 'r') as ff:
Expand Down Expand Up @@ -202,9 +206,6 @@ def prepare_functions(self):
self._err_info.append((cpp_func_sig, str(e)))
print('Error parsing "{}": {}'.format(cpp_func_sig, e), file=sys.stderr)

self._op_h_file = open(self._op_h_file_path, 'w')
self._op_cpp_file = open(self._op_cpp_file_path, 'w')

print('Extracted {} functions ({} errors) from {}'.format(
len(self._sigs),
len(self._err_info),
Expand Down Expand Up @@ -369,44 +370,53 @@ def gen_fallback_post_code(self, cpp_sig):
def gen_head_dec_code(self, cpp_func_str_h):
return ' static {};\n'.format(cpp_func_str_h)

def gen_cpu_ops_shard(self, func_defs, cpp_path, header_path, num_shards=1):
head_file_content = _H_HEADER.format(gen=os.path.basename(sys.argv[0]), hfuncs=''.join([f['dec'] for f in func_defs]))
write_or_skip(header_path, head_file_content)

shards = [[] for _ in range(num_shards)]
for idx, func in enumerate(func_defs):
shards[idx % num_shards].append(func)

for idx, shard in enumerate(shards):
regs_code = _REG_BLOCK.format(reg_ops=''.join([f['reg'] for f in shard]))
defs_code = ''.join([f['def'] for f in shard])

filename, ext = os.path.splitext(cpp_path)
shard_filepath = f'{filename}_{idx}{ext}'
shard_content = _CPP_HEADER.format(gen=os.path.basename(sys.argv[0]), funcs=defs_code, regs=regs_code)
write_or_skip(shard_filepath, shard_content)

def gen_code(self):
self.prepare_functions()
assert len(self._err_info) == 0

func_decs = []
func_regs = []
func_defs = []
for cpp_sparse_sig, _, cpp_sparse_func_sig_str, aten_func_sig_str in self._sigs:
func_regs.append(_REG_PATTERN.format(aten_func_sig_str, self.get_func_dec(cpp_sparse_sig), "AtenIpexCPUSparse::" + cpp_sparse_sig.def_name))
# Gen declaration code for head file
cpp_func_str_h, cpp_func_str_cpp = self.gen_func_signature(cpp_sparse_func_sig_str)
func_decs.append(self.gen_head_dec_code(cpp_func_str_h))
func_dec = self.gen_head_dec_code(cpp_func_str_h)

# Since we have pre-defined attr OPs, we don't need to regenerate it
if self.is_sparse_attr_function(cpp_sparse_sig.def_name):
continue
func_reg = _REG_PATTERN.format(aten_func_sig_str, self.get_func_dec(cpp_sparse_sig), "AtenIpexCPUSparse::" + cpp_sparse_sig.def_name)

# Gen definition code for cpp file
code = '{} {{\n'.format(cpp_func_str_cpp)
code += self.gen_fallback_prepare_code(cpp_sparse_sig)
code += self.gen_fallback_code(cpp_sparse_sig)
code += self.gen_fallback_post_code(cpp_sparse_sig)

code += '}\n\n'

func_defs.append(code)
code = ''
# Since we have pre-defined attr OPs, we don't need to regenerate it
if not self.is_sparse_attr_function(cpp_sparse_sig.def_name):

head_file_content = _H_HEADER.format(gen=os.path.basename(sys.argv[0]), hfuncs=''.join(func_decs))
# Gen definition code for cpp file
code += '{} {{\n'.format(cpp_func_str_cpp)
code += self.gen_fallback_prepare_code(cpp_sparse_sig)
code += self.gen_fallback_code(cpp_sparse_sig)
code += self.gen_fallback_post_code(cpp_sparse_sig)

regs_code = 'void RegisterIpexSparseOPs() {\n'
regs_code += ' static auto dispatch = torch::RegisterOperators()\n'
regs_code += ''.join(func_regs)
regs_code += ';\n}\n'
code += '}\n\n'

source_file_content = _CPP_HEADER.format(gen=os.path.basename(sys.argv[0]), funcs=''.join(func_defs), regs=regs_code)
print(head_file_content, file=self._op_h_file)
print(source_file_content, file=self._op_cpp_file)
func_defs.append({'dec': func_dec, 'reg': func_reg, 'def': code})

self.gen_cpu_ops_shard(func_defs,
cpp_path=self._op_cpp_file_path,
header_path=self._op_h_file_path,
num_shards=1)

if __name__ == '__main__':
arg_parser = argparse.ArgumentParser()
Expand Down
16 changes: 13 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,10 @@ def run(self):
class DPCPPBuild(build_ext, object):
def run(self):
print("run")

# Generate the code before globbing!
generate_ipex_cpu_aten_code(base_dir)

cmake = find_executable('cmake3') or find_executable('cmake')
if cmake is None:
raise RuntimeError(
Expand All @@ -170,6 +174,7 @@ def build_extension(self, ext):
os.mkdir(ext.build_dir)

build_type = 'Release'
use_ninja = False

if _check_env_flag('DEBUG'):
build_type = 'Debug'
Expand All @@ -193,6 +198,10 @@ def build_extension(self, ext):
if _check_env_flag("DPCPP_ENABLE_PROFILING"):
cmake_args += ['-DDPCPP_ENABLE_PROFILING=1']

if _check_env_flag("USE_NINJA"):
use_ninja = True
cmake_args += ['-GNinja']

build_args = ['-j', str(multiprocessing.cpu_count())]

env = os.environ.copy()
Expand All @@ -203,7 +212,10 @@ def build_extension(self, ext):
check_call([self.cmake, ext.project_dir] + cmake_args, cwd=ext.build_dir, env=env)

# build_args += ['VERBOSE=1']
check_call(['make'] + build_args, cwd=ext.build_dir, env=env)
if use_ninja:
check_call(['ninja'] + build_args, cwd=ext.build_dir, env=env)
else:
check_call(['make'] + build_args, cwd=ext.build_dir, env=env)


ipex_git_sha, torch_git_sha = get_git_head_sha(base_dir)
Expand All @@ -212,8 +224,6 @@ def build_extension(self, ext):
# Generate version info (torch_xla.__version__)
create_version_files(base_dir, version, ipex_git_sha, torch_git_sha)

# Generate the code before globbing!
generate_ipex_cpu_aten_code(base_dir)

# Constant known variables used throughout this file

Expand Down
1 change: 0 additions & 1 deletion tests/cpu/common_device_type.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,6 @@
import unittest
import torch
import _torch_ipex as ipex
ipex._initialize_aten_bindings()
import copy
from common_utils import TestCase, TEST_WITH_ROCM, TEST_MKL, \
skipCUDANonDefaultStreamIf
Expand Down
1 change: 0 additions & 1 deletion tests/cpu/test_conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@

import torch
import _torch_ipex as ipex
ipex._initialize_aten_bindings()

import torch.nn as nn
import torch.backends.cudnn as cudnn
Expand Down
1 change: 0 additions & 1 deletion tests/cpu/test_lazy_reorder.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
import sys
import torch
import _torch_ipex as ipex
ipex._initialize_aten_bindings()
import intel_pytorch_extension

import torch.nn as nn
Expand Down
1 change: 0 additions & 1 deletion tests/cpu/test_rn50_cpu_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,6 @@

import torch
import _torch_ipex as ipex
ipex._initialize_aten_bindings()
import intel_pytorch_extension

import torch.nn as nn
Expand Down
Loading