diff --git a/.gitignore b/.gitignore index e4bd68a61..83b135eb0 100644 --- a/.gitignore +++ b/.gitignore @@ -11,6 +11,7 @@ .coverage .hypothesis .mypy_cache +*.so* */*.pyc */*.so* */**/__pycache__ @@ -91,9 +92,9 @@ torch/version.py intel_pytorch_extension_py/version.py torch_ipex/csrc/version.cpp torch_ipex/csrc/aten_ipex_sparse_type_default.* -torch_ipex/csrc/cpu/SparseOPs.* +torch_ipex/csrc/cpu/SparseOPs* torch_ipex/csrc/cpu/OPs.* -torch_ipex/csrc/cpu/DenseOPs.* +torch_ipex/csrc/cpu/DenseOPs* cscope.* diff --git a/intel_pytorch_extension_py/__init__.py b/intel_pytorch_extension_py/__init__.py index 82c769a12..0d9bb30d6 100644 --- a/intel_pytorch_extension_py/__init__.py +++ b/intel_pytorch_extension_py/__init__.py @@ -3,6 +3,4 @@ from .version import __version__ from .optim import * from .ops import * -import _torch_ipex as core - -core._initialize_aten_bindings() +import _torch_ipex as core \ No newline at end of file diff --git a/scripts/cpu/common/codegen.py b/scripts/cpu/common/codegen.py new file mode 100644 index 000000000..70e52521a --- /dev/null +++ b/scripts/cpu/common/codegen.py @@ -0,0 +1,15 @@ +import os + +def write_or_skip(filepath, content): + try: + with open(filepath, 'r') as f: + old_content = f.read() + except IOError: + old_content = None + + if old_content != content: + with open(filepath, 'w') as f: + print('writing', filepath) + f.write(content) + else: + print('skipped writing', filepath) \ No newline at end of file diff --git a/scripts/cpu/gen-dense-cpu-ops.py b/scripts/cpu/gen-dense-cpu-ops.py index 4096416fc..dce834882 100755 --- a/scripts/cpu/gen-dense-cpu-ops.py +++ b/scripts/cpu/gen-dense-cpu-ops.py @@ -9,6 +9,7 @@ import sys import json +from common.codegen import write_or_skip from common.cpp_sig_parser import CPPSig from common.aten_sig_parser import AtenSig @@ -92,6 +93,12 @@ .op(torch::RegisterOperators::options().schema("{}") .impl_unboxedOnlyKernel<{}, &{}>(at::DispatchKey::DPCPPTensorId) .aliasAnalysis(c10::AliasAnalysisKind::FROM_SCHEMA))""" + +_REG_BLOCK = """ +namespace {{ + static auto dispatch = torch::RegisterOperators(){reg_ops}; +}}""" + _H_HEADER = """// Autogenerated file by {gen}. Do not edit directly! #pragma once @@ -105,8 +112,6 @@ class AtenIpexCPUDefault {{ {hfuncs} }}; -void RegisterIpexDenseOPs(); - }} // namespace cpu }} // namespace torch_ipex @@ -145,9 +150,7 @@ def __init__(self, reg_dec_file_path, func_file_path, op_h_file_path, op_cpp_fil self._reg_dec_file_path = reg_dec_file_path self._func_file_path = func_file_path self._op_h_file_path = op_h_file_path - self._op_h_file = None self._op_cpp_file_path = op_cpp_file_path - self._op_cpp_file = None self._sigs = [] self._err_info = [] self._func_data = '' @@ -223,9 +226,6 @@ def prepare_functions(self): with open(self._func_file_path, 'r') as ff: self._func_data = ff.read() - self._op_h_file = open(self._op_h_file_path, 'w') - self._op_cpp_file = open(self._op_cpp_file_path, 'w') - print('Extracted {} functions ({} errors) from {}'.format( len(self._sigs), len(self._err_info), @@ -452,6 +452,23 @@ def gen_fallback_post_code(self, cpp_sig): def gen_head_dec_code(self, cpp_func_str_h): return ' static {};\n'.format(cpp_func_str_h) + def gen_cpu_ops_shard(self, func_defs, cpp_path, header_path, num_shards=1): + head_file_content = _H_HEADER.format(gen=os.path.basename(sys.argv[0]), hfuncs=''.join([f['dec'] for f in func_defs])) + write_or_skip(header_path, head_file_content) + + shards = [[] for _ in range(num_shards)] + for idx, func in enumerate(func_defs): + shards[idx % num_shards].append(func) + + for idx, shard in enumerate(shards): + regs_code = _REG_BLOCK.format(reg_ops=''.join([f['reg'] for f in shard])) + defs_code = ''.join([f['def'] for f in shard]) + + filename, ext = os.path.splitext(cpp_path) + shard_filepath = f'{filename}_{idx}{ext}' + shard_content = _CPP_HEADER.format(gen=os.path.basename(sys.argv[0]), funcs=defs_code, regs=regs_code) + write_or_skip(shard_filepath, shard_content) + def gen_code(self): self.prepare_functions() assert len(self._err_info) == 0 @@ -459,15 +476,13 @@ def gen_code(self): def is_conv_overrideable_func(fname): return fname in ['convolution_overrideable', 'convolution_backward_overrideable'] - func_decs = [] - func_regs = [] func_defs = [] - for cpp_sig, aten_sig, cpp_func_sig_str, aten_func_sig_str in self._sigs: + for cpp_sig, _, cpp_func_sig_str, aten_func_sig_str in self._sigs: cpp_func_str_h, cpp_func_str_cpp = self.gen_func_signature(cpp_func_sig_str) # Gen declaration code for head file - func_decs.append(self.gen_head_dec_code(cpp_func_str_h)) + func_dec = self.gen_head_dec_code(cpp_func_str_h) - func_regs.append(_REG_PATTERN.format(aten_func_sig_str, self.get_func_dec(cpp_sig), "AtenIpexCPUDefault::" + cpp_sig.def_name)) + func_reg = _REG_PATTERN.format(aten_func_sig_str, self.get_func_dec(cpp_sig), "AtenIpexCPUDefault::" + cpp_sig.def_name) # Gen definition code for cpp file code = '{} {{\n'.format(cpp_func_str_cpp) @@ -480,23 +495,14 @@ def is_conv_overrideable_func(fname): code += self.gen_fallback_code(cpp_sig) code += self.gen_fallback_post_code(cpp_sig) - code += '}\n' - - code += '\n' - - func_defs.append(code) - - head_file_content = _H_HEADER.format(gen=os.path.basename(sys.argv[0]), hfuncs=''.join(func_decs)) - - regs_code = 'void RegisterIpexDenseOPs() {\n' - regs_code += ' static auto dispatch = torch::RegisterOperators()\n' - regs_code += ''.join(func_regs) - regs_code += ';\n}\n' + code += '}\n\n' - source_file_content = _CPP_HEADER.format(gen=os.path.basename(sys.argv[0]), funcs=''.join(func_defs), regs=regs_code) - print(head_file_content, file=self._op_h_file) - print(source_file_content, file=self._op_cpp_file) + func_defs.append({'dec': func_dec, 'reg': func_reg, 'def': code}) + self.gen_cpu_ops_shard(func_defs, + cpp_path=self._op_cpp_file_path, + header_path=self._op_h_file_path, + num_shards=8) if __name__ == '__main__': arg_parser = argparse.ArgumentParser() diff --git a/scripts/cpu/gen-sparse-cpu-ops.py b/scripts/cpu/gen-sparse-cpu-ops.py index 3f99c9e18..e09941a9b 100755 --- a/scripts/cpu/gen-sparse-cpu-ops.py +++ b/scripts/cpu/gen-sparse-cpu-ops.py @@ -9,6 +9,7 @@ import sys import json +from common.codegen import write_or_skip from common.cpp_sig_parser import CPPSig from common.aten_sig_parser import AtenSig @@ -47,6 +48,13 @@ .op(torch::RegisterOperators::options().schema("{}") .impl_unboxedOnlyKernel<{}, &{}>(at::DispatchKey::SparseDPCPPTensorId) .aliasAnalysis(c10::AliasAnalysisKind::FROM_SCHEMA))""" + +_REG_BLOCK = """ +namespace {{ + static auto dispatch = torch::RegisterOperators(){reg_ops}; +}}""" + + _H_HEADER = """// Autogenerated file by {gen}. Do not edit directly! #pragma once @@ -60,8 +68,6 @@ class AtenIpexCPUSparse {{ {hfuncs} }}; -void RegisterIpexSparseOPs(); - }} // namespace cpu }} // namespace torch_ipex @@ -100,9 +106,7 @@ def __init__(self, reg_dec_file_path, func_file_path, sparse_dec_file_path, spar self._sparse_dec_file_path = sparse_dec_file_path self._sparse_attr_file_path = sparse_attr_file_path self._op_h_file_path = op_h_file_path - self._op_h_file = None self._op_cpp_file_path = op_cpp_file_path - self._op_cpp_file = None self._sigs = [] self._sparse_attr_data = '' self._sparse_sigs = [] @@ -155,8 +159,8 @@ def prepare_functions(self): continue cpp_func_sig_str = m.group(1) _sparse_sig_strs.append(cpp_func_sig_str) - print(cpp_func_sig_str) - print("********************") + # print(cpp_func_sig_str) + # print("********************") # Parse SparseAttrType.h with open(self._sparse_attr_file_path, 'r') as ff: @@ -202,9 +206,6 @@ def prepare_functions(self): self._err_info.append((cpp_func_sig, str(e))) print('Error parsing "{}": {}'.format(cpp_func_sig, e), file=sys.stderr) - self._op_h_file = open(self._op_h_file_path, 'w') - self._op_cpp_file = open(self._op_cpp_file_path, 'w') - print('Extracted {} functions ({} errors) from {}'.format( len(self._sigs), len(self._err_info), @@ -369,44 +370,53 @@ def gen_fallback_post_code(self, cpp_sig): def gen_head_dec_code(self, cpp_func_str_h): return ' static {};\n'.format(cpp_func_str_h) + def gen_cpu_ops_shard(self, func_defs, cpp_path, header_path, num_shards=1): + head_file_content = _H_HEADER.format(gen=os.path.basename(sys.argv[0]), hfuncs=''.join([f['dec'] for f in func_defs])) + write_or_skip(header_path, head_file_content) + + shards = [[] for _ in range(num_shards)] + for idx, func in enumerate(func_defs): + shards[idx % num_shards].append(func) + + for idx, shard in enumerate(shards): + regs_code = _REG_BLOCK.format(reg_ops=''.join([f['reg'] for f in shard])) + defs_code = ''.join([f['def'] for f in shard]) + + filename, ext = os.path.splitext(cpp_path) + shard_filepath = f'{filename}_{idx}{ext}' + shard_content = _CPP_HEADER.format(gen=os.path.basename(sys.argv[0]), funcs=defs_code, regs=regs_code) + write_or_skip(shard_filepath, shard_content) + def gen_code(self): self.prepare_functions() assert len(self._err_info) == 0 - func_decs = [] - func_regs = [] func_defs = [] for cpp_sparse_sig, _, cpp_sparse_func_sig_str, aten_func_sig_str in self._sigs: - func_regs.append(_REG_PATTERN.format(aten_func_sig_str, self.get_func_dec(cpp_sparse_sig), "AtenIpexCPUSparse::" + cpp_sparse_sig.def_name)) # Gen declaration code for head file cpp_func_str_h, cpp_func_str_cpp = self.gen_func_signature(cpp_sparse_func_sig_str) - func_decs.append(self.gen_head_dec_code(cpp_func_str_h)) + func_dec = self.gen_head_dec_code(cpp_func_str_h) - # Since we have pre-defined attr OPs, we don't need to regenerate it - if self.is_sparse_attr_function(cpp_sparse_sig.def_name): - continue + func_reg = _REG_PATTERN.format(aten_func_sig_str, self.get_func_dec(cpp_sparse_sig), "AtenIpexCPUSparse::" + cpp_sparse_sig.def_name) - # Gen definition code for cpp file - code = '{} {{\n'.format(cpp_func_str_cpp) - code += self.gen_fallback_prepare_code(cpp_sparse_sig) - code += self.gen_fallback_code(cpp_sparse_sig) - code += self.gen_fallback_post_code(cpp_sparse_sig) - - code += '}\n\n' - - func_defs.append(code) + code = '' + # Since we have pre-defined attr OPs, we don't need to regenerate it + if not self.is_sparse_attr_function(cpp_sparse_sig.def_name): - head_file_content = _H_HEADER.format(gen=os.path.basename(sys.argv[0]), hfuncs=''.join(func_decs)) + # Gen definition code for cpp file + code += '{} {{\n'.format(cpp_func_str_cpp) + code += self.gen_fallback_prepare_code(cpp_sparse_sig) + code += self.gen_fallback_code(cpp_sparse_sig) + code += self.gen_fallback_post_code(cpp_sparse_sig) - regs_code = 'void RegisterIpexSparseOPs() {\n' - regs_code += ' static auto dispatch = torch::RegisterOperators()\n' - regs_code += ''.join(func_regs) - regs_code += ';\n}\n' + code += '}\n\n' - source_file_content = _CPP_HEADER.format(gen=os.path.basename(sys.argv[0]), funcs=''.join(func_defs), regs=regs_code) - print(head_file_content, file=self._op_h_file) - print(source_file_content, file=self._op_cpp_file) + func_defs.append({'dec': func_dec, 'reg': func_reg, 'def': code}) + self.gen_cpu_ops_shard(func_defs, + cpp_path=self._op_cpp_file_path, + header_path=self._op_h_file_path, + num_shards=1) if __name__ == '__main__': arg_parser = argparse.ArgumentParser() diff --git a/setup.py b/setup.py index 7a6a13904..663f573eb 100644 --- a/setup.py +++ b/setup.py @@ -151,6 +151,10 @@ def run(self): class DPCPPBuild(build_ext, object): def run(self): print("run") + + # Generate the code before globbing! + generate_ipex_cpu_aten_code(base_dir) + cmake = find_executable('cmake3') or find_executable('cmake') if cmake is None: raise RuntimeError( @@ -170,6 +174,7 @@ def build_extension(self, ext): os.mkdir(ext.build_dir) build_type = 'Release' + use_ninja = False if _check_env_flag('DEBUG'): build_type = 'Debug' @@ -193,6 +198,10 @@ def build_extension(self, ext): if _check_env_flag("DPCPP_ENABLE_PROFILING"): cmake_args += ['-DDPCPP_ENABLE_PROFILING=1'] + if _check_env_flag("USE_NINJA"): + use_ninja = True + cmake_args += ['-GNinja'] + build_args = ['-j', str(multiprocessing.cpu_count())] env = os.environ.copy() @@ -203,7 +212,10 @@ def build_extension(self, ext): check_call([self.cmake, ext.project_dir] + cmake_args, cwd=ext.build_dir, env=env) # build_args += ['VERBOSE=1'] - check_call(['make'] + build_args, cwd=ext.build_dir, env=env) + if use_ninja: + check_call(['ninja'] + build_args, cwd=ext.build_dir, env=env) + else: + check_call(['make'] + build_args, cwd=ext.build_dir, env=env) ipex_git_sha, torch_git_sha = get_git_head_sha(base_dir) @@ -212,8 +224,6 @@ def build_extension(self, ext): # Generate version info (torch_xla.__version__) create_version_files(base_dir, version, ipex_git_sha, torch_git_sha) -# Generate the code before globbing! -generate_ipex_cpu_aten_code(base_dir) # Constant known variables used throughout this file diff --git a/tests/cpu/common_device_type.py b/tests/cpu/common_device_type.py index ee6349eef..a68cfe42d 100644 --- a/tests/cpu/common_device_type.py +++ b/tests/cpu/common_device_type.py @@ -50,7 +50,6 @@ import unittest import torch import _torch_ipex as ipex -ipex._initialize_aten_bindings() import copy from common_utils import TestCase, TEST_WITH_ROCM, TEST_MKL, \ skipCUDANonDefaultStreamIf diff --git a/tests/cpu/test_conf.py b/tests/cpu/test_conf.py index 2df27c2e9..5ca82af61 100644 --- a/tests/cpu/test_conf.py +++ b/tests/cpu/test_conf.py @@ -5,7 +5,6 @@ import torch import _torch_ipex as ipex -ipex._initialize_aten_bindings() import torch.nn as nn import torch.backends.cudnn as cudnn diff --git a/tests/cpu/test_lazy_reorder.py b/tests/cpu/test_lazy_reorder.py index 34d4f1fc0..fb26d9437 100644 --- a/tests/cpu/test_lazy_reorder.py +++ b/tests/cpu/test_lazy_reorder.py @@ -12,7 +12,6 @@ import sys import torch import _torch_ipex as ipex -ipex._initialize_aten_bindings() import intel_pytorch_extension import torch.nn as nn diff --git a/tests/cpu/test_rn50_cpu_ops.py b/tests/cpu/test_rn50_cpu_ops.py index c53c78a3f..2f5a8ec8a 100644 --- a/tests/cpu/test_rn50_cpu_ops.py +++ b/tests/cpu/test_rn50_cpu_ops.py @@ -56,7 +56,6 @@ import torch import _torch_ipex as ipex -ipex._initialize_aten_bindings() import intel_pytorch_extension import torch.nn as nn diff --git a/torch_ipex/csrc/aten_ipex_type.cpp b/torch_ipex/csrc/aten_ipex_type.cpp index bb1c08285..e1def87d1 100644 --- a/torch_ipex/csrc/aten_ipex_type.cpp +++ b/torch_ipex/csrc/aten_ipex_type.cpp @@ -10,18 +10,4 @@ namespace torch_ipex { -namespace { - -void AtenInitialize() { - cpu::RegisterIpexDenseOPs(); - cpu::RegisterIpexSparseOPs(); -} - -} // namespace - -void AtenIpexType::InitializeAtenBindings() { - static std::once_flag once; - std::call_once(once, []() { AtenInitialize(); }); -} - } // namespace torch_ipe diff --git a/torch_ipex/csrc/aten_ipex_type.h b/torch_ipex/csrc/aten_ipex_type.h index e42bc083d..281d0b3f2 100644 --- a/torch_ipex/csrc/aten_ipex_type.h +++ b/torch_ipex/csrc/aten_ipex_type.h @@ -6,8 +6,6 @@ namespace torch_ipex { // Base ATEN Type class where the IPE specific overrides should be defined. class AtenIpexType { - public: - static void InitializeAtenBindings(); }; } // namespace torch_ipex diff --git a/torch_ipex/csrc/init_python_bindings.cpp b/torch_ipex/csrc/init_python_bindings.cpp index 6cb5b7c2f..d2632b359 100644 --- a/torch_ipex/csrc/init_python_bindings.cpp +++ b/torch_ipex/csrc/init_python_bindings.cpp @@ -54,8 +54,6 @@ dil::dims getDilTensorStrides(const at::Tensor &tensor) { /// **************************** void InitIpexModuleBindings(py::module m) { - m.def("_initialize_aten_bindings", - []() { AtenIpexType::InitializeAtenBindings(); }); m.def("_get_git_revs", []() { return GetRevisions(); }); m.def("enable_auto_dnnl", []() { AutoOptConfig::singleton().set_auto_dnnl(true); }); m.def("disable_auto_dnnl", []() { AutoOptConfig::singleton().set_auto_dnnl(false); });