Skip to content

Commit dcd1a3d

Browse files
committed
task: benchmarking with asv
1 parent 67317b0 commit dcd1a3d

File tree

6 files changed

+370
-0
lines changed

6 files changed

+370
-0
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,3 +102,6 @@ dpctl/tensor/_usmarray.h
102102

103103
# moved cmake scripts
104104
dpctl/resources/cmake
105+
106+
# asv artifacts
107+
*.asv*

benchmarks/README.md

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
# dpctl benchmarks
2+
3+
Benchmarking dpctl using Airspeed Velocity
4+
Read more about ASV [here](https://asv.readthedocs.io/en/stable/index.html)
5+
6+
## Usage
7+
This was made for use within our CI in mind. As such, you will see the `asv.conf.json` is minimal without any environmental information supplied.
8+
The expectation is for users to execute `asv run` with an existing environment.
9+
10+
As such, you should have conda or mamba installed, and create a `dpctl-benchmarking` environment as so:
11+
```
12+
conda create --name dpctl-benchmarking python=$PYTHON_VERSION dpctl asv libmambapy conda dpcpp_linux-64 --override-channels \
13+
-c https://software.repos.intel.com/python/conda \
14+
-c conda-forge -y
15+
```
16+
17+
Then, you may activate the environment and instruct `asv run` to use this existing environment for the benchmarks by pointing it to thhe environment's python binary, like so:
18+
```
19+
conda activate dpctl-benchmarking
20+
asv run --environment existing:/full/mamba/path/envs/dpctl-benchmarking/bin/python
21+
```
22+
23+
## Writing new benchmarks
24+
Read ASV's guidelines for writing benchmarks [here](https://asv.readthedocs.io/en/stable/writing_benchmarks.html)

benchmarks/asv.conf.json

Lines changed: 182 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,182 @@
1+
{
2+
// The version of the config file format. Do not change, unless
3+
// you know what you are doing.
4+
"version": 1,
5+
6+
// The name of the project being benchmarked
7+
"project": "dpctl",
8+
9+
// The project's homepage
10+
"project_url": "https://github.com/IntelPython/dpctl",
11+
12+
// The URL or local path of the source code repository for the
13+
// project being benchmarked
14+
"repo": "..",
15+
16+
// The Python project's subdirectory in your repo. If missing or
17+
// the empty string, the project is assumed to be located at the root
18+
// of the repository.
19+
// "repo_subdir": "",
20+
21+
// Customizable commands for building the project.
22+
// See asv.conf.json documentation.
23+
"build_command": [],
24+
25+
// Customizable commands for installing and uninstalling the project.
26+
// See asv.conf.json documentation.
27+
// "install_command": ["in-dir={env_dir} conda install dpctl --yes"],
28+
// "uninstall_command": ["return-code=any conda uninstall dpctl --yes"],
29+
30+
// List of branches to benchmark. If not provided, defaults to "main"
31+
// (for git) or "default" (for mercurial).
32+
"branches": ["HEAD"], // for git
33+
// "branches": ["default"], // for mercurial
34+
35+
// The DVCS being used. If not set, it will be automatically
36+
// determined from "repo" by looking at the protocol in the URL
37+
// (if remote), or by looking for special directories, such as
38+
// ".git" (if local).
39+
"dvcs": "git",
40+
41+
// The tool to use to create environments. May be "conda",
42+
// "virtualenv", "mamba" (above 3.8)
43+
// or other value depending on the plugins in use.
44+
// If missing or the empty string, the tool will be automatically
45+
// determined by looking for tools on the PATH environment
46+
// variable.
47+
"environment_type": "conda",
48+
49+
// timeout in seconds for installing any dependencies in environment
50+
// defaults to 10 min
51+
//"install_timeout": 600,
52+
53+
// the base URL to show a commit for the project.
54+
// "show_commit_url": "http://github.com/owner/project/commit/",
55+
56+
// The Pythons you'd like to test against. If not provided, defaults
57+
// to the current version of Python used to run `asv`.
58+
// "pythons": ["3.8", "3.12"],
59+
60+
// The list of conda channel names to be searched for benchmark
61+
// dependency packages in the specified order
62+
// "conda_channels": [
63+
// "https://af01p-igk.devtools.intel.com/artifactory/api/conda/idp-conda-pkgserver-igk-local/gold",
64+
// "https://af01p-igk.devtools.intel.com/artifactory/api/conda/idp-conda-pkgserver-igk-local/tools",
65+
// "conda-forge"
66+
// ],
67+
68+
// A conda environment file that is used for environment creation.
69+
// "conda_environment_file": "environment.yml",
70+
71+
// The matrix of dependencies to test. Each key of the "req"
72+
// requirements dictionary is the name of a package (in PyPI) and
73+
// the values are version numbers. An empty list or empty string
74+
// indicates to just test against the default (latest)
75+
// version. null indicates that the package is to not be
76+
// installed. If the package to be tested is only available from
77+
// PyPi, and the 'environment_type' is conda, then you can preface
78+
// the package name by 'pip+', and the package will be installed
79+
// via pip (with all the conda available packages installed first,
80+
// followed by the pip installed packages).
81+
//
82+
// The ``@env`` and ``@env_nobuild`` keys contain the matrix of
83+
// environment variables to pass to build and benchmark commands.
84+
// An environment will be created for every combination of the
85+
// cartesian product of the "@env" variables in this matrix.
86+
// Variables in "@env_nobuild" will be passed to every environment
87+
// during the benchmark phase, but will not trigger creation of
88+
// new environments. A value of ``null`` means that the variable
89+
// will not be set for the current combination.
90+
//
91+
// "matrix": {
92+
// "env": {"PACKAGE_PATH": [""]}
93+
// },
94+
95+
// Combinations of libraries/python versions can be excluded/included
96+
// from the set to test. Each entry is a dictionary containing additional
97+
// key-value pairs to include/exclude.
98+
//
99+
// An exclude entry excludes entries where all values match. The
100+
// values are regexps that should match the whole string.
101+
//
102+
// An include entry adds an environment. Only the packages listed
103+
// are installed. The 'python' key is required. The exclude rules
104+
// do not apply to includes.
105+
//
106+
// In addition to package names, the following keys are available:
107+
//
108+
// - python
109+
// Python version, as in the *pythons* variable above.
110+
// - environment_type
111+
// Environment type, as above.
112+
// - sys_platform
113+
// Platform, as in sys.platform. Possible values for the common
114+
// cases: 'linux2', 'win32', 'cygwin', 'darwin'.
115+
// - req
116+
// Required packages
117+
// - env
118+
// Environment variables
119+
// - env_nobuild
120+
// Non-build environment variables
121+
//
122+
// "exclude": [
123+
// {"python": "3.2", "sys_platform": "win32"}, // skip py3.2 on windows
124+
// {"environment_type": "conda", "req": {"six": null}}, // don't run without six on conda
125+
// {"env": {"ENV_VAR_1": "val2"}}, // skip val2 for ENV_VAR_1
126+
// ],
127+
//
128+
// "include": [
129+
// // additional env for python3.12
130+
// {"python": "3.12", "req": {"numpy": "1.26"}, "env_nobuild": {"FOO": "123"}},
131+
// // additional env if run on windows+conda
132+
// {"platform": "win32", "environment_type": "conda", "python": "3.12", "req": {"libpython": ""}},
133+
// ],
134+
135+
// The directory (relative to the current directory) that benchmarks are
136+
// stored in. If not provided, defaults to "benchmarks"
137+
"benchmark_dir": "benchmarks",
138+
139+
// The directory (relative to the current directory) to cache the Python
140+
// environments in. If not provided, defaults to "env"
141+
"env_dir": ".asv/env",
142+
143+
// The directory (relative to the current directory) that raw benchmark
144+
// results are stored in. If not provided, defaults to "results".
145+
"results_dir": ".asv/results",
146+
147+
// The directory (relative to the current directory) that the html tree
148+
// should be written to. If not provided, defaults to "html".
149+
"html_dir": ".asv/html",
150+
151+
// The number of characters to retain in the commit hashes.
152+
// "hash_length": 8,
153+
154+
// `asv` will cache results of the recent builds in each
155+
// environment, making them faster to install next time. This is
156+
// the number of builds to keep, per environment.
157+
// "build_cache_size": 2,
158+
159+
// The commits after which the regression search in `asv publish`
160+
// should start looking for regressions. Dictionary whose keys are
161+
// regexps matching to benchmark names, and values corresponding to
162+
// the commit (exclusive) after which to start looking for
163+
// regressions. The default is to start from the first commit
164+
// with results. If the commit is `null`, regression detection is
165+
// skipped for the matching benchmark.
166+
//
167+
// "regressions_first_commits": {
168+
// "some_benchmark": "352cdf", // Consider regressions only after this commit
169+
// "another_benchmark": null, // Skip regression detection altogether
170+
// },
171+
172+
// The thresholds for relative change in results, after which `asv
173+
// publish` starts reporting regressions. Dictionary of the same
174+
// form as in ``regressions_first_commits``, with values
175+
// indicating the thresholds. If multiple entries match, the
176+
// maximum is taken. If no entry matches, the default is 5%.
177+
//
178+
// "regressions_thresholds": {
179+
// "some_benchmark": 0.01, // Threshold of 1%
180+
// "another_benchmark": 0.5, // Threshold of 50%
181+
// },
182+
}

benchmarks/benchmarks/__init__.py

Whitespace-only changes.

benchmarks/benchmarks/binary.py

Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
import dpctl
2+
import dpctl.tensor as dpt
3+
4+
SHARED_QUEUE = dpctl.SyclQueue(property="enable_profiling")
5+
6+
7+
class Binary:
8+
"""Benchmark class for binary operations on SYCL devices."""
9+
10+
timeout = 300.0
11+
12+
def setup(self):
13+
"""Setup the benchmark environment."""
14+
self.q = SHARED_QUEUE
15+
self.iterations = 1
16+
self.n_values = 10**8
17+
18+
def run_bench(self, q, reps, n_max, dtype1, dtype2, op):
19+
"""Run the benchmark for a specific function and dtype combination."""
20+
21+
def get_sizes(n):
22+
s = []
23+
m = 8192
24+
while m < n:
25+
s.append(m)
26+
m *= 2
27+
s.append(n)
28+
return s
29+
30+
x1 = dpt.ones(n_max, dtype=dtype1, sycl_queue=q)
31+
x2 = dpt.ones(n_max, dtype=dtype2, sycl_queue=q)
32+
r = op(x1, x2)
33+
34+
max_bytes = x1.nbytes + x2.nbytes + r.nbytes
35+
times_res = []
36+
37+
for n in get_sizes(n_max):
38+
x1_n = x1[:n]
39+
x2_n = x2[:n]
40+
r_n = r[:n]
41+
n_bytes = x1_n.nbytes + x2_n.nbytes + r_n.nbytes
42+
43+
n_iters = int((max_bytes / n_bytes) * reps)
44+
45+
while True:
46+
timer = dpctl.SyclTimer(
47+
device_timer="order_manager", time_scale=1e9
48+
)
49+
with timer(q):
50+
for _ in range(n_iters):
51+
op(x1_n, x2_n, out=r_n)
52+
53+
dev_dt = timer.dt.device_dt
54+
if dev_dt > 0:
55+
times_res.append((n, dev_dt / n_iters))
56+
break
57+
58+
return times_res
59+
60+
61+
binary_instance = Binary()
62+
binary_instance.q = SHARED_QUEUE
63+
binary_instance.iterations = 1
64+
binary_instance.n_values = 10**8
65+
66+
function_list = [
67+
dpt.add,
68+
dpt.multiply,
69+
dpt.divide,
70+
dpt.subtract,
71+
dpt.floor_divide,
72+
dpt.remainder,
73+
dpt.hypot,
74+
dpt.logaddexp,
75+
dpt.pow,
76+
dpt.atan2,
77+
dpt.nextafter,
78+
dpt.copysign,
79+
dpt.less,
80+
dpt.less_equal,
81+
dpt.greater,
82+
dpt.greater_equal,
83+
dpt.equal,
84+
dpt.not_equal,
85+
dpt.minimum,
86+
dpt.maximum,
87+
dpt.bitwise_and,
88+
dpt.bitwise_or,
89+
dpt.bitwise_xor,
90+
dpt.bitwise_left_shift,
91+
dpt.bitwise_right_shift,
92+
dpt.logical_and,
93+
dpt.logical_or,
94+
dpt.logical_xor,
95+
]
96+
97+
# Generate dtype combinations for each function
98+
dtypes = {}
99+
for fn in function_list:
100+
dtypes[fn] = [list(map(dpt.dtype, sig.split("->")[0])) for sig in fn.types]
101+
102+
103+
# Dynamically create benchmark methods at the module level
104+
def generate_benchmark_functions():
105+
"""Dynamically create benchmark functions for each
106+
function and dtype combination.
107+
"""
108+
for fn in function_list:
109+
fn_name = fn.name_
110+
for dtype1, dtype2 in dtypes[fn]:
111+
# Create unique function names
112+
method_name = f"time_{fn_name}_{dtype1.name}_{dtype2.name}"
113+
114+
def benchmark_method(self, fn=fn, dtype1=dtype1, dtype2=dtype2):
115+
return self.run_bench(
116+
self.q,
117+
self.iterations,
118+
self.n_values,
119+
dtype1,
120+
dtype2,
121+
fn,
122+
)
123+
124+
# Attach the new method to the Binary class
125+
benchmark_method.__name__ = method_name
126+
setattr(Binary, method_name, benchmark_method)
127+
128+
129+
# Generate the benchmark functions
130+
generate_benchmark_functions()

benchmarks/benchmarks/ef_bench_add.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
import dpctl
2+
import dpctl.tensor as dpt
3+
import dpctl.tensor._tensor_elementwise_impl as tei
4+
import dpctl.utils as dpu
5+
6+
7+
class EfBenchAdd:
8+
9+
def time_ef_bench_add(self):
10+
q = dpctl.SyclQueue(property="enable_profiling")
11+
n = 2**26
12+
reps = 50
13+
14+
dt = dpt.int8
15+
x1 = dpt.ones(n, dtype=dt, sycl_queue=q)
16+
x2 = dpt.ones(n, dtype=dt, sycl_queue=q)
17+
18+
op1, op2 = dpt.add, tei._add
19+
20+
r = op1(x1, x2)
21+
22+
timer = dpctl.SyclTimer(device_timer="order_manager", time_scale=1e9)
23+
24+
m = dpu.SequentialOrderManager[q]
25+
with timer(q):
26+
for _ in range(reps):
27+
deps = m.submitted_events
28+
ht_e, c_e = op2(
29+
src1=x1, src2=x2, dst=r, sycl_queue=q, depends=deps
30+
)
31+
m.add_event_pair(ht_e, c_e)

0 commit comments

Comments
 (0)