Skip to content

Commit ca00e65

Browse files
committed
task: benchmarking with asv
1 parent 67317b0 commit ca00e65

File tree

6 files changed

+237
-0
lines changed

6 files changed

+237
-0
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,3 +102,6 @@ dpctl/tensor/_usmarray.h
102102

103103
# moved cmake scripts
104104
dpctl/resources/cmake
105+
106+
# asv artifacts
107+
*.asv*

benchmarks/README.md

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
# dpctl benchmarks
2+
3+
Benchmarking dpctl using Airspeed Velocity
4+
Read more about ASV [here](https://asv.readthedocs.io/en/stable/index.html)
5+
6+
## Usage
7+
The benchmarks were made with using an existing environment in-mind before execution. You will see the `asv.conf.json` is minimal without any environmental information supplied.
8+
The expectation is for users to execute `asv run` with an existing environment.
9+
10+
As such, you should have conda or mamba installed, and create an environment [following these instructions](https://intelpython.github.io/dpctl/latest/beginners_guides/installation.html#dpctl-installation)
11+
Additionally, install `asv` and `libmambapy` to the environment.
12+
13+
Then, you may activate the environment and instruct `asv run` to use this existing environment for the benchmarks by pointing it to the environment's python binary, like so:
14+
```
15+
conda activate dpctl_env
16+
asv run --environment existing:/full/mamba/path/envs/dpctl_env/bin/python
17+
```
18+
19+
## Writing new benchmarks
20+
Read ASV's guidelines for writing benchmarks [here](https://asv.readthedocs.io/en/stable/writing_benchmarks.html)

benchmarks/asv.conf.json

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
{
2+
// The version of the config file format. Do not change, unless
3+
// you know what you are doing.
4+
"version": 1,
5+
6+
// The name of the project being benchmarked
7+
"project": "dpctl",
8+
9+
// The project's homepage
10+
"project_url": "https://github.com/IntelPython/dpctl",
11+
12+
// The URL or local path of the source code repository for the
13+
// project being benchmarked
14+
"repo": "..",
15+
16+
// Customizable commands for building the project.
17+
// See asv.conf.json documentation.
18+
"build_command": [],
19+
20+
// List of branches to benchmark. If not provided, defaults to "main"
21+
// (for git) or "default" (for mercurial).
22+
"branches": ["HEAD"],
23+
24+
// The DVCS being used. If not set, it will be automatically
25+
// determined from "repo" by looking at the protocol in the URL
26+
// (if remote), or by looking for special directories, such as
27+
// ".git" (if local).
28+
"dvcs": "git",
29+
30+
// The tool to use to create environments. May be "conda",
31+
// "virtualenv", "mamba" (above 3.8)
32+
// or other value depending on the plugins in use.
33+
// If missing or the empty string, the tool will be automatically
34+
// determined by looking for tools on the PATH environment
35+
// variable.
36+
"environment_type": "conda",
37+
38+
// The directory (relative to the current directory) that benchmarks are
39+
// stored in. If not provided, defaults to "benchmarks"
40+
"benchmark_dir": "benchmarks",
41+
42+
// The directory (relative to the current directory) to cache the Python
43+
// environments in. If not provided, defaults to "env"
44+
"env_dir": ".asv/env",
45+
46+
// The directory (relative to the current directory) that raw benchmark
47+
// results are stored in. If not provided, defaults to "results".
48+
"results_dir": ".asv/results",
49+
50+
// The directory (relative to the current directory) that the html tree
51+
// should be written to. If not provided, defaults to "html".
52+
"html_dir": ".asv/html"
53+
}

benchmarks/benchmarks/__init__.py

Whitespace-only changes.

benchmarks/benchmarks/binary.py

Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
import dpctl
2+
import dpctl.tensor as dpt
3+
4+
SHARED_QUEUE = dpctl.SyclQueue(property="enable_profiling")
5+
6+
7+
class Binary:
8+
"""Benchmark class for binary operations on SYCL devices."""
9+
10+
timeout = 300.0
11+
12+
def setup(self):
13+
"""Setup the benchmark environment."""
14+
self.q = SHARED_QUEUE
15+
self.iterations = 1
16+
self.n_values = 10**8
17+
18+
def run_bench(self, q, reps, n_max, dtype1, dtype2, op):
19+
"""Run the benchmark for a specific function and dtype combination."""
20+
21+
def get_sizes(n):
22+
s = []
23+
m = 8192
24+
while m < n:
25+
s.append(m)
26+
m *= 2
27+
s.append(n)
28+
return s
29+
30+
x1 = dpt.ones(n_max, dtype=dtype1, sycl_queue=q)
31+
x2 = dpt.ones(n_max, dtype=dtype2, sycl_queue=q)
32+
r = op(x1, x2)
33+
34+
max_bytes = x1.nbytes + x2.nbytes + r.nbytes
35+
times_res = []
36+
37+
for n in get_sizes(n_max):
38+
x1_n = x1[:n]
39+
x2_n = x2[:n]
40+
r_n = r[:n]
41+
n_bytes = x1_n.nbytes + x2_n.nbytes + r_n.nbytes
42+
43+
n_iters = int((max_bytes / n_bytes) * reps)
44+
45+
while True:
46+
timer = dpctl.SyclTimer(
47+
device_timer="order_manager", time_scale=1e9
48+
)
49+
with timer(q):
50+
for _ in range(n_iters):
51+
op(x1_n, x2_n, out=r_n)
52+
53+
dev_dt = timer.dt.device_dt
54+
if dev_dt > 0:
55+
times_res.append((n, dev_dt / n_iters))
56+
break
57+
58+
return times_res
59+
60+
61+
binary_instance = Binary()
62+
binary_instance.q = SHARED_QUEUE
63+
binary_instance.iterations = 1
64+
binary_instance.n_values = 10**8
65+
66+
function_list = [
67+
dpt.add,
68+
dpt.multiply,
69+
dpt.divide,
70+
dpt.subtract,
71+
dpt.floor_divide,
72+
dpt.remainder,
73+
dpt.hypot,
74+
dpt.logaddexp,
75+
dpt.pow,
76+
dpt.atan2,
77+
dpt.nextafter,
78+
dpt.copysign,
79+
dpt.less,
80+
dpt.less_equal,
81+
dpt.greater,
82+
dpt.greater_equal,
83+
dpt.equal,
84+
dpt.not_equal,
85+
dpt.minimum,
86+
dpt.maximum,
87+
dpt.bitwise_and,
88+
dpt.bitwise_or,
89+
dpt.bitwise_xor,
90+
dpt.bitwise_left_shift,
91+
dpt.bitwise_right_shift,
92+
dpt.logical_and,
93+
dpt.logical_or,
94+
dpt.logical_xor,
95+
]
96+
97+
# Generate dtype combinations for each function
98+
dtypes = {}
99+
for fn in function_list:
100+
dtypes[fn] = [list(map(dpt.dtype, sig.split("->")[0])) for sig in fn.types]
101+
102+
103+
# Dynamically create benchmark methods at the module level
104+
def generate_benchmark_functions():
105+
"""Dynamically create benchmark functions for each
106+
function and dtype combination.
107+
"""
108+
for fn in function_list:
109+
fn_name = fn.name_
110+
for dtype1, dtype2 in dtypes[fn]:
111+
# Create unique function names
112+
method_name = f"time_{fn_name}_{dtype1.name}_{dtype2.name}"
113+
114+
def benchmark_method(self, fn=fn, dtype1=dtype1, dtype2=dtype2):
115+
return self.run_bench(
116+
self.q,
117+
self.iterations,
118+
self.n_values,
119+
dtype1,
120+
dtype2,
121+
fn,
122+
)
123+
124+
# Attach the new method to the Binary class
125+
benchmark_method.__name__ = method_name
126+
setattr(Binary, method_name, benchmark_method)
127+
128+
129+
# Generate the benchmark functions
130+
generate_benchmark_functions()

benchmarks/benchmarks/ef_bench_add.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
import dpctl
2+
import dpctl.tensor as dpt
3+
import dpctl.tensor._tensor_elementwise_impl as tei
4+
import dpctl.utils as dpu
5+
6+
7+
class EfBenchAdd:
8+
9+
def time_ef_bench_add(self):
10+
q = dpctl.SyclQueue(property="enable_profiling")
11+
n = 2**26
12+
reps = 50
13+
14+
dt = dpt.int8
15+
x1 = dpt.ones(n, dtype=dt, sycl_queue=q)
16+
x2 = dpt.ones(n, dtype=dt, sycl_queue=q)
17+
18+
op1, op2 = dpt.add, tei._add
19+
20+
r = op1(x1, x2)
21+
22+
timer = dpctl.SyclTimer(device_timer="order_manager", time_scale=1e9)
23+
24+
m = dpu.SequentialOrderManager[q]
25+
with timer(q):
26+
for _ in range(reps):
27+
deps = m.submitted_events
28+
ht_e, c_e = op2(
29+
src1=x1, src2=x2, dst=r, sycl_queue=q, depends=deps
30+
)
31+
m.add_event_pair(ht_e, c_e)

0 commit comments

Comments
 (0)