@@ -1834,6 +1834,7 @@ struct ggml_compute_threadpool {
1834
1834
struct ggml_compute_state {
1835
1835
ggml_thread_t thrd;
1836
1836
bool cpumask[GGML_N_CORES_MAX];
1837
+ bool mask_specified;
1837
1838
int ith;
1838
1839
struct ggml_compute_threadpool * threadpool;
1839
1840
enum ggml_status ec;
@@ -19472,13 +19473,6 @@ static bool __thread_priority(int32_t prio) {
19472
19473
19473
19474
#endif
19474
19475
19475
- static void __init_stack(size_t size) {
19476
- void* ptr = alloca(size);
19477
- if (ptr) {
19478
- memset(ptr, 0, size);
19479
- }
19480
- }
19481
-
19482
19476
#ifdef __aarch64__
19483
19477
19484
19478
static inline void __cpu_relax(void) {
@@ -19553,8 +19547,6 @@ struct ggml_compute_threadpool * ggml_create_threadpool(struct ggml_threadpool_p
19553
19547
19554
19548
threadpool->workers = workers;
19555
19549
19556
- __init_stack(2ULL * 1024 * 1024);
19557
-
19558
19550
int cpumask_iter = 0;
19559
19551
19560
19552
__process_priority(tpp->prio);
@@ -19566,12 +19558,12 @@ struct ggml_compute_threadpool * ggml_create_threadpool(struct ggml_threadpool_p
19566
19558
.ith = j,
19567
19559
.threadpool = threadpool,
19568
19560
.ec = GGML_STATUS_SUCCESS,
19561
+ .mask_specified = false
19569
19562
};
19570
19563
19571
19564
if (tpp->mask_specified) {
19572
19565
__cpumask_next(tpp->cpumask, workers[j].cpumask, tpp->strict_cpu, &cpumask_iter);
19573
- } else {
19574
- workers[j].cpumask[j] = true;
19566
+ workers[j].mask_specified = true;
19575
19567
}
19576
19568
19577
19569
// Spin threads for all secondary workers
@@ -19841,12 +19833,9 @@ static thread_ret_t ggml_graph_compute_secondary_thread(void* data) {
19841
19833
struct ggml_compute_state * state = (struct ggml_compute_state *) data;
19842
19834
struct ggml_compute_threadpool * threadpool = state->threadpool;
19843
19835
19844
- #ifndef __aarch64__
19845
- __init_stack(2ULL * 1024 * 1024);
19846
- #endif
19847
-
19848
19836
__thread_priority(threadpool->prio);
19849
- __thread_affinity(state->cpumask);
19837
+ if (state->mask_specified)
19838
+ __thread_affinity(state->cpumask);
19850
19839
19851
19840
// Indicate that we're ready to go
19852
19841
atomic_fetch_add(&threadpool->n_ready, 1);
@@ -20096,7 +20085,7 @@ enum ggml_status ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cpl
20096
20085
bool disposable_threadpool = false;
20097
20086
20098
20087
if (threadpool == NULL) {
20099
- //GGML_PRINT("NOTE: Threadpool is not specified. Will create a disposable threadpool\n");
20088
+ // GGML_PRINT("NOTE: Threadpool is not specified. Will create a disposable threadpool\n");
20100
20089
struct ggml_threadpool_params tpp = {
20101
20090
.mask_specified = false,
20102
20091
.n_threads = n_threads,
@@ -20118,7 +20107,8 @@ enum ggml_status ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cpl
20118
20107
}
20119
20108
20120
20109
// Update main thread affinity to match the current threadpool
20121
- __thread_affinity(threadpool->workers[0].cpumask);
20110
+ if (threadpool->workers[0].mask_specified)
20111
+ __thread_affinity(threadpool->workers[0].cpumask);
20122
20112
20123
20113
// Set up work
20124
20114
threadpool->cgraph = cgraph;
0 commit comments