Skip to content

Commit 6fbe989

Browse files
committed
resolving fixing unaligned loop and casting issues
1 parent 78a6931 commit 6fbe989

File tree

2 files changed

+54
-36
lines changed

2 files changed

+54
-36
lines changed

quaddtype/quaddtype/src/casts.cpp

Lines changed: 49 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,13 @@ extern "C" {
1414
}
1515
#include "sleef.h"
1616
#include "sleefquad.h"
17-
#include <vector>
1817

1918
#include "scalar.h"
2019
#include "casts.h"
2120
#include "dtype.h"
2221

22+
#define NUM_CASTS 29 // 14 to_casts + 14 from_casts + 1 quad_to_quad
23+
2324
static NPY_CASTING
2425
quad_to_quad_resolve_descriptors(PyObject *NPY_UNUSED(self),
2526
PyArray_DTypeMeta *NPY_UNUSED(dtypes[2]),
@@ -51,14 +52,13 @@ quad_to_quad_strided_loop(PyArrayMethod_Context *context, char *const data[],
5152
char *in_ptr = data[0];
5253
char *out_ptr = data[1];
5354

54-
while (N--) {
55-
Sleef_quad *in = (Sleef_quad *)in_ptr;
56-
Sleef_quad *out = (Sleef_quad *)out_ptr;
57-
58-
*out = *in;
55+
npy_intp in_stride = strides[0];
56+
npy_intp out_stride = strides[1];
5957

60-
in_ptr += strides[0];
61-
out_ptr += strides[1];
58+
while (N--) {
59+
memcpy(out_ptr, in_ptr, sizeof(Sleef_quad));
60+
in_ptr += in_stride;
61+
out_ptr += out_stride;
6262
}
6363
return 0;
6464
}
@@ -172,7 +172,7 @@ numpy_to_quad_resolve_descriptors(PyObject *NPY_UNUSED(self), PyArray_DTypeMeta
172172
}
173173

174174
loop_descrs[0] = PyArray_GetDefaultDescr(dtypes[0]);
175-
*view_offset = 0;
175+
// *view_offset = 0;
176176
return NPY_SAFE_CASTING;
177177
}
178178

@@ -187,9 +187,12 @@ numpy_to_quad_strided_loop(PyArrayMethod_Context *context, char *const data[],
187187
char *out_ptr = data[1];
188188

189189
while (N--) {
190-
T in_val = *(T *)in_ptr;
191-
Sleef_quad *out_val = (Sleef_quad *)out_ptr;
192-
*out_val = to_quad<T>(in_val);
190+
T in_val;
191+
Sleef_quad out_val;
192+
193+
memcpy(&in_val, in_ptr, sizeof(T));
194+
out_val = to_quad<T>(in_val);
195+
memcpy(out_ptr, &out_val, sizeof(Sleef_quad));
193196

194197
in_ptr += strides[0];
195198
out_ptr += strides[1];
@@ -298,8 +301,8 @@ quad_to_numpy_resolve_descriptors(PyObject *NPY_UNUSED(self), PyArray_DTypeMeta
298301
loop_descrs[0] = given_descrs[0];
299302

300303
loop_descrs[1] = PyArray_GetDefaultDescr(dtypes[1]);
301-
*view_offset = 0;
302-
return NPY_SAME_KIND_CASTING;
304+
// *view_offset = 0;
305+
return NPY_UNSAFE_CASTING;
303306
}
304307

305308
template <typename T>
@@ -323,7 +326,16 @@ quad_to_numpy_strided_loop(PyArrayMethod_Context *context, char *const data[],
323326
return 0;
324327
}
325328

326-
static std::vector<PyArrayMethod_Spec *> specs;
329+
static PyArrayMethod_Spec *specs[NUM_CASTS + 1]; // +1 for NULL terminator
330+
static size_t spec_count = 0;
331+
332+
void
333+
add_spec(PyArrayMethod_Spec *spec)
334+
{
335+
if (spec_count < NUM_CASTS) {
336+
specs[spec_count++] = spec;
337+
}
338+
}
327339

328340
// functions to add casts
329341
template <typename T>
@@ -337,15 +349,16 @@ add_cast_from(PyArray_DTypeMeta *to)
337349
{NPY_METH_strided_loop, (void *)&quad_to_numpy_strided_loop<T>},
338350
{0, nullptr}};
339351

340-
specs.push_back(new PyArrayMethod_Spec{
352+
PyArrayMethod_Spec *spec = new PyArrayMethod_Spec{
341353
.name = "cast_QuadPrec_to_NumPy",
342354
.nin = 1,
343355
.nout = 1,
344-
.casting = NPY_SAME_KIND_CASTING,
356+
.casting = NPY_UNSAFE_CASTING,
345357
.flags = (NPY_ARRAYMETHOD_FLAGS)0,
346358
.dtypes = dtypes,
347359
.slots = slots,
348-
});
360+
};
361+
add_spec(spec);
349362
}
350363

351364
template <typename T>
@@ -359,34 +372,40 @@ add_cast_to(PyArray_DTypeMeta *from)
359372
{NPY_METH_strided_loop, (void *)&numpy_to_quad_strided_loop<T>},
360373
{0, nullptr}};
361374

362-
specs.push_back(new PyArrayMethod_Spec{
375+
PyArrayMethod_Spec *spec = new PyArrayMethod_Spec{
363376
.name = "cast_NumPy_to_QuadPrec",
364377
.nin = 1,
365378
.nout = 1,
366379
.casting = NPY_SAFE_CASTING,
367380
.flags = (NPY_ARRAYMETHOD_FLAGS)0,
368381
.dtypes = dtypes,
369382
.slots = slots,
370-
});
383+
};
384+
385+
add_spec(spec);
371386
}
372387

373388
PyArrayMethod_Spec **
374389
init_casts_internal(void)
375390
{
376391
PyArray_DTypeMeta **quad2quad_dtypes = new PyArray_DTypeMeta *[2]{nullptr, nullptr};
392+
PyType_Slot *quad2quad_slots = new PyType_Slot[4]{
393+
{NPY_METH_resolve_descriptors, (void *)&quad_to_quad_resolve_descriptors},
394+
{NPY_METH_strided_loop, (void *)&quad_to_quad_strided_loop},
395+
{NPY_METH_unaligned_strided_loop, (void *)&quad_to_quad_strided_loop},
396+
{0, nullptr}};
377397

378-
specs.push_back(new PyArrayMethod_Spec{
398+
PyArrayMethod_Spec *quad2quad_spec = new PyArrayMethod_Spec{
379399
.name = "cast_QuadPrec_to_QuadPrec",
380400
.nin = 1,
381401
.nout = 1,
382402
.casting = NPY_SAME_KIND_CASTING,
383403
.flags = NPY_METH_SUPPORTS_UNALIGNED,
384404
.dtypes = quad2quad_dtypes,
385-
.slots = new PyType_Slot[4]{
386-
{NPY_METH_resolve_descriptors, (void *)&quad_to_quad_resolve_descriptors},
387-
{NPY_METH_strided_loop, (void *)&quad_to_quad_strided_loop},
388-
{NPY_METH_unaligned_strided_loop, (void *)&quad_to_quad_strided_loop},
389-
{0, NULL}}});
405+
.slots = quad2quad_slots,
406+
};
407+
408+
add_spec(quad2quad_spec);
390409

391410
add_cast_to<npy_bool>(&PyArray_BoolDType);
392411
add_cast_to<npy_byte>(&PyArray_ByteDType);
@@ -418,8 +437,8 @@ init_casts_internal(void)
418437
add_cast_from<double>(&PyArray_DoubleDType);
419438
add_cast_from<long double>(&PyArray_LongDoubleDType);
420439

421-
specs.push_back(nullptr);
422-
return specs.data();
440+
specs[spec_count] = nullptr;
441+
return specs;
423442
}
424443

425444
PyArrayMethod_Spec **
@@ -428,7 +447,7 @@ init_casts(void)
428447
try {
429448
return init_casts_internal();
430449
}
431-
catch (const std::exception &e) {
450+
catch (int e) {
432451
PyErr_NoMemory();
433452
return nullptr;
434453
}
@@ -445,5 +464,5 @@ free_casts(void)
445464
delete cast->slots;
446465
delete cast;
447466
}
448-
specs.clear();
467+
spec_count = 0;
449468
}

quaddtype/quaddtype/src/casts.h

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,18 @@
11
#ifndef _QUADDTYPE_CASTS_H
22
#define _QUADDTYPE_CASTS_H
33

4-
#include<Python.h>
4+
#include <Python.h>
55
#include "numpy/dtype_api.h"
66

7-
87
#ifdef __cplusplus
98
extern "C" {
109
#endif
1110

12-
extern PyArrayMethod_Spec QuadtoQuadCastSpec;
13-
14-
PyArrayMethod_Spec ** init_casts(void);
11+
PyArrayMethod_Spec **
12+
init_casts(void);
1513

16-
void free_casts(void);
14+
void
15+
free_casts(void);
1716

1817
#ifdef __cplusplus
1918
}

0 commit comments

Comments
 (0)