From 0a36a9d70d9e79f5ad07dddb68ebffaadcb616d1 Mon Sep 17 00:00:00 2001 From: swayaminsync Date: Sun, 25 Aug 2024 12:02:33 +0530 Subject: [PATCH 01/32] added readme --- quaddtype/README.md | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/quaddtype/README.md b/quaddtype/README.md index e69de29b..97425387 100644 --- a/quaddtype/README.md +++ b/quaddtype/README.md @@ -0,0 +1,17 @@ +# Numpy-QuadDType + +## Installation + +``` +pip install numpy==2.1.0 +pip install -i https://test.pypi.org/simple/ quaddtype +``` + +## Usage + +``` +import numpy as np +from quaddtype import QuadPrecDType, QuadPrecision + +np.array([1,2,3], dtype=QuadPrecDType()) +``` From d56e59278b3dceabb6cc9bd373872d9f392f52ed Mon Sep 17 00:00:00 2001 From: swayaminsync Date: Sat, 31 Aug 2024 22:37:00 +0530 Subject: [PATCH 02/32] renaming module and adding initial longdouble backend support --- quaddtype/meson.build | 38 +- quaddtype/numpy_quaddtype/__init__.py | 23 + quaddtype/numpy_quaddtype/src/casts.cpp | 477 ++++++++++++++++++ .../src/casts.h | 0 quaddtype/numpy_quaddtype/src/dtype.c | 241 +++++++++ .../src/dtype.h | 18 +- .../src/ops.hpp | 0 quaddtype/numpy_quaddtype/src/quad_common.h | 17 + .../src/quaddtype_main.c | 26 +- quaddtype/numpy_quaddtype/src/scalar.c | 159 ++++++ .../src/scalar.h | 15 +- .../src/scalar_ops.cpp | 0 .../src/scalar_ops.h | 0 .../src/umath.cpp | 0 .../src/umath.h | 0 quaddtype/pyproject.toml | 4 +- quaddtype/quaddtype/__init__.py | 1 - quaddtype/quaddtype/src/casts.cpp | 468 ----------------- quaddtype/quaddtype/src/dtype.c | 216 -------- quaddtype/quaddtype/src/scalar.c | 126 ----- quaddtype/reinstall.sh | 2 +- 21 files changed, 969 insertions(+), 862 deletions(-) create mode 100644 quaddtype/numpy_quaddtype/__init__.py create mode 100644 quaddtype/numpy_quaddtype/src/casts.cpp rename quaddtype/{quaddtype => numpy_quaddtype}/src/casts.h (100%) create mode 100644 quaddtype/numpy_quaddtype/src/dtype.c rename quaddtype/{quaddtype => numpy_quaddtype}/src/dtype.h (53%) rename quaddtype/{quaddtype => numpy_quaddtype}/src/ops.hpp (100%) create mode 100644 quaddtype/numpy_quaddtype/src/quad_common.h rename quaddtype/{quaddtype => numpy_quaddtype}/src/quaddtype_main.c (68%) create mode 100644 quaddtype/numpy_quaddtype/src/scalar.c rename quaddtype/{quaddtype => numpy_quaddtype}/src/scalar.h (55%) rename quaddtype/{quaddtype => numpy_quaddtype}/src/scalar_ops.cpp (100%) rename quaddtype/{quaddtype => numpy_quaddtype}/src/scalar_ops.h (100%) rename quaddtype/{quaddtype => numpy_quaddtype}/src/umath.cpp (100%) rename quaddtype/{quaddtype => numpy_quaddtype}/src/umath.h (100%) delete mode 100644 quaddtype/quaddtype/__init__.py delete mode 100644 quaddtype/quaddtype/src/casts.cpp delete mode 100644 quaddtype/quaddtype/src/dtype.c delete mode 100644 quaddtype/quaddtype/src/scalar.c diff --git a/quaddtype/meson.build b/quaddtype/meson.build index f1f9eab1..50dc67b1 100644 --- a/quaddtype/meson.build +++ b/quaddtype/meson.build @@ -1,12 +1,12 @@ -project('quaddtype', 'c', 'cpp', default_options : ['cpp_std=c++17', 'b_pie=true']) +project('numpy_quaddtype', 'c', 'cpp', default_options : ['cpp_std=c++17', 'b_pie=true']) py_mod = import('python') py = py_mod.find_installation() c = meson.get_compiler('c') -sleef_dep = c.find_library('sleef') -sleefquad_dep = c.find_library('sleefquad') +sleef_dep = c.find_library('sleef', dirs:['/usr/local/lib']) +sleefquad_dep = c.find_library('sleefquad', dirs:['/usr/local/lib']) incdir_numpy = run_command(py, [ @@ -19,30 +19,30 @@ incdir_numpy = run_command(py, includes = include_directories( [ incdir_numpy, - 'quaddtype/src', + 'numpy_quaddtype/src', ] ) srcs = [ - 'quaddtype/src/casts.h', - 'quaddtype/src/casts.cpp', - 'quaddtype/src/scalar.h', - 'quaddtype/src/scalar.c', - 'quaddtype/src/dtype.h', - 'quaddtype/src/dtype.c', - 'quaddtype/src/quaddtype_main.c', - 'quaddtype/src/scalar_ops.h', - 'quaddtype/src/scalar_ops.cpp', - 'quaddtype/src/ops.hpp', - 'quaddtype/src/umath.h', - 'quaddtype/src/umath.cpp' + 'numpy_quaddtype/src/casts.h', + 'numpy_quaddtype/src/casts.cpp', + 'numpy_quaddtype/src/scalar.h', + 'numpy_quaddtype/src/scalar.c', + 'numpy_quaddtype/src/dtype.h', + 'numpy_quaddtype/src/dtype.c', + 'numpy_quaddtype/src/quaddtype_main.c', + # 'numpy_quaddtype/src/scalar_ops.h', + # 'numpy_quaddtype/src/scalar_ops.cpp', + # 'numpy_quaddtype/src/ops.hpp', + # 'numpy_quaddtype/src/umath.h', + # 'numpy_quaddtype/src/umath.cpp' ] py.install_sources( [ - 'quaddtype/__init__.py', + 'numpy_quaddtype/__init__.py', ], - subdir: 'quaddtype', + subdir: 'numpy_quaddtype', pure: false ) @@ -51,6 +51,6 @@ srcs, c_args: ['-g', '-O0', '-lsleef', '-lsleefquad'], dependencies: [sleef_dep, sleefquad_dep], install: true, -subdir: 'quaddtype', +subdir: 'numpy_quaddtype', include_directories: includes ) \ No newline at end of file diff --git a/quaddtype/numpy_quaddtype/__init__.py b/quaddtype/numpy_quaddtype/__init__.py new file mode 100644 index 00000000..5ee5ab30 --- /dev/null +++ b/quaddtype/numpy_quaddtype/__init__.py @@ -0,0 +1,23 @@ +from ._quaddtype_main import ( + QuadPrecision, + QuadPrecDType +) + +__all__ = ['QuadPrecision', 'QuadPrecDType', 'SleefQuadPrecision', 'LongDoubleQuadPrecision', + 'SleefQuadPrecDType', 'LongDoubleQuadPrecDType'] + + +def SleefQuadPrecision(value): + return QuadPrecision(value, backend='sleef') + + +def LongDoubleQuadPrecision(value): + return QuadPrecision(value, backend='longdouble') + + +def SleefQuadPrecDType(): + return QuadPrecDType(backend='sleef') + + +def LongDoubleQuadPrecDType(): + return QuadPrecDType(backend='longdouble') diff --git a/quaddtype/numpy_quaddtype/src/casts.cpp b/quaddtype/numpy_quaddtype/src/casts.cpp new file mode 100644 index 00000000..44c316e0 --- /dev/null +++ b/quaddtype/numpy_quaddtype/src/casts.cpp @@ -0,0 +1,477 @@ +#define PY_ARRAY_UNIQUE_SYMBOL QuadPrecType_ARRAY_API +#define PY_UFUNC_UNIQUE_SYMBOL QuadPrecType_UFUNC_API +#define NPY_NO_DEPRECATED_API NPY_2_0_API_VERSION +#define NPY_TARGET_VERSION NPY_2_0_API_VERSION +#define NO_IMPORT_ARRAY +#define NO_IMPORT_UFUNC + +extern "C" { +#include + +#include "numpy/arrayobject.h" +#include "numpy/ndarraytypes.h" +#include "numpy/dtype_api.h" +} +#include "sleef.h" +#include "sleefquad.h" + +#include "quad_common.h" +#include "scalar.h" +#include "casts.h" +#include "dtype.h" + +#define NUM_CASTS 29 // 14 to_casts + 14 from_casts + 1 quad_to_quad + +static NPY_CASTING +quad_to_quad_resolve_descriptors(PyObject *NPY_UNUSED(self), + PyArray_DTypeMeta *NPY_UNUSED(dtypes[2]), + QuadPrecDTypeObject *given_descrs[2], + QuadPrecDTypeObject *loop_descrs[2], npy_intp *view_offset) +{ + if (given_descrs[0]->backend != given_descrs[1]->backend) { + return NPY_UNSAFE_CASTING; + } + + Py_INCREF(given_descrs[0]); + loop_descrs[0] = given_descrs[0]; + + if (given_descrs[1] == NULL) { + Py_INCREF(given_descrs[0]); + loop_descrs[1] = given_descrs[0]; + } + else { + Py_INCREF(given_descrs[1]); + loop_descrs[1] = given_descrs[1]; + } + + *view_offset = 0; + return NPY_NO_CASTING; +} + +static int +quad_to_quad_strided_loop(PyArrayMethod_Context *context, char *const data[], + npy_intp const dimensions[], npy_intp const strides[], + void *NPY_UNUSED(auxdata)) +{ + npy_intp N = dimensions[0]; + char *in_ptr = data[0]; + char *out_ptr = data[1]; + + npy_intp in_stride = strides[0]; + npy_intp out_stride = strides[1]; + + QuadPrecDTypeObject *descr = (QuadPrecDTypeObject *)context->descriptors[0]; + size_t elem_size = (descr->backend == BACKEND_SLEEF) ? sizeof(Sleef_quad) : sizeof(long double); + + while (N--) { + memcpy(out_ptr, in_ptr, elem_size); + in_ptr += in_stride; + out_ptr += out_stride; + } + return 0; +} + +// Casting from other types to QuadDType + +// template +// static inline Sleef_quad +// to_quad(T x); + +// template <> +// inline Sleef_quad +// to_quad(npy_bool x) +// { +// return x ? Sleef_cast_from_doubleq1(1.0) : Sleef_cast_from_doubleq1(0.0); +// } +// template <> +// inline Sleef_quad +// to_quad(npy_byte x) +// { +// return Sleef_cast_from_int64q1(x); +// } +// // template <> +// // inline Sleef_quad +// // to_quad(npy_ubyte x) +// // { +// // return Sleef_cast_from_uint64q1(x); +// // } +// template <> +// inline Sleef_quad +// to_quad(npy_short x) +// { +// return Sleef_cast_from_int64q1(x); +// } +// template <> +// inline Sleef_quad +// to_quad(npy_ushort x) +// { +// return Sleef_cast_from_uint64q1(x); +// } +// template <> +// inline Sleef_quad +// to_quad(npy_int x) +// { +// return Sleef_cast_from_int64q1(x); +// } +// template <> +// inline Sleef_quad +// to_quad(npy_uint x) +// { +// return Sleef_cast_from_uint64q1(x); +// } +// template <> +// inline Sleef_quad +// to_quad(npy_long x) +// { +// return Sleef_cast_from_int64q1(x); +// } +// template <> +// inline Sleef_quad +// to_quad(npy_ulong x) +// { +// return Sleef_cast_from_uint64q1(x); +// } +// template <> +// inline Sleef_quad +// to_quad(npy_longlong x) +// { +// return Sleef_cast_from_int64q1(x); +// } +// template <> +// inline Sleef_quad +// to_quad(npy_ulonglong x) +// { +// return Sleef_cast_from_uint64q1(x); +// } +// template <> +// inline Sleef_quad +// to_quad(float x) +// { +// return Sleef_cast_from_doubleq1(x); +// } +// template <> +// inline Sleef_quad +// to_quad(double x) +// { +// return Sleef_cast_from_doubleq1(x); +// } +// template <> +// inline Sleef_quad +// to_quad(long double x) +// { +// return Sleef_cast_from_doubleq1(x); +// } + +// template +// static NPY_CASTING +// numpy_to_quad_resolve_descriptors(PyObject *NPY_UNUSED(self), PyArray_DTypeMeta *dtypes[2], +// PyArray_Descr *given_descrs[2], PyArray_Descr *loop_descrs[2], +// npy_intp *view_offset) +// { +// if (given_descrs[1] == NULL) { +// loop_descrs[1] = (PyArray_Descr *)new_quaddtype_instance(); +// if (loop_descrs[1] == nullptr) { +// return (NPY_CASTING)-1; +// } +// } +// else { +// Py_INCREF(given_descrs[1]); +// loop_descrs[1] = given_descrs[1]; +// } + +// loop_descrs[0] = PyArray_GetDefaultDescr(dtypes[0]); +// // *view_offset = 0; +// return NPY_SAFE_CASTING; +// } + +// template +// static int +// numpy_to_quad_strided_loop(PyArrayMethod_Context *context, char *const data[], +// npy_intp const dimensions[], npy_intp const strides[], +// void *NPY_UNUSED(auxdata)) +// { +// npy_intp N = dimensions[0]; +// char *in_ptr = data[0]; +// char *out_ptr = data[1]; + +// while (N--) { +// T in_val; +// Sleef_quad out_val; + +// memcpy(&in_val, in_ptr, sizeof(T)); +// out_val = to_quad(in_val); +// memcpy(out_ptr, &out_val, sizeof(Sleef_quad)); + +// in_ptr += strides[0]; +// out_ptr += strides[1]; +// } +// return 0; +// } + +// // Casting from QuadDType to other types + +// template +// static inline T +// from_quad(Sleef_quad x); + +// template <> +// inline npy_bool +// from_quad(Sleef_quad x) +// { +// return Sleef_cast_to_int64q1(x) != 0; +// } +// template <> +// inline npy_byte +// from_quad(Sleef_quad x) +// { +// return (npy_byte)Sleef_cast_to_int64q1(x); +// } +// // template <> +// // inline npy_ubyte +// // from_quad(Sleef_quad x) +// // { +// // return (npy_ubyte)Sleef_cast_to_uint64q1(x); +// // } +// template <> +// inline npy_short +// from_quad(Sleef_quad x) +// { +// return (npy_short)Sleef_cast_to_int64q1(x); +// } +// template <> +// inline npy_ushort +// from_quad(Sleef_quad x) +// { +// return (npy_ushort)Sleef_cast_to_uint64q1(x); +// } +// template <> +// inline npy_int +// from_quad(Sleef_quad x) +// { +// return (npy_int)Sleef_cast_to_int64q1(x); +// } +// template <> +// inline npy_uint +// from_quad(Sleef_quad x) +// { +// return (npy_uint)Sleef_cast_to_uint64q1(x); +// } +// template <> +// inline npy_long +// from_quad(Sleef_quad x) +// { +// return (npy_long)Sleef_cast_to_int64q1(x); +// } +// template <> +// inline npy_ulong +// from_quad(Sleef_quad x) +// { +// return (npy_ulong)Sleef_cast_to_uint64q1(x); +// } +// template <> +// inline npy_longlong +// from_quad(Sleef_quad x) +// { +// return Sleef_cast_to_int64q1(x); +// } +// template <> +// inline npy_ulonglong +// from_quad(Sleef_quad x) +// { +// return Sleef_cast_to_uint64q1(x); +// } +// template <> +// inline float +// from_quad(Sleef_quad x) +// { +// return Sleef_cast_to_doubleq1(x); +// } +// template <> +// inline double +// from_quad(Sleef_quad x) +// { +// return Sleef_cast_to_doubleq1(x); +// } +// template <> +// inline long double +// from_quad(Sleef_quad x) +// { +// return Sleef_cast_to_doubleq1(x); +// } + +// template +// static NPY_CASTING +// quad_to_numpy_resolve_descriptors(PyObject *NPY_UNUSED(self), PyArray_DTypeMeta *dtypes[2], +// PyArray_Descr *given_descrs[2], PyArray_Descr *loop_descrs[2], +// npy_intp *view_offset) +// { +// Py_INCREF(given_descrs[0]); +// loop_descrs[0] = given_descrs[0]; + +// loop_descrs[1] = PyArray_GetDefaultDescr(dtypes[1]); +// // *view_offset = 0; +// return NPY_UNSAFE_CASTING; +// } + +// template +// static int +// quad_to_numpy_strided_loop(PyArrayMethod_Context *context, char *const data[], +// npy_intp const dimensions[], npy_intp const strides[], +// void *NPY_UNUSED(auxdata)) +// { +// npy_intp N = dimensions[0]; +// char *in_ptr = data[0]; +// char *out_ptr = data[1]; + +// while (N--) { +// Sleef_quad in_val = *(Sleef_quad *)in_ptr; +// T *out_val = (T *)out_ptr; +// *out_val = from_quad(in_val); + +// in_ptr += strides[0]; +// out_ptr += strides[1]; +// } +// return 0; +// } + +static PyArrayMethod_Spec *specs[NUM_CASTS + 1]; // +1 for NULL terminator +static size_t spec_count = 0; + +void +add_spec(PyArrayMethod_Spec *spec) +{ + if (spec_count < NUM_CASTS) { + specs[spec_count++] = spec; + } +} + +// // functions to add casts +// template +// void +// add_cast_from(PyArray_DTypeMeta *to) +// { +// PyArray_DTypeMeta **dtypes = new PyArray_DTypeMeta *[2]{nullptr, to}; + +// PyType_Slot *slots = new PyType_Slot[3]{ +// {NPY_METH_resolve_descriptors, (void *)&quad_to_numpy_resolve_descriptors}, +// {NPY_METH_strided_loop, (void *)&quad_to_numpy_strided_loop}, +// {0, nullptr}}; + +// PyArrayMethod_Spec *spec = new PyArrayMethod_Spec{ +// .name = "cast_QuadPrec_to_NumPy", +// .nin = 1, +// .nout = 1, +// .casting = NPY_UNSAFE_CASTING, +// .flags = (NPY_ARRAYMETHOD_FLAGS)0, +// .dtypes = dtypes, +// .slots = slots, +// }; +// add_spec(spec); +// } + +// template +// void +// add_cast_to(PyArray_DTypeMeta *from) +// { +// PyArray_DTypeMeta **dtypes = new PyArray_DTypeMeta *[2]{from, nullptr}; + +// PyType_Slot *slots = new PyType_Slot[3]{ +// {NPY_METH_resolve_descriptors, (void *)&numpy_to_quad_resolve_descriptors}, +// {NPY_METH_strided_loop, (void *)&numpy_to_quad_strided_loop}, +// {0, nullptr}}; + +// PyArrayMethod_Spec *spec = new PyArrayMethod_Spec{ +// .name = "cast_NumPy_to_QuadPrec", +// .nin = 1, +// .nout = 1, +// .casting = NPY_SAFE_CASTING, +// .flags = (NPY_ARRAYMETHOD_FLAGS)0, +// .dtypes = dtypes, +// .slots = slots, +// }; + +// add_spec(spec); +// } + +PyArrayMethod_Spec ** +init_casts_internal(void) +{ + PyArray_DTypeMeta **quad2quad_dtypes = + new PyArray_DTypeMeta *[2]{&QuadPrecDType, &QuadPrecDType}; + PyType_Slot *quad2quad_slots = new PyType_Slot[4]{ + {NPY_METH_resolve_descriptors, (void *)&quad_to_quad_resolve_descriptors}, + {NPY_METH_strided_loop, (void *)&quad_to_quad_strided_loop}, + {NPY_METH_unaligned_strided_loop, (void *)&quad_to_quad_strided_loop}, + {0, nullptr}}; + + PyArrayMethod_Spec *quad2quad_spec = new PyArrayMethod_Spec{ + .name = "cast_QuadPrec_to_QuadPrec", + .nin = 1, + .nout = 1, + .casting = NPY_NO_CASTING, + .flags = NPY_METH_SUPPORTS_UNALIGNED, + .dtypes = quad2quad_dtypes, + .slots = quad2quad_slots, + }; + + add_spec(quad2quad_spec); + + // add_cast_to(&PyArray_BoolDType); + // add_cast_to(&PyArray_ByteDType); + // add_cast_to(&PyArray_UByteDType); + // add_cast_to(&PyArray_ShortDType); + // add_cast_to(&PyArray_UShortDType); + // add_cast_to(&PyArray_IntDType); + // add_cast_to(&PyArray_UIntDType); + // add_cast_to(&PyArray_LongDType); + // add_cast_to(&PyArray_ULongDType); + // add_cast_to(&PyArray_LongLongDType); + // add_cast_to(&PyArray_ULongLongDType); + // add_cast_to(&PyArray_FloatDType); + // add_cast_to(&PyArray_DoubleDType); + // add_cast_to(&PyArray_LongDoubleDType); + + // add_cast_from(&PyArray_BoolDType); + // add_cast_from(&PyArray_ByteDType); + // add_cast_from(&PyArray_UByteDType); + // add_cast_from(&PyArray_ShortDType); + // add_cast_from(&PyArray_UShortDType); + // add_cast_from(&PyArray_IntDType); + // add_cast_from(&PyArray_UIntDType); + // add_cast_from(&PyArray_LongDType); + // add_cast_from(&PyArray_ULongDType); + // add_cast_from(&PyArray_LongLongDType); + // add_cast_from(&PyArray_ULongLongDType); + // add_cast_from(&PyArray_FloatDType); + // add_cast_from(&PyArray_DoubleDType); + // add_cast_from(&PyArray_LongDoubleDType); + + specs[spec_count] = nullptr; + return specs; +} + +PyArrayMethod_Spec ** +init_casts(void) +{ + try { + return init_casts_internal(); + } + catch (int e) { + PyErr_NoMemory(); + return nullptr; + } +} + +void +free_casts(void) +{ + for (auto cast : specs) { + if (cast == nullptr) { + continue; + } + delete[] cast->dtypes; + delete[] cast->slots; + delete cast; + } + spec_count = 0; +} \ No newline at end of file diff --git a/quaddtype/quaddtype/src/casts.h b/quaddtype/numpy_quaddtype/src/casts.h similarity index 100% rename from quaddtype/quaddtype/src/casts.h rename to quaddtype/numpy_quaddtype/src/casts.h diff --git a/quaddtype/numpy_quaddtype/src/dtype.c b/quaddtype/numpy_quaddtype/src/dtype.c new file mode 100644 index 00000000..a00c37c9 --- /dev/null +++ b/quaddtype/numpy_quaddtype/src/dtype.c @@ -0,0 +1,241 @@ +#include +#include +#include + +#define PY_ARRAY_UNIQUE_SYMBOL QuadPrecType_ARRAY_API +#define PY_UFUNC_UNIQUE_SYMBOL QuadPrecType_UFUNC_API +#define NPY_NO_DEPRECATED_API NPY_2_0_API_VERSION +#define NPY_TARGET_VERSION NPY_2_0_API_VERSION +#define NO_IMPORT_ARRAY +#define NO_IMPORT_UFUNC +#include "numpy/arrayobject.h" +#include "numpy/ndarraytypes.h" +#include "numpy/dtype_api.h" + +#include "scalar.h" +#include "casts.h" +#include "dtype.h" + +static inline int +quad_load(void *x, char *data_ptr, QuadBackendType backend) +{ + if (data_ptr == NULL || x == NULL) { + return -1; + } + if (backend == BACKEND_SLEEF) { + *(Sleef_quad *)x = *(Sleef_quad *)data_ptr; + } + else { + *(long double *)x = *(long double *)data_ptr; + } + return 0; +} + +static inline int +quad_store(char *data_ptr, void *x, QuadBackendType backend) +{ + if (data_ptr == NULL || x == NULL) { + return -1; + } + if (backend == BACKEND_SLEEF) { + *(Sleef_quad *)data_ptr = *(Sleef_quad *)x; + } + else { + *(long double *)data_ptr = *(long double *)x; + } + return 0; +} + +QuadPrecDTypeObject * +new_quaddtype_instance(QuadBackendType backend) +{ + QuadPrecDTypeObject *new = (QuadPrecDTypeObject *)PyArrayDescr_Type.tp_new( + (PyTypeObject *)&QuadPrecDType, NULL, NULL); + if (new == NULL) { + return NULL; + } + new->base.elsize = (backend == BACKEND_SLEEF) ? sizeof(Sleef_quad) : sizeof(long double); + new->base.alignment = (backend == BACKEND_SLEEF) ? _Alignof(Sleef_quad) : _Alignof(long double); + new->backend = backend; + + return new; +} + +static QuadPrecDTypeObject * +ensure_canonical(QuadPrecDTypeObject *self) +{ + Py_INCREF(self); + return self; +} + +static QuadPrecDTypeObject * +common_instance(QuadPrecDTypeObject *dtype1, QuadPrecDTypeObject *dtype2) +{ + if (dtype1->backend != dtype2->backend) { + PyErr_SetString(PyExc_TypeError, + "Cannot combine QuadPrecDType instances with different backends"); + return NULL; + } + Py_INCREF(dtype1); + return dtype1; +} + +static PyArray_DTypeMeta * +common_dtype(PyArray_DTypeMeta *cls, PyArray_DTypeMeta *other) +{ + // Promote integer and floating-point types to QuadPrecDType + if (other->type_num >= 0 && + (PyTypeNum_ISINTEGER(other->type_num) || PyTypeNum_ISFLOAT(other->type_num))) { + Py_INCREF(cls); + return cls; + } + // Don't promote complex types + if (PyTypeNum_ISCOMPLEX(other->type_num)) { + Py_INCREF(Py_NotImplemented); + return (PyArray_DTypeMeta *)Py_NotImplemented; + } + + Py_INCREF(Py_NotImplemented); + return (PyArray_DTypeMeta *)Py_NotImplemented; +} + +static PyArray_Descr * +quadprec_discover_descriptor_from_pyobject(PyArray_DTypeMeta *NPY_UNUSED(cls), PyObject *obj) +{ + if (Py_TYPE(obj) != &QuadPrecision_Type) { + PyErr_SetString(PyExc_TypeError, "Can only store QuadPrecision in a QuadPrecDType array."); + return NULL; + } + QuadPrecisionObject *quad_obj = (QuadPrecisionObject *)obj; + return (PyArray_Descr *)new_quaddtype_instance(quad_obj->backend); +} + +static int +quadprec_setitem(QuadPrecDTypeObject *descr, PyObject *obj, char *dataptr) +{ + QuadPrecisionObject *value; + if (PyObject_TypeCheck(obj, &QuadPrecision_Type)) { + Py_INCREF(obj); + value = (QuadPrecisionObject *)obj; + } + else { + value = QuadPrecision_from_object(obj, descr->backend); + if (value == NULL) { + return -1; + } + } + + if (quad_store(dataptr, &value->value, descr->backend) < 0) { + Py_DECREF(value); + char error_msg[100]; + snprintf(error_msg, sizeof(error_msg), "Invalid memory location %p", (void *)dataptr); + PyErr_SetString(PyExc_ValueError, error_msg); + return -1; + } + + Py_DECREF(value); + return 0; +} + +static PyObject * +quadprec_getitem(QuadPrecDTypeObject *descr, char *dataptr) +{ + QuadPrecisionObject *new = QuadPrecision_raw_new(descr->backend); + if (!new) { + return NULL; + } + if (quad_load(&new->value, dataptr, descr->backend) < 0) { + Py_DECREF(new); + char error_msg[100]; + snprintf(error_msg, sizeof(error_msg), "Invalid memory location %p", (void *)dataptr); + PyErr_SetString(PyExc_ValueError, error_msg); + return NULL; + } + return (PyObject *)new; +} + +static PyArray_Descr * +quadprec_default_descr(PyArray_DTypeMeta *NPY_UNUSED(cls)) +{ + return (PyArray_Descr *)new_quaddtype_instance(BACKEND_SLEEF); +} + +static PyType_Slot QuadPrecDType_Slots[] = { + {NPY_DT_ensure_canonical, &ensure_canonical}, + {NPY_DT_common_instance, &common_instance}, + {NPY_DT_common_dtype, &common_dtype}, + {NPY_DT_discover_descr_from_pyobject, &quadprec_discover_descriptor_from_pyobject}, + {NPY_DT_setitem, &quadprec_setitem}, + {NPY_DT_getitem, &quadprec_getitem}, + {NPY_DT_default_descr, &quadprec_default_descr}, + {0, NULL}}; + +static PyObject * +QuadPrecDType_new(PyTypeObject *NPY_UNUSED(cls), PyObject *args, PyObject *kwds) +{ + static char *kwlist[] = {"backend", NULL}; + const char *backend_str = "sleef"; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "|s", kwlist, &backend_str)) { + return NULL; + } + + QuadBackendType backend = BACKEND_SLEEF; + if (strcmp(backend_str, "longdouble") == 0) { + backend = BACKEND_LONGDOUBLE; + } + else if (strcmp(backend_str, "sleef") != 0) { + PyErr_SetString(PyExc_ValueError, "Invalid backend. Use 'sleef' or 'longdouble'."); + return NULL; + } + + return (PyObject *)new_quaddtype_instance(backend); +} + +static PyObject * +QuadPrecDType_repr(QuadPrecDTypeObject *self) +{ + const char *backend_str = (self->backend == BACKEND_SLEEF) ? "sleef" : "longdouble"; + return PyUnicode_FromFormat("QuadPrecDType(backend='%s')", backend_str); +} + +PyArray_DTypeMeta QuadPrecDType = { + {{ + PyVarObject_HEAD_INIT(NULL, 0).tp_name = "QuadPrecDType.QuadPrecDType", + .tp_basicsize = sizeof(QuadPrecDTypeObject), + .tp_new = QuadPrecDType_new, + .tp_repr = (reprfunc)QuadPrecDType_repr, + .tp_str = (reprfunc)QuadPrecDType_repr, + }}, +}; + +int +init_quadprec_dtype(void) +{ + PyArrayMethod_Spec **casts = init_casts(); + if (!casts) + return -1; + + PyArrayDTypeMeta_Spec QuadPrecDType_DTypeSpec = { + .flags = NPY_DT_NUMERIC, + .casts = casts, + .typeobj = &QuadPrecision_Type, + .slots = QuadPrecDType_Slots, + }; + + ((PyObject *)&QuadPrecDType)->ob_type = &PyArrayDTypeMeta_Type; + + ((PyTypeObject *)&QuadPrecDType)->tp_base = &PyArrayDescr_Type; + + if (PyType_Ready((PyTypeObject *)&QuadPrecDType) < 0) { + return -1; + } + + if (PyArrayInitDTypeMeta_FromSpec(&QuadPrecDType, &QuadPrecDType_DTypeSpec) < 0) { + return -1; + } + + free_casts(); + + return 0; +} \ No newline at end of file diff --git a/quaddtype/quaddtype/src/dtype.h b/quaddtype/numpy_quaddtype/src/dtype.h similarity index 53% rename from quaddtype/quaddtype/src/dtype.h rename to quaddtype/numpy_quaddtype/src/dtype.h index 162d5714..243018d3 100644 --- a/quaddtype/quaddtype/src/dtype.h +++ b/quaddtype/numpy_quaddtype/src/dtype.h @@ -1,25 +1,23 @@ #ifndef _QUADDTYPE_DTYPE_H #define _QUADDTYPE_DTYPE_H + #ifdef __cplusplus extern "C" { #endif -#include -#include -#include -#include - -#include "scalar.h" +#include +#include +#include +#include "quad_common.h" -typedef struct -{ +typedef struct { PyArray_Descr base; - + QuadBackendType backend; } QuadPrecDTypeObject; extern PyArray_DTypeMeta QuadPrecDType; -QuadPrecDTypeObject * new_quaddtype_instance(void); +QuadPrecDTypeObject * new_quaddtype_instance(QuadBackendType backend); int init_quadprec_dtype(void); diff --git a/quaddtype/quaddtype/src/ops.hpp b/quaddtype/numpy_quaddtype/src/ops.hpp similarity index 100% rename from quaddtype/quaddtype/src/ops.hpp rename to quaddtype/numpy_quaddtype/src/ops.hpp diff --git a/quaddtype/numpy_quaddtype/src/quad_common.h b/quaddtype/numpy_quaddtype/src/quad_common.h new file mode 100644 index 00000000..fcdf6f2b --- /dev/null +++ b/quaddtype/numpy_quaddtype/src/quad_common.h @@ -0,0 +1,17 @@ +#ifndef _QUADDTYPE_COMMON_H +#define _QUADDTYPE_COMMON_H + +#ifdef __cplusplus +extern "C" { +#endif + +typedef enum { + BACKEND_SLEEF = 0, + BACKEND_LONGDOUBLE +} QuadBackendType; + +#ifdef __cplusplus +} +#endif + +#endif \ No newline at end of file diff --git a/quaddtype/quaddtype/src/quaddtype_main.c b/quaddtype/numpy_quaddtype/src/quaddtype_main.c similarity index 68% rename from quaddtype/quaddtype/src/quaddtype_main.c rename to quaddtype/numpy_quaddtype/src/quaddtype_main.c index 098c0749..ce263e49 100644 --- a/quaddtype/quaddtype/src/quaddtype_main.c +++ b/quaddtype/numpy_quaddtype/src/quaddtype_main.c @@ -9,13 +9,15 @@ #include "numpy/dtype_api.h" #include "numpy/ufuncobject.h" +#include "scalar.h" #include "dtype.h" -#include "umath.h" +// #include "umath.h" +#include "quad_common.h" static struct PyModuleDef moduledef = { PyModuleDef_HEAD_INIT, .m_name = "_quaddtype_main", - .m_doc = "Quad (128-bit) floating point Data Type for Numpy", + .m_doc = "Quad (128-bit) floating point Data Type for NumPy with multiple backends", .m_size = -1, }; @@ -25,31 +27,29 @@ PyInit__quaddtype_main(void) import_array(); import_umath(); PyObject *m = PyModule_Create(&moduledef); - if (!m) - { + if (!m) { return NULL; } if (init_quadprecision_scalar() < 0) goto error; - - if(PyModule_AddObject(m, "QuadPrecision", (PyObject *)&QuadPrecision_Type) < 0) - goto error; - if(init_quadprec_dtype() < 0) + if (PyModule_AddObject(m, "QuadPrecision", (PyObject *)&QuadPrecision_Type) < 0) goto error; - if(PyModule_AddObject(m, "QuadPrecDType", (PyObject *)&QuadPrecDType) < 0) + if (init_quadprec_dtype() < 0) goto error; - if (init_quad_umath() < 0) { + if (PyModule_AddObject(m, "QuadPrecDType", (PyObject *)&QuadPrecDType) < 0) goto error; - } + + // if (init_quad_umath() < 0) { + // goto error; + // } return m; - error: - Py_DECREF(m); + Py_XDECREF(m); return NULL; } \ No newline at end of file diff --git a/quaddtype/numpy_quaddtype/src/scalar.c b/quaddtype/numpy_quaddtype/src/scalar.c new file mode 100644 index 00000000..a3101286 --- /dev/null +++ b/quaddtype/numpy_quaddtype/src/scalar.c @@ -0,0 +1,159 @@ +#include +#include +#include +#include + +#define PY_ARRAY_UNIQUE_SYMBOL QuadPrecType_ARRAY_API +#define NPY_NO_DEPRECATED_API NPY_2_0_API_VERSION +#define NO_IMPORT_ARRAY + +#include "numpy/arrayobject.h" +#include "numpy/ndarraytypes.h" +#include "numpy/dtype_api.h" + +#include "scalar.h" +// #include "scalar_ops.h" + +QuadPrecisionObject * +QuadPrecision_raw_new(QuadBackendType backend) +{ + QuadPrecisionObject *new = PyObject_New(QuadPrecisionObject, &QuadPrecision_Type); + if (!new) + return NULL; + new->backend = backend; + if (backend == BACKEND_SLEEF) { + new->value.sleef_value = Sleef_cast_from_doubleq1(0.0); + } + else { + new->value.longdouble_value = 0.0L; + } + return new; +} + +QuadPrecisionObject * +QuadPrecision_from_object(PyObject *value, QuadBackendType backend) +{ + QuadPrecisionObject *self = QuadPrecision_raw_new(backend); + if (!self) + return NULL; + + if (PyFloat_Check(value)) { + double dval = PyFloat_AsDouble(value); + if (backend == BACKEND_SLEEF) { + self->value.sleef_value = Sleef_cast_from_doubleq1(dval); + } + else { + self->value.longdouble_value = (long double)dval; + } + } + else if (PyUnicode_CheckExact(value)) { + const char *s = PyUnicode_AsUTF8(value); + char *endptr = NULL; + if (backend == BACKEND_SLEEF) { + self->value.sleef_value = Sleef_strtoq(s, &endptr); + } + else { + self->value.longdouble_value = strtold(s, &endptr); + } + if (*endptr != '\0' || endptr == s) { + PyErr_SetString(PyExc_ValueError, "Unable to parse string to QuadPrecision"); + Py_DECREF(self); + return NULL; + } + } + else if (PyLong_Check(value)) { + long long val = PyLong_AsLongLong(value); + if (val == -1 && PyErr_Occurred()) { + PyErr_SetString(PyExc_OverflowError, "Overflow Error, value out of range"); + Py_DECREF(self); + return NULL; + } + if (backend == BACKEND_SLEEF) { + self->value.sleef_value = Sleef_cast_from_int64q1(val); + } + else { + self->value.longdouble_value = (long double)val; + } + } + else { + PyErr_SetString(PyExc_TypeError, "QuadPrecision value must be a float, int or string"); + Py_DECREF(self); + return NULL; + } + + return self; +} + +static PyObject * +QuadPrecision_new(PyTypeObject *cls, PyObject *args, PyObject *kwargs) +{ + PyObject *value; + const char *backend_str = "sleef"; + static char *kwlist[] = {"value", "backend", NULL}; + + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|s", kwlist, &value, &backend_str)) { + return NULL; + } + + QuadBackendType backend = BACKEND_SLEEF; + if (strcmp(backend_str, "longdouble") == 0) { + backend = BACKEND_LONGDOUBLE; + } + else if (strcmp(backend_str, "sleef") != 0) { + PyErr_SetString(PyExc_ValueError, "Invalid backend. Use 'sleef' or 'longdouble'."); + return NULL; + } + + return (PyObject *)QuadPrecision_from_object(value, backend); +} + +static PyObject * +QuadPrecision_str(QuadPrecisionObject *self) +{ + char buffer[128]; + if (self->backend == BACKEND_SLEEF) { + Sleef_snprintf(buffer, sizeof(buffer), "%.*Qe", SLEEF_QUAD_DIG, self->value.sleef_value); + } + else { + snprintf(buffer, sizeof(buffer), "%.35Le", self->value.longdouble_value); + } + return PyUnicode_FromString(buffer); +} + +static PyObject * +QuadPrecision_repr(QuadPrecisionObject *self) +{ + PyObject *str = QuadPrecision_str(self); + if (str == NULL) { + return NULL; + } + const char *backend_str = (self->backend == BACKEND_SLEEF) ? "sleef" : "longdouble"; + PyObject *res = PyUnicode_FromFormat("QuadPrecision('%S', backend='%s')", str, backend_str); + Py_DECREF(str); + return res; +} + +static void +QuadPrecision_dealloc(QuadPrecisionObject *self) +{ + Py_TYPE(self)->tp_free((PyObject *)self); +} + +PyTypeObject QuadPrecision_Type = { + PyVarObject_HEAD_INIT(NULL, 0).tp_name = "numpy_quaddtype.QuadPrecision", + .tp_basicsize = sizeof(QuadPrecisionObject), + .tp_itemsize = 0, + .tp_new = QuadPrecision_new, + .tp_dealloc = (destructor)QuadPrecision_dealloc, + .tp_repr = (reprfunc)QuadPrecision_repr, + .tp_str = (reprfunc)QuadPrecision_str, + // .tp_as_number = &quad_as_scalar, + // .tp_richcompare = (richcmpfunc)quad_richcompare + +}; + +int +init_quadprecision_scalar(void) +{ + return PyType_Ready(&QuadPrecision_Type); +} \ No newline at end of file diff --git a/quaddtype/quaddtype/src/scalar.h b/quaddtype/numpy_quaddtype/src/scalar.h similarity index 55% rename from quaddtype/quaddtype/src/scalar.h rename to quaddtype/numpy_quaddtype/src/scalar.h index 962a1fec..344fef09 100644 --- a/quaddtype/quaddtype/src/scalar.h +++ b/quaddtype/numpy_quaddtype/src/scalar.h @@ -7,23 +7,26 @@ extern "C" { #include #include +#include "quad_common.h" -typedef struct { - Sleef_quad value; -} quad_field; +typedef union { + Sleef_quad sleef_value; + long double longdouble_value; +} quad_value; typedef struct { PyObject_HEAD - quad_field quad; + quad_value value; + QuadBackendType backend; } QuadPrecisionObject; extern PyTypeObject QuadPrecision_Type; QuadPrecisionObject * -QuadPrecision_raw_new(void); +QuadPrecision_raw_new(QuadBackendType backend); QuadPrecisionObject * -QuadPrecision_from_object(PyObject *value); +QuadPrecision_from_object(PyObject *value, QuadBackendType backend); int init_quadprecision_scalar(void); diff --git a/quaddtype/quaddtype/src/scalar_ops.cpp b/quaddtype/numpy_quaddtype/src/scalar_ops.cpp similarity index 100% rename from quaddtype/quaddtype/src/scalar_ops.cpp rename to quaddtype/numpy_quaddtype/src/scalar_ops.cpp diff --git a/quaddtype/quaddtype/src/scalar_ops.h b/quaddtype/numpy_quaddtype/src/scalar_ops.h similarity index 100% rename from quaddtype/quaddtype/src/scalar_ops.h rename to quaddtype/numpy_quaddtype/src/scalar_ops.h diff --git a/quaddtype/quaddtype/src/umath.cpp b/quaddtype/numpy_quaddtype/src/umath.cpp similarity index 100% rename from quaddtype/quaddtype/src/umath.cpp rename to quaddtype/numpy_quaddtype/src/umath.cpp diff --git a/quaddtype/quaddtype/src/umath.h b/quaddtype/numpy_quaddtype/src/umath.h similarity index 100% rename from quaddtype/quaddtype/src/umath.h rename to quaddtype/numpy_quaddtype/src/umath.h diff --git a/quaddtype/pyproject.toml b/quaddtype/pyproject.toml index 68b61e7f..836b73f2 100644 --- a/quaddtype/pyproject.toml +++ b/quaddtype/pyproject.toml @@ -9,7 +9,7 @@ requires = [ build-backend = "mesonpy" [project] -name = "quaddtype" +name = "numpy_quaddtype" description = "Quad (128-bit) float dtype for numpy" version = "0.0.1" readme = 'README.md' @@ -22,4 +22,4 @@ dependencies = [ [project.optional-dependencies] test = [ "pytest", -] +] \ No newline at end of file diff --git a/quaddtype/quaddtype/__init__.py b/quaddtype/quaddtype/__init__.py deleted file mode 100644 index 9f246f73..00000000 --- a/quaddtype/quaddtype/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from ._quaddtype_main import QuadPrecDType, QuadPrecision \ No newline at end of file diff --git a/quaddtype/quaddtype/src/casts.cpp b/quaddtype/quaddtype/src/casts.cpp deleted file mode 100644 index c096fe5c..00000000 --- a/quaddtype/quaddtype/src/casts.cpp +++ /dev/null @@ -1,468 +0,0 @@ -#define PY_ARRAY_UNIQUE_SYMBOL QuadPrecType_ARRAY_API -#define PY_UFUNC_UNIQUE_SYMBOL QuadPrecType_UFUNC_API -#define NPY_NO_DEPRECATED_API NPY_2_0_API_VERSION -#define NPY_TARGET_VERSION NPY_2_0_API_VERSION -#define NO_IMPORT_ARRAY -#define NO_IMPORT_UFUNC - -extern "C" { -#include - -#include "numpy/arrayobject.h" -#include "numpy/ndarraytypes.h" -#include "numpy/dtype_api.h" -} -#include "sleef.h" -#include "sleefquad.h" - -#include "scalar.h" -#include "casts.h" -#include "dtype.h" - -#define NUM_CASTS 29 // 14 to_casts + 14 from_casts + 1 quad_to_quad - -static NPY_CASTING -quad_to_quad_resolve_descriptors(PyObject *NPY_UNUSED(self), - PyArray_DTypeMeta *NPY_UNUSED(dtypes[2]), - QuadPrecDTypeObject *given_descrs[2], - QuadPrecDTypeObject *loop_descrs[2], npy_intp *view_offset) -{ - Py_INCREF(given_descrs[0]); - loop_descrs[0] = given_descrs[0]; - - if (given_descrs[1] == NULL) { - Py_INCREF(given_descrs[0]); - loop_descrs[1] = given_descrs[0]; - } - else { - Py_INCREF(given_descrs[1]); - loop_descrs[1] = given_descrs[1]; - } - - *view_offset = 0; - return NPY_NO_CASTING; -} - -static int -quad_to_quad_strided_loop(PyArrayMethod_Context *context, char *const data[], - npy_intp const dimensions[], npy_intp const strides[], - void *NPY_UNUSED(auxdata)) -{ - npy_intp N = dimensions[0]; - char *in_ptr = data[0]; - char *out_ptr = data[1]; - - npy_intp in_stride = strides[0]; - npy_intp out_stride = strides[1]; - - while (N--) { - memcpy(out_ptr, in_ptr, sizeof(Sleef_quad)); - in_ptr += in_stride; - out_ptr += out_stride; - } - return 0; -} - -// Casting from other types to QuadDType - -template -static inline Sleef_quad -to_quad(T x); - -template <> -inline Sleef_quad -to_quad(npy_bool x) -{ - return x ? Sleef_cast_from_doubleq1(1.0) : Sleef_cast_from_doubleq1(0.0); -} -template <> -inline Sleef_quad -to_quad(npy_byte x) -{ - return Sleef_cast_from_int64q1(x); -} -// template <> -// inline Sleef_quad -// to_quad(npy_ubyte x) -// { -// return Sleef_cast_from_uint64q1(x); -// } -template <> -inline Sleef_quad -to_quad(npy_short x) -{ - return Sleef_cast_from_int64q1(x); -} -template <> -inline Sleef_quad -to_quad(npy_ushort x) -{ - return Sleef_cast_from_uint64q1(x); -} -template <> -inline Sleef_quad -to_quad(npy_int x) -{ - return Sleef_cast_from_int64q1(x); -} -template <> -inline Sleef_quad -to_quad(npy_uint x) -{ - return Sleef_cast_from_uint64q1(x); -} -template <> -inline Sleef_quad -to_quad(npy_long x) -{ - return Sleef_cast_from_int64q1(x); -} -template <> -inline Sleef_quad -to_quad(npy_ulong x) -{ - return Sleef_cast_from_uint64q1(x); -} -template <> -inline Sleef_quad -to_quad(npy_longlong x) -{ - return Sleef_cast_from_int64q1(x); -} -template <> -inline Sleef_quad -to_quad(npy_ulonglong x) -{ - return Sleef_cast_from_uint64q1(x); -} -template <> -inline Sleef_quad -to_quad(float x) -{ - return Sleef_cast_from_doubleq1(x); -} -template <> -inline Sleef_quad -to_quad(double x) -{ - return Sleef_cast_from_doubleq1(x); -} -template <> -inline Sleef_quad -to_quad(long double x) -{ - return Sleef_cast_from_doubleq1(x); -} - -template -static NPY_CASTING -numpy_to_quad_resolve_descriptors(PyObject *NPY_UNUSED(self), PyArray_DTypeMeta *dtypes[2], - PyArray_Descr *given_descrs[2], PyArray_Descr *loop_descrs[2], - npy_intp *view_offset) -{ - if (given_descrs[1] == NULL) { - loop_descrs[1] = (PyArray_Descr *)new_quaddtype_instance(); - if (loop_descrs[1] == nullptr) { - return (NPY_CASTING)-1; - } - } - else { - Py_INCREF(given_descrs[1]); - loop_descrs[1] = given_descrs[1]; - } - - loop_descrs[0] = PyArray_GetDefaultDescr(dtypes[0]); - // *view_offset = 0; - return NPY_SAFE_CASTING; -} - -template -static int -numpy_to_quad_strided_loop(PyArrayMethod_Context *context, char *const data[], - npy_intp const dimensions[], npy_intp const strides[], - void *NPY_UNUSED(auxdata)) -{ - npy_intp N = dimensions[0]; - char *in_ptr = data[0]; - char *out_ptr = data[1]; - - while (N--) { - T in_val; - Sleef_quad out_val; - - memcpy(&in_val, in_ptr, sizeof(T)); - out_val = to_quad(in_val); - memcpy(out_ptr, &out_val, sizeof(Sleef_quad)); - - in_ptr += strides[0]; - out_ptr += strides[1]; - } - return 0; -} - -// Casting from QuadDType to other types - -template -static inline T -from_quad(Sleef_quad x); - -template <> -inline npy_bool -from_quad(Sleef_quad x) -{ - return Sleef_cast_to_int64q1(x) != 0; -} -template <> -inline npy_byte -from_quad(Sleef_quad x) -{ - return (npy_byte)Sleef_cast_to_int64q1(x); -} -// template <> -// inline npy_ubyte -// from_quad(Sleef_quad x) -// { -// return (npy_ubyte)Sleef_cast_to_uint64q1(x); -// } -template <> -inline npy_short -from_quad(Sleef_quad x) -{ - return (npy_short)Sleef_cast_to_int64q1(x); -} -template <> -inline npy_ushort -from_quad(Sleef_quad x) -{ - return (npy_ushort)Sleef_cast_to_uint64q1(x); -} -template <> -inline npy_int -from_quad(Sleef_quad x) -{ - return (npy_int)Sleef_cast_to_int64q1(x); -} -template <> -inline npy_uint -from_quad(Sleef_quad x) -{ - return (npy_uint)Sleef_cast_to_uint64q1(x); -} -template <> -inline npy_long -from_quad(Sleef_quad x) -{ - return (npy_long)Sleef_cast_to_int64q1(x); -} -template <> -inline npy_ulong -from_quad(Sleef_quad x) -{ - return (npy_ulong)Sleef_cast_to_uint64q1(x); -} -template <> -inline npy_longlong -from_quad(Sleef_quad x) -{ - return Sleef_cast_to_int64q1(x); -} -template <> -inline npy_ulonglong -from_quad(Sleef_quad x) -{ - return Sleef_cast_to_uint64q1(x); -} -template <> -inline float -from_quad(Sleef_quad x) -{ - return Sleef_cast_to_doubleq1(x); -} -template <> -inline double -from_quad(Sleef_quad x) -{ - return Sleef_cast_to_doubleq1(x); -} -template <> -inline long double -from_quad(Sleef_quad x) -{ - return Sleef_cast_to_doubleq1(x); -} - -template -static NPY_CASTING -quad_to_numpy_resolve_descriptors(PyObject *NPY_UNUSED(self), PyArray_DTypeMeta *dtypes[2], - PyArray_Descr *given_descrs[2], PyArray_Descr *loop_descrs[2], - npy_intp *view_offset) -{ - Py_INCREF(given_descrs[0]); - loop_descrs[0] = given_descrs[0]; - - loop_descrs[1] = PyArray_GetDefaultDescr(dtypes[1]); - // *view_offset = 0; - return NPY_UNSAFE_CASTING; -} - -template -static int -quad_to_numpy_strided_loop(PyArrayMethod_Context *context, char *const data[], - npy_intp const dimensions[], npy_intp const strides[], - void *NPY_UNUSED(auxdata)) -{ - npy_intp N = dimensions[0]; - char *in_ptr = data[0]; - char *out_ptr = data[1]; - - while (N--) { - Sleef_quad in_val = *(Sleef_quad *)in_ptr; - T *out_val = (T *)out_ptr; - *out_val = from_quad(in_val); - - in_ptr += strides[0]; - out_ptr += strides[1]; - } - return 0; -} - -static PyArrayMethod_Spec *specs[NUM_CASTS + 1]; // +1 for NULL terminator -static size_t spec_count = 0; - -void -add_spec(PyArrayMethod_Spec *spec) -{ - if (spec_count < NUM_CASTS) { - specs[spec_count++] = spec; - } -} - -// functions to add casts -template -void -add_cast_from(PyArray_DTypeMeta *to) -{ - PyArray_DTypeMeta **dtypes = new PyArray_DTypeMeta *[2]{nullptr, to}; - - PyType_Slot *slots = new PyType_Slot[3]{ - {NPY_METH_resolve_descriptors, (void *)&quad_to_numpy_resolve_descriptors}, - {NPY_METH_strided_loop, (void *)&quad_to_numpy_strided_loop}, - {0, nullptr}}; - - PyArrayMethod_Spec *spec = new PyArrayMethod_Spec{ - .name = "cast_QuadPrec_to_NumPy", - .nin = 1, - .nout = 1, - .casting = NPY_UNSAFE_CASTING, - .flags = (NPY_ARRAYMETHOD_FLAGS)0, - .dtypes = dtypes, - .slots = slots, - }; - add_spec(spec); -} - -template -void -add_cast_to(PyArray_DTypeMeta *from) -{ - PyArray_DTypeMeta **dtypes = new PyArray_DTypeMeta *[2]{from, nullptr}; - - PyType_Slot *slots = new PyType_Slot[3]{ - {NPY_METH_resolve_descriptors, (void *)&numpy_to_quad_resolve_descriptors}, - {NPY_METH_strided_loop, (void *)&numpy_to_quad_strided_loop}, - {0, nullptr}}; - - PyArrayMethod_Spec *spec = new PyArrayMethod_Spec{ - .name = "cast_NumPy_to_QuadPrec", - .nin = 1, - .nout = 1, - .casting = NPY_SAFE_CASTING, - .flags = (NPY_ARRAYMETHOD_FLAGS)0, - .dtypes = dtypes, - .slots = slots, - }; - - add_spec(spec); -} - -PyArrayMethod_Spec ** -init_casts_internal(void) -{ - PyArray_DTypeMeta **quad2quad_dtypes = new PyArray_DTypeMeta *[2]{nullptr, nullptr}; - PyType_Slot *quad2quad_slots = new PyType_Slot[4]{ - {NPY_METH_resolve_descriptors, (void *)&quad_to_quad_resolve_descriptors}, - {NPY_METH_strided_loop, (void *)&quad_to_quad_strided_loop}, - {NPY_METH_unaligned_strided_loop, (void *)&quad_to_quad_strided_loop}, - {0, nullptr}}; - - PyArrayMethod_Spec *quad2quad_spec = new PyArrayMethod_Spec{ - .name = "cast_QuadPrec_to_QuadPrec", - .nin = 1, - .nout = 1, - .casting = NPY_NO_CASTING, - .flags = NPY_METH_SUPPORTS_UNALIGNED, - .dtypes = quad2quad_dtypes, - .slots = quad2quad_slots, - }; - - add_spec(quad2quad_spec); - - add_cast_to(&PyArray_BoolDType); - add_cast_to(&PyArray_ByteDType); - add_cast_to(&PyArray_UByteDType); - add_cast_to(&PyArray_ShortDType); - add_cast_to(&PyArray_UShortDType); - add_cast_to(&PyArray_IntDType); - add_cast_to(&PyArray_UIntDType); - add_cast_to(&PyArray_LongDType); - add_cast_to(&PyArray_ULongDType); - add_cast_to(&PyArray_LongLongDType); - add_cast_to(&PyArray_ULongLongDType); - add_cast_to(&PyArray_FloatDType); - add_cast_to(&PyArray_DoubleDType); - add_cast_to(&PyArray_LongDoubleDType); - - add_cast_from(&PyArray_BoolDType); - add_cast_from(&PyArray_ByteDType); - add_cast_from(&PyArray_UByteDType); - add_cast_from(&PyArray_ShortDType); - add_cast_from(&PyArray_UShortDType); - add_cast_from(&PyArray_IntDType); - add_cast_from(&PyArray_UIntDType); - add_cast_from(&PyArray_LongDType); - add_cast_from(&PyArray_ULongDType); - add_cast_from(&PyArray_LongLongDType); - add_cast_from(&PyArray_ULongLongDType); - add_cast_from(&PyArray_FloatDType); - add_cast_from(&PyArray_DoubleDType); - add_cast_from(&PyArray_LongDoubleDType); - - specs[spec_count] = nullptr; - return specs; -} - -PyArrayMethod_Spec ** -init_casts(void) -{ - try { - return init_casts_internal(); - } - catch (int e) { - PyErr_NoMemory(); - return nullptr; - } -} - -void -free_casts(void) -{ - for (auto cast : specs) { - if (cast == nullptr) { - continue; - } - delete cast->dtypes; - delete cast->slots; - delete cast; - } - spec_count = 0; -} diff --git a/quaddtype/quaddtype/src/dtype.c b/quaddtype/quaddtype/src/dtype.c deleted file mode 100644 index a6f485bf..00000000 --- a/quaddtype/quaddtype/src/dtype.c +++ /dev/null @@ -1,216 +0,0 @@ -#include -#include -#include - -#define PY_ARRAY_UNIQUE_SYMBOL QuadPrecType_ARRAY_API -#define PY_UFUNC_UNIQUE_SYMBOL QuadPrecType_UFUNC_API -#define NPY_NO_DEPRECATED_API NPY_2_0_API_VERSION -#define NPY_TARGET_VERSION NPY_2_0_API_VERSION -#define NO_IMPORT_ARRAY -#define NO_IMPORT_UFUNC -#include "numpy/arrayobject.h" -#include "numpy/ndarraytypes.h" -#include "numpy/dtype_api.h" - -#include "scalar.h" -#include "casts.h" -#include "dtype.h" - -static inline int quad_load(Sleef_quad *x, char *data_ptr) -{ - if (data_ptr == NULL || x == NULL) - { - return -1; - } - *x = *(Sleef_quad *)data_ptr; - return 0; -} - -static inline int quad_store(char *data_ptr, Sleef_quad x) -{ - if (data_ptr == NULL) - { - return -1; - } - *(Sleef_quad *)data_ptr = x; - return 0; -} - -QuadPrecDTypeObject * new_quaddtype_instance(void) -{ - QuadPrecDTypeObject *new = (QuadPrecDTypeObject *)PyArrayDescr_Type.tp_new((PyTypeObject *)&QuadPrecDType, NULL, NULL); - if (new == NULL) { - return NULL; - } - new->base.elsize = sizeof(Sleef_quad); - new->base.alignment = _Alignof(Sleef_quad); - - return new; -} - -static QuadPrecDTypeObject * ensure_canonical(QuadPrecDTypeObject *self) -{ - Py_INCREF(self); - return self; -} - -static QuadPrecDTypeObject * common_instance(QuadPrecDTypeObject *dtype1, QuadPrecDTypeObject *dtype2) -{ - Py_INCREF(dtype1); - return dtype1; -} - - -static PyArray_DTypeMeta * common_dtype(PyArray_DTypeMeta *cls, PyArray_DTypeMeta *other) -{ - // Promote integer and floating-point types to QuadPrecDType - if (other->type_num >= 0 && - (PyTypeNum_ISINTEGER(other->type_num) || - PyTypeNum_ISFLOAT(other->type_num))) { - Py_INCREF(cls); - return cls; - } - // Don't promote complex types - if (PyTypeNum_ISCOMPLEX(other->type_num)) { - Py_INCREF(Py_NotImplemented); - return (PyArray_DTypeMeta *)Py_NotImplemented; - } - - Py_INCREF(Py_NotImplemented); - return (PyArray_DTypeMeta *)Py_NotImplemented; -} - -static PyArray_Descr * -quadprec_discover_descriptor_from_pyobject(PyArray_DTypeMeta *NPY_UNUSED(cls), PyObject *obj) -{ - if (Py_TYPE(obj) != &QuadPrecision_Type) - { - PyErr_SetString(PyExc_TypeError, "Can only store QuadPrecision in a QuadPrecDType array."); - return NULL; - } - return (PyArray_Descr *)new_quaddtype_instance(); -} - -static int quadprec_setitem(QuadPrecDTypeObject *descr, PyObject *obj, char *dataptr) -{ - QuadPrecisionObject *value; - if (PyObject_TypeCheck(obj, &QuadPrecision_Type)) - { - Py_INCREF(obj); - value = (QuadPrecisionObject *)obj; - } - else - { - value = QuadPrecision_from_object(obj); - if (value == NULL) { - return -1; - } - } - - if (quad_store(dataptr, value->quad.value) < 0) - { - Py_DECREF(value); - char error_msg[100]; - snprintf(error_msg, sizeof(error_msg), "Invalid memory location %p", (void*)dataptr); - PyErr_SetString(PyExc_ValueError, error_msg); - return -1; - } - - Py_DECREF(value); - return 0; -} - -static PyObject * quadprec_getitem(QuadPrecDTypeObject *descr, char *dataptr) -{ - QuadPrecisionObject *new = QuadPrecision_raw_new(); - if (!new) - { - return NULL; - } - if (quad_load(&new->quad.value, dataptr) < 0) - { - Py_DECREF(new); - char error_msg[100]; - snprintf(error_msg, sizeof(error_msg), "Invalid memory location %p", (void*)dataptr); - PyErr_SetString(PyExc_ValueError, error_msg); - return NULL; - } - return (PyObject *)new; -} - -static PyArray_Descr *quadprec_default_descr(PyArray_DTypeMeta *NPY_UNUSED(cls)) -{ - return (PyArray_Descr *)new_quaddtype_instance(); -} - -static PyType_Slot QuadPrecDType_Slots[] = -{ - {NPY_DT_ensure_canonical, &ensure_canonical}, - {NPY_DT_common_instance, &common_instance}, - {NPY_DT_common_dtype, &common_dtype}, - {NPY_DT_discover_descr_from_pyobject, &quadprec_discover_descriptor_from_pyobject}, - {NPY_DT_setitem, &quadprec_setitem}, - {NPY_DT_getitem, &quadprec_getitem}, - {NPY_DT_default_descr, &quadprec_default_descr}, - {0, NULL} -}; - - -static PyObject * QuadPrecDType_new(PyTypeObject *NPY_UNUSED(cls), PyObject *args, PyObject *kwds) -{ - if (PyTuple_GET_SIZE(args) != 0 || (kwds != NULL && PyDict_Size(kwds) != 0)) { - PyErr_SetString(PyExc_TypeError, - "QuadPrecDType takes no arguments"); - return NULL; - } - - return (PyObject *)new_quaddtype_instance(); -} - -static PyObject * QuadPrecDType_repr(QuadPrecDTypeObject *self) -{ - return PyUnicode_FromString("QuadPrecDType()"); -} - -PyArray_DTypeMeta QuadPrecDType = { - {{ - PyVarObject_HEAD_INIT(NULL, 0) - .tp_name = "QuadPrecDType.QuadPrecDType", - .tp_basicsize = sizeof(QuadPrecDTypeObject), - .tp_new = QuadPrecDType_new, - .tp_repr = (reprfunc)QuadPrecDType_repr, - .tp_str = (reprfunc)QuadPrecDType_repr, - }}, -}; - -int init_quadprec_dtype(void) -{ - PyArrayMethod_Spec **casts = init_casts(); - if (!casts) - return -1; - - PyArrayDTypeMeta_Spec QuadPrecDType_DTypeSpec = { - .flags = NPY_DT_NUMERIC, - .casts = casts, - .typeobj = &QuadPrecision_Type, - .slots = QuadPrecDType_Slots, - }; - - ((PyObject *)&QuadPrecDType)->ob_type = &PyArrayDTypeMeta_Type; - - ((PyTypeObject *)&QuadPrecDType)->tp_base = &PyArrayDescr_Type; - - if (PyType_Ready((PyTypeObject *)&QuadPrecDType) < 0) - { - return -1; - } - - if (PyArrayInitDTypeMeta_FromSpec(&QuadPrecDType, &QuadPrecDType_DTypeSpec) < 0) - { - return -1; - } - - free_casts(); - - return 0; -} \ No newline at end of file diff --git a/quaddtype/quaddtype/src/scalar.c b/quaddtype/quaddtype/src/scalar.c deleted file mode 100644 index 15f727d8..00000000 --- a/quaddtype/quaddtype/src/scalar.c +++ /dev/null @@ -1,126 +0,0 @@ -#include -#include -#include - -#define PY_ARRAY_UNIQUE_SYMBOL QuadPrecType_ARRAY_API -#define NPY_NO_DEPRECATED_API NPY_2_0_API_VERSION -#define NO_IMPORT_ARRAY - -#include "numpy/arrayobject.h" -#include "numpy/ndarraytypes.h" -#include "numpy/dtype_api.h" - -#include "scalar.h" -#include "scalar_ops.h" - - -// static PyTypeObject QuadPrecision_Type; - - -QuadPrecisionObject * QuadPrecision_raw_new(void) -{ - QuadPrecisionObject * new = PyObject_New(QuadPrecisionObject, &QuadPrecision_Type); - if(!new) - return NULL; - new->quad.value = Sleef_cast_from_doubleq1(0.0); // initialize to 0 - return new; -} - -QuadPrecisionObject * QuadPrecision_from_object(PyObject * value) -{ - QuadPrecisionObject *self = QuadPrecision_raw_new(); - if(!self) - return NULL; - - if(PyFloat_Check(value)) - self->quad.value = Sleef_cast_from_doubleq1(PyFloat_AsDouble(value)); - - else if(PyUnicode_CheckExact(value)) - { - const char * s = PyUnicode_AsUTF8(value); - char *endptr = NULL; - self->quad.value = Sleef_strtoq(s, &endptr); - if (*endptr != '\0' || endptr == s) - { - PyErr_SetString(PyExc_ValueError, "Unable to parse string to QuadPrecision"); - Py_DECREF(self); - return NULL; - } - } - else if(PyLong_Check(value)) - { - long int val = PyLong_AsLong(value); - if (val == -1) - { - PyErr_SetString(PyExc_OverflowError, "Overflow Error, value out of range"); - Py_DECREF(self); - return NULL; - } - self->quad.value = Sleef_cast_from_int64q1(val); - } - else - { - PyErr_SetString(PyExc_TypeError, "QuadPrecision value must be a float, int or string"); - Py_DECREF(self); - return NULL; - } - - return self; -} - -static PyObject * -QuadPrecision_new(PyTypeObject *cls, PyObject *args, PyObject *kwargs) -{ - PyObject *value; - - if (!PyArg_ParseTuple(args, "O", &value)) { - return NULL; - } - - return (PyObject *)QuadPrecision_from_object(value); -} - -static PyObject * QuadPrecision_str(QuadPrecisionObject * self) -{ - char buffer[128]; - Sleef_snprintf(buffer, sizeof(buffer), "%.*Qe", SLEEF_QUAD_DIG, self->quad.value); - return PyUnicode_FromString(buffer); -} - -static PyObject * QuadPrecision_repr(QuadPrecisionObject* self) -{ - PyObject *str = QuadPrecision_str(self); - if (str == NULL) { - return NULL; - } - PyObject *res = PyUnicode_FromFormat("QuadPrecision('%S')", str); - Py_DECREF(str); - return res; -} - -static void -quad_dealloc(QuadPrecDTypeObject *self) -{ - Py_TYPE(self)->tp_free((PyObject *)self); -} - -PyTypeObject QuadPrecision_Type = -{ - PyVarObject_HEAD_INIT(NULL, 0) - .tp_name = "QuadPrecType.QuadPrecision", - .tp_basicsize = sizeof(QuadPrecisionObject), - .tp_itemsize = 0, - .tp_new = QuadPrecision_new, - .tp_dealloc = (destructor)quad_dealloc, - .tp_repr = (reprfunc)QuadPrecision_repr, - .tp_str = (reprfunc)QuadPrecision_str, - .tp_as_number = &quad_as_scalar, - .tp_richcompare = (richcmpfunc)quad_richcompare - -}; - -int -init_quadprecision_scalar(void) -{ - return PyType_Ready(&QuadPrecision_Type); -} \ No newline at end of file diff --git a/quaddtype/reinstall.sh b/quaddtype/reinstall.sh index 31479511..afec2b59 100755 --- a/quaddtype/reinstall.sh +++ b/quaddtype/reinstall.sh @@ -8,6 +8,6 @@ then fi #meson setup build -Db_sanitize=address,undefined -python -m pip uninstall -y quaddtype +python -m pip uninstall -y quaddtype numpy_quaddtype python -m pip install . -v --no-build-isolation -Cbuilddir=build -C'compile-args=-v' -Csetup-args="-Dbuildtype=debug" #python -m pip install . -v --no-build-isolation -Cbuilddir=build -C'compile-args=-v' \ No newline at end of file From 8408227b4fe44c309c330fcafd349c1e209a8176 Mon Sep 17 00:00:00 2001 From: swayaminsync Date: Sun, 1 Sep 2024 04:17:31 +0530 Subject: [PATCH 03/32] added ld backend support to casting --- quaddtype/numpy_quaddtype/src/casts.cpp | 860 ++++++++++++-------- quaddtype/numpy_quaddtype/src/dtype.c | 2 +- quaddtype/numpy_quaddtype/src/quad_common.h | 2 +- quaddtype/numpy_quaddtype/src/scalar.c | 25 +- 4 files changed, 546 insertions(+), 343 deletions(-) diff --git a/quaddtype/numpy_quaddtype/src/casts.cpp b/quaddtype/numpy_quaddtype/src/casts.cpp index 44c316e0..473fd9af 100644 --- a/quaddtype/numpy_quaddtype/src/casts.cpp +++ b/quaddtype/numpy_quaddtype/src/casts.cpp @@ -56,12 +56,20 @@ quad_to_quad_strided_loop(PyArrayMethod_Context *context, char *const data[], npy_intp N = dimensions[0]; char *in_ptr = data[0]; char *out_ptr = data[1]; - npy_intp in_stride = strides[0]; npy_intp out_stride = strides[1]; - QuadPrecDTypeObject *descr = (QuadPrecDTypeObject *)context->descriptors[0]; - size_t elem_size = (descr->backend == BACKEND_SLEEF) ? sizeof(Sleef_quad) : sizeof(long double); + QuadPrecDTypeObject *descr_in = (QuadPrecDTypeObject *)context->descriptors[0]; + QuadPrecDTypeObject *descr_out = (QuadPrecDTypeObject *)context->descriptors[1]; + + if (descr_in->backend != descr_out->backend) { + PyErr_SetString(PyExc_TypeError, + "Cannot convert between different quad-precision backends"); + return -1; + } + + size_t elem_size = + (descr_in->backend == BACKEND_SLEEF) ? sizeof(Sleef_quad) : sizeof(long double); while (N--) { memcpy(out_ptr, in_ptr, elem_size); @@ -73,266 +81,442 @@ quad_to_quad_strided_loop(PyArrayMethod_Context *context, char *const data[], // Casting from other types to QuadDType -// template -// static inline Sleef_quad -// to_quad(T x); - -// template <> -// inline Sleef_quad -// to_quad(npy_bool x) -// { -// return x ? Sleef_cast_from_doubleq1(1.0) : Sleef_cast_from_doubleq1(0.0); -// } -// template <> -// inline Sleef_quad -// to_quad(npy_byte x) -// { -// return Sleef_cast_from_int64q1(x); -// } -// // template <> -// // inline Sleef_quad -// // to_quad(npy_ubyte x) -// // { -// // return Sleef_cast_from_uint64q1(x); -// // } -// template <> -// inline Sleef_quad -// to_quad(npy_short x) -// { -// return Sleef_cast_from_int64q1(x); -// } -// template <> -// inline Sleef_quad -// to_quad(npy_ushort x) -// { -// return Sleef_cast_from_uint64q1(x); -// } -// template <> -// inline Sleef_quad -// to_quad(npy_int x) -// { -// return Sleef_cast_from_int64q1(x); -// } -// template <> -// inline Sleef_quad -// to_quad(npy_uint x) -// { -// return Sleef_cast_from_uint64q1(x); -// } -// template <> -// inline Sleef_quad -// to_quad(npy_long x) -// { -// return Sleef_cast_from_int64q1(x); -// } -// template <> -// inline Sleef_quad -// to_quad(npy_ulong x) -// { -// return Sleef_cast_from_uint64q1(x); -// } -// template <> -// inline Sleef_quad -// to_quad(npy_longlong x) -// { -// return Sleef_cast_from_int64q1(x); -// } -// template <> -// inline Sleef_quad -// to_quad(npy_ulonglong x) -// { -// return Sleef_cast_from_uint64q1(x); -// } -// template <> -// inline Sleef_quad -// to_quad(float x) -// { -// return Sleef_cast_from_doubleq1(x); -// } -// template <> -// inline Sleef_quad -// to_quad(double x) -// { -// return Sleef_cast_from_doubleq1(x); -// } -// template <> -// inline Sleef_quad -// to_quad(long double x) -// { -// return Sleef_cast_from_doubleq1(x); -// } - -// template -// static NPY_CASTING -// numpy_to_quad_resolve_descriptors(PyObject *NPY_UNUSED(self), PyArray_DTypeMeta *dtypes[2], -// PyArray_Descr *given_descrs[2], PyArray_Descr *loop_descrs[2], -// npy_intp *view_offset) -// { -// if (given_descrs[1] == NULL) { -// loop_descrs[1] = (PyArray_Descr *)new_quaddtype_instance(); -// if (loop_descrs[1] == nullptr) { -// return (NPY_CASTING)-1; -// } -// } -// else { -// Py_INCREF(given_descrs[1]); -// loop_descrs[1] = given_descrs[1]; -// } - -// loop_descrs[0] = PyArray_GetDefaultDescr(dtypes[0]); -// // *view_offset = 0; -// return NPY_SAFE_CASTING; -// } - -// template -// static int -// numpy_to_quad_strided_loop(PyArrayMethod_Context *context, char *const data[], -// npy_intp const dimensions[], npy_intp const strides[], -// void *NPY_UNUSED(auxdata)) -// { -// npy_intp N = dimensions[0]; -// char *in_ptr = data[0]; -// char *out_ptr = data[1]; - -// while (N--) { -// T in_val; -// Sleef_quad out_val; - -// memcpy(&in_val, in_ptr, sizeof(T)); -// out_val = to_quad(in_val); -// memcpy(out_ptr, &out_val, sizeof(Sleef_quad)); - -// in_ptr += strides[0]; -// out_ptr += strides[1]; -// } -// return 0; -// } - -// // Casting from QuadDType to other types - -// template -// static inline T -// from_quad(Sleef_quad x); - -// template <> -// inline npy_bool -// from_quad(Sleef_quad x) -// { -// return Sleef_cast_to_int64q1(x) != 0; -// } -// template <> -// inline npy_byte -// from_quad(Sleef_quad x) -// { -// return (npy_byte)Sleef_cast_to_int64q1(x); -// } -// // template <> -// // inline npy_ubyte -// // from_quad(Sleef_quad x) -// // { -// // return (npy_ubyte)Sleef_cast_to_uint64q1(x); -// // } -// template <> -// inline npy_short -// from_quad(Sleef_quad x) -// { -// return (npy_short)Sleef_cast_to_int64q1(x); -// } -// template <> -// inline npy_ushort -// from_quad(Sleef_quad x) -// { -// return (npy_ushort)Sleef_cast_to_uint64q1(x); -// } -// template <> -// inline npy_int -// from_quad(Sleef_quad x) -// { -// return (npy_int)Sleef_cast_to_int64q1(x); -// } -// template <> -// inline npy_uint -// from_quad(Sleef_quad x) -// { -// return (npy_uint)Sleef_cast_to_uint64q1(x); -// } -// template <> -// inline npy_long -// from_quad(Sleef_quad x) -// { -// return (npy_long)Sleef_cast_to_int64q1(x); -// } -// template <> -// inline npy_ulong -// from_quad(Sleef_quad x) -// { -// return (npy_ulong)Sleef_cast_to_uint64q1(x); -// } -// template <> -// inline npy_longlong -// from_quad(Sleef_quad x) -// { -// return Sleef_cast_to_int64q1(x); -// } -// template <> -// inline npy_ulonglong -// from_quad(Sleef_quad x) -// { -// return Sleef_cast_to_uint64q1(x); -// } -// template <> -// inline float -// from_quad(Sleef_quad x) -// { -// return Sleef_cast_to_doubleq1(x); -// } -// template <> -// inline double -// from_quad(Sleef_quad x) -// { -// return Sleef_cast_to_doubleq1(x); -// } -// template <> -// inline long double -// from_quad(Sleef_quad x) -// { -// return Sleef_cast_to_doubleq1(x); -// } - -// template -// static NPY_CASTING -// quad_to_numpy_resolve_descriptors(PyObject *NPY_UNUSED(self), PyArray_DTypeMeta *dtypes[2], -// PyArray_Descr *given_descrs[2], PyArray_Descr *loop_descrs[2], -// npy_intp *view_offset) -// { -// Py_INCREF(given_descrs[0]); -// loop_descrs[0] = given_descrs[0]; - -// loop_descrs[1] = PyArray_GetDefaultDescr(dtypes[1]); -// // *view_offset = 0; -// return NPY_UNSAFE_CASTING; -// } - -// template -// static int -// quad_to_numpy_strided_loop(PyArrayMethod_Context *context, char *const data[], -// npy_intp const dimensions[], npy_intp const strides[], -// void *NPY_UNUSED(auxdata)) -// { -// npy_intp N = dimensions[0]; -// char *in_ptr = data[0]; -// char *out_ptr = data[1]; - -// while (N--) { -// Sleef_quad in_val = *(Sleef_quad *)in_ptr; -// T *out_val = (T *)out_ptr; -// *out_val = from_quad(in_val); - -// in_ptr += strides[0]; -// out_ptr += strides[1]; -// } -// return 0; -// } +template +static inline quad_value +to_quad(T x, QuadBackendType backend); + +template <> +inline quad_value +to_quad(npy_bool x, QuadBackendType backend) +{ + quad_value result; + if (backend == BACKEND_SLEEF) { + result.sleef_value = x ? Sleef_cast_from_doubleq1(1.0) : Sleef_cast_from_doubleq1(0.0); + } + else { + result.longdouble_value = x ? 1.0L : 0.0L; + } + return result; +} + +template <> +inline quad_value +to_quad(npy_byte x, QuadBackendType backend) +{ + quad_value result; + if (backend == BACKEND_SLEEF) { + result.sleef_value = Sleef_cast_from_int64q1(x); + } + else { + result.longdouble_value = (long double)x; + } + return result; +} + +template <> +inline quad_value +to_quad(npy_short x, QuadBackendType backend) +{ + quad_value result; + if (backend == BACKEND_SLEEF) { + result.sleef_value = Sleef_cast_from_int64q1(x); + } + else { + result.longdouble_value = (long double)x; + } + return result; +} + +template <> +inline quad_value +to_quad(npy_ushort x, QuadBackendType backend) +{ + quad_value result; + if (backend == BACKEND_SLEEF) { + result.sleef_value = Sleef_cast_from_uint64q1(x); + } + else { + result.longdouble_value = (long double)x; + } + return result; +} + +template <> +inline quad_value +to_quad(npy_int x, QuadBackendType backend) +{ + quad_value result; + if (backend == BACKEND_SLEEF) { + result.sleef_value = Sleef_cast_from_int64q1(x); + } + else { + result.longdouble_value = (long double)x; + } + return result; +} + +template <> +inline quad_value +to_quad(npy_uint x, QuadBackendType backend) +{ + quad_value result; + if (backend == BACKEND_SLEEF) { + result.sleef_value = Sleef_cast_from_uint64q1(x); + } + else { + result.longdouble_value = (long double)x; + } + return result; +} + +template <> +inline quad_value +to_quad(npy_long x, QuadBackendType backend) +{ + quad_value result; + if (backend == BACKEND_SLEEF) { + result.sleef_value = Sleef_cast_from_int64q1(x); + } + else { + result.longdouble_value = (long double)x; + } + return result; +} + +template <> +inline quad_value +to_quad(npy_ulong x, QuadBackendType backend) +{ + quad_value result; + if (backend == BACKEND_SLEEF) { + result.sleef_value = Sleef_cast_from_uint64q1(x); + } + else { + result.longdouble_value = (long double)x; + } + return result; +} + +template <> +inline quad_value +to_quad(npy_longlong x, QuadBackendType backend) +{ + quad_value result; + if (backend == BACKEND_SLEEF) { + result.sleef_value = Sleef_cast_from_int64q1(x); + } + else { + result.longdouble_value = (long double)x; + } + return result; +} + +template <> +inline quad_value +to_quad(npy_ulonglong x, QuadBackendType backend) +{ + quad_value result; + if (backend == BACKEND_SLEEF) { + result.sleef_value = Sleef_cast_from_uint64q1(x); + } + else { + result.longdouble_value = (long double)x; + } + return result; +} +template <> +inline quad_value +to_quad(float x, QuadBackendType backend) +{ + quad_value result; + if (backend == BACKEND_SLEEF) { + result.sleef_value = Sleef_cast_from_doubleq1(x); + } + else { + result.longdouble_value = (long double)x; + } + return result; +} + +template <> +inline quad_value +to_quad(double x, QuadBackendType backend) +{ + quad_value result; + if (backend == BACKEND_SLEEF) { + result.sleef_value = Sleef_cast_from_doubleq1(x); + } + else { + result.longdouble_value = (long double)x; + } + return result; +} + +template <> +inline quad_value +to_quad(long double x, QuadBackendType backend) +{ + quad_value result; + if (backend == BACKEND_SLEEF) { + result.sleef_value = Sleef_cast_from_doubleq1(x); + } + else { + result.longdouble_value = x; + } + return result; +} + +template +static NPY_CASTING +numpy_to_quad_resolve_descriptors(PyObject *NPY_UNUSED(self), PyArray_DTypeMeta *dtypes[2], + PyArray_Descr *given_descrs[2], PyArray_Descr *loop_descrs[2], + npy_intp *view_offset) +{ + if (given_descrs[1] == NULL) { + loop_descrs[1] = (PyArray_Descr *)new_quaddtype_instance(BACKEND_SLEEF); + if (loop_descrs[1] == nullptr) { + return (NPY_CASTING)-1; + } + } + else { + Py_INCREF(given_descrs[1]); + loop_descrs[1] = given_descrs[1]; + } + + loop_descrs[0] = PyArray_GetDefaultDescr(dtypes[0]); + return NPY_SAFE_CASTING; +} + +template +static int +numpy_to_quad_strided_loop(PyArrayMethod_Context *context, char *const data[], + npy_intp const dimensions[], npy_intp const strides[], + void *NPY_UNUSED(auxdata)) +{ + npy_intp N = dimensions[0]; + char *in_ptr = data[0]; + char *out_ptr = data[1]; + + QuadPrecDTypeObject *descr_out = (QuadPrecDTypeObject *)context->descriptors[1]; + QuadBackendType backend = descr_out->backend; + size_t elem_size = (backend == BACKEND_SLEEF) ? sizeof(Sleef_quad) : sizeof(long double); + + while (N--) { + T in_val; + quad_value out_val; + + memcpy(&in_val, in_ptr, sizeof(T)); + out_val = to_quad(in_val, backend); + memcpy(out_ptr, &out_val, elem_size); + + in_ptr += strides[0]; + out_ptr += strides[1]; + } + return 0; +} + +// Casting from QuadDType to other types + +template +static inline T +from_quad(quad_value x, QuadBackendType backend); + +template <> +inline npy_bool +from_quad(quad_value x, QuadBackendType backend) +{ + if (backend == BACKEND_SLEEF) { + return Sleef_cast_to_int64q1(x.sleef_value) != 0; + } + else { + return x.longdouble_value != 0; + } +} + +template <> +inline npy_byte +from_quad(quad_value x, QuadBackendType backend) +{ + if (backend == BACKEND_SLEEF) { + return (npy_byte)Sleef_cast_to_int64q1(x.sleef_value); + } + else { + return (npy_byte)x.longdouble_value; + } +} + +template <> +inline npy_short +from_quad(quad_value x, QuadBackendType backend) +{ + if (backend == BACKEND_SLEEF) { + return (npy_short)Sleef_cast_to_int64q1(x.sleef_value); + } + else { + return (npy_short)x.longdouble_value; + } +} + +template <> +inline npy_ushort +from_quad(quad_value x, QuadBackendType backend) +{ + if (backend == BACKEND_SLEEF) { + return (npy_ushort)Sleef_cast_to_uint64q1(x.sleef_value); + } + else { + return (npy_ushort)x.longdouble_value; + } +} + +template <> +inline npy_int +from_quad(quad_value x, QuadBackendType backend) +{ + if (backend == BACKEND_SLEEF) { + return (npy_int)Sleef_cast_to_int64q1(x.sleef_value); + } + else { + return (npy_int)x.longdouble_value; + } +} + +template <> +inline npy_uint +from_quad(quad_value x, QuadBackendType backend) +{ + if (backend == BACKEND_SLEEF) { + return (npy_uint)Sleef_cast_to_uint64q1(x.sleef_value); + } + else { + return (npy_uint)x.longdouble_value; + } +} + +template <> +inline npy_long +from_quad(quad_value x, QuadBackendType backend) +{ + if (backend == BACKEND_SLEEF) { + return (npy_long)Sleef_cast_to_int64q1(x.sleef_value); + } + else { + return (npy_long)x.longdouble_value; + } +} + +template <> +inline npy_ulong +from_quad(quad_value x, QuadBackendType backend) +{ + if (backend == BACKEND_SLEEF) { + return (npy_ulong)Sleef_cast_to_uint64q1(x.sleef_value); + } + else { + return (npy_ulong)x.longdouble_value; + } +} + +template <> +inline npy_longlong +from_quad(quad_value x, QuadBackendType backend) +{ + if (backend == BACKEND_SLEEF) { + return Sleef_cast_to_int64q1(x.sleef_value); + } + else { + return (npy_longlong)x.longdouble_value; + } +} + +template <> +inline npy_ulonglong +from_quad(quad_value x, QuadBackendType backend) +{ + if (backend == BACKEND_SLEEF) { + return Sleef_cast_to_uint64q1(x.sleef_value); + } + else { + return (npy_ulonglong)x.longdouble_value; + } +} + +template <> +inline float +from_quad(quad_value x, QuadBackendType backend) +{ + if (backend == BACKEND_SLEEF) { + return (float)Sleef_cast_to_doubleq1(x.sleef_value); + } + else { + return (float)x.longdouble_value; + } +} + +template <> +inline double +from_quad(quad_value x, QuadBackendType backend) +{ + if (backend == BACKEND_SLEEF) { + return Sleef_cast_to_doubleq1(x.sleef_value); + } + else { + return (double)x.longdouble_value; + } +} + +template <> +inline long double +from_quad(quad_value x, QuadBackendType backend) +{ + if (backend == BACKEND_SLEEF) { + return (long double)Sleef_cast_to_doubleq1(x.sleef_value); + } + else { + return x.longdouble_value; + } +} + +template +static NPY_CASTING +quad_to_numpy_resolve_descriptors(PyObject *NPY_UNUSED(self), PyArray_DTypeMeta *dtypes[2], + PyArray_Descr *given_descrs[2], PyArray_Descr *loop_descrs[2], + npy_intp *view_offset) +{ + Py_INCREF(given_descrs[0]); + loop_descrs[0] = given_descrs[0]; + + loop_descrs[1] = PyArray_GetDefaultDescr(dtypes[1]); + return NPY_UNSAFE_CASTING; +} + +template +static int +quad_to_numpy_strided_loop(PyArrayMethod_Context *context, char *const data[], + npy_intp const dimensions[], npy_intp const strides[], + void *NPY_UNUSED(auxdata)) +{ + npy_intp N = dimensions[0]; + char *in_ptr = data[0]; + char *out_ptr = data[1]; + + QuadPrecDTypeObject *quad_descr = (QuadPrecDTypeObject *)context->descriptors[0]; + QuadBackendType backend = quad_descr->backend; + + size_t elem_size = (backend == BACKEND_SLEEF) ? sizeof(Sleef_quad) : sizeof(long double); + + while (N--) { + quad_value in_val; + memcpy(&in_val, in_ptr, elem_size); + + T out_val = from_quad(in_val, backend); + memcpy(out_ptr, &out_val, sizeof(T)); + + in_ptr += strides[0]; + out_ptr += strides[1]; + } + return 0; +} static PyArrayMethod_Spec *specs[NUM_CASTS + 1]; // +1 for NULL terminator static size_t spec_count = 0; @@ -345,53 +529,53 @@ add_spec(PyArrayMethod_Spec *spec) } } -// // functions to add casts -// template -// void -// add_cast_from(PyArray_DTypeMeta *to) -// { -// PyArray_DTypeMeta **dtypes = new PyArray_DTypeMeta *[2]{nullptr, to}; - -// PyType_Slot *slots = new PyType_Slot[3]{ -// {NPY_METH_resolve_descriptors, (void *)&quad_to_numpy_resolve_descriptors}, -// {NPY_METH_strided_loop, (void *)&quad_to_numpy_strided_loop}, -// {0, nullptr}}; - -// PyArrayMethod_Spec *spec = new PyArrayMethod_Spec{ -// .name = "cast_QuadPrec_to_NumPy", -// .nin = 1, -// .nout = 1, -// .casting = NPY_UNSAFE_CASTING, -// .flags = (NPY_ARRAYMETHOD_FLAGS)0, -// .dtypes = dtypes, -// .slots = slots, -// }; -// add_spec(spec); -// } - -// template -// void -// add_cast_to(PyArray_DTypeMeta *from) -// { -// PyArray_DTypeMeta **dtypes = new PyArray_DTypeMeta *[2]{from, nullptr}; - -// PyType_Slot *slots = new PyType_Slot[3]{ -// {NPY_METH_resolve_descriptors, (void *)&numpy_to_quad_resolve_descriptors}, -// {NPY_METH_strided_loop, (void *)&numpy_to_quad_strided_loop}, -// {0, nullptr}}; - -// PyArrayMethod_Spec *spec = new PyArrayMethod_Spec{ -// .name = "cast_NumPy_to_QuadPrec", -// .nin = 1, -// .nout = 1, -// .casting = NPY_SAFE_CASTING, -// .flags = (NPY_ARRAYMETHOD_FLAGS)0, -// .dtypes = dtypes, -// .slots = slots, -// }; - -// add_spec(spec); -// } +// functions to add casts +template +void +add_cast_from(PyArray_DTypeMeta *to) +{ + PyArray_DTypeMeta **dtypes = new PyArray_DTypeMeta *[2]{&QuadPrecDType, to}; + + PyType_Slot *slots = new PyType_Slot[3]{ + {NPY_METH_resolve_descriptors, (void *)&quad_to_numpy_resolve_descriptors}, + {NPY_METH_strided_loop, (void *)&quad_to_numpy_strided_loop}, + {0, nullptr}}; + + PyArrayMethod_Spec *spec = new PyArrayMethod_Spec{ + .name = "cast_QuadPrec_to_NumPy", + .nin = 1, + .nout = 1, + .casting = NPY_UNSAFE_CASTING, + .flags = (NPY_ARRAYMETHOD_FLAGS)0, + .dtypes = dtypes, + .slots = slots, + }; + add_spec(spec); +} + +template +void +add_cast_to(PyArray_DTypeMeta *from) +{ + PyArray_DTypeMeta **dtypes = new PyArray_DTypeMeta *[2]{from, &QuadPrecDType}; + + PyType_Slot *slots = new PyType_Slot[3]{ + {NPY_METH_resolve_descriptors, (void *)&numpy_to_quad_resolve_descriptors}, + {NPY_METH_strided_loop, (void *)&numpy_to_quad_strided_loop}, + {0, nullptr}}; + + PyArrayMethod_Spec *spec = new PyArrayMethod_Spec{ + .name = "cast_NumPy_to_QuadPrec", + .nin = 1, + .nout = 1, + .casting = NPY_SAFE_CASTING, + .flags = (NPY_ARRAYMETHOD_FLAGS)0, + .dtypes = dtypes, + .slots = slots, + }; + + add_spec(spec); +} PyArrayMethod_Spec ** init_casts_internal(void) @@ -416,35 +600,33 @@ init_casts_internal(void) add_spec(quad2quad_spec); - // add_cast_to(&PyArray_BoolDType); - // add_cast_to(&PyArray_ByteDType); - // add_cast_to(&PyArray_UByteDType); - // add_cast_to(&PyArray_ShortDType); - // add_cast_to(&PyArray_UShortDType); - // add_cast_to(&PyArray_IntDType); - // add_cast_to(&PyArray_UIntDType); - // add_cast_to(&PyArray_LongDType); - // add_cast_to(&PyArray_ULongDType); - // add_cast_to(&PyArray_LongLongDType); - // add_cast_to(&PyArray_ULongLongDType); - // add_cast_to(&PyArray_FloatDType); - // add_cast_to(&PyArray_DoubleDType); - // add_cast_to(&PyArray_LongDoubleDType); - - // add_cast_from(&PyArray_BoolDType); - // add_cast_from(&PyArray_ByteDType); - // add_cast_from(&PyArray_UByteDType); - // add_cast_from(&PyArray_ShortDType); - // add_cast_from(&PyArray_UShortDType); - // add_cast_from(&PyArray_IntDType); - // add_cast_from(&PyArray_UIntDType); - // add_cast_from(&PyArray_LongDType); - // add_cast_from(&PyArray_ULongDType); - // add_cast_from(&PyArray_LongLongDType); - // add_cast_from(&PyArray_ULongLongDType); - // add_cast_from(&PyArray_FloatDType); - // add_cast_from(&PyArray_DoubleDType); - // add_cast_from(&PyArray_LongDoubleDType); + add_cast_to(&PyArray_BoolDType); + add_cast_to(&PyArray_ByteDType); + add_cast_to(&PyArray_ShortDType); + add_cast_to(&PyArray_UShortDType); + add_cast_to(&PyArray_IntDType); + add_cast_to(&PyArray_UIntDType); + add_cast_to(&PyArray_LongDType); + add_cast_to(&PyArray_ULongDType); + add_cast_to(&PyArray_LongLongDType); + add_cast_to(&PyArray_ULongLongDType); + add_cast_to(&PyArray_FloatDType); + add_cast_to(&PyArray_DoubleDType); + add_cast_to(&PyArray_LongDoubleDType); + + add_cast_from(&PyArray_BoolDType); + add_cast_from(&PyArray_ByteDType); + add_cast_from(&PyArray_ShortDType); + add_cast_from(&PyArray_UShortDType); + add_cast_from(&PyArray_IntDType); + add_cast_from(&PyArray_UIntDType); + add_cast_from(&PyArray_LongDType); + add_cast_from(&PyArray_ULongDType); + add_cast_from(&PyArray_LongLongDType); + add_cast_from(&PyArray_ULongLongDType); + add_cast_from(&PyArray_FloatDType); + add_cast_from(&PyArray_DoubleDType); + add_cast_from(&PyArray_LongDoubleDType); specs[spec_count] = nullptr; return specs; diff --git a/quaddtype/numpy_quaddtype/src/dtype.c b/quaddtype/numpy_quaddtype/src/dtype.c index a00c37c9..9e496dd8 100644 --- a/quaddtype/numpy_quaddtype/src/dtype.c +++ b/quaddtype/numpy_quaddtype/src/dtype.c @@ -201,7 +201,7 @@ QuadPrecDType_repr(QuadPrecDTypeObject *self) PyArray_DTypeMeta QuadPrecDType = { {{ - PyVarObject_HEAD_INIT(NULL, 0).tp_name = "QuadPrecDType.QuadPrecDType", + PyVarObject_HEAD_INIT(NULL, 0).tp_name = "numpy_quaddtype.QuadPrecDType", .tp_basicsize = sizeof(QuadPrecDTypeObject), .tp_new = QuadPrecDType_new, .tp_repr = (reprfunc)QuadPrecDType_repr, diff --git a/quaddtype/numpy_quaddtype/src/quad_common.h b/quaddtype/numpy_quaddtype/src/quad_common.h index fcdf6f2b..190573af 100644 --- a/quaddtype/numpy_quaddtype/src/quad_common.h +++ b/quaddtype/numpy_quaddtype/src/quad_common.h @@ -6,7 +6,7 @@ extern "C" { #endif typedef enum { - BACKEND_SLEEF = 0, + BACKEND_SLEEF, BACKEND_LONGDOUBLE } QuadBackendType; diff --git a/quaddtype/numpy_quaddtype/src/scalar.c b/quaddtype/numpy_quaddtype/src/scalar.c index a3101286..8c267d42 100644 --- a/quaddtype/numpy_quaddtype/src/scalar.c +++ b/quaddtype/numpy_quaddtype/src/scalar.c @@ -75,8 +75,29 @@ QuadPrecision_from_object(PyObject *value, QuadBackendType backend) self->value.longdouble_value = (long double)val; } } - else { - PyErr_SetString(PyExc_TypeError, "QuadPrecision value must be a float, int or string"); + else + { + PyObject *type_str = PyObject_Str((PyObject *)Py_TYPE(value)); + if (type_str != NULL) { + const char *type_cstr = PyUnicode_AsUTF8(type_str); + if (type_cstr != NULL) { + PyErr_Format( + PyExc_TypeError, + "QuadPrecision value must be a float, int or string, but got %s instead", + type_cstr); + } + else { + PyErr_SetString(PyExc_TypeError, + "QuadPrecision value must be a float, int or string, but got an " + "unknown type instead"); + } + Py_DECREF(type_str); + } + else { + PyErr_SetString(PyExc_TypeError, + "QuadPrecision value must be a float, int or string, but got an " + "unknown type instead"); + } Py_DECREF(self); return NULL; } From a652985a16316cf6f56154c4af345133a4239140 Mon Sep 17 00:00:00 2001 From: swayaminsync Date: Sun, 1 Sep 2024 12:34:59 +0530 Subject: [PATCH 04/32] added multi backend support to scalar operations --- quaddtype/meson.build | 10 +- quaddtype/numpy_quaddtype/src/ops.hpp | 220 ++++++++++++++++++- quaddtype/numpy_quaddtype/src/scalar.c | 6 +- quaddtype/numpy_quaddtype/src/scalar_ops.cpp | 169 +++++++++----- quaddtype/reinstall.sh | 2 +- 5 files changed, 332 insertions(+), 75 deletions(-) diff --git a/quaddtype/meson.build b/quaddtype/meson.build index 50dc67b1..1b9f6248 100644 --- a/quaddtype/meson.build +++ b/quaddtype/meson.build @@ -5,8 +5,8 @@ py = py_mod.find_installation() c = meson.get_compiler('c') -sleef_dep = c.find_library('sleef', dirs:['/usr/local/lib']) -sleefquad_dep = c.find_library('sleefquad', dirs:['/usr/local/lib']) +sleef_dep = c.find_library('sleef') +sleefquad_dep = c.find_library('sleefquad') incdir_numpy = run_command(py, [ @@ -31,9 +31,9 @@ srcs = [ 'numpy_quaddtype/src/dtype.h', 'numpy_quaddtype/src/dtype.c', 'numpy_quaddtype/src/quaddtype_main.c', - # 'numpy_quaddtype/src/scalar_ops.h', - # 'numpy_quaddtype/src/scalar_ops.cpp', - # 'numpy_quaddtype/src/ops.hpp', + 'numpy_quaddtype/src/scalar_ops.h', + 'numpy_quaddtype/src/scalar_ops.cpp', + 'numpy_quaddtype/src/ops.hpp', # 'numpy_quaddtype/src/umath.h', # 'numpy_quaddtype/src/umath.cpp' ] diff --git a/quaddtype/numpy_quaddtype/src/ops.hpp b/quaddtype/numpy_quaddtype/src/ops.hpp index 6a3511c4..3e290da6 100644 --- a/quaddtype/numpy_quaddtype/src/ops.hpp +++ b/quaddtype/numpy_quaddtype/src/ops.hpp @@ -1,9 +1,11 @@ #include #include +#include -typedef int (*unary_op_def)(Sleef_quad *, Sleef_quad *); +// Unary Quad Operations +typedef int (*unary_op_quad_def)(Sleef_quad *, Sleef_quad *); -static inline int +static int quad_negative(Sleef_quad *op, Sleef_quad *out) { *out = Sleef_negq1(*op); @@ -108,8 +110,116 @@ quad_exp2(Sleef_quad *op, Sleef_quad *out) return 0; } -// binary ops -typedef int (*binop_def)(Sleef_quad *, Sleef_quad *, Sleef_quad *); +// Unary long double operations +typedef int (*unary_op_longdouble_def)(long double *, long double *); + +static int +ld_negative(long double *op, long double *out) +{ + *out = -(*op); + return 0; +} + +static int +ld_positive(long double *op, long double *out) +{ + *out = *op; + return 0; +} + +static inline int +ld_absolute(long double *op, long double *out) +{ + *out = fabsl(*op); + return 0; +} + +static inline int +ld_rint(long double *op, long double *out) +{ + *out = rintl(*op); + return 0; +} + +static inline int +ld_trunc(long double *op, long double *out) +{ + *out = truncl(*op); + return 0; +} + +static inline int +ld_floor(long double *op, long double *out) +{ + *out = floorl(*op); + return 0; +} + +static inline int +ld_ceil(long double *op, long double *out) +{ + *out = ceill(*op); + return 0; +} + +static inline int +ld_sqrt(long double *op, long double *out) +{ + *out = sqrtl(*op); + return 0; +} + +static inline int +ld_square(long double *op, long double *out) +{ + *out = (*op) * (*op); + return 0; +} + +static inline int +ld_log(long double *op, long double *out) +{ + *out = logl(*op); + return 0; +} + +static inline int +ld_log2(long double *op, long double *out) +{ + *out = log2l(*op); + return 0; +} + +static inline int +ld_log10(long double *op, long double *out) +{ + *out = log10l(*op); + return 0; +} + +static inline int +ld_log1p(long double *op, long double *out) +{ + *out = log1pl(*op); + return 0; +} + +static inline int +ld_exp(long double *op, long double *out) +{ + *out = expl(*op); + return 0; +} + +static inline int +ld_exp2(long double *op, long double *out) +{ + *out = exp2l(*op); + return 0; +} + +// Binary Quad operations +typedef int (*binary_op_quad_def)(Sleef_quad *, Sleef_quad *, Sleef_quad *); static inline int quad_add(Sleef_quad *out, Sleef_quad *in1, Sleef_quad *in2) @@ -167,8 +277,67 @@ quad_maximum(Sleef_quad *out, Sleef_quad *in1, Sleef_quad *in2) return 0; } -// comparison functions -typedef npy_bool (*cmp_def)(const Sleef_quad *, const Sleef_quad *); +// Binary long double operations +typedef int (*binary_op_longdouble_def)(long double *, long double *, long double *); + +static inline int +ld_add(long double *out, long double *in1, long double *in2) +{ + *out = (*in1) + (*in2); + return 0; +} + +static inline int +ld_sub(long double *out, long double *in1, long double *in2) +{ + *out = (*in1) - (*in2); + return 0; +} + +static inline int +ld_mul(long double *res, long double *a, long double *b) +{ + *res = (*a) * (*b); + return 0; +} + +static inline int +ld_div(long double *res, long double *a, long double *b) +{ + *res = (*a) / (*b); + return 0; +} + +static inline int +ld_pow(long double *res, long double *a, long double *b) +{ + *res = powl(*a, *b); + return 0; +} + +static inline int +ld_mod(long double *res, long double *a, long double *b) +{ + *res = fmodl(*a, *b); + return 0; +} + +static inline int +ld_minimum(long double *out, long double *in1, long double *in2) +{ + *out = (*in1 < *in2) ? *in1 : *in2; + return 0; +} + +static inline int +ld_maximum(long double *out, long double *in1, long double *in2) +{ + *out = (*in1 > *in2) ? *in1 : *in2; + return 0; +} + +// comparison quad functions +typedef npy_bool (*cmp_quad_def)(const Sleef_quad *, const Sleef_quad *); static inline npy_bool quad_equal(const Sleef_quad *a, const Sleef_quad *b) @@ -204,4 +373,43 @@ static inline npy_bool quad_greaterequal(const Sleef_quad *a, const Sleef_quad *b) { return Sleef_icmpgeq1(*a, *b); +} + +// comparison quad functions +typedef npy_bool (*cmp_londouble_def)(const long double *, const double *); + +static inline npy_bool +ld_equal(const long double *a, const long double *b) +{ + return *a == *b; +} + +static inline npy_bool +ld_notequal(const long double *a, const long double *b) +{ + return *a != *b; +} + +static inline npy_bool +ld_less(const long double *a, const long double *b) +{ + return *a < *b; +} + +static inline npy_bool +ld_lessequal(const long double *a, const long double *b) +{ + return *a <= *b; +} + +static inline npy_bool +ld_greater(const long double *a, const long double *b) +{ + return *a > *b; +} + +static inline npy_bool +ld_greaterequal(const long double *a, const long double *b) +{ + return *a >= *b; } \ No newline at end of file diff --git a/quaddtype/numpy_quaddtype/src/scalar.c b/quaddtype/numpy_quaddtype/src/scalar.c index 8c267d42..0b2967e5 100644 --- a/quaddtype/numpy_quaddtype/src/scalar.c +++ b/quaddtype/numpy_quaddtype/src/scalar.c @@ -12,7 +12,7 @@ #include "numpy/dtype_api.h" #include "scalar.h" -// #include "scalar_ops.h" +#include "scalar_ops.h" QuadPrecisionObject * QuadPrecision_raw_new(QuadBackendType backend) @@ -168,8 +168,8 @@ PyTypeObject QuadPrecision_Type = { .tp_dealloc = (destructor)QuadPrecision_dealloc, .tp_repr = (reprfunc)QuadPrecision_repr, .tp_str = (reprfunc)QuadPrecision_str, - // .tp_as_number = &quad_as_scalar, - // .tp_richcompare = (richcmpfunc)quad_richcompare + .tp_as_number = &quad_as_scalar, + .tp_richcompare = (richcmpfunc)quad_richcompare }; diff --git a/quaddtype/numpy_quaddtype/src/scalar_ops.cpp b/quaddtype/numpy_quaddtype/src/scalar_ops.cpp index a51bc69a..156e4288 100644 --- a/quaddtype/numpy_quaddtype/src/scalar_ops.cpp +++ b/quaddtype/numpy_quaddtype/src/scalar_ops.cpp @@ -16,27 +16,38 @@ extern "C" { #include "scalar.h" #include "ops.hpp" #include "scalar_ops.h" +#include "quad_common.h" -template +template static PyObject * quad_unary_func(QuadPrecisionObject *self) { - QuadPrecisionObject *res = QuadPrecision_raw_new(); + QuadPrecisionObject *res = QuadPrecision_raw_new(self->backend); if (!res) { return NULL; } - unary_op(&self->quad.value, &res->quad.value); + if (self->backend == BACKEND_SLEEF) { + sleef_op(&self->value.sleef_value, &res->value.sleef_value); + } + else { + longdouble_op(&self->value.longdouble_value, &res->value.longdouble_value); + } return (PyObject *)res; } PyObject * quad_nonzero(QuadPrecisionObject *self) { - return PyBool_FromLong(Sleef_icmpneq1(self->quad.value, Sleef_cast_from_int64q1(0))); + if (self->backend == BACKEND_SLEEF) { + return PyBool_FromLong(Sleef_icmpneq1(self->value.sleef_value, Sleef_cast_from_int64q1(0))); + } + else { + return PyBool_FromLong(self->value.longdouble_value != 0.0L); + } } -template +template static PyObject * quad_binary_func(PyObject *op1, PyObject *op2) { @@ -44,65 +55,68 @@ quad_binary_func(PyObject *op1, PyObject *op2) PyObject *other; QuadPrecisionObject *other_quad = NULL; int is_forward; + QuadBackendType backend; if (PyObject_TypeCheck(op1, &QuadPrecision_Type)) { is_forward = 1; self = (QuadPrecisionObject *)op1; other = Py_NewRef(op2); + backend = self->backend; } else { is_forward = 0; self = (QuadPrecisionObject *)op2; other = Py_NewRef(op1); + backend = self->backend; } if (PyObject_TypeCheck(other, &QuadPrecision_Type)) { Py_INCREF(other); other_quad = (QuadPrecisionObject *)other; + if (other_quad->backend != backend) { + PyErr_SetString(PyExc_TypeError, "Cannot mix QuadPrecision backends"); + Py_DECREF(other); + return NULL; + } } else if (PyLong_Check(other) || PyFloat_Check(other)) { - other_quad = QuadPrecision_raw_new(); + other_quad = QuadPrecision_from_object(other, backend); if (!other_quad) { Py_DECREF(other); return NULL; } - - if (PyLong_Check(other)) { - long long value = PyLong_AsLongLong(other); - if (value == -1 && PyErr_Occurred()) { - Py_DECREF(other); - Py_DECREF(other_quad); - return NULL; - } - other_quad->quad.value = Sleef_cast_from_int64q1(value); - } - else { - double value = PyFloat_AsDouble(other); - if (value == -1.0 && PyErr_Occurred()) { - Py_DECREF(other); - Py_DECREF(other_quad); - return NULL; - } - other_quad->quad.value = Sleef_cast_from_doubleq1(value); - } } else { Py_DECREF(other); Py_RETURN_NOTIMPLEMENTED; } - QuadPrecisionObject *res = QuadPrecision_raw_new(); + QuadPrecisionObject *res = QuadPrecision_raw_new(backend); if (!res) { Py_DECREF(other_quad); Py_DECREF(other); return NULL; } - if (is_forward) { - binary_op(&res->quad.value, &self->quad.value, &other_quad->quad.value); + if (backend == BACKEND_SLEEF) { + if (is_forward) { + sleef_op(&res->value.sleef_value, &self->value.sleef_value, + &other_quad->value.sleef_value); + } + else { + sleef_op(&res->value.sleef_value, &other_quad->value.sleef_value, + &self->value.sleef_value); + } } else { - binary_op(&res->quad.value, &other_quad->quad.value, &self->quad.value); + if (is_forward) { + longdouble_op(&res->value.longdouble_value, &self->value.longdouble_value, + &other_quad->value.longdouble_value); + } + else { + longdouble_op(&res->value.longdouble_value, &other_quad->value.longdouble_value, + &self->value.longdouble_value); + } } Py_DECREF(other_quad); @@ -115,13 +129,20 @@ PyObject * quad_richcompare(QuadPrecisionObject *self, PyObject *other, int cmp_op) { QuadPrecisionObject *other_quad = NULL; + QuadBackendType backend = self->backend; if (PyObject_TypeCheck(other, &QuadPrecision_Type)) { Py_INCREF(other); other_quad = (QuadPrecisionObject *)other; + if (other_quad->backend != backend) { + PyErr_SetString(PyExc_TypeError, + "Cannot compare QuadPrecision objects with different backends"); + Py_DECREF(other_quad); + return NULL; + } } else if (PyLong_CheckExact(other) || PyFloat_CheckExact(other)) { - other_quad = QuadPrecision_from_object(other); + other_quad = QuadPrecision_from_object(other, backend); if (other_quad == NULL) { return NULL; } @@ -129,29 +150,57 @@ quad_richcompare(QuadPrecisionObject *self, PyObject *other, int cmp_op) else { Py_RETURN_NOTIMPLEMENTED; } + int cmp; - switch (cmp_op) { - case Py_LT: - cmp = Sleef_icmpltq1(self->quad.value, other_quad->quad.value); - break; - case Py_LE: - cmp = Sleef_icmpleq1(self->quad.value, other_quad->quad.value); - break; - case Py_EQ: - cmp = Sleef_icmpeqq1(self->quad.value, other_quad->quad.value); - break; - case Py_NE: - cmp = Sleef_icmpneq1(self->quad.value, other_quad->quad.value); - break; - case Py_GT: - cmp = Sleef_icmpgtq1(self->quad.value, other_quad->quad.value); - break; - case Py_GE: - cmp = Sleef_icmpgeq1(self->quad.value, other_quad->quad.value); - break; - default: - Py_DECREF(other_quad); - Py_RETURN_NOTIMPLEMENTED; + if (backend == BACKEND_SLEEF) { + switch (cmp_op) { + case Py_LT: + cmp = Sleef_icmpltq1(self->value.sleef_value, other_quad->value.sleef_value); + break; + case Py_LE: + cmp = Sleef_icmpleq1(self->value.sleef_value, other_quad->value.sleef_value); + break; + case Py_EQ: + cmp = Sleef_icmpeqq1(self->value.sleef_value, other_quad->value.sleef_value); + break; + case Py_NE: + cmp = Sleef_icmpneq1(self->value.sleef_value, other_quad->value.sleef_value); + break; + case Py_GT: + cmp = Sleef_icmpgtq1(self->value.sleef_value, other_quad->value.sleef_value); + break; + case Py_GE: + cmp = Sleef_icmpgeq1(self->value.sleef_value, other_quad->value.sleef_value); + break; + default: + Py_DECREF(other_quad); + Py_RETURN_NOTIMPLEMENTED; + } + } + else { + switch (cmp_op) { + case Py_LT: + cmp = self->value.longdouble_value < other_quad->value.longdouble_value; + break; + case Py_LE: + cmp = self->value.longdouble_value <= other_quad->value.longdouble_value; + break; + case Py_EQ: + cmp = self->value.longdouble_value == other_quad->value.longdouble_value; + break; + case Py_NE: + cmp = self->value.longdouble_value != other_quad->value.longdouble_value; + break; + case Py_GT: + cmp = self->value.longdouble_value > other_quad->value.longdouble_value; + break; + case Py_GE: + cmp = self->value.longdouble_value >= other_quad->value.longdouble_value; + break; + default: + Py_DECREF(other_quad); + Py_RETURN_NOTIMPLEMENTED; + } } Py_DECREF(other_quad); @@ -159,13 +208,13 @@ quad_richcompare(QuadPrecisionObject *self, PyObject *other, int cmp_op) } PyNumberMethods quad_as_scalar = { - .nb_add = (binaryfunc)quad_binary_func, - .nb_subtract = (binaryfunc)quad_binary_func, - .nb_multiply = (binaryfunc)quad_binary_func, - .nb_power = (ternaryfunc)quad_binary_func, - .nb_negative = (unaryfunc)quad_unary_func, - .nb_positive = (unaryfunc)quad_unary_func, - .nb_absolute = (unaryfunc)quad_unary_func, + .nb_add = (binaryfunc)quad_binary_func, + .nb_subtract = (binaryfunc)quad_binary_func, + .nb_multiply = (binaryfunc)quad_binary_func, + .nb_power = (ternaryfunc)quad_binary_func, + .nb_negative = (unaryfunc)quad_unary_func, + .nb_positive = (unaryfunc)quad_unary_func, + .nb_absolute = (unaryfunc)quad_unary_func, .nb_bool = (inquiry)quad_nonzero, - .nb_true_divide = (binaryfunc)quad_binary_func, + .nb_true_divide = (binaryfunc)quad_binary_func, }; \ No newline at end of file diff --git a/quaddtype/reinstall.sh b/quaddtype/reinstall.sh index afec2b59..f552d7a7 100755 --- a/quaddtype/reinstall.sh +++ b/quaddtype/reinstall.sh @@ -8,6 +8,6 @@ then fi #meson setup build -Db_sanitize=address,undefined -python -m pip uninstall -y quaddtype numpy_quaddtype +python -m pip uninstall -y numpy_quaddtype python -m pip install . -v --no-build-isolation -Cbuilddir=build -C'compile-args=-v' -Csetup-args="-Dbuildtype=debug" #python -m pip install . -v --no-build-isolation -Cbuilddir=build -C'compile-args=-v' \ No newline at end of file From 7fd8da6627a9be5be350128529e2678188e285cd Mon Sep 17 00:00:00 2001 From: swayaminsync Date: Sun, 1 Sep 2024 21:14:11 +0530 Subject: [PATCH 05/32] failing: londouble with different dtype --- quaddtype/numpy_quaddtype/src/dtype.c | 8 +- quaddtype/numpy_quaddtype/src/dtype.h | 6 +- quaddtype/numpy_quaddtype/src/ops.hpp | 2 +- quaddtype/numpy_quaddtype/src/quad_common.h | 1 + .../numpy_quaddtype/src/quaddtype_main.c | 8 +- quaddtype/numpy_quaddtype/src/umath.cpp | 323 +++++++++++------- quaddtype/reinstall.sh | 4 +- temp.py | 207 +++++++++++ 8 files changed, 420 insertions(+), 139 deletions(-) create mode 100644 temp.py diff --git a/quaddtype/numpy_quaddtype/src/dtype.c b/quaddtype/numpy_quaddtype/src/dtype.c index 9e496dd8..b655f112 100644 --- a/quaddtype/numpy_quaddtype/src/dtype.c +++ b/quaddtype/numpy_quaddtype/src/dtype.c @@ -57,7 +57,6 @@ new_quaddtype_instance(QuadBackendType backend) new->base.elsize = (backend == BACKEND_SLEEF) ? sizeof(Sleef_quad) : sizeof(long double); new->base.alignment = (backend == BACKEND_SLEEF) ? _Alignof(Sleef_quad) : _Alignof(long double); new->backend = backend; - return new; } @@ -73,7 +72,7 @@ common_instance(QuadPrecDTypeObject *dtype1, QuadPrecDTypeObject *dtype2) { if (dtype1->backend != dtype2->backend) { PyErr_SetString(PyExc_TypeError, - "Cannot combine QuadPrecDType instances with different backends"); + "Cannot find common instance for QuadPrecDTypes with different backends"); return NULL; } Py_INCREF(dtype1); @@ -189,7 +188,8 @@ QuadPrecDType_new(PyTypeObject *NPY_UNUSED(cls), PyObject *args, PyObject *kwds) return NULL; } - return (PyObject *)new_quaddtype_instance(backend); + return (PyObject *)quadprec_discover_descriptor_from_pyobject( + &QuadPrecDType, (PyObject *)QuadPrecision_raw_new(backend)); } static PyObject * @@ -217,7 +217,7 @@ init_quadprec_dtype(void) return -1; PyArrayDTypeMeta_Spec QuadPrecDType_DTypeSpec = { - .flags = NPY_DT_NUMERIC, + .flags = NPY_DT_PARAMETRIC | NPY_DT_NUMERIC, .casts = casts, .typeobj = &QuadPrecision_Type, .slots = QuadPrecDType_Slots, diff --git a/quaddtype/numpy_quaddtype/src/dtype.h b/quaddtype/numpy_quaddtype/src/dtype.h index 243018d3..77a65449 100644 --- a/quaddtype/numpy_quaddtype/src/dtype.h +++ b/quaddtype/numpy_quaddtype/src/dtype.h @@ -17,9 +17,11 @@ typedef struct { extern PyArray_DTypeMeta QuadPrecDType; -QuadPrecDTypeObject * new_quaddtype_instance(QuadBackendType backend); +QuadPrecDTypeObject * +new_quaddtype_instance(QuadBackendType backend); -int init_quadprec_dtype(void); +int +init_quadprec_dtype(void); #ifdef __cplusplus } diff --git a/quaddtype/numpy_quaddtype/src/ops.hpp b/quaddtype/numpy_quaddtype/src/ops.hpp index 3e290da6..9b3eeed1 100644 --- a/quaddtype/numpy_quaddtype/src/ops.hpp +++ b/quaddtype/numpy_quaddtype/src/ops.hpp @@ -376,7 +376,7 @@ quad_greaterequal(const Sleef_quad *a, const Sleef_quad *b) } // comparison quad functions -typedef npy_bool (*cmp_londouble_def)(const long double *, const double *); +typedef npy_bool (*cmp_londouble_def)(const long double *, const long double *); static inline npy_bool ld_equal(const long double *a, const long double *b) diff --git a/quaddtype/numpy_quaddtype/src/quad_common.h b/quaddtype/numpy_quaddtype/src/quad_common.h index 190573af..bc578a29 100644 --- a/quaddtype/numpy_quaddtype/src/quad_common.h +++ b/quaddtype/numpy_quaddtype/src/quad_common.h @@ -6,6 +6,7 @@ extern "C" { #endif typedef enum { + BACKEND_INVALID = -1, BACKEND_SLEEF, BACKEND_LONGDOUBLE } QuadBackendType; diff --git a/quaddtype/numpy_quaddtype/src/quaddtype_main.c b/quaddtype/numpy_quaddtype/src/quaddtype_main.c index ce263e49..994d1eba 100644 --- a/quaddtype/numpy_quaddtype/src/quaddtype_main.c +++ b/quaddtype/numpy_quaddtype/src/quaddtype_main.c @@ -11,7 +11,7 @@ #include "scalar.h" #include "dtype.h" -// #include "umath.h" +#include "umath.h" #include "quad_common.h" static struct PyModuleDef moduledef = { @@ -43,9 +43,9 @@ PyInit__quaddtype_main(void) if (PyModule_AddObject(m, "QuadPrecDType", (PyObject *)&QuadPrecDType) < 0) goto error; - // if (init_quad_umath() < 0) { - // goto error; - // } + if (init_quad_umath() < 0) { + goto error; + } return m; diff --git a/quaddtype/numpy_quaddtype/src/umath.cpp b/quaddtype/numpy_quaddtype/src/umath.cpp index 9007f890..0d64ff7a 100644 --- a/quaddtype/numpy_quaddtype/src/umath.cpp +++ b/quaddtype/numpy_quaddtype/src/umath.cpp @@ -1,5 +1,3 @@ -#include "scalar.h" - #define PY_ARRAY_UNIQUE_SYMBOL QuadPrecType_ARRAY_API #define PY_UFUNC_UNIQUE_SYMBOL QuadPrecType_UFUNC_API #define NPY_NO_DEPRECATED_API NPY_2_0_API_VERSION @@ -17,11 +15,40 @@ extern "C" { #include "numpy/dtype_api.h" } +#include "quad_common.h" +#include "scalar.h" #include "dtype.h" #include "umath.h" #include "ops.hpp" -template +static NPY_CASTING +quad_unary_op_resolve_descriptors(PyObject *self, PyArray_DTypeMeta *const dtypes[], + PyArray_Descr *const given_descrs[], PyArray_Descr *loop_descrs[], + npy_intp *NPY_UNUSED(view_offset)) +{ + Py_INCREF(given_descrs[0]); + loop_descrs[0] = given_descrs[0]; + + if (given_descrs[1] == NULL) { + Py_INCREF(given_descrs[0]); + loop_descrs[1] = given_descrs[0]; + } + else { + Py_INCREF(given_descrs[1]); + loop_descrs[1] = given_descrs[1]; + } + + QuadPrecDTypeObject *descr_in = (QuadPrecDTypeObject *)given_descrs[0]; + QuadPrecDTypeObject *descr_out = (QuadPrecDTypeObject *)loop_descrs[1]; + + if (descr_in->backend != descr_out->backend) { + return NPY_UNSAFE_CASTING; + } + + return NPY_NO_CASTING; +} + +template int quad_generic_unary_op_strided_loop(PyArrayMethod_Context *context, char *const data[], npy_intp const dimensions[], npy_intp const strides[], @@ -33,11 +60,20 @@ quad_generic_unary_op_strided_loop(PyArrayMethod_Context *context, char *const d npy_intp in_stride = strides[0]; npy_intp out_stride = strides[1]; - Sleef_quad in, out; + QuadPrecDTypeObject *descr = (QuadPrecDTypeObject *)context->descriptors[0]; + QuadBackendType backend = descr->backend; + size_t elem_size = (backend == BACKEND_SLEEF) ? sizeof(Sleef_quad) : sizeof(long double); + + quad_value in, out; while (N--) { - memcpy(&in, in_ptr, sizeof(Sleef_quad)); - unary_op(&in, &out); - memcpy(out_ptr, &out, sizeof(Sleef_quad)); + memcpy(&in, in_ptr, elem_size); + if (backend == BACKEND_SLEEF) { + sleef_op(&in.sleef_value, &out.sleef_value); + } + else { + longdouble_op(&in.longdouble_value, &out.longdouble_value); + } + memcpy(out_ptr, &out, elem_size); in_ptr += in_stride; out_ptr += out_stride; @@ -45,26 +81,7 @@ quad_generic_unary_op_strided_loop(PyArrayMethod_Context *context, char *const d return 0; } -static NPY_CASTING -quad_unary_op_resolve_descriptors(PyObject *self, PyArray_DTypeMeta *const dtypes[], - PyArray_Descr *const given_descrs[], PyArray_Descr *loop_descrs[], - npy_intp *NPY_UNUSED(view_offset)) -{ - Py_INCREF(given_descrs[0]); - loop_descrs[0] = given_descrs[0]; - - if (given_descrs[1] == NULL) { - Py_INCREF(given_descrs[0]); - loop_descrs[1] = given_descrs[0]; - return NPY_NO_CASTING; - } - Py_INCREF(given_descrs[1]); - loop_descrs[1] = given_descrs[1]; - - return NPY_NO_CASTING; -} - -template +template int create_quad_unary_ufunc(PyObject *numpy, const char *ufunc_name) { @@ -77,7 +94,8 @@ create_quad_unary_ufunc(PyObject *numpy, const char *ufunc_name) PyType_Slot slots[] = { {NPY_METH_resolve_descriptors, (void *)&quad_unary_op_resolve_descriptors}, - {NPY_METH_strided_loop, (void *)&quad_generic_unary_op_strided_loop}, + {NPY_METH_strided_loop, + (void *)&quad_generic_unary_op_strided_loop}, {0, NULL}}; PyArrayMethod_Spec Spec = { @@ -100,46 +118,46 @@ create_quad_unary_ufunc(PyObject *numpy, const char *ufunc_name) int init_quad_unary_ops(PyObject *numpy) { - if (create_quad_unary_ufunc(numpy, "negative") < 0) { + if (create_quad_unary_ufunc(numpy, "negative") < 0) { return -1; } - if (create_quad_unary_ufunc(numpy, "absolute") < 0) { + if (create_quad_unary_ufunc(numpy, "absolute") < 0) { return -1; } - if (create_quad_unary_ufunc(numpy, "rint") < 0) { + if (create_quad_unary_ufunc(numpy, "rint") < 0) { return -1; } - if (create_quad_unary_ufunc(numpy, "trunc") < 0) { + if (create_quad_unary_ufunc(numpy, "trunc") < 0) { return -1; } - if (create_quad_unary_ufunc(numpy, "floor") < 0) { + if (create_quad_unary_ufunc(numpy, "floor") < 0) { return -1; } - if (create_quad_unary_ufunc(numpy, "ceil") < 0) { + if (create_quad_unary_ufunc(numpy, "ceil") < 0) { return -1; } - if (create_quad_unary_ufunc(numpy, "sqrt") < 0) { + if (create_quad_unary_ufunc(numpy, "sqrt") < 0) { return -1; } - if (create_quad_unary_ufunc(numpy, "square") < 0) { + if (create_quad_unary_ufunc(numpy, "square") < 0) { return -1; } - if (create_quad_unary_ufunc(numpy, "log") < 0) { + if (create_quad_unary_ufunc(numpy, "log") < 0) { return -1; } - if (create_quad_unary_ufunc(numpy, "log2") < 0) { + if (create_quad_unary_ufunc(numpy, "log2") < 0) { return -1; } - if (create_quad_unary_ufunc(numpy, "log10") < 0) { + if (create_quad_unary_ufunc(numpy, "log10") < 0) { return -1; } - if (create_quad_unary_ufunc(numpy, "log1p") < 0) { + if (create_quad_unary_ufunc(numpy, "log1p") < 0) { return -1; } - if (create_quad_unary_ufunc(numpy, "exp") < 0) { + if (create_quad_unary_ufunc(numpy, "exp") < 0) { return -1; } - if (create_quad_unary_ufunc(numpy, "exp2") < 0) { + if (create_quad_unary_ufunc(numpy, "exp2") < 0) { return -1; } return 0; @@ -147,33 +165,6 @@ init_quad_unary_ops(PyObject *numpy) // Binary ufuncs -template -int -quad_generic_binop_strided_loop(PyArrayMethod_Context *context, char *const data[], - npy_intp const dimensions[], npy_intp const strides[], - NpyAuxData *auxdata) -{ - npy_intp N = dimensions[0]; - char *in1_ptr = data[0], *in2_ptr = data[1]; - char *out_ptr = data[2]; - npy_intp in1_stride = strides[0]; - npy_intp in2_stride = strides[1]; - npy_intp out_stride = strides[2]; - - Sleef_quad in1, in2, out; - while (N--) { - memcpy(&in1, in1_ptr, sizeof(Sleef_quad)); - memcpy(&in2, in2_ptr, sizeof(Sleef_quad)); - binop(&out, &in1, &in2); - memcpy(out_ptr, &out, sizeof(Sleef_quad)); - - in1_ptr += in1_stride; - in2_ptr += in2_stride; - out_ptr += out_stride; - } - return 0; -} - static NPY_CASTING quad_binary_op_resolve_descriptors(PyObject *self, PyArray_DTypeMeta *const dtypes[], PyArray_Descr *const given_descrs[], @@ -184,22 +175,72 @@ quad_binary_op_resolve_descriptors(PyObject *self, PyArray_DTypeMeta *const dtyp Py_INCREF(given_descrs[1]); loop_descrs[1] = given_descrs[1]; + QuadPrecDTypeObject *descr_in1 = (QuadPrecDTypeObject *)given_descrs[0]; + QuadPrecDTypeObject *descr_in2 = (QuadPrecDTypeObject *)given_descrs[1]; + + if (descr_in1->backend != descr_in2->backend) { + PyErr_SetString(PyExc_TypeError, + "Cannot operate on QuadPrecision objects with different backends"); + return (NPY_CASTING)-1; + } + if (given_descrs[2] == NULL) { - PyArray_Descr *out_descr = (PyArray_Descr *)new_quaddtype_instance(); - if (!out_descr) { + loop_descrs[2] = (PyArray_Descr *)new_quaddtype_instance(descr_in1->backend); + if (!loop_descrs[2]) { return (NPY_CASTING)-1; } - Py_INCREF(given_descrs[0]); - loop_descrs[2] = out_descr; } else { Py_INCREF(given_descrs[2]); loop_descrs[2] = given_descrs[2]; } + QuadPrecDTypeObject *descr_out = (QuadPrecDTypeObject *)loop_descrs[2]; + if (descr_out->backend != descr_in1->backend) { + PyErr_SetString(PyExc_TypeError, + "Output QuadPrecision object must have the same backend as inputs"); + return (NPY_CASTING)-1; + } + return NPY_NO_CASTING; } +template +int +quad_generic_binop_strided_loop(PyArrayMethod_Context *context, char *const data[], + npy_intp const dimensions[], npy_intp const strides[], + NpyAuxData *auxdata) +{ + npy_intp N = dimensions[0]; + char *in1_ptr = data[0], *in2_ptr = data[1]; + char *out_ptr = data[2]; + npy_intp in1_stride = strides[0]; + npy_intp in2_stride = strides[1]; + npy_intp out_stride = strides[2]; + + QuadPrecDTypeObject *descr = (QuadPrecDTypeObject *)context->descriptors[0]; + QuadBackendType backend = descr->backend; + size_t elem_size = (backend == BACKEND_SLEEF) ? sizeof(Sleef_quad) : sizeof(long double); + + quad_value in1, in2, out; + while (N--) { + memcpy(&in1, in1_ptr, elem_size); + memcpy(&in2, in2_ptr, elem_size); + if (backend == BACKEND_SLEEF) { + sleef_op(&out.sleef_value, &in1.sleef_value, &in2.sleef_value); + } + else { + longdouble_op(&out.longdouble_value, &in1.longdouble_value, &in2.longdouble_value); + } + memcpy(out_ptr, &out, elem_size); + + in1_ptr += in1_stride; + in2_ptr += in2_stride; + out_ptr += out_stride; + } + return 0; +} + // helper debugging function static const char * get_dtype_name(PyArray_DTypeMeta *dtype) @@ -258,15 +299,16 @@ static int quad_ufunc_promoter(PyUFuncObject *ufunc, PyArray_DTypeMeta *op_dtypes[], PyArray_DTypeMeta *signature[], PyArray_DTypeMeta *new_op_dtypes[]) { - // printf("quad_ufunc_promoter called for ufunc: %s\n", ufunc->name); - // printf("Entering quad_ufunc_promoter\n"); - // printf("Ufunc name: %s\n", ufunc->name); - // printf("nin: %d, nargs: %d\n", ufunc->nin, ufunc->nargs); + printf("quad_ufunc_promoter called for ufunc: %s\n", ufunc->name); + printf("Entering quad_ufunc_promoter\n"); + printf("Ufunc name: %s\n", ufunc->name); + printf("nin: %d, nargs: %d\n", ufunc->nin, ufunc->nargs); int nin = ufunc->nin; int nargs = ufunc->nargs; PyArray_DTypeMeta *common = NULL; bool has_quad = false; + QuadBackendType backend = BACKEND_INVALID; // Initialize to an invalid state // Handle the special case for reductions if (op_dtypes[0] == NULL) { @@ -274,55 +316,65 @@ quad_ufunc_promoter(PyUFuncObject *ufunc, PyArray_DTypeMeta *op_dtypes[], for (int i = 0; i < 3; i++) { Py_INCREF(op_dtypes[1]); new_op_dtypes[i] = op_dtypes[1]; - // printf("new_op_dtypes[%d] set to %s\n", i, get_dtype_name(new_op_dtypes[i])); + printf("new_op_dtypes[%d] set to %s\n", i, get_dtype_name(new_op_dtypes[i])); } return 0; } // Check if any input or signature is QuadPrecision - for (int i = 0; i < nargs; i++) { - if ((i < nin && op_dtypes[i] == &QuadPrecDType) || (signature[i] == &QuadPrecDType)) { + for (int i = 0; i < nin; i++) { + if (op_dtypes[i] == &QuadPrecDType) { has_quad = true; - // printf("QuadPrecision detected in input %d or signature\n", i); + // todo: Why below line always picking the backend as SLEEF + QuadPrecDTypeObject *descr = + (QuadPrecDTypeObject *)PyArray_GetDefaultDescr(op_dtypes[i]); + + const char *s = (descr->backend == BACKEND_SLEEF) ? "SLEEF" : "LONGDOUBLE"; + printf("QuadPrecision detected in input %d or signature with backend: %s\n", i, s); + if (backend == BACKEND_INVALID) + backend = descr->backend; + else if (backend != BACKEND_INVALID && backend != descr->backend) { + PyErr_SetString(PyExc_TypeError, + "Cannot mix QuadPrecDType arrays with different backends"); + return -1; + } + Py_DECREF(descr); break; } } if (has_quad) { - // If QuadPrecision is involved, use it for all arguments common = &QuadPrecDType; - // printf("Using QuadPrecDType as common type\n"); + printf("Using QuadPrecDType as common type\n"); } else { - // Check if output signature is homogeneous for (int i = nin; i < nargs; i++) { if (signature[i] != NULL) { if (common == NULL) { Py_INCREF(signature[i]); common = signature[i]; - // printf("Common type set to %s from signature\n", get_dtype_name(common)); + printf("Common type set to %s from signature\n", get_dtype_name(common)); } else if (common != signature[i]) { Py_CLEAR(common); // Not homogeneous, unset common - // printf("Output signature not homogeneous, cleared common type\n"); + printf("Output signature not homogeneous, cleared common type\n"); break; } } } - - // If no common output dtype, use standard promotion for inputs + } + // If no common output dtype, use standard promotion for inputs + if (common == NULL) { + printf("Using standard promotion for inputs\n"); + common = PyArray_PromoteDTypeSequence(nin, op_dtypes); if (common == NULL) { - // printf("Using standard promotion for inputs\n"); - common = PyArray_PromoteDTypeSequence(nin, op_dtypes); - if (common == NULL) { - if (PyErr_ExceptionMatches(PyExc_TypeError)) { - PyErr_Clear(); // Do not propagate normal promotion errors - } - // printf("Exiting quad_ufunc_promoter (promotion failed)\n"); - return -1; + if (PyErr_ExceptionMatches(PyExc_TypeError)) { + PyErr_Clear(); // Do not propagate normal promotion errors } - // printf("Common type after promotion: %s\n", get_dtype_name(common)); + printf("Exiting quad_ufunc_promoter (promotion failed)\n"); + return -1; } + printf("Common type after promotion: %s\n", get_dtype_name(common)); } // Set all new_op_dtypes to the common dtype @@ -331,30 +383,29 @@ quad_ufunc_promoter(PyUFuncObject *ufunc, PyArray_DTypeMeta *op_dtypes[], // If signature is specified for this argument, use it Py_INCREF(signature[i]); new_op_dtypes[i] = signature[i]; - // printf("new_op_dtypes[%d] set to %s (from signature)\n", i, - // get_dtype_name(new_op_dtypes[i])); + printf("new_op_dtypes[%d] set to %s (from signature)\n", i, + get_dtype_name(new_op_dtypes[i])); } else { // Otherwise, use the common dtype Py_INCREF(common); new_op_dtypes[i] = common; - // printf("new_op_dtypes[%d] set to %s (from common)\n", i, - // get_dtype_name(new_op_dtypes[i])); + printf("new_op_dtypes[%d] set to %s (from common)\n", i, + get_dtype_name(new_op_dtypes[i])); } } Py_XDECREF(common); - // printf("Exiting quad_ufunc_promoter\n"); + printf("Exiting quad_ufunc_promoter\n"); return 0; } -template +template int create_quad_binary_ufunc(PyObject *numpy, const char *ufunc_name) { PyObject *ufunc = PyObject_GetAttrString(numpy, ufunc_name); if (ufunc == NULL) { - Py_DecRef(ufunc); return -1; } @@ -362,7 +413,8 @@ create_quad_binary_ufunc(PyObject *numpy, const char *ufunc_name) PyType_Slot slots[] = { {NPY_METH_resolve_descriptors, (void *)&quad_binary_op_resolve_descriptors}, - {NPY_METH_strided_loop, (void *)&quad_generic_binop_strided_loop}, + {NPY_METH_strided_loop, + (void *)&quad_generic_binop_strided_loop}, {0, NULL}}; PyArrayMethod_Spec Spec = { @@ -404,28 +456,28 @@ create_quad_binary_ufunc(PyObject *numpy, const char *ufunc_name) int init_quad_binary_ops(PyObject *numpy) { - if (create_quad_binary_ufunc(numpy, "add") < 0) { + if (create_quad_binary_ufunc(numpy, "add") < 0) { return -1; } - if (create_quad_binary_ufunc(numpy, "subtract") < 0) { + if (create_quad_binary_ufunc(numpy, "subtract") < 0) { return -1; } - if (create_quad_binary_ufunc(numpy, "multiply") < 0) { + if (create_quad_binary_ufunc(numpy, "multiply") < 0) { return -1; } - if (create_quad_binary_ufunc(numpy, "divide") < 0) { + if (create_quad_binary_ufunc(numpy, "divide") < 0) { return -1; } - if (create_quad_binary_ufunc(numpy, "power") < 0) { + if (create_quad_binary_ufunc(numpy, "power") < 0) { return -1; } - if (create_quad_binary_ufunc(numpy, "mod") < 0) { + if (create_quad_binary_ufunc(numpy, "mod") < 0) { return -1; } - if (create_quad_binary_ufunc(numpy, "minimum") < 0) { + if (create_quad_binary_ufunc(numpy, "minimum") < 0) { return -1; } - if (create_quad_binary_ufunc(numpy, "maximum") < 0) { + if (create_quad_binary_ufunc(numpy, "maximum") < 0) { return -1; } return 0; @@ -433,7 +485,7 @@ init_quad_binary_ops(PyObject *numpy) // comparison functions -template +template int quad_generic_comp_strided_loop(PyArrayMethod_Context *context, char *const data[], npy_intp const dimensions[], npy_intp const strides[], @@ -446,8 +498,19 @@ quad_generic_comp_strided_loop(PyArrayMethod_Context *context, char *const data[ npy_intp in2_stride = strides[1]; npy_intp out_stride = strides[2]; + QuadPrecDTypeObject *descr = (QuadPrecDTypeObject *)context->descriptors[0]; + QuadBackendType backend = descr->backend; + size_t elem_size = (backend == BACKEND_SLEEF) ? sizeof(Sleef_quad) : sizeof(long double); + while (N--) { - *((npy_bool *)out_ptr) = comp((Sleef_quad *)in1_ptr, (Sleef_quad *)in2_ptr); + if (backend == BACKEND_SLEEF) { + *((npy_bool *)out_ptr) = + sleef_comp((const Sleef_quad *)in1_ptr, (const Sleef_quad *)in2_ptr); + } + else { + *((npy_bool *)out_ptr) = + ld_comp((const long double *)in1_ptr, (const long double *)in2_ptr); + } in1_ptr += in1_stride; in2_ptr += in2_stride; @@ -472,7 +535,7 @@ comparison_ufunc_promoter(PyUFuncObject *ufunc, PyArray_DTypeMeta *op_dtypes[], return 0; } -template +template int create_quad_comparison_ufunc(PyObject *numpy, const char *ufunc_name) { @@ -483,8 +546,10 @@ create_quad_comparison_ufunc(PyObject *numpy, const char *ufunc_name) PyArray_DTypeMeta *dtypes[3] = {&QuadPrecDType, &QuadPrecDType, &PyArray_BoolDType}; - PyType_Slot slots[] = {{NPY_METH_strided_loop, (void *)&quad_generic_comp_strided_loop}, - {0, NULL}}; + PyType_Slot slots[] = { + {NPY_METH_resolve_descriptors, (void *)&quad_binary_op_resolve_descriptors}, + {NPY_METH_strided_loop, (void *)&quad_generic_comp_strided_loop}, + {0, NULL}}; PyArrayMethod_Spec Spec = { .name = "quad_comp", @@ -501,7 +566,7 @@ create_quad_comparison_ufunc(PyObject *numpy, const char *ufunc_name) } PyObject *promoter_capsule = - PyCapsule_New((void *)&comparison_ufunc_promoter, "numpy._ufunc_promoter", NULL); + PyCapsule_New((void *)&quad_ufunc_promoter, "numpy._ufunc_promoter", NULL); if (promoter_capsule == NULL) { return -1; } @@ -526,22 +591,23 @@ create_quad_comparison_ufunc(PyObject *numpy, const char *ufunc_name) int init_quad_comps(PyObject *numpy) { - if (create_quad_comparison_ufunc(numpy, "equal") < 0) { + if (create_quad_comparison_ufunc(numpy, "equal") < 0) { return -1; } - if (create_quad_comparison_ufunc(numpy, "not_equal") < 0) { + if (create_quad_comparison_ufunc(numpy, "not_equal") < 0) { return -1; } - if (create_quad_comparison_ufunc(numpy, "less") < 0) { + if (create_quad_comparison_ufunc(numpy, "less") < 0) { return -1; } - if (create_quad_comparison_ufunc(numpy, "less_equal") < 0) { + if (create_quad_comparison_ufunc(numpy, "less_equal") < 0) { return -1; } - if (create_quad_comparison_ufunc(numpy, "greater") < 0) { + if (create_quad_comparison_ufunc(numpy, "greater") < 0) { return -1; } - if (create_quad_comparison_ufunc(numpy, "greater_equal") < 0) { + if (create_quad_comparison_ufunc(numpy, "greater_equal") < + 0) { return -1; } @@ -552,18 +618,23 @@ int init_quad_umath(void) { PyObject *numpy = PyImport_ImportModule("numpy"); - if (!numpy) + if (!numpy) { + PyErr_SetString(PyExc_ImportError, "Failed to import numpy module"); return -1; + } if (init_quad_unary_ops(numpy) < 0) { + PyErr_SetString(PyExc_RuntimeError, "Failed to initialize quad unary operations"); goto err; } if (init_quad_binary_ops(numpy) < 0) { + PyErr_SetString(PyExc_RuntimeError, "Failed to initialize quad binary operations"); goto err; } if (init_quad_comps(numpy) < 0) { + PyErr_SetString(PyExc_RuntimeError, "Failed to initialize quad comparison operations"); goto err; } diff --git a/quaddtype/reinstall.sh b/quaddtype/reinstall.sh index f552d7a7..5131f15a 100755 --- a/quaddtype/reinstall.sh +++ b/quaddtype/reinstall.sh @@ -9,5 +9,5 @@ fi #meson setup build -Db_sanitize=address,undefined python -m pip uninstall -y numpy_quaddtype -python -m pip install . -v --no-build-isolation -Cbuilddir=build -C'compile-args=-v' -Csetup-args="-Dbuildtype=debug" -#python -m pip install . -v --no-build-isolation -Cbuilddir=build -C'compile-args=-v' \ No newline at end of file +# python -m pip install . -v --no-build-isolation -Cbuilddir=build -C'compile-args=-v' -Csetup-args="-Dbuildtype=debug" +python -m pip install . -v --no-build-isolation -Cbuilddir=build -C'compile-args=-v' \ No newline at end of file diff --git a/temp.py b/temp.py new file mode 100644 index 00000000..6ce4fa9f --- /dev/null +++ b/temp.py @@ -0,0 +1,207 @@ +import numpy_quaddtype as npq +import numpy as np + + +def test_scalar_ops(backend): + print(f"\nTesting scalar operations for {backend} backend:") + + # Create QuadPrecision instances + q1 = npq.QuadPrecision( + "3.14159265358979323846264338327950288", backend=backend) + q2 = npq.QuadPrecision( + "-2.71828182845904523536028747135266250", backend=backend) + + # Test unary operations + print("\nUnary operations:") + print(f" Negation of q1: {-q1}") + print(f" Absolute value of q2: {abs(q2)}") + + # Test binary operations + print("\nBinary operations:") + print(f" Addition: {q1 + q2}") + print(f" Subtraction: {q1 - q2}") + print(f" Multiplication: {q1 * q2}") + print(f" Division: {q1 / q2}") + + # Test comparison operations + print("\nComparison operations:") + print(f" q1 == q2: {q1 == q2}") + print(f" q1 != q2: {q1 != q2}") + print(f" q1 < q2: {q1 < q2}") + print(f" q1 <= q2: {q1 <= q2}") + print(f" q1 > q2: {q1 > q2}") + print(f" q1 >= q2: {q1 >= q2}") + + # Test operations with Python numbers + print("\nOperations with Python numbers:") + print(f" q1 + 1: {q1 + 1}") + print(f" q1 - 2.5: {q1 - 2.5}") + print(f" q1 * 3: {q1 * 3}") + print(f" q1 / 2: {q1 / 2}") + + # Test boolean conversion + print("\nBoolean conversion:") + print(f" bool(q1): {bool(q1)}") + print( + f" bool(npq.QuadPrecision('0', backend=backend)): {bool(npq.QuadPrecision('0', backend=backend))}") + + +def test_casting(backend): + print(f"\nTesting {backend} backend:") + + # Create QuadPrecision instances + q1 = npq.QuadPrecision( + "3.14159265358979323846264338327950288", backend=backend) + q2 = npq.QuadPrecision( + "-2.71828182845904523536028747135266250", backend=backend) + + # Test casting from QuadPrecision to numpy dtypes + print("Casting from QuadPrecision to numpy dtypes:") + print(f" float32: {np.float32(q1)}") + print(f" float64: {np.float64(q1)}") + print(f" int64: {np.int64(q1)}") + print(f" uint64: {np.uint64(q1)}") + + # Test casting from numpy dtypes to QuadPrecision + print("\nCasting from numpy dtypes to QuadPrecision:") + print( + f" float32: {np.float32(3.14159).astype(npq.QuadPrecDType(backend=backend))}") + print( + f" float64: {np.float64(2.71828182845904).astype(npq.QuadPrecDType(backend=backend))}") + print( + f" int64: {np.int64(-1234567890).astype(npq.QuadPrecDType(backend=backend))}") + print( + f" uint64: {np.uint64(9876543210).astype(npq.QuadPrecDType(backend=backend))}") + + # Test array operations + print("\nArray operations:") + q_array = np.array([q1, q2], dtype=npq.QuadPrecDType(backend=backend)) + print(f" QuadPrecision array: {q_array}") + + np_array = np.array([3.14, -2.71, 1.41, -1.73], dtype=np.float64) + q_from_np = np_array.astype(npq.QuadPrecDType(backend=backend)) + print(f" Numpy to QuadPrecision: {q_from_np}") + + back_to_np = np.array(q_from_np, dtype=np.float64) + print(f" QuadPrecision to Numpy: {back_to_np}") + + # Test precision maintenance + large_int = 12345678901234567890 + q_large = np.array([large_int], dtype=np.uint64).astype( + npq.QuadPrecDType(backend=backend))[0] + print(f"\nPrecision test:") + print(f" Original large int: {large_int}") + print(f" QuadPrecision: {q_large}") + print(f" Back to int: {np.int64(q_large)}") + + # Test edge cases + + +def test_edge_cases(backend): + print(f"\nTesting negative numbers for {backend} backend:") + + # Test various negative numbers + test_values = [ + -1.0, + -1e10, + -1e100, + -1e300, + np.nextafter(np.finfo(np.float64).min, 0), + np.finfo(np.float64).min + ] + + for value in test_values: + q_value = npq.QuadPrecision(str(value), backend=backend) + print(f" Original: {value}") + print(f" QuadPrecision: {q_value}") + print(f" Back to float64: {np.float64(q_value)}") + print() + + # Test value beyond float64 precision + beyond_float64_precision = "1.7976931348623157081452742373170435e+308" + q_beyond = npq.QuadPrecision(beyond_float64_precision, backend=backend) + print(f" Beyond float64 precision: {q_beyond}") + q_float64_max = npq.QuadPrecision( + str(np.finfo(np.float64).max), backend=backend) + diff = q_beyond - q_float64_max + print(f" Difference from float64 max: {diff}") + print( + f" Difference is positive: {diff > npq.QuadPrecision('0', backend=backend)}") + + # Test epsilon (smallest representable difference between two numbers) + q_epsilon = npq.QuadPrecision( + str(np.finfo(np.float64).eps), backend=backend) + print(f" Float64 epsilon in QuadPrecision: {q_epsilon}") + q_one = npq.QuadPrecision("1", backend=backend) + q_one_plus_epsilon = q_one + q_epsilon + print(f" 1 + epsilon != 1: {q_one_plus_epsilon != q_one}") + print(f" (1 + epsilon) - 1: {q_one_plus_epsilon - q_one}") + + +def test_ufuncs(backend): + print(f"\nTesting ufuncs for {backend} backend:") + + # Create QuadPrecision arrays + q_array1 = np.array([1, 2, 3], dtype=npq.QuadPrecDType(backend=backend)) + q_array2 = np.array([1, 2, 3], dtype=npq.QuadPrecDType(backend=backend)) + + # Test unary ufuncs + print("\nUnary ufuncs:") + print(f" negative: {np.negative(q_array1)}") + print(f" absolute: {np.absolute(q_array1)}") + print(f" rint: {np.rint(q_array1)}") + print(f" floor: {np.floor(q_array1)}") + print(f" ceil: {np.ceil(q_array1)}") + print(f" trunc: {np.trunc(q_array1)}") + print(f" sqrt: {np.sqrt(q_array1)}") + print(f" square: {np.square(q_array1)}") + print(f" log: {np.log(q_array1)}") + print(f" log2: {np.log2(q_array1)}") + print(f" log10: {np.log10(q_array1)}") + print(f" exp: {np.exp(q_array1)}") + print(f" exp2: {np.exp2(q_array1)}") + + # Test binary ufuncs + print("\nBinary ufuncs:") + print(f" add: {np.add(q_array1, q_array2)}") + print(f" subtract: {np.subtract(q_array1, q_array2)}") + print(f" multiply: {np.multiply(q_array1, q_array2)}") + print(f" divide: {np.divide(q_array1, q_array2)}") + print(f" power: {np.power(q_array1, q_array2)}") + print(f" mod: {np.mod(q_array1, q_array2)}") + print(f" minimum: {np.minimum(q_array1, q_array2)}") + print(f" maximum: {np.maximum(q_array1, q_array2)}") + + # Test comparison ufuncs + print("\nComparison ufuncs:") + print(f" equal: {np.equal(q_array1, q_array2)}") + print(f" not_equal: {np.not_equal(q_array1, q_array2)}") + print(f" less: {np.less(q_array1, q_array2)}") + print(f" less_equal: {np.less_equal(q_array1, q_array2)}") + print(f" greater: {np.greater(q_array1, q_array2)}") + print(f" greater_equal: {np.greater_equal(q_array1, q_array2)}") + + # Test mixed operations with numpy arrays + print(f"Testing backend: {backend}") + print("\nMixed operations with numpy arrays:") + np_array = np.array([1.0, 2.0, 3.0], dtype=np.float64) + print(f" add: {np.add(q_array1, np_array)}") + print(f" multiply: {np.multiply(q_array1, np_array)}") + print(f" divide: {np.divide(q_array1, np_array)}") + + # Test reduction operations + print("\nReduction operations:") + print(f" sum: {np.sum(q_array1)}") + print(f" prod: {np.prod(q_array1)}") + print(f" min: {np.min(q_array1)}") + print(f" max: {np.max(q_array1)}") + + +# Run tests for both backends +for backend in ['longdouble']: + test_scalar_ops(backend) + test_casting(backend) + test_edge_cases(backend) + test_ufuncs(backend) + +print("All tests completed successfully") From d664a598858032a54ba2fa1c66724f22bafefec2 Mon Sep 17 00:00:00 2001 From: swayaminsync Date: Mon, 2 Sep 2024 00:23:12 +0530 Subject: [PATCH 06/32] added backend parameter in default descriptor --- quaddtype/numpy_quaddtype/src/dtype.c | 7 +- quaddtype/numpy_quaddtype/src/umath.cpp | 113 +++++++++++++----------- 2 files changed, 64 insertions(+), 56 deletions(-) diff --git a/quaddtype/numpy_quaddtype/src/dtype.c b/quaddtype/numpy_quaddtype/src/dtype.c index b655f112..c58f8633 100644 --- a/quaddtype/numpy_quaddtype/src/dtype.c +++ b/quaddtype/numpy_quaddtype/src/dtype.c @@ -154,9 +154,12 @@ quadprec_getitem(QuadPrecDTypeObject *descr, char *dataptr) } static PyArray_Descr * -quadprec_default_descr(PyArray_DTypeMeta *NPY_UNUSED(cls)) +quadprec_default_descr(PyArray_DTypeMeta *cls) { - return (PyArray_Descr *)new_quaddtype_instance(BACKEND_SLEEF); + QuadPrecDTypeObject *temp = (QuadPrecDTypeObject *)cls; + const char *s1 = (temp->backend == BACKEND_SLEEF) ? "SLEEF" : "LONGDOUBLE"; + printf("called with backend: %s\n", s1); + return (PyArray_Descr *)new_quaddtype_instance(temp->backend); } static PyType_Slot QuadPrecDType_Slots[] = { diff --git a/quaddtype/numpy_quaddtype/src/umath.cpp b/quaddtype/numpy_quaddtype/src/umath.cpp index 0d64ff7a..783f38a2 100644 --- a/quaddtype/numpy_quaddtype/src/umath.cpp +++ b/quaddtype/numpy_quaddtype/src/umath.cpp @@ -21,6 +21,60 @@ extern "C" { #include "umath.h" #include "ops.hpp" +// helper debugging function +static const char * +get_dtype_name(PyArray_DTypeMeta *dtype) +{ + if (dtype == &QuadPrecDType) { + return "QuadPrecDType"; + } + else if (dtype == &PyArray_BoolDType) { + return "BoolDType"; + } + else if (dtype == &PyArray_ByteDType) { + return "ByteDType"; + } + else if (dtype == &PyArray_UByteDType) { + return "UByteDType"; + } + else if (dtype == &PyArray_ShortDType) { + return "ShortDType"; + } + else if (dtype == &PyArray_UShortDType) { + return "UShortDType"; + } + else if (dtype == &PyArray_IntDType) { + return "IntDType"; + } + else if (dtype == &PyArray_UIntDType) { + return "UIntDType"; + } + else if (dtype == &PyArray_LongDType) { + return "LongDType"; + } + else if (dtype == &PyArray_ULongDType) { + return "ULongDType"; + } + else if (dtype == &PyArray_LongLongDType) { + return "LongLongDType"; + } + else if (dtype == &PyArray_ULongLongDType) { + return "ULongLongDType"; + } + else if (dtype == &PyArray_FloatDType) { + return "FloatDType"; + } + else if (dtype == &PyArray_DoubleDType) { + return "DoubleDType"; + } + else if (dtype == &PyArray_LongDoubleDType) { + return "LongDoubleDType"; + } + else { + return "UnknownDType"; + } +} + static NPY_CASTING quad_unary_op_resolve_descriptors(PyObject *self, PyArray_DTypeMeta *const dtypes[], PyArray_Descr *const given_descrs[], PyArray_Descr *loop_descrs[], @@ -170,6 +224,7 @@ quad_binary_op_resolve_descriptors(PyObject *self, PyArray_DTypeMeta *const dtyp PyArray_Descr *const given_descrs[], PyArray_Descr *loop_descrs[], npy_intp *NPY_UNUSED(view_offset)) { + printf("Descriptor Resolver is calledn\n"); Py_INCREF(given_descrs[0]); loop_descrs[0] = given_descrs[0]; Py_INCREF(given_descrs[1]); @@ -177,6 +232,10 @@ quad_binary_op_resolve_descriptors(PyObject *self, PyArray_DTypeMeta *const dtyp QuadPrecDTypeObject *descr_in1 = (QuadPrecDTypeObject *)given_descrs[0]; QuadPrecDTypeObject *descr_in2 = (QuadPrecDTypeObject *)given_descrs[1]; + const char *s1 = (descr_in1->backend == BACKEND_SLEEF) ? "SLEEF" : "LONGDOUBLE"; + const char *s2 = (descr_in2->backend == BACKEND_SLEEF) ? "SLEEF" : "LONGDOUBLE"; + printf("1: %s\n", s1); + printf("2: %s\n", s2); if (descr_in1->backend != descr_in2->backend) { PyErr_SetString(PyExc_TypeError, @@ -241,60 +300,6 @@ quad_generic_binop_strided_loop(PyArrayMethod_Context *context, char *const data return 0; } -// helper debugging function -static const char * -get_dtype_name(PyArray_DTypeMeta *dtype) -{ - if (dtype == &QuadPrecDType) { - return "QuadPrecDType"; - } - else if (dtype == &PyArray_BoolDType) { - return "BoolDType"; - } - else if (dtype == &PyArray_ByteDType) { - return "ByteDType"; - } - else if (dtype == &PyArray_UByteDType) { - return "UByteDType"; - } - else if (dtype == &PyArray_ShortDType) { - return "ShortDType"; - } - else if (dtype == &PyArray_UShortDType) { - return "UShortDType"; - } - else if (dtype == &PyArray_IntDType) { - return "IntDType"; - } - else if (dtype == &PyArray_UIntDType) { - return "UIntDType"; - } - else if (dtype == &PyArray_LongDType) { - return "LongDType"; - } - else if (dtype == &PyArray_ULongDType) { - return "ULongDType"; - } - else if (dtype == &PyArray_LongLongDType) { - return "LongLongDType"; - } - else if (dtype == &PyArray_ULongLongDType) { - return "ULongLongDType"; - } - else if (dtype == &PyArray_FloatDType) { - return "FloatDType"; - } - else if (dtype == &PyArray_DoubleDType) { - return "DoubleDType"; - } - else if (dtype == &PyArray_LongDoubleDType) { - return "LongDoubleDType"; - } - else { - return "UnknownDType"; - } -} - static int quad_ufunc_promoter(PyUFuncObject *ufunc, PyArray_DTypeMeta *op_dtypes[], PyArray_DTypeMeta *signature[], PyArray_DTypeMeta *new_op_dtypes[]) From 7a85fbf154d778787fa04725d3c7bd5005d1e1d8 Mon Sep 17 00:00:00 2001 From: swayaminsync Date: Mon, 2 Sep 2024 00:24:29 +0530 Subject: [PATCH 07/32] removed todo comment: umath.cpp --- quaddtype/numpy_quaddtype/src/umath.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/quaddtype/numpy_quaddtype/src/umath.cpp b/quaddtype/numpy_quaddtype/src/umath.cpp index 783f38a2..fc996f95 100644 --- a/quaddtype/numpy_quaddtype/src/umath.cpp +++ b/quaddtype/numpy_quaddtype/src/umath.cpp @@ -330,7 +330,6 @@ quad_ufunc_promoter(PyUFuncObject *ufunc, PyArray_DTypeMeta *op_dtypes[], for (int i = 0; i < nin; i++) { if (op_dtypes[i] == &QuadPrecDType) { has_quad = true; - // todo: Why below line always picking the backend as SLEEF QuadPrecDTypeObject *descr = (QuadPrecDTypeObject *)PyArray_GetDefaultDescr(op_dtypes[i]); From e8cba6a3f0d108ae015253a9164fa0763d0870ca Mon Sep 17 00:00:00 2001 From: swayaminsync Date: Thu, 5 Sep 2024 14:58:47 +0530 Subject: [PATCH 08/32] temporary solution to handle both backends --- quaddtype/numpy_quaddtype/src/casts.cpp | 6 ++ quaddtype/numpy_quaddtype/src/dtype.c | 21 +++++-- quaddtype/numpy_quaddtype/src/umath.cpp | 84 ++++++++++++++----------- 3 files changed, 70 insertions(+), 41 deletions(-) diff --git a/quaddtype/numpy_quaddtype/src/casts.cpp b/quaddtype/numpy_quaddtype/src/casts.cpp index 473fd9af..6fdf1825 100644 --- a/quaddtype/numpy_quaddtype/src/casts.cpp +++ b/quaddtype/numpy_quaddtype/src/casts.cpp @@ -272,6 +272,7 @@ numpy_to_quad_resolve_descriptors(PyObject *NPY_UNUSED(self), PyArray_DTypeMeta PyArray_Descr *given_descrs[2], PyArray_Descr *loop_descrs[2], npy_intp *view_offset) { + printf("cast.cpp: numpy_to_quad_resolve_descriptors is called\n"); if (given_descrs[1] == NULL) { loop_descrs[1] = (PyArray_Descr *)new_quaddtype_instance(BACKEND_SLEEF); if (loop_descrs[1] == nullptr) { @@ -279,6 +280,7 @@ numpy_to_quad_resolve_descriptors(PyObject *NPY_UNUSED(self), PyArray_DTypeMeta } } else { + printf("cast.cpp: numpy_to_quad_resolve_descriptors, I am in ELSE condition\n"); Py_INCREF(given_descrs[1]); loop_descrs[1] = given_descrs[1]; } @@ -297,8 +299,12 @@ numpy_to_quad_strided_loop(PyArrayMethod_Context *context, char *const data[], char *in_ptr = data[0]; char *out_ptr = data[1]; + QuadPrecDTypeObject *descr_out1 = (QuadPrecDTypeObject *)context->descriptors[0]; + printf("The type of context->descriptor[0] is: %s\n", Py_TYPE(descr_out1)->tp_name); QuadPrecDTypeObject *descr_out = (QuadPrecDTypeObject *)context->descriptors[1]; + printf("The type of context->descriptor[1] is: %s\n", Py_TYPE(descr_out)->tp_name); QuadBackendType backend = descr_out->backend; + printf("cast.cpp: numpy_to_quad_strided_loop with backend: %d\n", backend); size_t elem_size = (backend == BACKEND_SLEEF) ? sizeof(Sleef_quad) : sizeof(long double); while (N--) { diff --git a/quaddtype/numpy_quaddtype/src/dtype.c b/quaddtype/numpy_quaddtype/src/dtype.c index c58f8633..35b7a36e 100644 --- a/quaddtype/numpy_quaddtype/src/dtype.c +++ b/quaddtype/numpy_quaddtype/src/dtype.c @@ -49,6 +49,13 @@ quad_store(char *data_ptr, void *x, QuadBackendType backend) QuadPrecDTypeObject * new_quaddtype_instance(QuadBackendType backend) { + // if (backend != BACKEND_SLEEF && backend != BACKEND_LONGDOUBLE) + // { + // PyErr_SetString(PyExc_TypeError, + // "Backend must be sleef or longdouble"); + // return NULL; + // } + printf("New Quandtype instance is created with backend: %d\n", backend); QuadPrecDTypeObject *new = (QuadPrecDTypeObject *)PyArrayDescr_Type.tp_new( (PyTypeObject *)&QuadPrecDType, NULL, NULL); if (new == NULL) { @@ -63,6 +70,7 @@ new_quaddtype_instance(QuadBackendType backend) static QuadPrecDTypeObject * ensure_canonical(QuadPrecDTypeObject *self) { + printf("Ensure Canonical is called\n"); Py_INCREF(self); return self; } @@ -70,6 +78,7 @@ ensure_canonical(QuadPrecDTypeObject *self) static QuadPrecDTypeObject * common_instance(QuadPrecDTypeObject *dtype1, QuadPrecDTypeObject *dtype2) { + printf("Common Instance is called\n"); if (dtype1->backend != dtype2->backend) { PyErr_SetString(PyExc_TypeError, "Cannot find common instance for QuadPrecDTypes with different backends"); @@ -82,6 +91,7 @@ common_instance(QuadPrecDTypeObject *dtype1, QuadPrecDTypeObject *dtype2) static PyArray_DTypeMeta * common_dtype(PyArray_DTypeMeta *cls, PyArray_DTypeMeta *other) { + printf("Common dtype is called\n"); // Promote integer and floating-point types to QuadPrecDType if (other->type_num >= 0 && (PyTypeNum_ISINTEGER(other->type_num) || PyTypeNum_ISFLOAT(other->type_num))) { @@ -105,7 +115,9 @@ quadprec_discover_descriptor_from_pyobject(PyArray_DTypeMeta *NPY_UNUSED(cls), P PyErr_SetString(PyExc_TypeError, "Can only store QuadPrecision in a QuadPrecDType array."); return NULL; } + QuadPrecisionObject *quad_obj = (QuadPrecisionObject *)obj; + printf("dtype.c: quadprec_discover_descriptor_from_pyobject is called with backend %d\n", quad_obj->backend); return (PyArray_Descr *)new_quaddtype_instance(quad_obj->backend); } @@ -156,10 +168,11 @@ quadprec_getitem(QuadPrecDTypeObject *descr, char *dataptr) static PyArray_Descr * quadprec_default_descr(PyArray_DTypeMeta *cls) { - QuadPrecDTypeObject *temp = (QuadPrecDTypeObject *)cls; - const char *s1 = (temp->backend == BACKEND_SLEEF) ? "SLEEF" : "LONGDOUBLE"; - printf("called with backend: %s\n", s1); - return (PyArray_Descr *)new_quaddtype_instance(temp->backend); + QuadPrecDTypeObject * a = (QuadPrecDTypeObject *)cls; + printf("Default descriptor called with backend: %d\n", a->backend); + QuadPrecDTypeObject * temp = new_quaddtype_instance(a->backend); + printf("Default descriptor made backend: %d\n", temp->backend); + return (PyArray_Descr *)temp; } static PyType_Slot QuadPrecDType_Slots[] = { diff --git a/quaddtype/numpy_quaddtype/src/umath.cpp b/quaddtype/numpy_quaddtype/src/umath.cpp index fc996f95..2adabc1f 100644 --- a/quaddtype/numpy_quaddtype/src/umath.cpp +++ b/quaddtype/numpy_quaddtype/src/umath.cpp @@ -175,6 +175,9 @@ init_quad_unary_ops(PyObject *numpy) if (create_quad_unary_ufunc(numpy, "negative") < 0) { return -1; } + if (create_quad_unary_ufunc(numpy, "positive") < 0) { + return -1; + } if (create_quad_unary_ufunc(numpy, "absolute") < 0) { return -1; } @@ -224,44 +227,62 @@ quad_binary_op_resolve_descriptors(PyObject *self, PyArray_DTypeMeta *const dtyp PyArray_Descr *const given_descrs[], PyArray_Descr *loop_descrs[], npy_intp *NPY_UNUSED(view_offset)) { - printf("Descriptor Resolver is calledn\n"); - Py_INCREF(given_descrs[0]); - loop_descrs[0] = given_descrs[0]; - Py_INCREF(given_descrs[1]); - loop_descrs[1] = given_descrs[1]; + printf("Descriptor Resolver is called\n"); QuadPrecDTypeObject *descr_in1 = (QuadPrecDTypeObject *)given_descrs[0]; QuadPrecDTypeObject *descr_in2 = (QuadPrecDTypeObject *)given_descrs[1]; + QuadBackendType target_backend; + const char *s1 = (descr_in1->backend == BACKEND_SLEEF) ? "SLEEF" : "LONGDOUBLE"; const char *s2 = (descr_in2->backend == BACKEND_SLEEF) ? "SLEEF" : "LONGDOUBLE"; - printf("1: %s\n", s1); - printf("2: %s\n", s2); + printf("1: %s %d %s\n", s1, descr_in1->backend, Py_TYPE(given_descrs[0])->tp_name); + printf("2: %s %d %s\n", s2, descr_in2->backend, Py_TYPE(given_descrs[1])->tp_name); + // Determine target backend and if casting is needed + NPY_CASTING casting = NPY_NO_CASTING; if (descr_in1->backend != descr_in2->backend) { - PyErr_SetString(PyExc_TypeError, - "Cannot operate on QuadPrecision objects with different backends"); - return (NPY_CASTING)-1; + target_backend = BACKEND_LONGDOUBLE; + casting = NPY_SAFE_CASTING; + printf("Different backends detected. Casting to LONGDOUBLE.\n"); + } else { + target_backend = descr_in1->backend; + printf("Unified backend: %s\n", (target_backend == BACKEND_SLEEF) ? "SLEEF" : "LONGDOUBLE"); + } + + // Set up input descriptors, casting if necessary + for (int i = 0; i < 2; i++) { + if (((QuadPrecDTypeObject *)given_descrs[i])->backend != target_backend) { + loop_descrs[i] = (PyArray_Descr *)new_quaddtype_instance(target_backend); + if (!loop_descrs[i]) { + return (NPY_CASTING)-1; + } + } else { + Py_INCREF(given_descrs[i]); + loop_descrs[i] = given_descrs[i]; + } } + // Set up output descriptor if (given_descrs[2] == NULL) { - loop_descrs[2] = (PyArray_Descr *)new_quaddtype_instance(descr_in1->backend); + loop_descrs[2] = (PyArray_Descr *)new_quaddtype_instance(target_backend); if (!loop_descrs[2]) { return (NPY_CASTING)-1; } - } - else { - Py_INCREF(given_descrs[2]); - loop_descrs[2] = given_descrs[2]; - } - - QuadPrecDTypeObject *descr_out = (QuadPrecDTypeObject *)loop_descrs[2]; - if (descr_out->backend != descr_in1->backend) { - PyErr_SetString(PyExc_TypeError, - "Output QuadPrecision object must have the same backend as inputs"); - return (NPY_CASTING)-1; + } else { + QuadPrecDTypeObject *descr_out = (QuadPrecDTypeObject *)given_descrs[2]; + if (descr_out->backend != target_backend) { + loop_descrs[2] = (PyArray_Descr *)new_quaddtype_instance(target_backend); + if (!loop_descrs[2]) { + return (NPY_CASTING)-1; + } + } else { + Py_INCREF(given_descrs[2]); + loop_descrs[2] = given_descrs[2]; + } } - return NPY_NO_CASTING; + printf("Casting result: %d\n", casting); + return casting; } template @@ -270,6 +291,7 @@ quad_generic_binop_strided_loop(PyArrayMethod_Context *context, char *const data npy_intp const dimensions[], npy_intp const strides[], NpyAuxData *auxdata) { + printf("Umath: Generic Strided loop is calledn\n"); npy_intp N = dimensions[0]; char *in1_ptr = data[0], *in2_ptr = data[1]; char *out_ptr = data[2]; @@ -328,22 +350,10 @@ quad_ufunc_promoter(PyUFuncObject *ufunc, PyArray_DTypeMeta *op_dtypes[], // Check if any input or signature is QuadPrecision for (int i = 0; i < nin; i++) { + printf("iterating on dtype : %s\n", get_dtype_name(op_dtypes[i])); if (op_dtypes[i] == &QuadPrecDType) { has_quad = true; - QuadPrecDTypeObject *descr = - (QuadPrecDTypeObject *)PyArray_GetDefaultDescr(op_dtypes[i]); - - const char *s = (descr->backend == BACKEND_SLEEF) ? "SLEEF" : "LONGDOUBLE"; - printf("QuadPrecision detected in input %d or signature with backend: %s\n", i, s); - if (backend == BACKEND_INVALID) - backend = descr->backend; - else if (backend != BACKEND_INVALID && backend != descr->backend) { - PyErr_SetString(PyExc_TypeError, - "Cannot mix QuadPrecDType arrays with different backends"); - return -1; - } - Py_DECREF(descr); - break; + printf("QuadPrecision detected in input %d\n", i); } } From 01e9b7ee9864cea1e80796b5cd7414372473590f Mon Sep 17 00:00:00 2001 From: swayaminsync Date: Fri, 6 Sep 2024 12:04:01 +0530 Subject: [PATCH 09/32] post-cleaning --- quaddtype/meson.build | 56 ----- quaddtype/numpy_quaddtype/__init__.py | 5 +- quaddtype/numpy_quaddtype/src/casts.cpp | 8 +- quaddtype/numpy_quaddtype/src/dtype.c | 27 +-- .../numpy_quaddtype/src/quaddtype_main.c | 17 ++ quaddtype/numpy_quaddtype/src/umath.cpp | 35 +-- temp.py | 207 ------------------ 7 files changed, 43 insertions(+), 312 deletions(-) delete mode 100644 quaddtype/meson.build delete mode 100644 temp.py diff --git a/quaddtype/meson.build b/quaddtype/meson.build deleted file mode 100644 index 1b9f6248..00000000 --- a/quaddtype/meson.build +++ /dev/null @@ -1,56 +0,0 @@ -project('numpy_quaddtype', 'c', 'cpp', default_options : ['cpp_std=c++17', 'b_pie=true']) - -py_mod = import('python') -py = py_mod.find_installation() - -c = meson.get_compiler('c') - -sleef_dep = c.find_library('sleef') -sleefquad_dep = c.find_library('sleefquad') - -incdir_numpy = run_command(py, - [ - '-c', - 'import numpy; import os; print(os.path.relpath(numpy.get_include()))' - ], - check: true -).stdout().strip() - -includes = include_directories( - [ - incdir_numpy, - 'numpy_quaddtype/src', - ] -) - -srcs = [ - 'numpy_quaddtype/src/casts.h', - 'numpy_quaddtype/src/casts.cpp', - 'numpy_quaddtype/src/scalar.h', - 'numpy_quaddtype/src/scalar.c', - 'numpy_quaddtype/src/dtype.h', - 'numpy_quaddtype/src/dtype.c', - 'numpy_quaddtype/src/quaddtype_main.c', - 'numpy_quaddtype/src/scalar_ops.h', - 'numpy_quaddtype/src/scalar_ops.cpp', - 'numpy_quaddtype/src/ops.hpp', - # 'numpy_quaddtype/src/umath.h', - # 'numpy_quaddtype/src/umath.cpp' -] - -py.install_sources( - [ - 'numpy_quaddtype/__init__.py', - ], - subdir: 'numpy_quaddtype', - pure: false -) - -py.extension_module('_quaddtype_main', -srcs, -c_args: ['-g', '-O0', '-lsleef', '-lsleefquad'], -dependencies: [sleef_dep, sleefquad_dep], -install: true, -subdir: 'numpy_quaddtype', -include_directories: includes -) \ No newline at end of file diff --git a/quaddtype/numpy_quaddtype/__init__.py b/quaddtype/numpy_quaddtype/__init__.py index 5ee5ab30..5aa50441 100644 --- a/quaddtype/numpy_quaddtype/__init__.py +++ b/quaddtype/numpy_quaddtype/__init__.py @@ -1,10 +1,11 @@ from ._quaddtype_main import ( QuadPrecision, - QuadPrecDType + QuadPrecDType, + is_longdouble_128 ) __all__ = ['QuadPrecision', 'QuadPrecDType', 'SleefQuadPrecision', 'LongDoubleQuadPrecision', - 'SleefQuadPrecDType', 'LongDoubleQuadPrecDType'] + 'SleefQuadPrecDType', 'LongDoubleQuadPrecDType', 'is_longdouble_128'] def SleefQuadPrecision(value): diff --git a/quaddtype/numpy_quaddtype/src/casts.cpp b/quaddtype/numpy_quaddtype/src/casts.cpp index 6fdf1825..61809012 100644 --- a/quaddtype/numpy_quaddtype/src/casts.cpp +++ b/quaddtype/numpy_quaddtype/src/casts.cpp @@ -272,7 +272,7 @@ numpy_to_quad_resolve_descriptors(PyObject *NPY_UNUSED(self), PyArray_DTypeMeta PyArray_Descr *given_descrs[2], PyArray_Descr *loop_descrs[2], npy_intp *view_offset) { - printf("cast.cpp: numpy_to_quad_resolve_descriptors is called\n"); + if (given_descrs[1] == NULL) { loop_descrs[1] = (PyArray_Descr *)new_quaddtype_instance(BACKEND_SLEEF); if (loop_descrs[1] == nullptr) { @@ -280,7 +280,7 @@ numpy_to_quad_resolve_descriptors(PyObject *NPY_UNUSED(self), PyArray_DTypeMeta } } else { - printf("cast.cpp: numpy_to_quad_resolve_descriptors, I am in ELSE condition\n"); + Py_INCREF(given_descrs[1]); loop_descrs[1] = given_descrs[1]; } @@ -299,12 +299,8 @@ numpy_to_quad_strided_loop(PyArrayMethod_Context *context, char *const data[], char *in_ptr = data[0]; char *out_ptr = data[1]; - QuadPrecDTypeObject *descr_out1 = (QuadPrecDTypeObject *)context->descriptors[0]; - printf("The type of context->descriptor[0] is: %s\n", Py_TYPE(descr_out1)->tp_name); QuadPrecDTypeObject *descr_out = (QuadPrecDTypeObject *)context->descriptors[1]; - printf("The type of context->descriptor[1] is: %s\n", Py_TYPE(descr_out)->tp_name); QuadBackendType backend = descr_out->backend; - printf("cast.cpp: numpy_to_quad_strided_loop with backend: %d\n", backend); size_t elem_size = (backend == BACKEND_SLEEF) ? sizeof(Sleef_quad) : sizeof(long double); while (N--) { diff --git a/quaddtype/numpy_quaddtype/src/dtype.c b/quaddtype/numpy_quaddtype/src/dtype.c index 35b7a36e..36bca565 100644 --- a/quaddtype/numpy_quaddtype/src/dtype.c +++ b/quaddtype/numpy_quaddtype/src/dtype.c @@ -49,13 +49,13 @@ quad_store(char *data_ptr, void *x, QuadBackendType backend) QuadPrecDTypeObject * new_quaddtype_instance(QuadBackendType backend) { - // if (backend != BACKEND_SLEEF && backend != BACKEND_LONGDOUBLE) - // { - // PyErr_SetString(PyExc_TypeError, - // "Backend must be sleef or longdouble"); - // return NULL; - // } - printf("New Quandtype instance is created with backend: %d\n", backend); + if (backend != BACKEND_SLEEF && backend != BACKEND_LONGDOUBLE) + { + PyErr_SetString(PyExc_TypeError, + "Backend must be sleef or longdouble"); + return NULL; + } + QuadPrecDTypeObject *new = (QuadPrecDTypeObject *)PyArrayDescr_Type.tp_new( (PyTypeObject *)&QuadPrecDType, NULL, NULL); if (new == NULL) { @@ -70,7 +70,7 @@ new_quaddtype_instance(QuadBackendType backend) static QuadPrecDTypeObject * ensure_canonical(QuadPrecDTypeObject *self) { - printf("Ensure Canonical is called\n"); + Py_INCREF(self); return self; } @@ -78,7 +78,7 @@ ensure_canonical(QuadPrecDTypeObject *self) static QuadPrecDTypeObject * common_instance(QuadPrecDTypeObject *dtype1, QuadPrecDTypeObject *dtype2) { - printf("Common Instance is called\n"); + if (dtype1->backend != dtype2->backend) { PyErr_SetString(PyExc_TypeError, "Cannot find common instance for QuadPrecDTypes with different backends"); @@ -91,7 +91,7 @@ common_instance(QuadPrecDTypeObject *dtype1, QuadPrecDTypeObject *dtype2) static PyArray_DTypeMeta * common_dtype(PyArray_DTypeMeta *cls, PyArray_DTypeMeta *other) { - printf("Common dtype is called\n"); + // Promote integer and floating-point types to QuadPrecDType if (other->type_num >= 0 && (PyTypeNum_ISINTEGER(other->type_num) || PyTypeNum_ISFLOAT(other->type_num))) { @@ -117,7 +117,7 @@ quadprec_discover_descriptor_from_pyobject(PyArray_DTypeMeta *NPY_UNUSED(cls), P } QuadPrecisionObject *quad_obj = (QuadPrecisionObject *)obj; - printf("dtype.c: quadprec_discover_descriptor_from_pyobject is called with backend %d\n", quad_obj->backend); + return (PyArray_Descr *)new_quaddtype_instance(quad_obj->backend); } @@ -168,10 +168,7 @@ quadprec_getitem(QuadPrecDTypeObject *descr, char *dataptr) static PyArray_Descr * quadprec_default_descr(PyArray_DTypeMeta *cls) { - QuadPrecDTypeObject * a = (QuadPrecDTypeObject *)cls; - printf("Default descriptor called with backend: %d\n", a->backend); - QuadPrecDTypeObject * temp = new_quaddtype_instance(a->backend); - printf("Default descriptor made backend: %d\n", temp->backend); + QuadPrecDTypeObject * temp = new_quaddtype_instance(BACKEND_SLEEF); return (PyArray_Descr *)temp; } diff --git a/quaddtype/numpy_quaddtype/src/quaddtype_main.c b/quaddtype/numpy_quaddtype/src/quaddtype_main.c index 994d1eba..89ec16f0 100644 --- a/quaddtype/numpy_quaddtype/src/quaddtype_main.c +++ b/quaddtype/numpy_quaddtype/src/quaddtype_main.c @@ -13,12 +13,29 @@ #include "dtype.h" #include "umath.h" #include "quad_common.h" +#include "float.h" + +static PyObject* py_is_longdouble_128(PyObject* self, PyObject* args) { + if(sizeof(long double) == 16 && + LDBL_MANT_DIG == 113 && + LDBL_MAX_EXP == 16384) { + Py_RETURN_TRUE; + } else { + Py_RETURN_FALSE; + } +} + +static PyMethodDef module_methods[] = { + {"is_longdouble_128", py_is_longdouble_128, METH_NOARGS, "Check if long double is 128-bit"}, + {NULL, NULL, 0, NULL} +}; static struct PyModuleDef moduledef = { PyModuleDef_HEAD_INIT, .m_name = "_quaddtype_main", .m_doc = "Quad (128-bit) floating point Data Type for NumPy with multiple backends", .m_size = -1, + .m_methods = module_methods }; PyMODINIT_FUNC diff --git a/quaddtype/numpy_quaddtype/src/umath.cpp b/quaddtype/numpy_quaddtype/src/umath.cpp index 2adabc1f..2444047c 100644 --- a/quaddtype/numpy_quaddtype/src/umath.cpp +++ b/quaddtype/numpy_quaddtype/src/umath.cpp @@ -227,7 +227,6 @@ quad_binary_op_resolve_descriptors(PyObject *self, PyArray_DTypeMeta *const dtyp PyArray_Descr *const given_descrs[], PyArray_Descr *loop_descrs[], npy_intp *NPY_UNUSED(view_offset)) { - printf("Descriptor Resolver is called\n"); QuadPrecDTypeObject *descr_in1 = (QuadPrecDTypeObject *)given_descrs[0]; QuadPrecDTypeObject *descr_in2 = (QuadPrecDTypeObject *)given_descrs[1]; @@ -235,18 +234,14 @@ quad_binary_op_resolve_descriptors(PyObject *self, PyArray_DTypeMeta *const dtyp const char *s1 = (descr_in1->backend == BACKEND_SLEEF) ? "SLEEF" : "LONGDOUBLE"; const char *s2 = (descr_in2->backend == BACKEND_SLEEF) ? "SLEEF" : "LONGDOUBLE"; - printf("1: %s %d %s\n", s1, descr_in1->backend, Py_TYPE(given_descrs[0])->tp_name); - printf("2: %s %d %s\n", s2, descr_in2->backend, Py_TYPE(given_descrs[1])->tp_name); // Determine target backend and if casting is needed NPY_CASTING casting = NPY_NO_CASTING; if (descr_in1->backend != descr_in2->backend) { target_backend = BACKEND_LONGDOUBLE; casting = NPY_SAFE_CASTING; - printf("Different backends detected. Casting to LONGDOUBLE.\n"); } else { target_backend = descr_in1->backend; - printf("Unified backend: %s\n", (target_backend == BACKEND_SLEEF) ? "SLEEF" : "LONGDOUBLE"); } // Set up input descriptors, casting if necessary @@ -280,8 +275,6 @@ quad_binary_op_resolve_descriptors(PyObject *self, PyArray_DTypeMeta *const dtyp loop_descrs[2] = given_descrs[2]; } } - - printf("Casting result: %d\n", casting); return casting; } @@ -291,7 +284,6 @@ quad_generic_binop_strided_loop(PyArrayMethod_Context *context, char *const data npy_intp const dimensions[], npy_intp const strides[], NpyAuxData *auxdata) { - printf("Umath: Generic Strided loop is calledn\n"); npy_intp N = dimensions[0]; char *in1_ptr = data[0], *in2_ptr = data[1]; char *out_ptr = data[2]; @@ -326,10 +318,6 @@ static int quad_ufunc_promoter(PyUFuncObject *ufunc, PyArray_DTypeMeta *op_dtypes[], PyArray_DTypeMeta *signature[], PyArray_DTypeMeta *new_op_dtypes[]) { - printf("quad_ufunc_promoter called for ufunc: %s\n", ufunc->name); - printf("Entering quad_ufunc_promoter\n"); - printf("Ufunc name: %s\n", ufunc->name); - printf("nin: %d, nargs: %d\n", ufunc->nin, ufunc->nargs); int nin = ufunc->nin; int nargs = ufunc->nargs; @@ -343,23 +331,23 @@ quad_ufunc_promoter(PyUFuncObject *ufunc, PyArray_DTypeMeta *op_dtypes[], for (int i = 0; i < 3; i++) { Py_INCREF(op_dtypes[1]); new_op_dtypes[i] = op_dtypes[1]; - printf("new_op_dtypes[%d] set to %s\n", i, get_dtype_name(new_op_dtypes[i])); + } return 0; } // Check if any input or signature is QuadPrecision for (int i = 0; i < nin; i++) { - printf("iterating on dtype : %s\n", get_dtype_name(op_dtypes[i])); + if (op_dtypes[i] == &QuadPrecDType) { has_quad = true; - printf("QuadPrecision detected in input %d\n", i); + } } if (has_quad) { common = &QuadPrecDType; - printf("Using QuadPrecDType as common type\n"); + } else { for (int i = nin; i < nargs; i++) { @@ -367,11 +355,11 @@ quad_ufunc_promoter(PyUFuncObject *ufunc, PyArray_DTypeMeta *op_dtypes[], if (common == NULL) { Py_INCREF(signature[i]); common = signature[i]; - printf("Common type set to %s from signature\n", get_dtype_name(common)); + } else if (common != signature[i]) { Py_CLEAR(common); // Not homogeneous, unset common - printf("Output signature not homogeneous, cleared common type\n"); + break; } } @@ -379,16 +367,15 @@ quad_ufunc_promoter(PyUFuncObject *ufunc, PyArray_DTypeMeta *op_dtypes[], } // If no common output dtype, use standard promotion for inputs if (common == NULL) { - printf("Using standard promotion for inputs\n"); common = PyArray_PromoteDTypeSequence(nin, op_dtypes); if (common == NULL) { if (PyErr_ExceptionMatches(PyExc_TypeError)) { PyErr_Clear(); // Do not propagate normal promotion errors } - printf("Exiting quad_ufunc_promoter (promotion failed)\n"); + return -1; } - printf("Common type after promotion: %s\n", get_dtype_name(common)); + } // Set all new_op_dtypes to the common dtype @@ -397,20 +384,16 @@ quad_ufunc_promoter(PyUFuncObject *ufunc, PyArray_DTypeMeta *op_dtypes[], // If signature is specified for this argument, use it Py_INCREF(signature[i]); new_op_dtypes[i] = signature[i]; - printf("new_op_dtypes[%d] set to %s (from signature)\n", i, - get_dtype_name(new_op_dtypes[i])); } else { // Otherwise, use the common dtype Py_INCREF(common); new_op_dtypes[i] = common; - printf("new_op_dtypes[%d] set to %s (from common)\n", i, - get_dtype_name(new_op_dtypes[i])); } } Py_XDECREF(common); - printf("Exiting quad_ufunc_promoter\n"); + return 0; } diff --git a/temp.py b/temp.py deleted file mode 100644 index 6ce4fa9f..00000000 --- a/temp.py +++ /dev/null @@ -1,207 +0,0 @@ -import numpy_quaddtype as npq -import numpy as np - - -def test_scalar_ops(backend): - print(f"\nTesting scalar operations for {backend} backend:") - - # Create QuadPrecision instances - q1 = npq.QuadPrecision( - "3.14159265358979323846264338327950288", backend=backend) - q2 = npq.QuadPrecision( - "-2.71828182845904523536028747135266250", backend=backend) - - # Test unary operations - print("\nUnary operations:") - print(f" Negation of q1: {-q1}") - print(f" Absolute value of q2: {abs(q2)}") - - # Test binary operations - print("\nBinary operations:") - print(f" Addition: {q1 + q2}") - print(f" Subtraction: {q1 - q2}") - print(f" Multiplication: {q1 * q2}") - print(f" Division: {q1 / q2}") - - # Test comparison operations - print("\nComparison operations:") - print(f" q1 == q2: {q1 == q2}") - print(f" q1 != q2: {q1 != q2}") - print(f" q1 < q2: {q1 < q2}") - print(f" q1 <= q2: {q1 <= q2}") - print(f" q1 > q2: {q1 > q2}") - print(f" q1 >= q2: {q1 >= q2}") - - # Test operations with Python numbers - print("\nOperations with Python numbers:") - print(f" q1 + 1: {q1 + 1}") - print(f" q1 - 2.5: {q1 - 2.5}") - print(f" q1 * 3: {q1 * 3}") - print(f" q1 / 2: {q1 / 2}") - - # Test boolean conversion - print("\nBoolean conversion:") - print(f" bool(q1): {bool(q1)}") - print( - f" bool(npq.QuadPrecision('0', backend=backend)): {bool(npq.QuadPrecision('0', backend=backend))}") - - -def test_casting(backend): - print(f"\nTesting {backend} backend:") - - # Create QuadPrecision instances - q1 = npq.QuadPrecision( - "3.14159265358979323846264338327950288", backend=backend) - q2 = npq.QuadPrecision( - "-2.71828182845904523536028747135266250", backend=backend) - - # Test casting from QuadPrecision to numpy dtypes - print("Casting from QuadPrecision to numpy dtypes:") - print(f" float32: {np.float32(q1)}") - print(f" float64: {np.float64(q1)}") - print(f" int64: {np.int64(q1)}") - print(f" uint64: {np.uint64(q1)}") - - # Test casting from numpy dtypes to QuadPrecision - print("\nCasting from numpy dtypes to QuadPrecision:") - print( - f" float32: {np.float32(3.14159).astype(npq.QuadPrecDType(backend=backend))}") - print( - f" float64: {np.float64(2.71828182845904).astype(npq.QuadPrecDType(backend=backend))}") - print( - f" int64: {np.int64(-1234567890).astype(npq.QuadPrecDType(backend=backend))}") - print( - f" uint64: {np.uint64(9876543210).astype(npq.QuadPrecDType(backend=backend))}") - - # Test array operations - print("\nArray operations:") - q_array = np.array([q1, q2], dtype=npq.QuadPrecDType(backend=backend)) - print(f" QuadPrecision array: {q_array}") - - np_array = np.array([3.14, -2.71, 1.41, -1.73], dtype=np.float64) - q_from_np = np_array.astype(npq.QuadPrecDType(backend=backend)) - print(f" Numpy to QuadPrecision: {q_from_np}") - - back_to_np = np.array(q_from_np, dtype=np.float64) - print(f" QuadPrecision to Numpy: {back_to_np}") - - # Test precision maintenance - large_int = 12345678901234567890 - q_large = np.array([large_int], dtype=np.uint64).astype( - npq.QuadPrecDType(backend=backend))[0] - print(f"\nPrecision test:") - print(f" Original large int: {large_int}") - print(f" QuadPrecision: {q_large}") - print(f" Back to int: {np.int64(q_large)}") - - # Test edge cases - - -def test_edge_cases(backend): - print(f"\nTesting negative numbers for {backend} backend:") - - # Test various negative numbers - test_values = [ - -1.0, - -1e10, - -1e100, - -1e300, - np.nextafter(np.finfo(np.float64).min, 0), - np.finfo(np.float64).min - ] - - for value in test_values: - q_value = npq.QuadPrecision(str(value), backend=backend) - print(f" Original: {value}") - print(f" QuadPrecision: {q_value}") - print(f" Back to float64: {np.float64(q_value)}") - print() - - # Test value beyond float64 precision - beyond_float64_precision = "1.7976931348623157081452742373170435e+308" - q_beyond = npq.QuadPrecision(beyond_float64_precision, backend=backend) - print(f" Beyond float64 precision: {q_beyond}") - q_float64_max = npq.QuadPrecision( - str(np.finfo(np.float64).max), backend=backend) - diff = q_beyond - q_float64_max - print(f" Difference from float64 max: {diff}") - print( - f" Difference is positive: {diff > npq.QuadPrecision('0', backend=backend)}") - - # Test epsilon (smallest representable difference between two numbers) - q_epsilon = npq.QuadPrecision( - str(np.finfo(np.float64).eps), backend=backend) - print(f" Float64 epsilon in QuadPrecision: {q_epsilon}") - q_one = npq.QuadPrecision("1", backend=backend) - q_one_plus_epsilon = q_one + q_epsilon - print(f" 1 + epsilon != 1: {q_one_plus_epsilon != q_one}") - print(f" (1 + epsilon) - 1: {q_one_plus_epsilon - q_one}") - - -def test_ufuncs(backend): - print(f"\nTesting ufuncs for {backend} backend:") - - # Create QuadPrecision arrays - q_array1 = np.array([1, 2, 3], dtype=npq.QuadPrecDType(backend=backend)) - q_array2 = np.array([1, 2, 3], dtype=npq.QuadPrecDType(backend=backend)) - - # Test unary ufuncs - print("\nUnary ufuncs:") - print(f" negative: {np.negative(q_array1)}") - print(f" absolute: {np.absolute(q_array1)}") - print(f" rint: {np.rint(q_array1)}") - print(f" floor: {np.floor(q_array1)}") - print(f" ceil: {np.ceil(q_array1)}") - print(f" trunc: {np.trunc(q_array1)}") - print(f" sqrt: {np.sqrt(q_array1)}") - print(f" square: {np.square(q_array1)}") - print(f" log: {np.log(q_array1)}") - print(f" log2: {np.log2(q_array1)}") - print(f" log10: {np.log10(q_array1)}") - print(f" exp: {np.exp(q_array1)}") - print(f" exp2: {np.exp2(q_array1)}") - - # Test binary ufuncs - print("\nBinary ufuncs:") - print(f" add: {np.add(q_array1, q_array2)}") - print(f" subtract: {np.subtract(q_array1, q_array2)}") - print(f" multiply: {np.multiply(q_array1, q_array2)}") - print(f" divide: {np.divide(q_array1, q_array2)}") - print(f" power: {np.power(q_array1, q_array2)}") - print(f" mod: {np.mod(q_array1, q_array2)}") - print(f" minimum: {np.minimum(q_array1, q_array2)}") - print(f" maximum: {np.maximum(q_array1, q_array2)}") - - # Test comparison ufuncs - print("\nComparison ufuncs:") - print(f" equal: {np.equal(q_array1, q_array2)}") - print(f" not_equal: {np.not_equal(q_array1, q_array2)}") - print(f" less: {np.less(q_array1, q_array2)}") - print(f" less_equal: {np.less_equal(q_array1, q_array2)}") - print(f" greater: {np.greater(q_array1, q_array2)}") - print(f" greater_equal: {np.greater_equal(q_array1, q_array2)}") - - # Test mixed operations with numpy arrays - print(f"Testing backend: {backend}") - print("\nMixed operations with numpy arrays:") - np_array = np.array([1.0, 2.0, 3.0], dtype=np.float64) - print(f" add: {np.add(q_array1, np_array)}") - print(f" multiply: {np.multiply(q_array1, np_array)}") - print(f" divide: {np.divide(q_array1, np_array)}") - - # Test reduction operations - print("\nReduction operations:") - print(f" sum: {np.sum(q_array1)}") - print(f" prod: {np.prod(q_array1)}") - print(f" min: {np.min(q_array1)}") - print(f" max: {np.max(q_array1)}") - - -# Run tests for both backends -for backend in ['longdouble']: - test_scalar_ops(backend) - test_casting(backend) - test_edge_cases(backend) - test_ufuncs(backend) - -print("All tests completed successfully") From 22982a43f361e6513c7f0e6fd2b81a2250ce58a0 Mon Sep 17 00:00:00 2001 From: swayaminsync Date: Fri, 6 Sep 2024 19:09:12 +0530 Subject: [PATCH 10/32] adding dragon4 --- quaddtype/numpy_quaddtype/src/dragon4.c | 2057 +++++++++++++++++ quaddtype/numpy_quaddtype/src/dragon4.h | 70 + quaddtype/numpy_quaddtype/src/dtype.c | 12 +- .../numpy_quaddtype/src/quaddtype_main.c | 1 + quaddtype/numpy_quaddtype/src/scalar.c | 61 +- quaddtype/numpy_quaddtype/src/scalar.h | 3 + quaddtype/numpy_quaddtype/src/umath.cpp | 4 - 7 files changed, 2201 insertions(+), 7 deletions(-) create mode 100644 quaddtype/numpy_quaddtype/src/dragon4.c create mode 100644 quaddtype/numpy_quaddtype/src/dragon4.h diff --git a/quaddtype/numpy_quaddtype/src/dragon4.c b/quaddtype/numpy_quaddtype/src/dragon4.c new file mode 100644 index 00000000..313ff312 --- /dev/null +++ b/quaddtype/numpy_quaddtype/src/dragon4.c @@ -0,0 +1,2057 @@ +#include +#include +#include +#include +#include +#include +#include + +#define PY_ARRAY_UNIQUE_SYMBOL QuadPrecType_ARRAY_API +#define PY_UFUNC_UNIQUE_SYMBOL QuadPrecType_UFUNC_API +#define NPY_NO_DEPRECATED_API NPY_2_0_API_VERSION +#define NPY_TARGET_VERSION NPY_2_0_API_VERSION +#define NO_IMPORT_ARRAY +#define NO_IMPORT_UFUNC + +#include "dragon4.h" +#include "dtype.h" +#include "scalar.h" + +#if 0 +#define DEBUG_ASSERT(stmnt) assert(stmnt) +#else +#define DEBUG_ASSERT(stmnt) do {} while(0) +#endif + +#define c_BigInt_MaxBlocks 1023 +#define BIGINT_DRAGON4_GROUPSIZE 7 + +typedef struct BigInt { + npy_uint32 length; + npy_uint32 blocks[c_BigInt_MaxBlocks]; +} BigInt; + +typedef struct { + BigInt bigints[BIGINT_DRAGON4_GROUPSIZE]; + char repr[16384]; +} Dragon4_Scratch; + +static NPY_TLS Dragon4_Scratch _bigint_static; + +static inline npy_uint64 +bitmask_u64(npy_uint32 n) +{ + return ~(~((npy_uint64)0) << n); +} + +static inline npy_uint32 +bitmask_u32(npy_uint32 n) +{ + return ~(~((npy_uint32)0) << n); +} + +/* result = result * 10 */ +static void +BigInt_Multiply10(BigInt *result) +{ + /* multiply all the blocks */ + npy_uint64 carry = 0; + + npy_uint32 *cur = result->blocks; + npy_uint32 *end = result->blocks + result->length; + for ( ; cur != end; ++cur) { + npy_uint64 product = (npy_uint64)(*cur) * 10ull + carry; + (*cur) = (npy_uint32)(product & bitmask_u64(32)); + carry = product >> 32; + } + + if (carry != 0) { + /* grow the array */ + DEBUG_ASSERT(result->length + 1 <= c_BigInt_MaxBlocks); + *cur = (npy_uint32)carry; + ++result->length; + } +} + +static npy_uint32 g_PowerOf10_U32[] = +{ + 1, /* 10 ^ 0 */ + 10, /* 10 ^ 1 */ + 100, /* 10 ^ 2 */ + 1000, /* 10 ^ 3 */ + 10000, /* 10 ^ 4 */ + 100000, /* 10 ^ 5 */ + 1000000, /* 10 ^ 6 */ + 10000000, /* 10 ^ 7 */ +}; + +/* + * Note: This has a lot of wasted space in the big integer structures of the + * early table entries. It wouldn't be terribly hard to make the multiply + * function work on integer pointers with an array length instead of + * the BigInt struct which would allow us to store a minimal amount of + * data here. + */ +static BigInt g_PowerOf10_Big[] = +{ + /* 10 ^ 8 */ + { 1, { 100000000 } }, + /* 10 ^ 16 */ + { 2, { 0x6fc10000, 0x002386f2 } }, + /* 10 ^ 32 */ + { 4, { 0x00000000, 0x85acef81, 0x2d6d415b, 0x000004ee, } }, + /* 10 ^ 64 */ + { 7, { 0x00000000, 0x00000000, 0xbf6a1f01, 0x6e38ed64, 0xdaa797ed, + 0xe93ff9f4, 0x00184f03, } }, + /* 10 ^ 128 */ + { 14, { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x2e953e01, + 0x03df9909, 0x0f1538fd, 0x2374e42f, 0xd3cff5ec, 0xc404dc08, + 0xbccdb0da, 0xa6337f19, 0xe91f2603, 0x0000024e, } }, + /* 10 ^ 256 */ + { 27, { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x982e7c01, 0xbed3875b, + 0xd8d99f72, 0x12152f87, 0x6bde50c6, 0xcf4a6e70, 0xd595d80f, + 0x26b2716e, 0xadc666b0, 0x1d153624, 0x3c42d35a, 0x63ff540e, + 0xcc5573c0, 0x65f9ef17, 0x55bc28f2, 0x80dcc7f7, 0xf46eeddc, + 0x5fdcefce, 0x000553f7, } }, + /* 10 ^ 512 */ + { 54, { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0xfc6cf801, 0x77f27267, 0x8f9546dc, 0x5d96976f, + 0xb83a8a97, 0xc31e1ad9, 0x46c40513, 0x94e65747, 0xc88976c1, + 0x4475b579, 0x28f8733b, 0xaa1da1bf, 0x703ed321, 0x1e25cfea, + 0xb21a2f22, 0xbc51fb2e, 0x96e14f5d, 0xbfa3edac, 0x329c57ae, + 0xe7fc7153, 0xc3fc0695, 0x85a91924, 0xf95f635e, 0xb2908ee0, + 0x93abade4, 0x1366732a, 0x9449775c, 0x69be5b0e, 0x7343afac, + 0xb099bc81, 0x45a71d46, 0xa2699748, 0x8cb07303, 0x8a0b1f13, + 0x8cab8a97, 0xc1d238d9, 0x633415d4, 0x0000001c, } }, + /* 10 ^ 1024 */ + { 107, { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x2919f001, 0xf55b2b72, 0x6e7c215b, + 0x1ec29f86, 0x991c4e87, 0x15c51a88, 0x140ac535, 0x4c7d1e1a, + 0xcc2cd819, 0x0ed1440e, 0x896634ee, 0x7de16cfb, 0x1e43f61f, + 0x9fce837d, 0x231d2b9c, 0x233e55c7, 0x65dc60d7, 0xf451218b, + 0x1c5cd134, 0xc9635986, 0x922bbb9f, 0xa7e89431, 0x9f9f2a07, + 0x62be695a, 0x8e1042c4, 0x045b7a74, 0x1abe1de3, 0x8ad822a5, + 0xba34c411, 0xd814b505, 0xbf3fdeb3, 0x8fc51a16, 0xb1b896bc, + 0xf56deeec, 0x31fb6bfd, 0xb6f4654b, 0x101a3616, 0x6b7595fb, + 0xdc1a47fe, 0x80d98089, 0x80bda5a5, 0x9a202882, 0x31eb0f66, + 0xfc8f1f90, 0x976a3310, 0xe26a7b7e, 0xdf68368a, 0x3ce3a0b8, + 0x8e4262ce, 0x75a351a2, 0x6cb0b6c9, 0x44597583, 0x31b5653f, + 0xc356e38a, 0x35faaba6, 0x0190fba0, 0x9fc4ed52, 0x88bc491b, + 0x1640114a, 0x005b8041, 0xf4f3235e, 0x1e8d4649, 0x36a8de06, + 0x73c55349, 0xa7e6bd2a, 0xc1a6970c, 0x47187094, 0xd2db49ef, + 0x926c3f5b, 0xae6209d4, 0x2d433949, 0x34f4a3c6, 0xd4305d94, + 0xd9d61a05, 0x00000325, } }, + /* 10 ^ 2048 */ + { 213, { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1333e001, + 0xe3096865, 0xb27d4d3f, 0x49e28dcf, 0xec2e4721, 0xee87e354, + 0xb6067584, 0x368b8abb, 0xa5e5a191, 0x2ed56d55, 0xfd827773, + 0xea50d142, 0x51b78db2, 0x98342c9e, 0xc850dabc, 0x866ed6f1, + 0x19342c12, 0x92794987, 0xd2f869c2, 0x66912e4a, 0x71c7fd8f, + 0x57a7842d, 0x235552eb, 0xfb7fedcc, 0xf3861ce0, 0x38209ce1, + 0x9713b449, 0x34c10134, 0x8c6c54de, 0xa7a8289c, 0x2dbb6643, + 0xe3cb64f3, 0x8074ff01, 0xe3892ee9, 0x10c17f94, 0xa8f16f92, + 0xa8281ed6, 0x967abbb3, 0x5a151440, 0x9952fbed, 0x13b41e44, + 0xafe609c3, 0xa2bca416, 0xf111821f, 0xfb1264b4, 0x91bac974, + 0xd6c7d6ab, 0x8e48ff35, 0x4419bd43, 0xc4a65665, 0x685e5510, + 0x33554c36, 0xab498697, 0x0dbd21fe, 0x3cfe491d, 0x982da466, + 0xcbea4ca7, 0x9e110c7b, 0x79c56b8a, 0x5fc5a047, 0x84d80e2e, + 0x1aa9f444, 0x730f203c, 0x6a57b1ab, 0xd752f7a6, 0x87a7dc62, + 0x944545ff, 0x40660460, 0x77c1a42f, 0xc9ac375d, 0xe866d7ef, + 0x744695f0, 0x81428c85, 0xa1fc6b96, 0xd7917c7b, 0x7bf03c19, + 0x5b33eb41, 0x5715f791, 0x8f6cae5f, 0xdb0708fd, 0xb125ac8e, + 0x785ce6b7, 0x56c6815b, 0x6f46eadb, 0x4eeebeee, 0x195355d8, + 0xa244de3c, 0x9d7389c0, 0x53761abd, 0xcf99d019, 0xde9ec24b, + 0x0d76ce39, 0x70beb181, 0x2e55ecee, 0xd5f86079, 0xf56d9d4b, + 0xfb8886fb, 0x13ef5a83, 0x408f43c5, 0x3f3389a4, 0xfad37943, + 0x58ccf45c, 0xf82df846, 0x415c7f3e, 0x2915e818, 0x8b3d5cf4, + 0x6a445f27, 0xf8dbb57a, 0xca8f0070, 0x8ad803ec, 0xb2e87c34, + 0x038f9245, 0xbedd8a6c, 0xc7c9dee0, 0x0eac7d56, 0x2ad3fa14, + 0xe0de0840, 0xf775677c, 0xf1bd0ad5, 0x92be221e, 0x87fa1fb9, + 0xce9d04a4, 0xd2c36fa9, 0x3f6f7024, 0xb028af62, 0x907855ee, + 0xd83e49d6, 0x4efac5dc, 0xe7151aab, 0x77cd8c6b, 0x0a753b7d, + 0x0af908b4, 0x8c983623, 0xe50f3027, 0x94222771, 0x1d08e2d6, + 0xf7e928e6, 0xf2ee5ca6, 0x1b61b93c, 0x11eb962b, 0x9648b21c, + 0xce2bcba1, 0x34f77154, 0x7bbebe30, 0xe526a319, 0x8ce329ac, + 0xde4a74d2, 0xb5dc53d5, 0x0009e8b3, } }, + /* 10 ^ 4096 */ + { 426, { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x2a67c001, 0xd4724e8d, + 0x8efe7ae7, 0xf89a1e90, 0xef084117, 0x54e05154, 0x13b1bb51, + 0x506be829, 0xfb29b172, 0xe599574e, 0xf0da6146, 0x806c0ed3, + 0xb86ae5be, 0x45155e93, 0xc0591cc2, 0x7e1e7c34, 0x7c4823da, + 0x1d1f4cce, 0x9b8ba1e8, 0xd6bfdf75, 0xe341be10, 0xc2dfae78, + 0x016b67b2, 0x0f237f1a, 0x3dbeabcd, 0xaf6a2574, 0xcab3e6d7, + 0x142e0e80, 0x61959127, 0x2c234811, 0x87009701, 0xcb4bf982, + 0xf8169c84, 0x88052f8c, 0x68dde6d4, 0xbc131761, 0xff0b0905, + 0x54ab9c41, 0x7613b224, 0x1a1c304e, 0x3bfe167b, 0x441c2d47, + 0x4f6cea9c, 0x78f06181, 0xeb659fb8, 0x30c7ae41, 0x947e0d0e, + 0xa1ebcad7, 0xd97d9556, 0x2130504d, 0x1a8309cb, 0xf2acd507, + 0x3f8ec72a, 0xfd82373a, 0x95a842bc, 0x280f4d32, 0xf3618ac0, + 0x811a4f04, 0x6dc3a5b4, 0xd3967a1b, 0x15b8c898, 0xdcfe388f, + 0x454eb2a0, 0x8738b909, 0x10c4e996, 0x2bd9cc11, 0x3297cd0c, + 0x655fec30, 0xae0725b1, 0xf4090ee8, 0x037d19ee, 0x398c6fed, + 0x3b9af26b, 0xc994a450, 0xb5341743, 0x75a697b2, 0xac50b9c1, + 0x3ccb5b92, 0xffe06205, 0xa8329761, 0xdfea5242, 0xeb83cadb, + 0xe79dadf7, 0x3c20ee69, 0x1e0a6817, 0x7021b97a, 0x743074fa, + 0x176ca776, 0x77fb8af6, 0xeca19beb, 0x92baf1de, 0xaf63b712, + 0xde35c88b, 0xa4eb8f8c, 0xe137d5e9, 0x40b464a0, 0x87d1cde8, + 0x42923bbd, 0xcd8f62ff, 0x2e2690f3, 0x095edc16, 0x59c89f1b, + 0x1fa8fd5d, 0x5138753d, 0x390a2b29, 0x80152f18, 0x2dd8d925, + 0xf984d83e, 0x7a872e74, 0xc19e1faf, 0xed4d542d, 0xecf9b5d0, + 0x9462ea75, 0xc53c0adf, 0x0caea134, 0x37a2d439, 0xc8fa2e8a, + 0x2181327e, 0x6e7bb827, 0x2d240820, 0x50be10e0, 0x5893d4b8, + 0xab312bb9, 0x1f2b2322, 0x440b3f25, 0xbf627ede, 0x72dac789, + 0xb608b895, 0x78787e2a, 0x86deb3f0, 0x6fee7aab, 0xbb9373f4, + 0x27ecf57b, 0xf7d8b57e, 0xfca26a9f, 0x3d04e8d2, 0xc9df13cb, + 0x3172826a, 0xcd9e8d7c, 0xa8fcd8e0, 0xb2c39497, 0x307641d9, + 0x1cc939c1, 0x2608c4cf, 0xb6d1c7bf, 0x3d326a7e, 0xeeaf19e6, + 0x8e13e25f, 0xee63302b, 0x2dfe6d97, 0x25971d58, 0xe41d3cc4, + 0x0a80627c, 0xab8db59a, 0x9eea37c8, 0xe90afb77, 0x90ca19cf, + 0x9ee3352c, 0x3613c850, 0xfe78d682, 0x788f6e50, 0x5b060904, + 0xb71bd1a4, 0x3fecb534, 0xb32c450c, 0x20c33857, 0xa6e9cfda, + 0x0239f4ce, 0x48497187, 0xa19adb95, 0xb492ed8a, 0x95aca6a8, + 0x4dcd6cd9, 0xcf1b2350, 0xfbe8b12a, 0x1a67778c, 0x38eb3acc, + 0xc32da383, 0xfb126ab1, 0xa03f40a8, 0xed5bf546, 0xe9ce4724, + 0x4c4a74fd, 0x73a130d8, 0xd9960e2d, 0xa2ebd6c1, 0x94ab6feb, + 0x6f233b7c, 0x49126080, 0x8e7b9a73, 0x4b8c9091, 0xd298f999, + 0x35e836b5, 0xa96ddeff, 0x96119b31, 0x6b0dd9bc, 0xc6cc3f8d, + 0x282566fb, 0x72b882e7, 0xd6769f3b, 0xa674343d, 0x00fc509b, + 0xdcbf7789, 0xd6266a3f, 0xae9641fd, 0x4e89541b, 0x11953407, + 0x53400d03, 0x8e0dd75a, 0xe5b53345, 0x108f19ad, 0x108b89bc, + 0x41a4c954, 0xe03b2b63, 0x437b3d7f, 0x97aced8e, 0xcbd66670, + 0x2c5508c2, 0x650ebc69, 0x5c4f2ef0, 0x904ff6bf, 0x9985a2df, + 0x9faddd9e, 0x5ed8d239, 0x25585832, 0xe3e51cb9, 0x0ff4f1d4, + 0x56c02d9a, 0x8c4ef804, 0xc1a08a13, 0x13fd01c8, 0xe6d27671, + 0xa7c234f4, 0x9d0176cc, 0xd0d73df2, 0x4d8bfa89, 0x544f10cd, + 0x2b17e0b2, 0xb70a5c7d, 0xfd86fe49, 0xdf373f41, 0x214495bb, + 0x84e857fd, 0x00d313d5, 0x0496fcbe, 0xa4ba4744, 0xe8cac982, + 0xaec29e6e, 0x87ec7038, 0x7000a519, 0xaeee333b, 0xff66e42c, + 0x8afd6b25, 0x03b4f63b, 0xbd7991dc, 0x5ab8d9c7, 0x2ed4684e, + 0x48741a6c, 0xaf06940d, 0x2fdc6349, 0xb03d7ecd, 0xe974996f, + 0xac7867f9, 0x52ec8721, 0xbcdd9d4a, 0x8edd2d00, 0x3557de06, + 0x41c759f8, 0x3956d4b9, 0xa75409f2, 0x123cd8a1, 0xb6100fab, + 0x3e7b21e2, 0x2e8d623b, 0x92959da2, 0xbca35f77, 0x200c03a5, + 0x35fcb457, 0x1bb6c6e4, 0xf74eb928, 0x3d5d0b54, 0x87cc1d21, + 0x4964046f, 0x18ae4240, 0xd868b275, 0x8bd2b496, 0x1c5563f4, + 0xc234d8f5, 0xf868e970, 0xf9151fff, 0xae7be4a2, 0x271133ee, + 0xbb0fd922, 0x25254932, 0xa60a9fc0, 0x104bcd64, 0x30290145, + 0x00000062, } }, +}; + +static int +BigInt_IsZero(const BigInt *i) +{ + return i->length == 0; +} + +/* + * Returns 1 if the value is even + */ +static int +BigInt_IsEven(const BigInt *i) +{ + return (i->length == 0) || ( (i->blocks[0] % 2) == 0); +} + +static void +BigInt_Copy(BigInt *dst, const BigInt *src) +{ + npy_uint32 length = src->length; + npy_uint32 * dstp = dst->blocks; + const npy_uint32 *srcp; + for (srcp = src->blocks; srcp != src->blocks + length; ++dstp, ++srcp) { + *dstp = *srcp; + } + dst->length = length; +} + +/* result = result << shift */ +static void +BigInt_ShiftLeft(BigInt *result, npy_uint32 shift) +{ + npy_uint32 shiftBlocks = shift / 32; + npy_uint32 shiftBits = shift % 32; + + /* process blocks high to low so that we can safely process in place */ + const npy_uint32 *pInBlocks = result->blocks; + npy_int32 inLength = result->length; + npy_uint32 *pInCur, *pOutCur; + + DEBUG_ASSERT(inLength + shiftBlocks < c_BigInt_MaxBlocks); + DEBUG_ASSERT(shift != 0); + + /* check if the shift is block aligned */ + if (shiftBits == 0) { + npy_uint32 i; + + /* copy blocks from high to low */ + for (pInCur = result->blocks + result->length, + pOutCur = pInCur + shiftBlocks; + pInCur >= pInBlocks; + --pInCur, --pOutCur) { + *pOutCur = *pInCur; + } + + /* zero the remaining low blocks */ + for (i = 0; i < shiftBlocks; ++i) { + result->blocks[i] = 0; + } + + result->length += shiftBlocks; + } + /* else we need to shift partial blocks */ + else { + npy_uint32 i; + npy_int32 inBlockIdx = inLength - 1; + npy_uint32 outBlockIdx = inLength + shiftBlocks; + + /* output the initial blocks */ + const npy_uint32 lowBitsShift = (32 - shiftBits); + npy_uint32 highBits = 0; + npy_uint32 block = result->blocks[inBlockIdx]; + npy_uint32 lowBits = block >> lowBitsShift; + + /* set the length to hold the shifted blocks */ + DEBUG_ASSERT(outBlockIdx < c_BigInt_MaxBlocks); + result->length = outBlockIdx + 1; + + while (inBlockIdx > 0) { + result->blocks[outBlockIdx] = highBits | lowBits; + highBits = block << shiftBits; + + --inBlockIdx; + --outBlockIdx; + + block = result->blocks[inBlockIdx]; + lowBits = block >> lowBitsShift; + } + + /* output the final blocks */ + DEBUG_ASSERT(outBlockIdx == shiftBlocks + 1); + result->blocks[outBlockIdx] = highBits | lowBits; + result->blocks[outBlockIdx-1] = block << shiftBits; + + /* zero the remaining low blocks */ + for (i = 0; i < shiftBlocks; ++i) { + result->blocks[i] = 0; + } + + /* check if the terminating block has no set bits */ + if (result->blocks[result->length - 1] == 0) { + --result->length; + } + } +} + +static void +BigInt_Set_uint32(BigInt *i, npy_uint32 val) +{ + if (val != 0) { + i->blocks[0] = val; + i->length = 1; + } + else { + i->length = 0; + } +} + +/* result = 2^exponent */ +static inline void +BigInt_Pow2(BigInt *result, npy_uint32 exponent) +{ + npy_uint32 bitIdx; + npy_uint32 blockIdx = exponent / 32; + npy_uint32 i; + + DEBUG_ASSERT(blockIdx < c_BigInt_MaxBlocks); + + for (i = 0; i <= blockIdx; ++i) { + result->blocks[i] = 0; + } + + result->length = blockIdx + 1; + + bitIdx = (exponent % 32); + result->blocks[blockIdx] |= ((npy_uint32)1 << bitIdx); +} + +static void +BigInt_Set_2x_uint64(BigInt *i, npy_uint64 hi, npy_uint64 lo) +{ + if (hi > bitmask_u64(32)) { + i->length = 4; + } + else if (hi != 0) { + i->length = 3; + } + else if (lo > bitmask_u64(32)) { + i->length = 2; + } + else if (lo != 0) { + i->length = 1; + } + else { + i->length = 0; + } + + /* Note deliberate fallthrough in this switch */ + switch (i->length) { + case 4: + i->blocks[3] = (hi >> 32) & bitmask_u64(32); + case 3: + i->blocks[2] = hi & bitmask_u64(32); + case 2: + i->blocks[1] = (lo >> 32) & bitmask_u64(32); + case 1: + i->blocks[0] = lo & bitmask_u64(32); + } +} + +/* result = lhs * rhs */ +static void +BigInt_Multiply_int(BigInt *result, const BigInt *lhs, npy_uint32 rhs) +{ + /* perform long multiplication */ + npy_uint32 carry = 0; + npy_uint32 *resultCur = result->blocks; + const npy_uint32 *pLhsCur = lhs->blocks; + const npy_uint32 *pLhsEnd = lhs->blocks + lhs->length; + for ( ; pLhsCur != pLhsEnd; ++pLhsCur, ++resultCur) { + npy_uint64 product = (npy_uint64)(*pLhsCur) * rhs + carry; + *resultCur = (npy_uint32)(product & bitmask_u64(32)); + carry = product >> 32; + } + + /* if there is a remaining carry, grow the array */ + if (carry != 0) { + /* grow the array */ + DEBUG_ASSERT(lhs->length + 1 <= c_BigInt_MaxBlocks); + *resultCur = (npy_uint32)carry; + result->length = lhs->length + 1; + } + else { + result->length = lhs->length; + } +} + +/* + * result = lhs * rhs + */ +static void +BigInt_Multiply(BigInt *result, const BigInt *lhs, const BigInt *rhs) +{ + const BigInt *large; + const BigInt *small; + npy_uint32 maxResultLen; + npy_uint32 *cur, *end, *resultStart; + const npy_uint32 *smallCur; + + DEBUG_ASSERT(result != lhs && result != rhs); + + /* determine which operand has the smaller length */ + if (lhs->length < rhs->length) { + small = lhs; + large = rhs; + } + else { + small = rhs; + large = lhs; + } + + /* set the maximum possible result length */ + maxResultLen = large->length + small->length; + DEBUG_ASSERT(maxResultLen <= c_BigInt_MaxBlocks); + + /* clear the result data */ + for (cur = result->blocks, end = cur + maxResultLen; cur != end; ++cur) { + *cur = 0; + } + + /* perform standard long multiplication for each small block */ + resultStart = result->blocks; + for (smallCur = small->blocks; + smallCur != small->blocks + small->length; + ++smallCur, ++resultStart) { + /* + * if non-zero, multiply against all the large blocks and add into the + * result + */ + const npy_uint32 multiplier = *smallCur; + if (multiplier != 0) { + const npy_uint32 *largeCur = large->blocks; + npy_uint32 *resultCur = resultStart; + npy_uint64 carry = 0; + do { + npy_uint64 product = (*resultCur) + + (*largeCur)*(npy_uint64)multiplier + carry; + carry = product >> 32; + *resultCur = product & bitmask_u64(32); + ++largeCur; + ++resultCur; + } while(largeCur != large->blocks + large->length); + + DEBUG_ASSERT(resultCur < result->blocks + maxResultLen); + *resultCur = (npy_uint32)(carry & bitmask_u64(32)); + } + } + + /* check if the terminating block has no set bits */ + if (maxResultLen > 0 && result->blocks[maxResultLen - 1] == 0) { + result->length = maxResultLen-1; + } + else { + result->length = maxResultLen; + } +} + +/* in = in * 10^exponent */ +static void +BigInt_MultiplyPow10(BigInt *in, npy_uint32 exponent, BigInt *temp) +{ + /* use two temporary values to reduce large integer copy operations */ + BigInt *curTemp, *pNextTemp; + npy_uint32 smallExponent; + npy_uint32 tableIdx = 0; + + /* make sure the exponent is within the bounds of the lookup table data */ + DEBUG_ASSERT(exponent < 8192); + + /* + * initialize the result by looking up a 32-bit power of 10 corresponding to + * the first 3 bits + */ + smallExponent = exponent & bitmask_u32(3); + if (smallExponent != 0) { + BigInt_Multiply_int(temp, in, g_PowerOf10_U32[smallExponent]); + curTemp = temp; + pNextTemp = in; + } + else { + curTemp = in; + pNextTemp = temp; + } + + /* remove the low bits that we used for the 32-bit lookup table */ + exponent >>= 3; + + /* while there are remaining bits in the exponent to be processed */ + while (exponent != 0) { + /* if the current bit is set, multiply by this power of 10 */ + if (exponent & 1) { + BigInt *pSwap; + + /* multiply into the next temporary */ + BigInt_Multiply(pNextTemp, curTemp, &g_PowerOf10_Big[tableIdx]); + + /* swap to the next temporary */ + pSwap = curTemp; + curTemp = pNextTemp; + pNextTemp = pSwap; + } + + /* advance to the next bit */ + ++tableIdx; + exponent >>= 1; + } + + /* output the result */ + if (curTemp != in){ + BigInt_Copy(in, curTemp); + } +} + +/* result = 10^exponent */ +static void +BigInt_Pow10(BigInt *result, npy_uint32 exponent, BigInt *temp) +{ + /* use two temporary values to reduce large integer copy operations */ + BigInt *curTemp = result; + BigInt *pNextTemp = temp; + npy_uint32 smallExponent; + npy_uint32 tableIdx = 0; + + /* make sure the exponent is within the bounds of the lookup table data */ + DEBUG_ASSERT(exponent < 8192); + + /* + * initialize the result by looking up a 32-bit power of 10 corresponding to + * the first 3 bits + */ + smallExponent = exponent & bitmask_u32(3); + BigInt_Set_uint32(curTemp, g_PowerOf10_U32[smallExponent]); + + /* remove the low bits that we used for the 32-bit lookup table */ + exponent >>= 3; + + /* while there are remaining bits in the exponent to be processed */ + while (exponent != 0) { + /* if the current bit is set, multiply by this power of 10 */ + if (exponent & 1) { + BigInt *pSwap; + + /* multiply into the next temporary */ + BigInt_Multiply(pNextTemp, curTemp, &g_PowerOf10_Big[tableIdx]); + + /* swap to the next temporary */ + pSwap = curTemp; + curTemp = pNextTemp; + pNextTemp = pSwap; + } + + /* advance to the next bit */ + ++tableIdx; + exponent >>= 1; + } + + /* output the result */ + if (curTemp != result) { + BigInt_Copy(result, curTemp); + } +} + +/* result = lhs + rhs */ +static void +BigInt_Add(BigInt *result, const BigInt *lhs, const BigInt *rhs) +{ + /* determine which operand has the smaller length */ + const BigInt *large, *small; + npy_uint64 carry = 0; + const npy_uint32 *largeCur, *smallCur, *largeEnd, *smallEnd; + npy_uint32 *resultCur; + + if (lhs->length < rhs->length) { + small = lhs; + large = rhs; + } + else { + small = rhs; + large = lhs; + } + + /* The output will be at least as long as the largest input */ + result->length = large->length; + + /* Add each block and add carry the overflow to the next block */ + largeCur = large->blocks; + largeEnd = largeCur + large->length; + smallCur = small->blocks; + smallEnd = smallCur + small->length; + resultCur = result->blocks; + while (smallCur != smallEnd) { + npy_uint64 sum = carry + (npy_uint64)(*largeCur) + + (npy_uint64)(*smallCur); + carry = sum >> 32; + *resultCur = sum & bitmask_u64(32); + ++largeCur; + ++smallCur; + ++resultCur; + } + + /* Add the carry to any blocks that only exist in the large operand */ + while (largeCur != largeEnd) { + npy_uint64 sum = carry + (npy_uint64)(*largeCur); + carry = sum >> 32; + (*resultCur) = sum & bitmask_u64(32); + ++largeCur; + ++resultCur; + } + + /* If there's still a carry, append a new block */ + if (carry != 0) { + DEBUG_ASSERT(carry == 1); + DEBUG_ASSERT((npy_uint32)(resultCur - result->blocks) == + large->length && (large->length < c_BigInt_MaxBlocks)); + *resultCur = 1; + result->length = large->length + 1; + } + else { + result->length = large->length; + } +} + +/* result = in * 2 */ +static void +BigInt_Multiply2(BigInt *result, const BigInt *in) +{ + /* shift all the blocks by one */ + npy_uint32 carry = 0; + + npy_uint32 *resultCur = result->blocks; + const npy_uint32 *pLhsCur = in->blocks; + const npy_uint32 *pLhsEnd = in->blocks + in->length; + for ( ; pLhsCur != pLhsEnd; ++pLhsCur, ++resultCur) { + npy_uint32 cur = *pLhsCur; + *resultCur = (cur << 1) | carry; + carry = cur >> 31; + } + + if (carry != 0) { + /* grow the array */ + DEBUG_ASSERT(in->length + 1 <= c_BigInt_MaxBlocks); + *resultCur = carry; + result->length = in->length + 1; + } + else { + result->length = in->length; + } +} + +/* result = result * 2 */ +static void +BigInt_Multiply2_inplace(BigInt *result) +{ + /* shift all the blocks by one */ + npy_uint32 carry = 0; + + npy_uint32 *cur = result->blocks; + npy_uint32 *end = result->blocks + result->length; + for ( ; cur != end; ++cur) { + npy_uint32 tmpcur = *cur; + *cur = (tmpcur << 1) | carry; + carry = tmpcur >> 31; + } + + if (carry != 0) { + /* grow the array */ + DEBUG_ASSERT(result->length + 1 <= c_BigInt_MaxBlocks); + *cur = carry; + ++result->length; + } +} + +static npy_int32 +BigInt_Compare(const BigInt *lhs, const BigInt *rhs) +{ + int i; + + /* A bigger length implies a bigger number. */ + npy_int32 lengthDiff = lhs->length - rhs->length; + if (lengthDiff != 0) { + return lengthDiff; + } + + /* Compare blocks one by one from high to low. */ + for (i = lhs->length - 1; i >= 0; --i) { + if (lhs->blocks[i] == rhs->blocks[i]) { + continue; + } + else if (lhs->blocks[i] > rhs->blocks[i]) { + return 1; + } + else { + return -1; + } + } + + /* no blocks differed */ + return 0; +} + +static npy_uint32 +BigInt_DivideWithRemainder_MaxQuotient9(BigInt *dividend, const BigInt *divisor) +{ + npy_uint32 length, quotient; + const npy_uint32 *finalDivisorBlock; + npy_uint32 *finalDividendBlock; + + /* + * Check that the divisor has been correctly shifted into range and that it + * is not smaller than the dividend in length. + */ + DEBUG_ASSERT(!divisor->length == 0 && + divisor->blocks[divisor->length-1] >= 8 && + divisor->blocks[divisor->length-1] < bitmask_u64(32) && + dividend->length <= divisor->length); + + /* + * If the dividend is smaller than the divisor, the quotient is zero and the + * divisor is already the remainder. + */ + length = divisor->length; + if (dividend->length < divisor->length) { + return 0; + } + + finalDivisorBlock = divisor->blocks + length - 1; + finalDividendBlock = dividend->blocks + length - 1; + + /* + * Compute an estimated quotient based on the high block value. This will + * either match the actual quotient or undershoot by one. + */ + quotient = *finalDividendBlock / (*finalDivisorBlock + 1); + DEBUG_ASSERT(quotient <= 9); + + /* Divide out the estimated quotient */ + if (quotient != 0) { + /* dividend = dividend - divisor*quotient */ + const npy_uint32 *divisorCur = divisor->blocks; + npy_uint32 *dividendCur = dividend->blocks; + + npy_uint64 borrow = 0; + npy_uint64 carry = 0; + do { + npy_uint64 difference, product; + + product = (npy_uint64)*divisorCur * (npy_uint64)quotient + carry; + carry = product >> 32; + + difference = (npy_uint64)*dividendCur + - (product & bitmask_u64(32)) - borrow; + borrow = (difference >> 32) & 1; + + *dividendCur = difference & bitmask_u64(32); + + ++divisorCur; + ++dividendCur; + } while(divisorCur <= finalDivisorBlock); + + /* remove all leading zero blocks from dividend */ + while (length > 0 && dividend->blocks[length - 1] == 0) { + --length; + } + + dividend->length = length; + } + + /* + * If the dividend is still larger than the divisor, we overshot our + * estimate quotient. To correct, we increment the quotient and subtract one + * more divisor from the dividend. + */ + if (BigInt_Compare(dividend, divisor) >= 0) { + /* dividend = dividend - divisor */ + const npy_uint32 *divisorCur = divisor->blocks; + npy_uint32 *dividendCur = dividend->blocks; + npy_uint64 borrow = 0; + + ++quotient; + + do { + npy_uint64 difference = (npy_uint64)*dividendCur + - (npy_uint64)*divisorCur - borrow; + borrow = (difference >> 32) & 1; + + *dividendCur = difference & bitmask_u64(32); + + ++divisorCur; + ++dividendCur; + } while(divisorCur <= finalDivisorBlock); + + /* remove all leading zero blocks from dividend */ + while (length > 0 && dividend->blocks[length - 1] == 0) { + --length; + } + + dividend->length = length; + } + + return quotient; +} + +static npy_uint32 +LogBase2_32(npy_uint32 val) +{ + static const npy_uint8 logTable[256] = + { + 0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7 + }; + + npy_uint32 temp; + + temp = val >> 24; + if (temp) { + return 24 + logTable[temp]; + } + + temp = val >> 16; + if (temp) { + return 16 + logTable[temp]; + } + + temp = val >> 8; + if (temp) { + return 8 + logTable[temp]; + } + + return logTable[val]; +} + +static npy_uint32 +LogBase2_64(npy_uint64 val) +{ + npy_uint64 temp; + + temp = val >> 32; + if (temp) { + return 32 + LogBase2_32((npy_uint32)temp); + } + + return LogBase2_32((npy_uint32)val); +} + +static npy_uint32 +LogBase2_128(npy_uint64 hi, npy_uint64 lo) +{ + if (hi) { + return 64 + LogBase2_64(hi); + } + + return LogBase2_64(lo); +} + +static npy_uint32 +PrintInfNan(char *buffer, npy_uint32 bufferSize, npy_uint64 mantissa, + npy_uint32 mantissaHexWidth, char signbit) +{ + npy_uint32 maxPrintLen = bufferSize-1; + npy_uint32 pos = 0; + + DEBUG_ASSERT(bufferSize > 0); + + /* Check for infinity */ + if (mantissa == 0) { + npy_uint32 printLen; + + /* only print sign for inf values (though nan can have a sign set) */ + if (signbit == '+') { + if (pos < maxPrintLen-1) { + buffer[pos++] = '+'; + } + } + else if (signbit == '-') { + if (pos < maxPrintLen-1) { + buffer[pos++] = '-'; + } + } + + /* copy and make sure the buffer is terminated */ + printLen = (3 < maxPrintLen - pos) ? 3 : maxPrintLen - pos; + memcpy(buffer + pos, "inf", printLen); + buffer[pos + printLen] = '\0'; + return pos + printLen; + } + else { + /* copy and make sure the buffer is terminated */ + npy_uint32 printLen = (3 < maxPrintLen - pos) ? 3 : maxPrintLen - pos; + memcpy(buffer + pos, "nan", printLen); + buffer[pos + printLen] = '\0'; + + /* + * For numpy we ignore unusual mantissa values for nan, but keep this + * code in case we change our mind later. + * + * // append HEX value + * if (maxPrintLen > 3) { + * printLen += PrintHex(buffer+3, bufferSize-3, mantissa, + * mantissaHexWidth); + * } + */ + + return pos + printLen; + } +} + +static npy_uint32 +Dragon4(BigInt *bigints, const npy_int32 exponent, + const npy_uint32 mantissaBit, const npy_bool hasUnequalMargins, + const DigitMode digitMode, const CutoffMode cutoffMode, + npy_int32 cutoff_max, npy_int32 cutoff_min, char *pOutBuffer, + npy_uint32 bufferSize, npy_int32 *pOutExponent) +{ + char *curDigit = pOutBuffer; + + /* + * We compute values in integer format by rescaling as + * mantissa = scaledValue / scale + * marginLow = scaledMarginLow / scale + * marginHigh = scaledMarginHigh / scale + * Here, marginLow and marginHigh represent 1/2 of the distance to the next + * floating point value above/below the mantissa. + * + * scaledMarginHigh will point to scaledMarginLow in the case they must be + * equal to each other, otherwise it will point to optionalMarginHigh. + */ + BigInt *mantissa = &bigints[0]; /* the only initialized bigint */ + BigInt *scale = &bigints[1]; + BigInt *scaledValue = &bigints[2]; + BigInt *scaledMarginLow = &bigints[3]; + BigInt *scaledMarginHigh; + BigInt *optionalMarginHigh = &bigints[4]; + + BigInt *temp1 = &bigints[5]; + BigInt *temp2 = &bigints[6]; + + const npy_float64 log10_2 = 0.30102999566398119521373889472449; + npy_int32 digitExponent, hiBlock; + npy_int32 cutoff_max_Exponent, cutoff_min_Exponent; + npy_uint32 outputDigit; /* current digit being output */ + npy_uint32 outputLen; + npy_bool isEven = BigInt_IsEven(mantissa); + npy_int32 cmp; + + /* values used to determine how to round */ + npy_bool low, high, roundDown; + + DEBUG_ASSERT(bufferSize > 0); + + /* if the mantissa is zero, the value is zero regardless of the exponent */ + if (BigInt_IsZero(mantissa)) { + *curDigit = '0'; + *pOutExponent = 0; + return 1; + } + + BigInt_Copy(scaledValue, mantissa); + + if (hasUnequalMargins) { + /* if we have no fractional component */ + if (exponent > 0) { + /* + * 1) Expand the input value by multiplying out the mantissa and + * exponent. This represents the input value in its whole number + * representation. + * 2) Apply an additional scale of 2 such that later comparisons + * against the margin values are simplified. + * 3) Set the margin value to the lowest mantissa bit's scale. + */ + + /* scaledValue = 2 * 2 * mantissa*2^exponent */ + BigInt_ShiftLeft(scaledValue, exponent + 2); + /* scale = 2 * 2 * 1 */ + BigInt_Set_uint32(scale, 4); + /* scaledMarginLow = 2 * 2^(exponent-1) */ + BigInt_Pow2(scaledMarginLow, exponent); + /* scaledMarginHigh = 2 * 2 * 2^(exponent-1) */ + BigInt_Pow2(optionalMarginHigh, exponent + 1); + } + /* else we have a fractional exponent */ + else { + /* + * In order to track the mantissa data as an integer, we store it as + * is with a large scale + */ + + /* scaledValue = 2 * 2 * mantissa */ + BigInt_ShiftLeft(scaledValue, 2); + /* scale = 2 * 2 * 2^(-exponent) */ + BigInt_Pow2(scale, -exponent + 2); + /* scaledMarginLow = 2 * 2^(-1) */ + BigInt_Set_uint32(scaledMarginLow, 1); + /* scaledMarginHigh = 2 * 2 * 2^(-1) */ + BigInt_Set_uint32(optionalMarginHigh, 2); + } + + /* the high and low margins are different */ + scaledMarginHigh = optionalMarginHigh; + } + else { + /* if we have no fractional component */ + if (exponent > 0) { + /* scaledValue = 2 * mantissa*2^exponent */ + BigInt_ShiftLeft(scaledValue, exponent + 1); + /* scale = 2 * 1 */ + BigInt_Set_uint32(scale, 2); + /* scaledMarginLow = 2 * 2^(exponent-1) */ + BigInt_Pow2(scaledMarginLow, exponent); + } + /* else we have a fractional exponent */ + else { + /* + * In order to track the mantissa data as an integer, we store it as + * is with a large scale + */ + + /* scaledValue = 2 * mantissa */ + BigInt_ShiftLeft(scaledValue, 1); + /* scale = 2 * 2^(-exponent) */ + BigInt_Pow2(scale, -exponent + 1); + /* scaledMarginLow = 2 * 2^(-1) */ + BigInt_Set_uint32(scaledMarginLow, 1); + } + + /* the high and low margins are equal */ + scaledMarginHigh = scaledMarginLow; + } + + /* + * Compute an estimate for digitExponent that will be correct or undershoot + * by one. This optimization is based on the paper "Printing Floating-Point + * Numbers Quickly and Accurately" by Burger and Dybvig + * https://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.72.4656 + * We perform an additional subtraction of 0.69 to increase the frequency of + * a failed estimate because that lets us take a faster branch in the code. + * 0.69 is chosen because 0.69 + log10(2) is less than one by a reasonable + * epsilon that will account for any floating point error. + * + * We want to set digitExponent to floor(log10(v)) + 1 + * v = mantissa*2^exponent + * log2(v) = log2(mantissa) + exponent; + * log10(v) = log2(v) * log10(2) + * floor(log2(v)) = mantissaBit + exponent; + * log10(v) - log10(2) < (mantissaBit + exponent) * log10(2) <= log10(v) + * log10(v) < (mantissaBit + exponent) * log10(2) + log10(2) + * <= log10(v) + log10(2) + * floor(log10(v)) < ceil((mantissaBit + exponent) * log10(2)) + * <= floor(log10(v)) + 1 + * + * Warning: This calculation assumes npy_float64 is an IEEE-binary64 + * float. This line may need to be updated if this is not the case. + */ + digitExponent = (npy_int32)( + ceil((npy_float64)((npy_int32)mantissaBit + exponent) * log10_2 - 0.69)); + + /* + * if the digit exponent is smaller than the smallest desired digit for + * fractional cutoff, pull the digit back into legal range at which point we + * will round to the appropriate value. Note that while our value for + * digitExponent is still an estimate, this is safe because it only + * increases the number. This will either correct digitExponent to an + * accurate value or it will clamp it above the accurate value. + */ + if (cutoff_max >= 0 && cutoffMode == CutoffMode_FractionLength && + digitExponent <= -cutoff_max) { + digitExponent = -cutoff_max + 1; + } + + + /* Divide value by 10^digitExponent. */ + if (digitExponent > 0) { + /* A positive exponent creates a division so we multiply the scale. */ + BigInt_MultiplyPow10(scale, digitExponent, temp1); + } + else if (digitExponent < 0) { + /* + * A negative exponent creates a multiplication so we multiply up the + * scaledValue, scaledMarginLow and scaledMarginHigh. + */ + BigInt *temp=temp1, *pow10=temp2; + BigInt_Pow10(pow10, -digitExponent, temp); + + BigInt_Multiply(temp, scaledValue, pow10); + BigInt_Copy(scaledValue, temp); + + BigInt_Multiply(temp, scaledMarginLow, pow10); + BigInt_Copy(scaledMarginLow, temp); + + if (scaledMarginHigh != scaledMarginLow) { + BigInt_Multiply2(scaledMarginHigh, scaledMarginLow); + } + } + + /* If (value >= 1), our estimate for digitExponent was too low */ + if (BigInt_Compare(scaledValue, scale) >= 0) { + /* + * The exponent estimate was incorrect. + * Increment the exponent and don't perform the premultiply needed + * for the first loop iteration. + */ + digitExponent = digitExponent + 1; + } + else { + /* + * The exponent estimate was correct. + * Multiply larger by the output base to prepare for the first loop + * iteration. + */ + BigInt_Multiply10(scaledValue); + BigInt_Multiply10(scaledMarginLow); + if (scaledMarginHigh != scaledMarginLow) { + BigInt_Multiply2(scaledMarginHigh, scaledMarginLow); + } + } + + /* + * Compute the cutoff_max exponent (the exponent of the final digit to + * print). Default to the maximum size of the output buffer. + */ + cutoff_max_Exponent = digitExponent - bufferSize; + if (cutoff_max >= 0) { + npy_int32 desiredCutoffExponent; + + if (cutoffMode == CutoffMode_TotalLength) { + desiredCutoffExponent = digitExponent - cutoff_max; + if (desiredCutoffExponent > cutoff_max_Exponent) { + cutoff_max_Exponent = desiredCutoffExponent; + } + } + /* Otherwise it's CutoffMode_FractionLength. Print cutoff_max digits + * past the decimal point or until we reach the buffer size + */ + else { + desiredCutoffExponent = -cutoff_max; + if (desiredCutoffExponent > cutoff_max_Exponent) { + cutoff_max_Exponent = desiredCutoffExponent; + } + } + } + /* Also compute the cutoff_min exponent. */ + cutoff_min_Exponent = digitExponent; + if (cutoff_min >= 0) { + npy_int32 desiredCutoffExponent; + + if (cutoffMode == CutoffMode_TotalLength) { + desiredCutoffExponent = digitExponent - cutoff_min; + if (desiredCutoffExponent < cutoff_min_Exponent) { + cutoff_min_Exponent = desiredCutoffExponent; + } + } + else { + desiredCutoffExponent = -cutoff_min; + if (desiredCutoffExponent < cutoff_min_Exponent) { + cutoff_min_Exponent = desiredCutoffExponent; + } + } + } + + /* Output the exponent of the first digit we will print */ + *pOutExponent = digitExponent-1; + + /* + * In preparation for calling BigInt_DivideWithRemainder_MaxQuotient9(), we + * need to scale up our values such that the highest block of the + * denominator is greater than or equal to 8. We also need to guarantee that + * the numerator can never have a length greater than the denominator after + * each loop iteration. This requires the highest block of the denominator + * to be less than or equal to 429496729 which is the highest number that + * can be multiplied by 10 without overflowing to a new block. + */ + DEBUG_ASSERT(scale->length > 0); + hiBlock = scale->blocks[scale->length - 1]; + if (hiBlock < 8 || hiBlock > 429496729) { + npy_uint32 hiBlockLog2, shift; + + /* + * Perform a bit shift on all values to get the highest block of the + * denominator into the range [8,429496729]. We are more likely to make + * accurate quotient estimations in + * BigInt_DivideWithRemainder_MaxQuotient9() with higher denominator + * values so we shift the denominator to place the highest bit at index + * 27 of the highest block. This is safe because (2^28 - 1) = 268435455 + * which is less than 429496729. This means that all values with a + * highest bit at index 27 are within range. + */ + hiBlockLog2 = LogBase2_32(hiBlock); + DEBUG_ASSERT(hiBlockLog2 < 3 || hiBlockLog2 > 27); + shift = (32 + 27 - hiBlockLog2) % 32; + + BigInt_ShiftLeft(scale, shift); + BigInt_ShiftLeft(scaledValue, shift); + BigInt_ShiftLeft(scaledMarginLow, shift); + if (scaledMarginHigh != scaledMarginLow) { + BigInt_Multiply2(scaledMarginHigh, scaledMarginLow); + } + } + + if (digitMode == DigitMode_Unique) { + /* + * For the unique cutoff mode, we will try to print until we have + * reached a level of precision that uniquely distinguishes this value + * from its neighbors. If we run out of space in the output buffer, we + * terminate early. + */ + for (;;) { + BigInt *scaledValueHigh = temp1; + + digitExponent = digitExponent-1; + + /* divide out the scale to extract the digit */ + outputDigit = + BigInt_DivideWithRemainder_MaxQuotient9(scaledValue, scale); + DEBUG_ASSERT(outputDigit < 10); + + /* update the high end of the value */ + BigInt_Add(scaledValueHigh, scaledValue, scaledMarginHigh); + + /* + * stop looping if we are far enough away from our neighboring + * values (and we have printed at least the requested minimum + * digits) or if we have reached the cutoff digit + */ + cmp = BigInt_Compare(scaledValue, scaledMarginLow); + low = isEven ? (cmp <= 0) : (cmp < 0); + cmp = BigInt_Compare(scaledValueHigh, scale); + high = isEven ? (cmp >= 0) : (cmp > 0); + if (((low | high) & (digitExponent <= cutoff_min_Exponent)) | + (digitExponent == cutoff_max_Exponent)) { + break; + } + + /* store the output digit */ + *curDigit = (char)('0' + outputDigit); + ++curDigit; + + /* multiply larger by the output base */ + BigInt_Multiply10(scaledValue); + BigInt_Multiply10(scaledMarginLow); + if (scaledMarginHigh != scaledMarginLow) { + BigInt_Multiply2(scaledMarginHigh, scaledMarginLow); + } + } + } + else { + /* + * For exact digit mode, we will try to print until we + * have exhausted all precision (i.e. all remaining digits are zeros) or + * until we reach the desired cutoff digit. + */ + low = NPY_FALSE; + high = NPY_FALSE; + + for (;;) { + digitExponent = digitExponent-1; + + /* divide out the scale to extract the digit */ + outputDigit = + BigInt_DivideWithRemainder_MaxQuotient9(scaledValue, scale); + DEBUG_ASSERT(outputDigit < 10); + + if ((scaledValue->length == 0) | + (digitExponent == cutoff_max_Exponent)) { + break; + } + + /* store the output digit */ + *curDigit = (char)('0' + outputDigit); + ++curDigit; + + /* multiply larger by the output base */ + BigInt_Multiply10(scaledValue); + } + } + + /* default to rounding down the final digit if value got too close to 0 */ + roundDown = low; + + /* if it is legal to round up and down */ + if (low == high) { + npy_int32 compare; + + /* + * round to the closest digit by comparing value with 0.5. To do this we + * need to convert the inequality to large integer values. + * compare( value, 0.5 ) + * compare( scale * value, scale * 0.5 ) + * compare( 2 * scale * value, scale ) + */ + BigInt_Multiply2_inplace(scaledValue); + compare = BigInt_Compare(scaledValue, scale); + roundDown = compare < 0; + + /* + * if we are directly in the middle, round towards the even digit (i.e. + * IEEE rounding rules) + */ + if (compare == 0) { + roundDown = (outputDigit & 1) == 0; + } + } + + /* print the rounded digit */ + if (roundDown) { + *curDigit = (char)('0' + outputDigit); + ++curDigit; + } + else { + /* handle rounding up */ + if (outputDigit == 9) { + /* find the first non-nine prior digit */ + for (;;) { + /* if we are at the first digit */ + if (curDigit == pOutBuffer) { + /* output 1 at the next highest exponent */ + *curDigit = '1'; + ++curDigit; + *pOutExponent += 1; + break; + } + + --curDigit; + if (*curDigit != '9') { + /* increment the digit */ + *curDigit += 1; + ++curDigit; + break; + } + } + } + else { + /* values in the range [0,8] can perform a simple round up */ + *curDigit = (char)('0' + outputDigit + 1); + ++curDigit; + } + } + + /* return the number of digits output */ + outputLen = (npy_uint32)(curDigit - pOutBuffer); + DEBUG_ASSERT(outputLen <= bufferSize); + return outputLen; +} + +static npy_uint32 +FormatPositional(char *buffer, npy_uint32 bufferSize, BigInt *mantissa, + npy_int32 exponent, char signbit, npy_uint32 mantissaBit, + npy_bool hasUnequalMargins, DigitMode digit_mode, + CutoffMode cutoff_mode, npy_int32 precision, + npy_int32 min_digits, TrimMode trim_mode, + npy_int32 digits_left, npy_int32 digits_right) +{ + npy_int32 printExponent; + npy_int32 numDigits, numWholeDigits=0, has_sign=0; + npy_int32 add_digits; + + npy_int32 maxPrintLen = (npy_int32)bufferSize - 1, pos = 0; + + /* track the # of digits past the decimal point that have been printed */ + npy_int32 numFractionDigits = 0, desiredFractionalDigits; + + DEBUG_ASSERT(bufferSize > 0); + + if (digit_mode != DigitMode_Unique) { + DEBUG_ASSERT(precision >= 0); + } + + if (signbit == '+' && pos < maxPrintLen) { + buffer[pos++] = '+'; + has_sign = 1; + } + else if (signbit == '-' && pos < maxPrintLen) { + buffer[pos++] = '-'; + has_sign = 1; + } + + numDigits = Dragon4(mantissa, exponent, mantissaBit, hasUnequalMargins, + digit_mode, cutoff_mode, precision, min_digits, + buffer + has_sign, maxPrintLen - has_sign, + &printExponent); + + DEBUG_ASSERT(numDigits > 0); + DEBUG_ASSERT(numDigits <= bufferSize); + + /* if output has a whole number */ + if (printExponent >= 0) { + /* leave the whole number at the start of the buffer */ + numWholeDigits = printExponent+1; + if (numDigits <= numWholeDigits) { + npy_int32 count = numWholeDigits - numDigits; + pos += numDigits; + + /* don't overflow the buffer */ + if (pos + count > maxPrintLen) { + count = maxPrintLen - pos; + } + + /* add trailing zeros up to the decimal point */ + numDigits += count; + for ( ; count > 0; count--) { + buffer[pos++] = '0'; + } + } + /* insert the decimal point prior to the fraction */ + else if (numDigits > numWholeDigits) { + npy_int32 maxFractionDigits; + + numFractionDigits = numDigits - numWholeDigits; + maxFractionDigits = maxPrintLen - numWholeDigits - 1 - pos; + if (numFractionDigits > maxFractionDigits) { + numFractionDigits = maxFractionDigits; + } + + memmove(buffer + pos + numWholeDigits + 1, + buffer + pos + numWholeDigits, numFractionDigits); + pos += numWholeDigits; + buffer[pos] = '.'; + numDigits = numWholeDigits + 1 + numFractionDigits; + pos += 1 + numFractionDigits; + } + } + else { + /* shift out the fraction to make room for the leading zeros */ + npy_int32 numFractionZeros = 0; + if (pos + 2 < maxPrintLen) { + npy_int32 maxFractionZeros, digitsStartIdx, maxFractionDigits, i; + + maxFractionZeros = maxPrintLen - 2 - pos; + numFractionZeros = -(printExponent + 1); + if (numFractionZeros > maxFractionZeros) { + numFractionZeros = maxFractionZeros; + } + + digitsStartIdx = 2 + numFractionZeros; + + /* + * shift the significant digits right such that there is room for + * leading zeros + */ + numFractionDigits = numDigits; + maxFractionDigits = maxPrintLen - digitsStartIdx - pos; + if (numFractionDigits > maxFractionDigits) { + numFractionDigits = maxFractionDigits; + } + + memmove(buffer + pos + digitsStartIdx, buffer + pos, + numFractionDigits); + + /* insert the leading zeros */ + for (i = 2; i < digitsStartIdx; ++i) { + buffer[pos + i] = '0'; + } + + /* update the counts */ + numFractionDigits += numFractionZeros; + numDigits = numFractionDigits; + } + + /* add the decimal point */ + if (pos + 1 < maxPrintLen) { + buffer[pos+1] = '.'; + } + + /* add the initial zero */ + if (pos < maxPrintLen) { + buffer[pos] = '0'; + numDigits += 1; + } + numWholeDigits = 1; + pos += 2 + numFractionDigits; + } + + /* always add decimal point, except for DprZeros mode */ + if (trim_mode != TrimMode_DptZeros && numFractionDigits == 0 && + pos < maxPrintLen) { + buffer[pos++] = '.'; + } + + add_digits = digit_mode == DigitMode_Unique ? min_digits : precision; + desiredFractionalDigits = add_digits < 0 ? 0 : add_digits; + if (cutoff_mode == CutoffMode_TotalLength) { + desiredFractionalDigits = add_digits - numWholeDigits; + } + + if (trim_mode == TrimMode_LeaveOneZero) { + /* if we didn't print any fractional digits, add a trailing 0 */ + if (numFractionDigits == 0 && pos < maxPrintLen) { + buffer[pos++] = '0'; + numFractionDigits++; + } + } + else if (trim_mode == TrimMode_None && + desiredFractionalDigits > numFractionDigits && + pos < maxPrintLen) { + /* add trailing zeros up to add_digits length */ + /* compute the number of trailing zeros needed */ + npy_int32 count = desiredFractionalDigits - numFractionDigits; + if (pos + count > maxPrintLen) { + count = maxPrintLen - pos; + } + numFractionDigits += count; + + for ( ; count > 0; count--) { + buffer[pos++] = '0'; + } + } + /* else, for trim_mode Zeros or DptZeros, there is nothing more to add */ + + /* + * when rounding, we may still end up with trailing zeros. Remove them + * depending on trim settings. + */ + if (trim_mode != TrimMode_None && numFractionDigits > 0) { + while (buffer[pos-1] == '0') { + pos--; + numFractionDigits--; + } + if (buffer[pos-1] == '.') { + /* in TrimMode_LeaveOneZero, add trailing 0 back */ + if (trim_mode == TrimMode_LeaveOneZero){ + buffer[pos++] = '0'; + numFractionDigits++; + } + /* in TrimMode_DptZeros, remove trailing decimal point */ + else if (trim_mode == TrimMode_DptZeros) { + pos--; + } + } + } + + /* add any whitespace padding to right side */ + if (digits_right >= numFractionDigits) { + npy_int32 count = digits_right - numFractionDigits; + + /* in trim_mode DptZeros, if right padding, add a space for the . */ + if (trim_mode == TrimMode_DptZeros && numFractionDigits == 0 + && pos < maxPrintLen) { + buffer[pos++] = ' '; + } + + if (pos + count > maxPrintLen) { + count = maxPrintLen - pos; + } + + for ( ; count > 0; count--) { + buffer[pos++] = ' '; + } + } + /* add any whitespace padding to left side */ + if (digits_left > numWholeDigits + has_sign) { + npy_int32 shift = digits_left - (numWholeDigits + has_sign); + npy_int32 count = pos; + + if (count + shift > maxPrintLen) { + count = maxPrintLen - shift; + } + + if (count > 0) { + memmove(buffer + shift, buffer, count); + } + pos = shift + count; + for ( ; shift > 0; shift--) { + buffer[shift - 1] = ' '; + } + } + + /* terminate the buffer */ + DEBUG_ASSERT(pos <= maxPrintLen); + buffer[pos] = '\0'; + + return pos; +} + +static npy_uint32 +FormatScientific (char *buffer, npy_uint32 bufferSize, BigInt *mantissa, + npy_int32 exponent, char signbit, npy_uint32 mantissaBit, + npy_bool hasUnequalMargins, DigitMode digit_mode, + npy_int32 precision, npy_int32 min_digits, TrimMode trim_mode, + npy_int32 digits_left, npy_int32 exp_digits) +{ + npy_int32 printExponent; + npy_int32 numDigits; + char *pCurOut; + npy_int32 numFractionDigits; + npy_int32 leftchars; + npy_int32 add_digits; + + if (digit_mode != DigitMode_Unique) { + DEBUG_ASSERT(precision >= 0); + } + + DEBUG_ASSERT(bufferSize > 0); + + pCurOut = buffer; + + /* add any whitespace padding to left side */ + leftchars = 1 + (signbit == '-' || signbit == '+'); + if (digits_left > leftchars) { + int i; + for (i = 0; i < digits_left - leftchars && bufferSize > 1; i++) { + *pCurOut = ' '; + pCurOut++; + --bufferSize; + } + } + + if (signbit == '+' && bufferSize > 1) { + *pCurOut = '+'; + pCurOut++; + --bufferSize; + } + else if (signbit == '-' && bufferSize > 1) { + *pCurOut = '-'; + pCurOut++; + --bufferSize; + } + + numDigits = Dragon4(mantissa, exponent, mantissaBit, hasUnequalMargins, + digit_mode, CutoffMode_TotalLength, + precision < 0 ? -1 : precision + 1, + min_digits < 0 ? -1 : min_digits + 1, + pCurOut, bufferSize, &printExponent); + + DEBUG_ASSERT(numDigits > 0); + DEBUG_ASSERT(numDigits <= bufferSize); + + /* keep the whole number as the first digit */ + if (bufferSize > 1) { + pCurOut += 1; + bufferSize -= 1; + } + + /* insert the decimal point prior to the fractional number */ + numFractionDigits = numDigits-1; + if (numFractionDigits > 0 && bufferSize > 1) { + npy_int32 maxFractionDigits = (npy_int32)bufferSize - 2; + + if (numFractionDigits > maxFractionDigits) { + numFractionDigits = maxFractionDigits; + } + + memmove(pCurOut + 1, pCurOut, numFractionDigits); + pCurOut[0] = '.'; + pCurOut += (1 + numFractionDigits); + bufferSize -= (1 + numFractionDigits); + } + + /* always add decimal point, except for DprZeros mode */ + if (trim_mode != TrimMode_DptZeros && numFractionDigits == 0 && + bufferSize > 1) { + *pCurOut = '.'; + ++pCurOut; + --bufferSize; + } + + add_digits = digit_mode == DigitMode_Unique ? min_digits : precision; + add_digits = add_digits < 0 ? 0 : add_digits; + if (trim_mode == TrimMode_LeaveOneZero) { + /* if we didn't print any fractional digits, add the 0 */ + if (numFractionDigits == 0 && bufferSize > 1) { + *pCurOut = '0'; + ++pCurOut; + --bufferSize; + ++numFractionDigits; + } + } + else if (trim_mode == TrimMode_None) { + /* add trailing zeros up to add_digits length */ + if (add_digits > (npy_int32)numFractionDigits) { + char *pEnd; + /* compute the number of trailing zeros needed */ + npy_int32 numZeros = (add_digits - numFractionDigits); + + if (numZeros > (npy_int32)bufferSize - 1) { + numZeros = (npy_int32)bufferSize - 1; + } + + for (pEnd = pCurOut + numZeros; pCurOut < pEnd; ++pCurOut) { + *pCurOut = '0'; + ++numFractionDigits; + } + } + } + /* else, for trim_mode Zeros or DptZeros, there is nothing more to add */ + + /* + * when rounding, we may still end up with trailing zeros. Remove them + * depending on trim settings. + */ + if (trim_mode != TrimMode_None && numFractionDigits > 0) { + --pCurOut; + while (*pCurOut == '0') { + --pCurOut; + ++bufferSize; + --numFractionDigits; + } + if (trim_mode == TrimMode_LeaveOneZero && *pCurOut == '.') { + ++pCurOut; + *pCurOut = '0'; + --bufferSize; + ++numFractionDigits; + } + ++pCurOut; + } + + /* print the exponent into a local buffer and copy into output buffer */ + if (bufferSize > 1) { + char exponentBuffer[7]; + npy_int32 digits[5]; + npy_int32 i, exp_size, count; + + if (exp_digits > 5) { + exp_digits = 5; + } + if (exp_digits < 0) { + exp_digits = 2; + } + + exponentBuffer[0] = 'e'; + if (printExponent >= 0) { + exponentBuffer[1] = '+'; + } + else { + exponentBuffer[1] = '-'; + printExponent = -printExponent; + } + + DEBUG_ASSERT(printExponent < 100000); + + /* get exp digits */ + for (i = 0; i < 5; i++) { + digits[i] = printExponent % 10; + printExponent /= 10; + } + /* count back over leading zeros */ + for (i = 5; i > exp_digits && digits[i-1] == 0; i--) { + } + exp_size = i; + /* write remaining digits to tmp buf */ + for (i = exp_size; i > 0; i--) { + exponentBuffer[2 + (exp_size-i)] = (char)('0' + digits[i-1]); + } + + /* copy the exponent buffer into the output */ + count = exp_size + 2; + if (count > (npy_int32)bufferSize - 1) { + count = (npy_int32)bufferSize - 1; + } + memcpy(pCurOut, exponentBuffer, count); + pCurOut += count; + bufferSize -= count; + } + + + DEBUG_ASSERT(bufferSize > 0); + pCurOut[0] = '\0'; + + return pCurOut - buffer; +} + +static npy_uint32 +Format_floatbits(char *buffer, npy_uint32 bufferSize, BigInt *mantissa, + npy_int32 exponent, char signbit, npy_uint32 mantissaBit, + npy_bool hasUnequalMargins, Dragon4_Options *opt) +{ + /* format the value */ + if (opt->scientific) { + return FormatScientific(buffer, bufferSize, mantissa, exponent, + signbit, mantissaBit, hasUnequalMargins, + opt->digit_mode, opt->precision, + opt->min_digits, opt->trim_mode, + opt->digits_left, opt->exp_digits); + } + else { + return FormatPositional(buffer, bufferSize, mantissa, exponent, + signbit, mantissaBit, hasUnequalMargins, + opt->digit_mode, opt->cutoff_mode, + opt->precision, opt->min_digits, opt->trim_mode, + opt->digits_left, opt->digits_right); + } +} + + +static npy_uint32 +Dragon4_PrintFloat_Sleef_quad(Sleef_quad *value, Dragon4_Options *opt) +{ + char *buffer = _bigint_static.repr; + const npy_uint32 bufferSize = sizeof(_bigint_static.repr); + BigInt *bigints = _bigint_static.bigints; + + npy_uint32 floatExponent, floatSign; + npy_uint64 mantissa_hi, mantissa_lo; + npy_int32 exponent; + npy_uint32 mantissaBit; + npy_bool hasUnequalMargins; + char signbit = '\0'; + + // Extract the bits from the SLEEF quad value + union { + Sleef_quad q; + struct { + npy_uint64 lo; + npy_uint64 hi; + } i; + } u; + u.q = *value; + + // Extract mantissa, exponent, and sign + mantissa_hi = u.i.hi & bitmask_u64(48); + mantissa_lo = u.i.lo; + floatExponent = (u.i.hi >> 48) & bitmask_u32(15); + floatSign = u.i.hi >> 63; + + /* output the sign */ + if (floatSign != 0) { + signbit = '-'; + } + else if (opt->sign) { + signbit = '+'; + } + + /* if this is a special value */ + if (floatExponent == bitmask_u32(15)) { + npy_uint64 mantissa_zero = mantissa_hi == 0 && mantissa_lo == 0; + return PrintInfNan(buffer, bufferSize, !mantissa_zero, 16, signbit); + } + /* else this is a number */ + + /* factor the value into its parts */ + if (floatExponent != 0) { + /* normal */ + mantissa_hi = (1ull << 48) | mantissa_hi; + /* mantissa_lo is unchanged */ + exponent = floatExponent - 16383 - 112; + mantissaBit = 112; + hasUnequalMargins = (floatExponent != 1) && (mantissa_hi == 0 && + mantissa_lo == 0); + } + else { + /* subnormal */ + exponent = 1 - 16383 - 112; + mantissaBit = LogBase2_128(mantissa_hi, mantissa_lo); + hasUnequalMargins = NPY_FALSE; + } + + BigInt_Set_2x_uint64(&bigints[0], mantissa_hi, mantissa_lo); + return Format_floatbits(buffer, bufferSize, bigints, exponent, + signbit, mantissaBit, hasUnequalMargins, opt); +} + + +PyObject * +Dragon4_Positional_QuadDType_opt(Sleef_quad *val, Dragon4_Options *opt) +{ + PyObject *ret; + if (Dragon4_PrintFloat_Sleef_quad(val, opt) < 0) { + return NULL; + } + ret = PyUnicode_FromString(_bigint_static.repr); + return ret; +} + +PyObject * +Dragon4_Positional_QuadDType(Sleef_quad *val, DigitMode digit_mode, + CutoffMode cutoff_mode, int precision, int min_digits, + int sign, TrimMode trim, int pad_left, int pad_right) +{ + Dragon4_Options opt; + + opt.scientific = 0; + opt.digit_mode = digit_mode; + opt.cutoff_mode = cutoff_mode; + opt.precision = precision; + opt.min_digits = min_digits; + opt.sign = sign; + opt.trim_mode = trim; + opt.digits_left = pad_left; + opt.digits_right = pad_right; + opt.exp_digits = -1; + + return Dragon4_Positional_QuadDType_opt(val, &opt); +} + +PyObject * +Dragon4_Scientific_QuadDType_opt(Sleef_quad *val, Dragon4_Options *opt) +{ + PyObject *ret; + if (Dragon4_PrintFloat_Sleef_quad(val, opt) < 0) { + return NULL; + } + ret = PyUnicode_FromString(_bigint_static.repr); + return ret; +} + +PyObject * +Dragon4_Scientific_QuadDType(Sleef_quad *val, DigitMode digit_mode, int precision, + int min_digits, int sign, TrimMode trim, int pad_left, + int exp_digits) +{ + Dragon4_Options opt; + + opt.scientific = 1; + opt.digit_mode = digit_mode; + opt.cutoff_mode = CutoffMode_TotalLength; + opt.precision = precision; + opt.min_digits = min_digits; + opt.sign = sign; + opt.trim_mode = trim; + opt.digits_left = pad_left; + opt.digits_right = -1; + opt.exp_digits = exp_digits; + + return Dragon4_Scientific_QuadDType_opt(val, &opt); +} + + +PyObject * +Dragon4_Positional(PyObject *obj, DigitMode digit_mode, CutoffMode cutoff_mode, + int precision, int min_digits, int sign, TrimMode trim, + int pad_left, int pad_right) +{ + npy_double v; + + if (PyArray_IsScalar(obj, QuadPrecDType)) { + QuadPrecisionObject *quad_obj = (QuadPrecisionObject *)obj; + if (quad_obj->backend == BACKEND_SLEEF) { + return Dragon4_Positional_QuadDType(&quad_obj->value.sleef_value, digit_mode, cutoff_mode, precision, min_digits, sign, trim, pad_left, pad_right); + } else { + Sleef_quad sleef_val = Sleef_cast_from_doubleq1(quad_obj->value.longdouble_value); + return Dragon4_Positional_QuadDType(&sleef_val, digit_mode, cutoff_mode, precision, min_digits, sign, trim, pad_left, pad_right); + } + } +} + +PyObject * +Dragon4_Scientific(PyObject *obj, DigitMode digit_mode, int precision, + int min_digits, int sign, TrimMode trim, int pad_left, + int exp_digits) +{ + npy_double val; + + if (PyArray_IsScalar(obj, QuadPrecDType)) { + QuadPrecisionObject *quad_obj = (QuadPrecisionObject *)obj; + if (quad_obj->backend == BACKEND_SLEEF) { + return Dragon4_Scientific_QuadDType(&quad_obj->value.sleef_value, digit_mode, precision, min_digits, sign, trim, pad_left, exp_digits); + } else { + Sleef_quad sleef_val = Sleef_cast_from_doubleq1(quad_obj->value.longdouble_value); + return Dragon4_Scientific_QuadDType(&sleef_val, digit_mode, precision, min_digits, sign, trim, pad_left, exp_digits); + } + } + +} \ No newline at end of file diff --git a/quaddtype/numpy_quaddtype/src/dragon4.h b/quaddtype/numpy_quaddtype/src/dragon4.h new file mode 100644 index 00000000..1977595e --- /dev/null +++ b/quaddtype/numpy_quaddtype/src/dragon4.h @@ -0,0 +1,70 @@ +#ifndef _QUADDTYPE_DRAGON4_H +#define _QUADDTYPE_DRAGON4_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include "numpy/arrayobject.h" +#include +#include "quad_common.h" + +typedef enum DigitMode +{ + /* Round digits to print shortest uniquely identifiable number. */ + DigitMode_Unique, + /* Output the digits of the number as if with infinite precision */ + DigitMode_Exact, +} DigitMode; + +typedef enum CutoffMode +{ + /* up to cutoffNumber significant digits */ + CutoffMode_TotalLength, + /* up to cutoffNumber significant digits past the decimal point */ + CutoffMode_FractionLength, +} CutoffMode; + +typedef enum TrimMode +{ + TrimMode_None, /* don't trim zeros, always leave a decimal point */ + TrimMode_LeaveOneZero, /* trim all but the zero before the decimal point */ + TrimMode_Zeros, /* trim all trailing zeros, leave decimal point */ + TrimMode_DptZeros, /* trim trailing zeros & trailing decimal point */ +} TrimMode; + +typedef struct { + int scientific; + DigitMode digit_mode; + CutoffMode cutoff_mode; + int precision; + int min_digits; + int sign; + TrimMode trim_mode; + int digits_left; + int digits_right; + int exp_digits; +} Dragon4_Options; + +PyObject *Dragon4_Positional_QuadDType(Sleef_quad *val, DigitMode digit_mode, + CutoffMode cutoff_mode, int precision, int min_digits, + int sign, TrimMode trim, int pad_left, int pad_right); + +PyObject *Dragon4_Scientific_QuadDType(Sleef_quad *val, DigitMode digit_mode, + int precision, int min_digits, int sign, TrimMode trim, + int pad_left, int exp_digits); + +PyObject *Dragon4_Positional(PyObject *obj, DigitMode digit_mode, + CutoffMode cutoff_mode, int precision, int min_digits, + int sign, TrimMode trim, int pad_left, int pad_right); + +PyObject *Dragon4_Scientific(PyObject *obj, DigitMode digit_mode, int precision, + int min_digits, int sign, TrimMode trim, int pad_left, + int exp_digits); + +#ifdef __cplusplus +} +#endif + +#endif /* _QUADDTYPE_DRAGON4_H */ \ No newline at end of file diff --git a/quaddtype/numpy_quaddtype/src/dtype.c b/quaddtype/numpy_quaddtype/src/dtype.c index 36bca565..4524f3db 100644 --- a/quaddtype/numpy_quaddtype/src/dtype.c +++ b/quaddtype/numpy_quaddtype/src/dtype.c @@ -15,6 +15,7 @@ #include "scalar.h" #include "casts.h" #include "dtype.h" +#include "dragon4.h" static inline int quad_load(void *x, char *data_ptr, QuadBackendType backend) @@ -212,13 +213,22 @@ QuadPrecDType_repr(QuadPrecDTypeObject *self) return PyUnicode_FromFormat("QuadPrecDType(backend='%s')", backend_str); } +static PyObject * +QuadPrecDType_str(QuadPrecDTypeObject *self) +{ + const char *backend_str = (self->backend == BACKEND_SLEEF) ? "sleef" : "longdouble"; + return PyUnicode_FromFormat("QuadPrecDType(backend='%s')", backend_str); +} + + + PyArray_DTypeMeta QuadPrecDType = { {{ PyVarObject_HEAD_INIT(NULL, 0).tp_name = "numpy_quaddtype.QuadPrecDType", .tp_basicsize = sizeof(QuadPrecDTypeObject), .tp_new = QuadPrecDType_new, .tp_repr = (reprfunc)QuadPrecDType_repr, - .tp_str = (reprfunc)QuadPrecDType_repr, + .tp_str = (reprfunc)QuadPrecDType_str, }}, }; diff --git a/quaddtype/numpy_quaddtype/src/quaddtype_main.c b/quaddtype/numpy_quaddtype/src/quaddtype_main.c index 89ec16f0..7c89d57f 100644 --- a/quaddtype/numpy_quaddtype/src/quaddtype_main.c +++ b/quaddtype/numpy_quaddtype/src/quaddtype_main.c @@ -15,6 +15,7 @@ #include "quad_common.h" #include "float.h" + static PyObject* py_is_longdouble_128(PyObject* self, PyObject* args) { if(sizeof(long double) == 16 && LDBL_MANT_DIG == 113 && diff --git a/quaddtype/numpy_quaddtype/src/scalar.c b/quaddtype/numpy_quaddtype/src/scalar.c index 0b2967e5..7ed731d0 100644 --- a/quaddtype/numpy_quaddtype/src/scalar.c +++ b/quaddtype/numpy_quaddtype/src/scalar.c @@ -13,6 +13,7 @@ #include "scalar.h" #include "scalar_ops.h" +#include "dragon4.h" QuadPrecisionObject * QuadPrecision_raw_new(QuadBackendType backend) @@ -128,6 +129,29 @@ QuadPrecision_new(PyTypeObject *cls, PyObject *args, PyObject *kwargs) return (PyObject *)QuadPrecision_from_object(value, backend); } +static PyObject * +QuadPrecision_str_dragon4(QuadPrecisionObject *self) +{ + Dragon4_Options opt = { + .scientific = 0, + .digit_mode = DigitMode_Unique, + .cutoff_mode = CutoffMode_TotalLength, + .precision = SLEEF_QUAD_DIG, + .sign = 1, + .trim_mode = TrimMode_LeaveOneZero, + .digits_left = 1, + .digits_right = SLEEF_QUAD_DIG + }; + + if (self->backend == BACKEND_SLEEF) { + return Dragon4_Positional_QuadDType(&self->value.sleef_value, opt.digit_mode, opt.cutoff_mode, opt.precision, opt.min_digits, opt.sign, opt.trim_mode, opt.digits_left, opt.digits_right); + } + else { + Sleef_quad sleef_val = Sleef_cast_from_doubleq1(self->value.longdouble_value); + return Dragon4_Positional_QuadDType(&sleef_val, opt.digit_mode, opt.cutoff_mode, opt.precision, opt.min_digits, opt.sign, opt.trim_mode, opt.digits_left, opt.digits_right); + } +} + static PyObject * QuadPrecision_str(QuadPrecisionObject *self) { @@ -154,6 +178,39 @@ QuadPrecision_repr(QuadPrecisionObject *self) return res; } +static PyObject * +QuadPrecision_repr_dragon4(QuadPrecisionObject *self) +{ + Dragon4_Options opt = { + .scientific = 1, + .digit_mode = DigitMode_Unique, + .cutoff_mode = CutoffMode_TotalLength, + .precision = SLEEF_QUAD_DIG, + .sign = 1, + .trim_mode = TrimMode_LeaveOneZero, + .digits_left = 1, + .exp_digits = 3 + }; + + PyObject *str; + if (self->backend == BACKEND_SLEEF) { + str = Dragon4_Scientific_QuadDType(&self->value.sleef_value, opt.digit_mode, opt.precision, opt.min_digits, opt.sign, opt.trim_mode, opt.digits_left, opt.exp_digits); + } + else { + Sleef_quad sleef_val = Sleef_cast_from_doubleq1(self->value.longdouble_value); + str = Dragon4_Scientific_QuadDType(&sleef_val, opt.digit_mode, opt.precision, opt.min_digits, opt.sign, opt.trim_mode, opt.digits_left, opt.exp_digits); + } + + if (str == NULL) { + return NULL; + } + + const char *backend_str = (self->backend == BACKEND_SLEEF) ? "sleef" : "longdouble"; + PyObject *res = PyUnicode_FromFormat("QuadPrecision('%S', backend='%s')", str, backend_str); + Py_DECREF(str); + return res; +} + static void QuadPrecision_dealloc(QuadPrecisionObject *self) { @@ -166,8 +223,8 @@ PyTypeObject QuadPrecision_Type = { .tp_itemsize = 0, .tp_new = QuadPrecision_new, .tp_dealloc = (destructor)QuadPrecision_dealloc, - .tp_repr = (reprfunc)QuadPrecision_repr, - .tp_str = (reprfunc)QuadPrecision_str, + .tp_repr = (reprfunc)QuadPrecision_repr_dragon4, + .tp_str = (reprfunc)QuadPrecision_str_dragon4, .tp_as_number = &quad_as_scalar, .tp_richcompare = (richcmpfunc)quad_richcompare diff --git a/quaddtype/numpy_quaddtype/src/scalar.h b/quaddtype/numpy_quaddtype/src/scalar.h index 344fef09..4fac1adf 100644 --- a/quaddtype/numpy_quaddtype/src/scalar.h +++ b/quaddtype/numpy_quaddtype/src/scalar.h @@ -31,6 +31,9 @@ QuadPrecision_from_object(PyObject *value, QuadBackendType backend); int init_quadprecision_scalar(void); +#define PyArray_IsScalar(obj, QuadPrecDType) PyObject_TypeCheck(obj, &QuadPrecision_Type) +#define PyArrayScalar_VAL(obj, QuadPrecDType) (((QuadPrecisionObject *)obj)->value) + #ifdef __cplusplus } #endif diff --git a/quaddtype/numpy_quaddtype/src/umath.cpp b/quaddtype/numpy_quaddtype/src/umath.cpp index 2444047c..95afed91 100644 --- a/quaddtype/numpy_quaddtype/src/umath.cpp +++ b/quaddtype/numpy_quaddtype/src/umath.cpp @@ -232,9 +232,6 @@ quad_binary_op_resolve_descriptors(PyObject *self, PyArray_DTypeMeta *const dtyp QuadPrecDTypeObject *descr_in2 = (QuadPrecDTypeObject *)given_descrs[1]; QuadBackendType target_backend; - const char *s1 = (descr_in1->backend == BACKEND_SLEEF) ? "SLEEF" : "LONGDOUBLE"; - const char *s2 = (descr_in2->backend == BACKEND_SLEEF) ? "SLEEF" : "LONGDOUBLE"; - // Determine target backend and if casting is needed NPY_CASTING casting = NPY_NO_CASTING; if (descr_in1->backend != descr_in2->backend) { @@ -323,7 +320,6 @@ quad_ufunc_promoter(PyUFuncObject *ufunc, PyArray_DTypeMeta *op_dtypes[], int nargs = ufunc->nargs; PyArray_DTypeMeta *common = NULL; bool has_quad = false; - QuadBackendType backend = BACKEND_INVALID; // Initialize to an invalid state // Handle the special case for reductions if (op_dtypes[0] == NULL) { From 7b43f3ba1c4cddf27d94b2b02c2951484bcb60d2 Mon Sep 17 00:00:00 2001 From: swayaminsync Date: Sat, 7 Sep 2024 12:58:45 +0530 Subject: [PATCH 11/32] fixing reduction redorderable issue --- quaddtype/meson.build | 58 +++++++++++++++++++++++++ quaddtype/numpy_quaddtype/src/umath.cpp | 2 +- 2 files changed, 59 insertions(+), 1 deletion(-) create mode 100644 quaddtype/meson.build diff --git a/quaddtype/meson.build b/quaddtype/meson.build new file mode 100644 index 00000000..fde838c0 --- /dev/null +++ b/quaddtype/meson.build @@ -0,0 +1,58 @@ +project('numpy_quaddtype', 'c', 'cpp', default_options : ['cpp_std=c++17', 'b_pie=true']) + +py_mod = import('python') +py = py_mod.find_installation() + +c = meson.get_compiler('c') + +sleef_dep = c.find_library('sleef') +sleefquad_dep = c.find_library('sleefquad') + +incdir_numpy = run_command(py, + [ + '-c', + 'import numpy; import os; print(os.path.relpath(numpy.get_include()))' + ], + check: true +).stdout().strip() + +includes = include_directories( + [ + incdir_numpy, + 'numpy_quaddtype/src', + ] +) + +srcs = [ + 'numpy_quaddtype/src/casts.h', + 'numpy_quaddtype/src/casts.cpp', + 'numpy_quaddtype/src/scalar.h', + 'numpy_quaddtype/src/scalar.c', + 'numpy_quaddtype/src/dtype.h', + 'numpy_quaddtype/src/dtype.c', + 'numpy_quaddtype/src/quaddtype_main.c', + 'numpy_quaddtype/src/scalar_ops.h', + 'numpy_quaddtype/src/scalar_ops.cpp', + 'numpy_quaddtype/src/ops.hpp', + 'numpy_quaddtype/src/umath.h', + 'numpy_quaddtype/src/umath.cpp', + 'numpy_quaddtype/src/dragon4.h', + 'numpy_quaddtype/src/dragon4.c' +] + +py.install_sources( + [ + 'numpy_quaddtype/__init__.py', + ], + subdir: 'numpy_quaddtype', + pure: false +) + +py.extension_module('_quaddtype_main', +srcs, +c_args: ['-g', '-O0', '-lsleef', '-lsleefquad'], +dependencies: [sleef_dep, sleefquad_dep], +install: true, +subdir: 'numpy_quaddtype', +include_directories: includes +) \ No newline at end of file diff --git a/quaddtype/numpy_quaddtype/src/umath.cpp b/quaddtype/numpy_quaddtype/src/umath.cpp index 95afed91..de2d982d 100644 --- a/quaddtype/numpy_quaddtype/src/umath.cpp +++ b/quaddtype/numpy_quaddtype/src/umath.cpp @@ -415,7 +415,7 @@ create_quad_binary_ufunc(PyObject *numpy, const char *ufunc_name) .nin = 2, .nout = 1, .casting = NPY_NO_CASTING, - .flags = (NPY_ARRAYMETHOD_FLAGS)0, + .flags = NPY_METH_IS_REORDERABLE, .dtypes = dtypes, .slots = slots, }; From 04480ce60aed5480dbde68df3e36e856e6b53f4e Mon Sep 17 00:00:00 2001 From: swayaminsync Date: Sat, 7 Sep 2024 18:23:20 +0530 Subject: [PATCH 12/32] np.dot WIP --- quaddtype/numpy_quaddtype/src/scalar.c | 4 +- quaddtype/numpy_quaddtype/src/scalar_ops.cpp | 23 ++ temp.py | 283 +++++++++++++++++++ 3 files changed, 308 insertions(+), 2 deletions(-) create mode 100644 temp.py diff --git a/quaddtype/numpy_quaddtype/src/scalar.c b/quaddtype/numpy_quaddtype/src/scalar.c index 7ed731d0..f1cbc6d4 100644 --- a/quaddtype/numpy_quaddtype/src/scalar.c +++ b/quaddtype/numpy_quaddtype/src/scalar.c @@ -226,12 +226,12 @@ PyTypeObject QuadPrecision_Type = { .tp_repr = (reprfunc)QuadPrecision_repr_dragon4, .tp_str = (reprfunc)QuadPrecision_str_dragon4, .tp_as_number = &quad_as_scalar, - .tp_richcompare = (richcmpfunc)quad_richcompare - + .tp_richcompare = (richcmpfunc)quad_richcompare, }; int init_quadprecision_scalar(void) { + QuadPrecision_Type.tp_base = &PyFloat_Type; // this is not working (subclassing to np.floating) return PyType_Ready(&QuadPrecision_Type); } \ No newline at end of file diff --git a/quaddtype/numpy_quaddtype/src/scalar_ops.cpp b/quaddtype/numpy_quaddtype/src/scalar_ops.cpp index 156e4288..2faf810d 100644 --- a/quaddtype/numpy_quaddtype/src/scalar_ops.cpp +++ b/quaddtype/numpy_quaddtype/src/scalar_ops.cpp @@ -207,6 +207,27 @@ quad_richcompare(QuadPrecisionObject *self, PyObject *other, int cmp_op) return PyBool_FromLong(cmp); } +static PyObject * +QuadPrecision_float(QuadPrecisionObject *self) +{ + if (self->backend == BACKEND_SLEEF) { + return PyFloat_FromDouble(Sleef_cast_to_doubleq1(self->value.sleef_value)); + } else { + return PyFloat_FromDouble((double)self->value.longdouble_value); + } +} + +static PyObject * +QuadPrecision_int(QuadPrecisionObject *self) +{ + if (self->backend == BACKEND_SLEEF) { + return PyLong_FromLongLong(Sleef_cast_to_int64q1(self->value.sleef_value)); + } else { + return PyLong_FromLongLong((long long)self->value.longdouble_value); + } +} + + PyNumberMethods quad_as_scalar = { .nb_add = (binaryfunc)quad_binary_func, .nb_subtract = (binaryfunc)quad_binary_func, @@ -216,5 +237,7 @@ PyNumberMethods quad_as_scalar = { .nb_positive = (unaryfunc)quad_unary_func, .nb_absolute = (unaryfunc)quad_unary_func, .nb_bool = (inquiry)quad_nonzero, + .nb_int = (unaryfunc)QuadPrecision_int, + .nb_float = (unaryfunc)QuadPrecision_float, .nb_true_divide = (binaryfunc)quad_binary_func, }; \ No newline at end of file diff --git a/temp.py b/temp.py new file mode 100644 index 00000000..3ef85e95 --- /dev/null +++ b/temp.py @@ -0,0 +1,283 @@ +import numpy_quaddtype as npq +import numpy as np + + +def test_scalar_ops(backend): + print(f"\nTesting scalar operations for {backend} backend:") + + # Create QuadPrecision instances + q1 = npq.QuadPrecision( + "3.14159265358979323846264338327950288", backend=backend) + q2 = npq.QuadPrecision( + "-2.71828182845904523536028747135266250", backend=backend) + + # Test unary operations + print("\nUnary operations:") + print(f" Negation of q1: {-q1}") + print(f" Absolute value of q2: {abs(q2)}") + + # Test binary operations + print("\nBinary operations:") + print(f" Addition: {q1 + q2}") + print(f" Subtraction: {q1 - q2}") + print(f" Multiplication: {q1 * q2}") + print(f" Division: {q1 / q2}") + + # Test comparison operations + print("\nComparison operations:") + print(f" q1 == q2: {q1 == q2}") + print(f" q1 != q2: {q1 != q2}") + print(f" q1 < q2: {q1 < q2}") + print(f" q1 <= q2: {q1 <= q2}") + print(f" q1 > q2: {q1 > q2}") + print(f" q1 >= q2: {q1 >= q2}") + + # Test operations with Python numbers + print("\nOperations with Python numbers:") + print(f" q1 + 1: {q1 + 1}") + print(f" q1 - 2.5: {q1 - 2.5}") + print(f" q1 * 3: {q1 * 3}") + print(f" q1 / 2: {q1 / 2}") + + # Test boolean conversion + print("\nBoolean conversion:") + print(f" bool(q1): {np.bool(q1)}") + print( + f" bool(npq.QuadPrecision('0', backend=backend)): {np.bool(npq.QuadPrecision('0', backend=backend))}") + + +def test_casting(backend): + print(f"\nTesting {backend} backend:") + + # Create QuadPrecision instances + q1 = npq.QuadPrecision( + "3.14159265358979323846264338327950288", backend=backend) + q2 = npq.QuadPrecision( + "-2.71828182845904523536028747135266250", backend=backend) + + # Test casting from QuadPrecision to numpy dtypes + print("Casting from QuadPrecision to numpy dtypes:") + print(f" float32: {np.float32(q1)}") + print(f" float64: {np.float64(q1)}") + print(f" int64: {np.int64(q1)}") + print(f" uint64: {np.uint64(q1)}") + + # Test casting from numpy dtypes to QuadPrecision + print("\nCasting from numpy dtypes to QuadPrecision:") + print( + f" float32: {np.float32(3.14159).astype(npq.QuadPrecDType(backend=backend))}") + print( + f" float64: {np.float64(2.71828182845904).astype(npq.QuadPrecDType(backend=backend))}") + print( + f" int64: {np.int64(-1234567890).astype(npq.QuadPrecDType(backend=backend))}") + print( + f" uint64: {np.uint64(9876543210).astype(npq.QuadPrecDType(backend=backend))}") + + # Test array operations + print("\nArray operations:") + q_array = np.array([q1, q2], dtype=npq.QuadPrecDType(backend=backend)) + print(f" QuadPrecision array: {q_array}") + + np_array = np.array([3.14, -2.71, 1.41, -1.73], dtype=np.float64) + q_from_np = np_array.astype(npq.QuadPrecDType(backend=backend)) + print(f" Numpy to QuadPrecision: {q_from_np}") + + back_to_np = np.array(q_from_np, dtype=np.float64) + print(f" QuadPrecision to Numpy: {back_to_np}") + + # Test precision maintenance + large_int = 12345678901234567890 + q_large = np.array([large_int], dtype=np.uint64).astype( + npq.QuadPrecDType(backend=backend))[0] + print(f"\nPrecision test:") + print(f" Original large int: {large_int}") + print(f" QuadPrecision: {q_large}") + print(f" Back to int: {np.uint64(q_large)}") + + # Test edge cases + + +def test_edge_cases(backend): + print(f"\nTesting negative numbers for {backend} backend:") + + # Test various negative numbers + test_values = [ + -1.0, + -1e10, + -1e100, + -1e300, + np.nextafter(np.finfo(np.float64).min, 0), + np.finfo(np.float64).min + ] + + for value in test_values: + q_value = npq.QuadPrecision(str(value), backend=backend) + print(f" Original: {value}") + print(f" QuadPrecision: {q_value}") + print(f" Back to float64: {np.float64(q_value)}") + print() + + # Test value beyond float64 precision + beyond_float64_precision = "1.7976931348623157081452742373170435e+308" + q_beyond = npq.QuadPrecision(beyond_float64_precision, backend=backend) + print(f" Beyond float64 precision: {q_beyond}") + q_float64_max = npq.QuadPrecision( + str(np.finfo(np.float64).max), backend=backend) + diff = q_beyond - q_float64_max + print(f" Difference from float64 max: {diff}") + print( + f" Difference is positive: {diff > npq.QuadPrecision('0', backend=backend)}") + + # Test epsilon (smallest representable difference between two numbers) + q_epsilon = npq.QuadPrecision( + str(np.finfo(np.float64).eps), backend=backend) + print(f" Float64 epsilon in QuadPrecision: {q_epsilon}") + q_one = npq.QuadPrecision("1", backend=backend) + q_one_plus_epsilon = q_one + q_epsilon + print(f" 1 + epsilon != 1: {q_one_plus_epsilon != q_one}") + print(f" (1 + epsilon) - 1: {q_one_plus_epsilon - q_one}") + + +def test_ufuncs(backend): + print(f"\nTesting ufuncs for {backend} backend:") + + # Create QuadPrecision arrays + q_array1 = np.array([1, 2, 3], dtype=npq.QuadPrecDType(backend=backend)) + q_array2 = np.array([1, 2, 3], dtype=npq.QuadPrecDType(backend=backend)) + + # Test unary ufuncs + print("\nUnary unfuncs:") + print(f" negative: {np.negative(q_array1)}") + print(f" absolute: {np.absolute(q_array1)}") + print(f" rint: {np.rint(q_array1)}") + print(f" floor: {np.floor(q_array1)}") + print(f" ceil: {np.ceil(q_array1)}") + print(f" trunc: {np.trunc(q_array1)}") + print(f" sqrt: {np.sqrt(q_array1)}") + print(f" square: {np.square(q_array1)}") + print(f" log: {np.log(q_array1)}") + print(f" log2: {np.log2(q_array1)}") + print(f" log10: {np.log10(q_array1)}") + print(f" exp: {np.exp(q_array1)}") + print(f" exp2: {np.exp2(q_array1)}") + + # Test binary ufuncs + print("\nBinary ufuncs:") + print(f" add: {np.add(q_array1, q_array2)}") + print(f" subtract: {np.subtract(q_array1, q_array2)}") + print(f" multiply: {np.multiply(q_array1, q_array2)}") + print(f" divide: {np.divide(q_array1, q_array2)}") + print(f" power: {np.power(q_array1, q_array2)}") + print(f" mod: {np.mod(q_array1, q_array2)}") + print(f" minimum: {np.minimum(q_array1, q_array2)}") + print(f" maximum: {np.maximum(q_array1, q_array2)}") + + # Test comparison ufuncs + print("\nComparison ufuncs:") + print(f" equal: {np.equal(q_array1, q_array2)}") + print(f" not_equal: {np.not_equal(q_array1, q_array2)}") + print(f" less: {np.less(q_array1, q_array2)}") + print(f" less_equal: {np.less_equal(q_array1, q_array2)}") + print(f" greater: {np.greater(q_array1, q_array2)}") + print(f" greater_equal: {np.greater_equal(q_array1, q_array2)}") + + # Test mixed operations with numpy arrays + print(f"Testing backend: {backend}") + print("\nMixed operations with numpy arrays:") + np_array = np.array([1.0, 2.0, 3.0], dtype=np.float64) + print(f" add: {np.add(q_array1, np_array)}") + print(f" multiply: {np.multiply(q_array1, np_array)}") + print(f" divide: {np.divide(q_array1, np_array)}") + + # Test reduction operations + print("\nReduction operations:") + print(f" sum: {np.sum(q_array1)}") + print(f" prod: {np.prod(q_array1)}") + print(f" min: {np.min(q_array1)}") + print(f" max: {np.max(q_array1)}") + +from numpy_quaddtype import QuadPrecision, QuadPrecDType + +def test_quad_precision(): + print("Testing QuadPrecision scalar:") + + # Test different initializations + values = [ + 0, + 1, + -1, + 3.14159265358979323846, + 1e100, + 1e-100, + float('inf'), + float('-inf'), + float('nan') + ] + + for val in values: + q = QuadPrecision(val) + print(f"Value: {val}") + print(f" str: {str(q)}") + print(f" repr: {repr(q)}") + + # Test different backends + print("\nTesting backends:") + q_sleef = QuadPrecision(3.14159265358979323846, backend='sleef') + q_longdouble = QuadPrecision(3.14159265358979323846, backend='longdouble') + print(f"Sleef: {q_sleef}") + print(f"Long double: {q_longdouble}") + +def test_quad_dtype(): + print("\nTesting QuadPrecDType:") + + # Create an array with QuadPrecDType + arr = np.array([0, 1, -1, 3.14159265358979323846, 1e100, 1e-100], + dtype=QuadPrecDType()) + + print("Array elements:") + for elem in arr: + print(f" {elem}") + + print("\nFull array:") + print(arr) + + # Test different backends + print("\nTesting backends in arrays:") + arr_sleef = np.array([3.14159265358979323846], dtype=QuadPrecDType(backend='sleef')) + arr_longdouble = np.array([3.14159265358979323846], dtype=QuadPrecDType(backend='longdouble')) + print(f"Sleef array: {arr_sleef}") + print(f"Long double array: {arr_longdouble}") + +def test_operations(): + print("\nTesting basic operations:") + a = QuadPrecision(3.14159265358979323846) + b = QuadPrecision(2.71828182845904523536) + + print(f"a = {a}") + print(f"b = {b}") + print(f"a + b = {a + b}") + print(f"a - b = {a - b}") + print(f"a * b = {a * b}") + print(f"a / b = {a / b}") + +def test(): + # Run tests for both backends + for backend in ['sleef', 'longdouble']: + test_scalar_ops(backend) + test_casting(backend) + test_edge_cases(backend) + test_ufuncs(backend) + test_quad_precision() + test_quad_dtype() + test_operations() + print("*"*50) + + print("All tests completed successfully") + +def dot(a, b): + r = np.dot(a, b) + return r + +if __name__ == "__main__": + a = np.array([1, 2, 3], dtype=QuadPrecDType()) + print(dot(a, a)) \ No newline at end of file From 755e2cdc141531378e4aae3feb78a5c23ee4c9ae Mon Sep 17 00:00:00 2001 From: swayaminsync Date: Tue, 10 Sep 2024 08:30:15 +0530 Subject: [PATCH 13/32] fixing base --- quaddtype/numpy_quaddtype/src/dragon4.c | 724 +++++++++++------------- quaddtype/numpy_quaddtype/src/dtype.c | 16 +- 2 files changed, 348 insertions(+), 392 deletions(-) diff --git a/quaddtype/numpy_quaddtype/src/dragon4.c b/quaddtype/numpy_quaddtype/src/dragon4.c index 313ff312..1ac9420b 100644 --- a/quaddtype/numpy_quaddtype/src/dragon4.c +++ b/quaddtype/numpy_quaddtype/src/dragon4.c @@ -20,10 +20,12 @@ #if 0 #define DEBUG_ASSERT(stmnt) assert(stmnt) #else -#define DEBUG_ASSERT(stmnt) do {} while(0) +#define DEBUG_ASSERT(stmnt) \ + do { \ + } while (0) #endif -#define c_BigInt_MaxBlocks 1023 +#define c_BigInt_MaxBlocks 1023 #define BIGINT_DRAGON4_GROUPSIZE 7 typedef struct BigInt { @@ -59,7 +61,7 @@ BigInt_Multiply10(BigInt *result) npy_uint32 *cur = result->blocks; npy_uint32 *end = result->blocks + result->length; - for ( ; cur != end; ++cur) { + for (; cur != end; ++cur) { npy_uint64 product = (npy_uint64)(*cur) * 10ull + carry; (*cur) = (npy_uint32)(product & bitmask_u64(32)); carry = product >> 32; @@ -73,16 +75,15 @@ BigInt_Multiply10(BigInt *result) } } -static npy_uint32 g_PowerOf10_U32[] = -{ - 1, /* 10 ^ 0 */ - 10, /* 10 ^ 1 */ - 100, /* 10 ^ 2 */ - 1000, /* 10 ^ 3 */ - 10000, /* 10 ^ 4 */ - 100000, /* 10 ^ 5 */ - 1000000, /* 10 ^ 6 */ - 10000000, /* 10 ^ 7 */ +static npy_uint32 g_PowerOf10_U32[] = { + 1, /* 10 ^ 0 */ + 10, /* 10 ^ 1 */ + 100, /* 10 ^ 2 */ + 1000, /* 10 ^ 3 */ + 10000, /* 10 ^ 4 */ + 100000, /* 10 ^ 5 */ + 1000000, /* 10 ^ 6 */ + 10000000, /* 10 ^ 7 */ }; /* @@ -92,194 +93,188 @@ static npy_uint32 g_PowerOf10_U32[] = * the BigInt struct which would allow us to store a minimal amount of * data here. */ -static BigInt g_PowerOf10_Big[] = -{ - /* 10 ^ 8 */ - { 1, { 100000000 } }, - /* 10 ^ 16 */ - { 2, { 0x6fc10000, 0x002386f2 } }, - /* 10 ^ 32 */ - { 4, { 0x00000000, 0x85acef81, 0x2d6d415b, 0x000004ee, } }, - /* 10 ^ 64 */ - { 7, { 0x00000000, 0x00000000, 0xbf6a1f01, 0x6e38ed64, 0xdaa797ed, - 0xe93ff9f4, 0x00184f03, } }, - /* 10 ^ 128 */ - { 14, { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x2e953e01, - 0x03df9909, 0x0f1538fd, 0x2374e42f, 0xd3cff5ec, 0xc404dc08, - 0xbccdb0da, 0xa6337f19, 0xe91f2603, 0x0000024e, } }, - /* 10 ^ 256 */ - { 27, { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x982e7c01, 0xbed3875b, - 0xd8d99f72, 0x12152f87, 0x6bde50c6, 0xcf4a6e70, 0xd595d80f, - 0x26b2716e, 0xadc666b0, 0x1d153624, 0x3c42d35a, 0x63ff540e, - 0xcc5573c0, 0x65f9ef17, 0x55bc28f2, 0x80dcc7f7, 0xf46eeddc, - 0x5fdcefce, 0x000553f7, } }, - /* 10 ^ 512 */ - { 54, { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0xfc6cf801, 0x77f27267, 0x8f9546dc, 0x5d96976f, - 0xb83a8a97, 0xc31e1ad9, 0x46c40513, 0x94e65747, 0xc88976c1, - 0x4475b579, 0x28f8733b, 0xaa1da1bf, 0x703ed321, 0x1e25cfea, - 0xb21a2f22, 0xbc51fb2e, 0x96e14f5d, 0xbfa3edac, 0x329c57ae, - 0xe7fc7153, 0xc3fc0695, 0x85a91924, 0xf95f635e, 0xb2908ee0, - 0x93abade4, 0x1366732a, 0x9449775c, 0x69be5b0e, 0x7343afac, - 0xb099bc81, 0x45a71d46, 0xa2699748, 0x8cb07303, 0x8a0b1f13, - 0x8cab8a97, 0xc1d238d9, 0x633415d4, 0x0000001c, } }, - /* 10 ^ 1024 */ - { 107, { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x2919f001, 0xf55b2b72, 0x6e7c215b, - 0x1ec29f86, 0x991c4e87, 0x15c51a88, 0x140ac535, 0x4c7d1e1a, - 0xcc2cd819, 0x0ed1440e, 0x896634ee, 0x7de16cfb, 0x1e43f61f, - 0x9fce837d, 0x231d2b9c, 0x233e55c7, 0x65dc60d7, 0xf451218b, - 0x1c5cd134, 0xc9635986, 0x922bbb9f, 0xa7e89431, 0x9f9f2a07, - 0x62be695a, 0x8e1042c4, 0x045b7a74, 0x1abe1de3, 0x8ad822a5, - 0xba34c411, 0xd814b505, 0xbf3fdeb3, 0x8fc51a16, 0xb1b896bc, - 0xf56deeec, 0x31fb6bfd, 0xb6f4654b, 0x101a3616, 0x6b7595fb, - 0xdc1a47fe, 0x80d98089, 0x80bda5a5, 0x9a202882, 0x31eb0f66, - 0xfc8f1f90, 0x976a3310, 0xe26a7b7e, 0xdf68368a, 0x3ce3a0b8, - 0x8e4262ce, 0x75a351a2, 0x6cb0b6c9, 0x44597583, 0x31b5653f, - 0xc356e38a, 0x35faaba6, 0x0190fba0, 0x9fc4ed52, 0x88bc491b, - 0x1640114a, 0x005b8041, 0xf4f3235e, 0x1e8d4649, 0x36a8de06, - 0x73c55349, 0xa7e6bd2a, 0xc1a6970c, 0x47187094, 0xd2db49ef, - 0x926c3f5b, 0xae6209d4, 0x2d433949, 0x34f4a3c6, 0xd4305d94, - 0xd9d61a05, 0x00000325, } }, - /* 10 ^ 2048 */ - { 213, { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1333e001, - 0xe3096865, 0xb27d4d3f, 0x49e28dcf, 0xec2e4721, 0xee87e354, - 0xb6067584, 0x368b8abb, 0xa5e5a191, 0x2ed56d55, 0xfd827773, - 0xea50d142, 0x51b78db2, 0x98342c9e, 0xc850dabc, 0x866ed6f1, - 0x19342c12, 0x92794987, 0xd2f869c2, 0x66912e4a, 0x71c7fd8f, - 0x57a7842d, 0x235552eb, 0xfb7fedcc, 0xf3861ce0, 0x38209ce1, - 0x9713b449, 0x34c10134, 0x8c6c54de, 0xa7a8289c, 0x2dbb6643, - 0xe3cb64f3, 0x8074ff01, 0xe3892ee9, 0x10c17f94, 0xa8f16f92, - 0xa8281ed6, 0x967abbb3, 0x5a151440, 0x9952fbed, 0x13b41e44, - 0xafe609c3, 0xa2bca416, 0xf111821f, 0xfb1264b4, 0x91bac974, - 0xd6c7d6ab, 0x8e48ff35, 0x4419bd43, 0xc4a65665, 0x685e5510, - 0x33554c36, 0xab498697, 0x0dbd21fe, 0x3cfe491d, 0x982da466, - 0xcbea4ca7, 0x9e110c7b, 0x79c56b8a, 0x5fc5a047, 0x84d80e2e, - 0x1aa9f444, 0x730f203c, 0x6a57b1ab, 0xd752f7a6, 0x87a7dc62, - 0x944545ff, 0x40660460, 0x77c1a42f, 0xc9ac375d, 0xe866d7ef, - 0x744695f0, 0x81428c85, 0xa1fc6b96, 0xd7917c7b, 0x7bf03c19, - 0x5b33eb41, 0x5715f791, 0x8f6cae5f, 0xdb0708fd, 0xb125ac8e, - 0x785ce6b7, 0x56c6815b, 0x6f46eadb, 0x4eeebeee, 0x195355d8, - 0xa244de3c, 0x9d7389c0, 0x53761abd, 0xcf99d019, 0xde9ec24b, - 0x0d76ce39, 0x70beb181, 0x2e55ecee, 0xd5f86079, 0xf56d9d4b, - 0xfb8886fb, 0x13ef5a83, 0x408f43c5, 0x3f3389a4, 0xfad37943, - 0x58ccf45c, 0xf82df846, 0x415c7f3e, 0x2915e818, 0x8b3d5cf4, - 0x6a445f27, 0xf8dbb57a, 0xca8f0070, 0x8ad803ec, 0xb2e87c34, - 0x038f9245, 0xbedd8a6c, 0xc7c9dee0, 0x0eac7d56, 0x2ad3fa14, - 0xe0de0840, 0xf775677c, 0xf1bd0ad5, 0x92be221e, 0x87fa1fb9, - 0xce9d04a4, 0xd2c36fa9, 0x3f6f7024, 0xb028af62, 0x907855ee, - 0xd83e49d6, 0x4efac5dc, 0xe7151aab, 0x77cd8c6b, 0x0a753b7d, - 0x0af908b4, 0x8c983623, 0xe50f3027, 0x94222771, 0x1d08e2d6, - 0xf7e928e6, 0xf2ee5ca6, 0x1b61b93c, 0x11eb962b, 0x9648b21c, - 0xce2bcba1, 0x34f77154, 0x7bbebe30, 0xe526a319, 0x8ce329ac, - 0xde4a74d2, 0xb5dc53d5, 0x0009e8b3, } }, - /* 10 ^ 4096 */ - { 426, { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x2a67c001, 0xd4724e8d, - 0x8efe7ae7, 0xf89a1e90, 0xef084117, 0x54e05154, 0x13b1bb51, - 0x506be829, 0xfb29b172, 0xe599574e, 0xf0da6146, 0x806c0ed3, - 0xb86ae5be, 0x45155e93, 0xc0591cc2, 0x7e1e7c34, 0x7c4823da, - 0x1d1f4cce, 0x9b8ba1e8, 0xd6bfdf75, 0xe341be10, 0xc2dfae78, - 0x016b67b2, 0x0f237f1a, 0x3dbeabcd, 0xaf6a2574, 0xcab3e6d7, - 0x142e0e80, 0x61959127, 0x2c234811, 0x87009701, 0xcb4bf982, - 0xf8169c84, 0x88052f8c, 0x68dde6d4, 0xbc131761, 0xff0b0905, - 0x54ab9c41, 0x7613b224, 0x1a1c304e, 0x3bfe167b, 0x441c2d47, - 0x4f6cea9c, 0x78f06181, 0xeb659fb8, 0x30c7ae41, 0x947e0d0e, - 0xa1ebcad7, 0xd97d9556, 0x2130504d, 0x1a8309cb, 0xf2acd507, - 0x3f8ec72a, 0xfd82373a, 0x95a842bc, 0x280f4d32, 0xf3618ac0, - 0x811a4f04, 0x6dc3a5b4, 0xd3967a1b, 0x15b8c898, 0xdcfe388f, - 0x454eb2a0, 0x8738b909, 0x10c4e996, 0x2bd9cc11, 0x3297cd0c, - 0x655fec30, 0xae0725b1, 0xf4090ee8, 0x037d19ee, 0x398c6fed, - 0x3b9af26b, 0xc994a450, 0xb5341743, 0x75a697b2, 0xac50b9c1, - 0x3ccb5b92, 0xffe06205, 0xa8329761, 0xdfea5242, 0xeb83cadb, - 0xe79dadf7, 0x3c20ee69, 0x1e0a6817, 0x7021b97a, 0x743074fa, - 0x176ca776, 0x77fb8af6, 0xeca19beb, 0x92baf1de, 0xaf63b712, - 0xde35c88b, 0xa4eb8f8c, 0xe137d5e9, 0x40b464a0, 0x87d1cde8, - 0x42923bbd, 0xcd8f62ff, 0x2e2690f3, 0x095edc16, 0x59c89f1b, - 0x1fa8fd5d, 0x5138753d, 0x390a2b29, 0x80152f18, 0x2dd8d925, - 0xf984d83e, 0x7a872e74, 0xc19e1faf, 0xed4d542d, 0xecf9b5d0, - 0x9462ea75, 0xc53c0adf, 0x0caea134, 0x37a2d439, 0xc8fa2e8a, - 0x2181327e, 0x6e7bb827, 0x2d240820, 0x50be10e0, 0x5893d4b8, - 0xab312bb9, 0x1f2b2322, 0x440b3f25, 0xbf627ede, 0x72dac789, - 0xb608b895, 0x78787e2a, 0x86deb3f0, 0x6fee7aab, 0xbb9373f4, - 0x27ecf57b, 0xf7d8b57e, 0xfca26a9f, 0x3d04e8d2, 0xc9df13cb, - 0x3172826a, 0xcd9e8d7c, 0xa8fcd8e0, 0xb2c39497, 0x307641d9, - 0x1cc939c1, 0x2608c4cf, 0xb6d1c7bf, 0x3d326a7e, 0xeeaf19e6, - 0x8e13e25f, 0xee63302b, 0x2dfe6d97, 0x25971d58, 0xe41d3cc4, - 0x0a80627c, 0xab8db59a, 0x9eea37c8, 0xe90afb77, 0x90ca19cf, - 0x9ee3352c, 0x3613c850, 0xfe78d682, 0x788f6e50, 0x5b060904, - 0xb71bd1a4, 0x3fecb534, 0xb32c450c, 0x20c33857, 0xa6e9cfda, - 0x0239f4ce, 0x48497187, 0xa19adb95, 0xb492ed8a, 0x95aca6a8, - 0x4dcd6cd9, 0xcf1b2350, 0xfbe8b12a, 0x1a67778c, 0x38eb3acc, - 0xc32da383, 0xfb126ab1, 0xa03f40a8, 0xed5bf546, 0xe9ce4724, - 0x4c4a74fd, 0x73a130d8, 0xd9960e2d, 0xa2ebd6c1, 0x94ab6feb, - 0x6f233b7c, 0x49126080, 0x8e7b9a73, 0x4b8c9091, 0xd298f999, - 0x35e836b5, 0xa96ddeff, 0x96119b31, 0x6b0dd9bc, 0xc6cc3f8d, - 0x282566fb, 0x72b882e7, 0xd6769f3b, 0xa674343d, 0x00fc509b, - 0xdcbf7789, 0xd6266a3f, 0xae9641fd, 0x4e89541b, 0x11953407, - 0x53400d03, 0x8e0dd75a, 0xe5b53345, 0x108f19ad, 0x108b89bc, - 0x41a4c954, 0xe03b2b63, 0x437b3d7f, 0x97aced8e, 0xcbd66670, - 0x2c5508c2, 0x650ebc69, 0x5c4f2ef0, 0x904ff6bf, 0x9985a2df, - 0x9faddd9e, 0x5ed8d239, 0x25585832, 0xe3e51cb9, 0x0ff4f1d4, - 0x56c02d9a, 0x8c4ef804, 0xc1a08a13, 0x13fd01c8, 0xe6d27671, - 0xa7c234f4, 0x9d0176cc, 0xd0d73df2, 0x4d8bfa89, 0x544f10cd, - 0x2b17e0b2, 0xb70a5c7d, 0xfd86fe49, 0xdf373f41, 0x214495bb, - 0x84e857fd, 0x00d313d5, 0x0496fcbe, 0xa4ba4744, 0xe8cac982, - 0xaec29e6e, 0x87ec7038, 0x7000a519, 0xaeee333b, 0xff66e42c, - 0x8afd6b25, 0x03b4f63b, 0xbd7991dc, 0x5ab8d9c7, 0x2ed4684e, - 0x48741a6c, 0xaf06940d, 0x2fdc6349, 0xb03d7ecd, 0xe974996f, - 0xac7867f9, 0x52ec8721, 0xbcdd9d4a, 0x8edd2d00, 0x3557de06, - 0x41c759f8, 0x3956d4b9, 0xa75409f2, 0x123cd8a1, 0xb6100fab, - 0x3e7b21e2, 0x2e8d623b, 0x92959da2, 0xbca35f77, 0x200c03a5, - 0x35fcb457, 0x1bb6c6e4, 0xf74eb928, 0x3d5d0b54, 0x87cc1d21, - 0x4964046f, 0x18ae4240, 0xd868b275, 0x8bd2b496, 0x1c5563f4, - 0xc234d8f5, 0xf868e970, 0xf9151fff, 0xae7be4a2, 0x271133ee, - 0xbb0fd922, 0x25254932, 0xa60a9fc0, 0x104bcd64, 0x30290145, - 0x00000062, } }, +static BigInt g_PowerOf10_Big[] = { + /* 10 ^ 8 */ + {1, {100000000}}, + /* 10 ^ 16 */ + {2, {0x6fc10000, 0x002386f2}}, + /* 10 ^ 32 */ + {4, + { + 0x00000000, + 0x85acef81, + 0x2d6d415b, + 0x000004ee, + }}, + /* 10 ^ 64 */ + {7, + { + 0x00000000, + 0x00000000, + 0xbf6a1f01, + 0x6e38ed64, + 0xdaa797ed, + 0xe93ff9f4, + 0x00184f03, + }}, + /* 10 ^ 128 */ + {14, + { + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x2e953e01, + 0x03df9909, + 0x0f1538fd, + 0x2374e42f, + 0xd3cff5ec, + 0xc404dc08, + 0xbccdb0da, + 0xa6337f19, + 0xe91f2603, + 0x0000024e, + }}, + /* 10 ^ 256 */ + {27, + { + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x982e7c01, 0xbed3875b, 0xd8d99f72, 0x12152f87, 0x6bde50c6, 0xcf4a6e70, + 0xd595d80f, 0x26b2716e, 0xadc666b0, 0x1d153624, 0x3c42d35a, 0x63ff540e, 0xcc5573c0, + 0x65f9ef17, 0x55bc28f2, 0x80dcc7f7, 0xf46eeddc, 0x5fdcefce, 0x000553f7, + }}, + /* 10 ^ 512 */ + {54, + { + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0xfc6cf801, 0x77f27267, 0x8f9546dc, 0x5d96976f, 0xb83a8a97, + 0xc31e1ad9, 0x46c40513, 0x94e65747, 0xc88976c1, 0x4475b579, 0x28f8733b, 0xaa1da1bf, + 0x703ed321, 0x1e25cfea, 0xb21a2f22, 0xbc51fb2e, 0x96e14f5d, 0xbfa3edac, 0x329c57ae, + 0xe7fc7153, 0xc3fc0695, 0x85a91924, 0xf95f635e, 0xb2908ee0, 0x93abade4, 0x1366732a, + 0x9449775c, 0x69be5b0e, 0x7343afac, 0xb099bc81, 0x45a71d46, 0xa2699748, 0x8cb07303, + 0x8a0b1f13, 0x8cab8a97, 0xc1d238d9, 0x633415d4, 0x0000001c, + }}, + /* 10 ^ 1024 */ + {107, + { + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x2919f001, 0xf55b2b72, 0x6e7c215b, + 0x1ec29f86, 0x991c4e87, 0x15c51a88, 0x140ac535, 0x4c7d1e1a, 0xcc2cd819, 0x0ed1440e, + 0x896634ee, 0x7de16cfb, 0x1e43f61f, 0x9fce837d, 0x231d2b9c, 0x233e55c7, 0x65dc60d7, + 0xf451218b, 0x1c5cd134, 0xc9635986, 0x922bbb9f, 0xa7e89431, 0x9f9f2a07, 0x62be695a, + 0x8e1042c4, 0x045b7a74, 0x1abe1de3, 0x8ad822a5, 0xba34c411, 0xd814b505, 0xbf3fdeb3, + 0x8fc51a16, 0xb1b896bc, 0xf56deeec, 0x31fb6bfd, 0xb6f4654b, 0x101a3616, 0x6b7595fb, + 0xdc1a47fe, 0x80d98089, 0x80bda5a5, 0x9a202882, 0x31eb0f66, 0xfc8f1f90, 0x976a3310, + 0xe26a7b7e, 0xdf68368a, 0x3ce3a0b8, 0x8e4262ce, 0x75a351a2, 0x6cb0b6c9, 0x44597583, + 0x31b5653f, 0xc356e38a, 0x35faaba6, 0x0190fba0, 0x9fc4ed52, 0x88bc491b, 0x1640114a, + 0x005b8041, 0xf4f3235e, 0x1e8d4649, 0x36a8de06, 0x73c55349, 0xa7e6bd2a, 0xc1a6970c, + 0x47187094, 0xd2db49ef, 0x926c3f5b, 0xae6209d4, 0x2d433949, 0x34f4a3c6, 0xd4305d94, + 0xd9d61a05, 0x00000325, + }}, + /* 10 ^ 2048 */ + {213, + { + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x1333e001, 0xe3096865, 0xb27d4d3f, 0x49e28dcf, 0xec2e4721, 0xee87e354, + 0xb6067584, 0x368b8abb, 0xa5e5a191, 0x2ed56d55, 0xfd827773, 0xea50d142, 0x51b78db2, + 0x98342c9e, 0xc850dabc, 0x866ed6f1, 0x19342c12, 0x92794987, 0xd2f869c2, 0x66912e4a, + 0x71c7fd8f, 0x57a7842d, 0x235552eb, 0xfb7fedcc, 0xf3861ce0, 0x38209ce1, 0x9713b449, + 0x34c10134, 0x8c6c54de, 0xa7a8289c, 0x2dbb6643, 0xe3cb64f3, 0x8074ff01, 0xe3892ee9, + 0x10c17f94, 0xa8f16f92, 0xa8281ed6, 0x967abbb3, 0x5a151440, 0x9952fbed, 0x13b41e44, + 0xafe609c3, 0xa2bca416, 0xf111821f, 0xfb1264b4, 0x91bac974, 0xd6c7d6ab, 0x8e48ff35, + 0x4419bd43, 0xc4a65665, 0x685e5510, 0x33554c36, 0xab498697, 0x0dbd21fe, 0x3cfe491d, + 0x982da466, 0xcbea4ca7, 0x9e110c7b, 0x79c56b8a, 0x5fc5a047, 0x84d80e2e, 0x1aa9f444, + 0x730f203c, 0x6a57b1ab, 0xd752f7a6, 0x87a7dc62, 0x944545ff, 0x40660460, 0x77c1a42f, + 0xc9ac375d, 0xe866d7ef, 0x744695f0, 0x81428c85, 0xa1fc6b96, 0xd7917c7b, 0x7bf03c19, + 0x5b33eb41, 0x5715f791, 0x8f6cae5f, 0xdb0708fd, 0xb125ac8e, 0x785ce6b7, 0x56c6815b, + 0x6f46eadb, 0x4eeebeee, 0x195355d8, 0xa244de3c, 0x9d7389c0, 0x53761abd, 0xcf99d019, + 0xde9ec24b, 0x0d76ce39, 0x70beb181, 0x2e55ecee, 0xd5f86079, 0xf56d9d4b, 0xfb8886fb, + 0x13ef5a83, 0x408f43c5, 0x3f3389a4, 0xfad37943, 0x58ccf45c, 0xf82df846, 0x415c7f3e, + 0x2915e818, 0x8b3d5cf4, 0x6a445f27, 0xf8dbb57a, 0xca8f0070, 0x8ad803ec, 0xb2e87c34, + 0x038f9245, 0xbedd8a6c, 0xc7c9dee0, 0x0eac7d56, 0x2ad3fa14, 0xe0de0840, 0xf775677c, + 0xf1bd0ad5, 0x92be221e, 0x87fa1fb9, 0xce9d04a4, 0xd2c36fa9, 0x3f6f7024, 0xb028af62, + 0x907855ee, 0xd83e49d6, 0x4efac5dc, 0xe7151aab, 0x77cd8c6b, 0x0a753b7d, 0x0af908b4, + 0x8c983623, 0xe50f3027, 0x94222771, 0x1d08e2d6, 0xf7e928e6, 0xf2ee5ca6, 0x1b61b93c, + 0x11eb962b, 0x9648b21c, 0xce2bcba1, 0x34f77154, 0x7bbebe30, 0xe526a319, 0x8ce329ac, + 0xde4a74d2, 0xb5dc53d5, 0x0009e8b3, + }}, + /* 10 ^ 4096 */ + {426, + { + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x2a67c001, 0xd4724e8d, 0x8efe7ae7, 0xf89a1e90, 0xef084117, + 0x54e05154, 0x13b1bb51, 0x506be829, 0xfb29b172, 0xe599574e, 0xf0da6146, 0x806c0ed3, + 0xb86ae5be, 0x45155e93, 0xc0591cc2, 0x7e1e7c34, 0x7c4823da, 0x1d1f4cce, 0x9b8ba1e8, + 0xd6bfdf75, 0xe341be10, 0xc2dfae78, 0x016b67b2, 0x0f237f1a, 0x3dbeabcd, 0xaf6a2574, + 0xcab3e6d7, 0x142e0e80, 0x61959127, 0x2c234811, 0x87009701, 0xcb4bf982, 0xf8169c84, + 0x88052f8c, 0x68dde6d4, 0xbc131761, 0xff0b0905, 0x54ab9c41, 0x7613b224, 0x1a1c304e, + 0x3bfe167b, 0x441c2d47, 0x4f6cea9c, 0x78f06181, 0xeb659fb8, 0x30c7ae41, 0x947e0d0e, + 0xa1ebcad7, 0xd97d9556, 0x2130504d, 0x1a8309cb, 0xf2acd507, 0x3f8ec72a, 0xfd82373a, + 0x95a842bc, 0x280f4d32, 0xf3618ac0, 0x811a4f04, 0x6dc3a5b4, 0xd3967a1b, 0x15b8c898, + 0xdcfe388f, 0x454eb2a0, 0x8738b909, 0x10c4e996, 0x2bd9cc11, 0x3297cd0c, 0x655fec30, + 0xae0725b1, 0xf4090ee8, 0x037d19ee, 0x398c6fed, 0x3b9af26b, 0xc994a450, 0xb5341743, + 0x75a697b2, 0xac50b9c1, 0x3ccb5b92, 0xffe06205, 0xa8329761, 0xdfea5242, 0xeb83cadb, + 0xe79dadf7, 0x3c20ee69, 0x1e0a6817, 0x7021b97a, 0x743074fa, 0x176ca776, 0x77fb8af6, + 0xeca19beb, 0x92baf1de, 0xaf63b712, 0xde35c88b, 0xa4eb8f8c, 0xe137d5e9, 0x40b464a0, + 0x87d1cde8, 0x42923bbd, 0xcd8f62ff, 0x2e2690f3, 0x095edc16, 0x59c89f1b, 0x1fa8fd5d, + 0x5138753d, 0x390a2b29, 0x80152f18, 0x2dd8d925, 0xf984d83e, 0x7a872e74, 0xc19e1faf, + 0xed4d542d, 0xecf9b5d0, 0x9462ea75, 0xc53c0adf, 0x0caea134, 0x37a2d439, 0xc8fa2e8a, + 0x2181327e, 0x6e7bb827, 0x2d240820, 0x50be10e0, 0x5893d4b8, 0xab312bb9, 0x1f2b2322, + 0x440b3f25, 0xbf627ede, 0x72dac789, 0xb608b895, 0x78787e2a, 0x86deb3f0, 0x6fee7aab, + 0xbb9373f4, 0x27ecf57b, 0xf7d8b57e, 0xfca26a9f, 0x3d04e8d2, 0xc9df13cb, 0x3172826a, + 0xcd9e8d7c, 0xa8fcd8e0, 0xb2c39497, 0x307641d9, 0x1cc939c1, 0x2608c4cf, 0xb6d1c7bf, + 0x3d326a7e, 0xeeaf19e6, 0x8e13e25f, 0xee63302b, 0x2dfe6d97, 0x25971d58, 0xe41d3cc4, + 0x0a80627c, 0xab8db59a, 0x9eea37c8, 0xe90afb77, 0x90ca19cf, 0x9ee3352c, 0x3613c850, + 0xfe78d682, 0x788f6e50, 0x5b060904, 0xb71bd1a4, 0x3fecb534, 0xb32c450c, 0x20c33857, + 0xa6e9cfda, 0x0239f4ce, 0x48497187, 0xa19adb95, 0xb492ed8a, 0x95aca6a8, 0x4dcd6cd9, + 0xcf1b2350, 0xfbe8b12a, 0x1a67778c, 0x38eb3acc, 0xc32da383, 0xfb126ab1, 0xa03f40a8, + 0xed5bf546, 0xe9ce4724, 0x4c4a74fd, 0x73a130d8, 0xd9960e2d, 0xa2ebd6c1, 0x94ab6feb, + 0x6f233b7c, 0x49126080, 0x8e7b9a73, 0x4b8c9091, 0xd298f999, 0x35e836b5, 0xa96ddeff, + 0x96119b31, 0x6b0dd9bc, 0xc6cc3f8d, 0x282566fb, 0x72b882e7, 0xd6769f3b, 0xa674343d, + 0x00fc509b, 0xdcbf7789, 0xd6266a3f, 0xae9641fd, 0x4e89541b, 0x11953407, 0x53400d03, + 0x8e0dd75a, 0xe5b53345, 0x108f19ad, 0x108b89bc, 0x41a4c954, 0xe03b2b63, 0x437b3d7f, + 0x97aced8e, 0xcbd66670, 0x2c5508c2, 0x650ebc69, 0x5c4f2ef0, 0x904ff6bf, 0x9985a2df, + 0x9faddd9e, 0x5ed8d239, 0x25585832, 0xe3e51cb9, 0x0ff4f1d4, 0x56c02d9a, 0x8c4ef804, + 0xc1a08a13, 0x13fd01c8, 0xe6d27671, 0xa7c234f4, 0x9d0176cc, 0xd0d73df2, 0x4d8bfa89, + 0x544f10cd, 0x2b17e0b2, 0xb70a5c7d, 0xfd86fe49, 0xdf373f41, 0x214495bb, 0x84e857fd, + 0x00d313d5, 0x0496fcbe, 0xa4ba4744, 0xe8cac982, 0xaec29e6e, 0x87ec7038, 0x7000a519, + 0xaeee333b, 0xff66e42c, 0x8afd6b25, 0x03b4f63b, 0xbd7991dc, 0x5ab8d9c7, 0x2ed4684e, + 0x48741a6c, 0xaf06940d, 0x2fdc6349, 0xb03d7ecd, 0xe974996f, 0xac7867f9, 0x52ec8721, + 0xbcdd9d4a, 0x8edd2d00, 0x3557de06, 0x41c759f8, 0x3956d4b9, 0xa75409f2, 0x123cd8a1, + 0xb6100fab, 0x3e7b21e2, 0x2e8d623b, 0x92959da2, 0xbca35f77, 0x200c03a5, 0x35fcb457, + 0x1bb6c6e4, 0xf74eb928, 0x3d5d0b54, 0x87cc1d21, 0x4964046f, 0x18ae4240, 0xd868b275, + 0x8bd2b496, 0x1c5563f4, 0xc234d8f5, 0xf868e970, 0xf9151fff, 0xae7be4a2, 0x271133ee, + 0xbb0fd922, 0x25254932, 0xa60a9fc0, 0x104bcd64, 0x30290145, 0x00000062, + }}, }; static int @@ -294,14 +289,14 @@ BigInt_IsZero(const BigInt *i) static int BigInt_IsEven(const BigInt *i) { - return (i->length == 0) || ( (i->blocks[0] % 2) == 0); + return (i->length == 0) || ((i->blocks[0] % 2) == 0); } static void BigInt_Copy(BigInt *dst, const BigInt *src) { npy_uint32 length = src->length; - npy_uint32 * dstp = dst->blocks; + npy_uint32 *dstp = dst->blocks; const npy_uint32 *srcp; for (srcp = src->blocks; srcp != src->blocks + length; ++dstp, ++srcp) { *dstp = *srcp; @@ -329,15 +324,13 @@ BigInt_ShiftLeft(BigInt *result, npy_uint32 shift) npy_uint32 i; /* copy blocks from high to low */ - for (pInCur = result->blocks + result->length, - pOutCur = pInCur + shiftBlocks; - pInCur >= pInBlocks; - --pInCur, --pOutCur) { + for (pInCur = result->blocks + result->length, pOutCur = pInCur + shiftBlocks; + pInCur >= pInBlocks; --pInCur, --pOutCur) { *pOutCur = *pInCur; } /* zero the remaining low blocks */ - for (i = 0; i < shiftBlocks; ++i) { + for (i = 0; i < shiftBlocks; ++i) { result->blocks[i] = 0; } @@ -373,7 +366,7 @@ BigInt_ShiftLeft(BigInt *result, npy_uint32 shift) /* output the final blocks */ DEBUG_ASSERT(outBlockIdx == shiftBlocks + 1); result->blocks[outBlockIdx] = highBits | lowBits; - result->blocks[outBlockIdx-1] = block << shiftBits; + result->blocks[outBlockIdx - 1] = block << shiftBits; /* zero the remaining low blocks */ for (i = 0; i < shiftBlocks; ++i) { @@ -460,7 +453,7 @@ BigInt_Multiply_int(BigInt *result, const BigInt *lhs, npy_uint32 rhs) npy_uint32 *resultCur = result->blocks; const npy_uint32 *pLhsCur = lhs->blocks; const npy_uint32 *pLhsEnd = lhs->blocks + lhs->length; - for ( ; pLhsCur != pLhsEnd; ++pLhsCur, ++resultCur) { + for (; pLhsCur != pLhsEnd; ++pLhsCur, ++resultCur) { npy_uint64 product = (npy_uint64)(*pLhsCur) * rhs + carry; *resultCur = (npy_uint32)(product & bitmask_u64(32)); carry = product >> 32; @@ -513,9 +506,8 @@ BigInt_Multiply(BigInt *result, const BigInt *lhs, const BigInt *rhs) /* perform standard long multiplication for each small block */ resultStart = result->blocks; - for (smallCur = small->blocks; - smallCur != small->blocks + small->length; - ++smallCur, ++resultStart) { + for (smallCur = small->blocks; smallCur != small->blocks + small->length; + ++smallCur, ++resultStart) { /* * if non-zero, multiply against all the large blocks and add into the * result @@ -526,13 +518,12 @@ BigInt_Multiply(BigInt *result, const BigInt *lhs, const BigInt *rhs) npy_uint32 *resultCur = resultStart; npy_uint64 carry = 0; do { - npy_uint64 product = (*resultCur) + - (*largeCur)*(npy_uint64)multiplier + carry; + npy_uint64 product = (*resultCur) + (*largeCur) * (npy_uint64)multiplier + carry; carry = product >> 32; *resultCur = product & bitmask_u64(32); ++largeCur; ++resultCur; - } while(largeCur != large->blocks + large->length); + } while (largeCur != large->blocks + large->length); DEBUG_ASSERT(resultCur < result->blocks + maxResultLen); *resultCur = (npy_uint32)(carry & bitmask_u64(32)); @@ -541,7 +532,7 @@ BigInt_Multiply(BigInt *result, const BigInt *lhs, const BigInt *rhs) /* check if the terminating block has no set bits */ if (maxResultLen > 0 && result->blocks[maxResultLen - 1] == 0) { - result->length = maxResultLen-1; + result->length = maxResultLen - 1; } else { result->length = maxResultLen; @@ -599,7 +590,7 @@ BigInt_MultiplyPow10(BigInt *in, npy_uint32 exponent, BigInt *temp) } /* output the result */ - if (curTemp != in){ + if (curTemp != in) { BigInt_Copy(in, curTemp); } } @@ -676,14 +667,13 @@ BigInt_Add(BigInt *result, const BigInt *lhs, const BigInt *rhs) result->length = large->length; /* Add each block and add carry the overflow to the next block */ - largeCur = large->blocks; - largeEnd = largeCur + large->length; - smallCur = small->blocks; - smallEnd = smallCur + small->length; + largeCur = large->blocks; + largeEnd = largeCur + large->length; + smallCur = small->blocks; + smallEnd = smallCur + small->length; resultCur = result->blocks; while (smallCur != smallEnd) { - npy_uint64 sum = carry + (npy_uint64)(*largeCur) + - (npy_uint64)(*smallCur); + npy_uint64 sum = carry + (npy_uint64)(*largeCur) + (npy_uint64)(*smallCur); carry = sum >> 32; *resultCur = sum & bitmask_u64(32); ++largeCur; @@ -703,8 +693,8 @@ BigInt_Add(BigInt *result, const BigInt *lhs, const BigInt *rhs) /* If there's still a carry, append a new block */ if (carry != 0) { DEBUG_ASSERT(carry == 1); - DEBUG_ASSERT((npy_uint32)(resultCur - result->blocks) == - large->length && (large->length < c_BigInt_MaxBlocks)); + DEBUG_ASSERT((npy_uint32)(resultCur - result->blocks) == large->length && + (large->length < c_BigInt_MaxBlocks)); *resultCur = 1; result->length = large->length + 1; } @@ -723,7 +713,7 @@ BigInt_Multiply2(BigInt *result, const BigInt *in) npy_uint32 *resultCur = result->blocks; const npy_uint32 *pLhsCur = in->blocks; const npy_uint32 *pLhsEnd = in->blocks + in->length; - for ( ; pLhsCur != pLhsEnd; ++pLhsCur, ++resultCur) { + for (; pLhsCur != pLhsEnd; ++pLhsCur, ++resultCur) { npy_uint32 cur = *pLhsCur; *resultCur = (cur << 1) | carry; carry = cur >> 31; @@ -749,7 +739,7 @@ BigInt_Multiply2_inplace(BigInt *result) npy_uint32 *cur = result->blocks; npy_uint32 *end = result->blocks + result->length; - for ( ; cur != end; ++cur) { + for (; cur != end; ++cur) { npy_uint32 tmpcur = *cur; *cur = (tmpcur << 1) | carry; carry = tmpcur >> 31; @@ -802,10 +792,9 @@ BigInt_DivideWithRemainder_MaxQuotient9(BigInt *dividend, const BigInt *divisor) * Check that the divisor has been correctly shifted into range and that it * is not smaller than the dividend in length. */ - DEBUG_ASSERT(!divisor->length == 0 && - divisor->blocks[divisor->length-1] >= 8 && - divisor->blocks[divisor->length-1] < bitmask_u64(32) && - dividend->length <= divisor->length); + DEBUG_ASSERT(!divisor->length == 0 && divisor->blocks[divisor->length - 1] >= 8 && + divisor->blocks[divisor->length - 1] < bitmask_u64(32) && + dividend->length <= divisor->length); /* * If the dividend is smaller than the divisor, the quotient is zero and the @@ -840,15 +829,14 @@ BigInt_DivideWithRemainder_MaxQuotient9(BigInt *dividend, const BigInt *divisor) product = (npy_uint64)*divisorCur * (npy_uint64)quotient + carry; carry = product >> 32; - difference = (npy_uint64)*dividendCur - - (product & bitmask_u64(32)) - borrow; + difference = (npy_uint64)*dividendCur - (product & bitmask_u64(32)) - borrow; borrow = (difference >> 32) & 1; *dividendCur = difference & bitmask_u64(32); ++divisorCur; ++dividendCur; - } while(divisorCur <= finalDivisorBlock); + } while (divisorCur <= finalDivisorBlock); /* remove all leading zero blocks from dividend */ while (length > 0 && dividend->blocks[length - 1] == 0) { @@ -872,15 +860,14 @@ BigInt_DivideWithRemainder_MaxQuotient9(BigInt *dividend, const BigInt *divisor) ++quotient; do { - npy_uint64 difference = (npy_uint64)*dividendCur - - (npy_uint64)*divisorCur - borrow; + npy_uint64 difference = (npy_uint64)*dividendCur - (npy_uint64)*divisorCur - borrow; borrow = (difference >> 32) & 1; *dividendCur = difference & bitmask_u64(32); ++divisorCur; ++dividendCur; - } while(divisorCur <= finalDivisorBlock); + } while (divisorCur <= finalDivisorBlock); /* remove all leading zero blocks from dividend */ while (length > 0 && dividend->blocks[length - 1] == 0) { @@ -896,25 +883,16 @@ BigInt_DivideWithRemainder_MaxQuotient9(BigInt *dividend, const BigInt *divisor) static npy_uint32 LogBase2_32(npy_uint32 val) { - static const npy_uint8 logTable[256] = - { - 0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7 - }; + static const npy_uint8 logTable[256] = { + 0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7}; npy_uint32 temp; @@ -960,10 +938,10 @@ LogBase2_128(npy_uint64 hi, npy_uint64 lo) } static npy_uint32 -PrintInfNan(char *buffer, npy_uint32 bufferSize, npy_uint64 mantissa, - npy_uint32 mantissaHexWidth, char signbit) +PrintInfNan(char *buffer, npy_uint32 bufferSize, npy_uint64 mantissa, npy_uint32 mantissaHexWidth, + char signbit) { - npy_uint32 maxPrintLen = bufferSize-1; + npy_uint32 maxPrintLen = bufferSize - 1; npy_uint32 pos = 0; DEBUG_ASSERT(bufferSize > 0); @@ -974,12 +952,12 @@ PrintInfNan(char *buffer, npy_uint32 bufferSize, npy_uint64 mantissa, /* only print sign for inf values (though nan can have a sign set) */ if (signbit == '+') { - if (pos < maxPrintLen-1) { + if (pos < maxPrintLen - 1) { buffer[pos++] = '+'; } } else if (signbit == '-') { - if (pos < maxPrintLen-1) { + if (pos < maxPrintLen - 1) { buffer[pos++] = '-'; } } @@ -1012,11 +990,10 @@ PrintInfNan(char *buffer, npy_uint32 bufferSize, npy_uint64 mantissa, } static npy_uint32 -Dragon4(BigInt *bigints, const npy_int32 exponent, - const npy_uint32 mantissaBit, const npy_bool hasUnequalMargins, - const DigitMode digitMode, const CutoffMode cutoffMode, - npy_int32 cutoff_max, npy_int32 cutoff_min, char *pOutBuffer, - npy_uint32 bufferSize, npy_int32 *pOutExponent) +Dragon4(BigInt *bigints, const npy_int32 exponent, const npy_uint32 mantissaBit, + const npy_bool hasUnequalMargins, const DigitMode digitMode, const CutoffMode cutoffMode, + npy_int32 cutoff_max, npy_int32 cutoff_min, char *pOutBuffer, npy_uint32 bufferSize, + npy_int32 *pOutExponent) { char *curDigit = pOutBuffer; @@ -1031,7 +1008,7 @@ Dragon4(BigInt *bigints, const npy_int32 exponent, * scaledMarginHigh will point to scaledMarginLow in the case they must be * equal to each other, otherwise it will point to optionalMarginHigh. */ - BigInt *mantissa = &bigints[0]; /* the only initialized bigint */ + BigInt *mantissa = &bigints[0]; /* the only initialized bigint */ BigInt *scale = &bigints[1]; BigInt *scaledValue = &bigints[2]; BigInt *scaledMarginLow = &bigints[3]; @@ -1044,7 +1021,7 @@ Dragon4(BigInt *bigints, const npy_int32 exponent, const npy_float64 log10_2 = 0.30102999566398119521373889472449; npy_int32 digitExponent, hiBlock; npy_int32 cutoff_max_Exponent, cutoff_min_Exponent; - npy_uint32 outputDigit; /* current digit being output */ + npy_uint32 outputDigit; /* current digit being output */ npy_uint32 outputLen; npy_bool isEven = BigInt_IsEven(mantissa); npy_int32 cmp; @@ -1078,7 +1055,7 @@ Dragon4(BigInt *bigints, const npy_int32 exponent, /* scaledValue = 2 * 2 * mantissa*2^exponent */ BigInt_ShiftLeft(scaledValue, exponent + 2); /* scale = 2 * 2 * 1 */ - BigInt_Set_uint32(scale, 4); + BigInt_Set_uint32(scale, 4); /* scaledMarginLow = 2 * 2^(exponent-1) */ BigInt_Pow2(scaledMarginLow, exponent); /* scaledMarginHigh = 2 * 2 * 2^(exponent-1) */ @@ -1157,8 +1134,8 @@ Dragon4(BigInt *bigints, const npy_int32 exponent, * Warning: This calculation assumes npy_float64 is an IEEE-binary64 * float. This line may need to be updated if this is not the case. */ - digitExponent = (npy_int32)( - ceil((npy_float64)((npy_int32)mantissaBit + exponent) * log10_2 - 0.69)); + digitExponent = + (npy_int32)(ceil((npy_float64)((npy_int32)mantissaBit + exponent) * log10_2 - 0.69)); /* * if the digit exponent is smaller than the smallest desired digit for @@ -1169,11 +1146,10 @@ Dragon4(BigInt *bigints, const npy_int32 exponent, * accurate value or it will clamp it above the accurate value. */ if (cutoff_max >= 0 && cutoffMode == CutoffMode_FractionLength && - digitExponent <= -cutoff_max) { + digitExponent <= -cutoff_max) { digitExponent = -cutoff_max + 1; } - /* Divide value by 10^digitExponent. */ if (digitExponent > 0) { /* A positive exponent creates a division so we multiply the scale. */ @@ -1184,7 +1160,7 @@ Dragon4(BigInt *bigints, const npy_int32 exponent, * A negative exponent creates a multiplication so we multiply up the * scaledValue, scaledMarginLow and scaledMarginHigh. */ - BigInt *temp=temp1, *pow10=temp2; + BigInt *temp = temp1, *pow10 = temp2; BigInt_Pow10(pow10, -digitExponent, temp); BigInt_Multiply(temp, scaledValue, pow10); @@ -1264,7 +1240,7 @@ Dragon4(BigInt *bigints, const npy_int32 exponent, } /* Output the exponent of the first digit we will print */ - *pOutExponent = digitExponent-1; + *pOutExponent = digitExponent - 1; /* * In preparation for calling BigInt_DivideWithRemainder_MaxQuotient9(), we @@ -1312,11 +1288,10 @@ Dragon4(BigInt *bigints, const npy_int32 exponent, for (;;) { BigInt *scaledValueHigh = temp1; - digitExponent = digitExponent-1; + digitExponent = digitExponent - 1; /* divide out the scale to extract the digit */ - outputDigit = - BigInt_DivideWithRemainder_MaxQuotient9(scaledValue, scale); + outputDigit = BigInt_DivideWithRemainder_MaxQuotient9(scaledValue, scale); DEBUG_ASSERT(outputDigit < 10); /* update the high end of the value */ @@ -1332,7 +1307,7 @@ Dragon4(BigInt *bigints, const npy_int32 exponent, cmp = BigInt_Compare(scaledValueHigh, scale); high = isEven ? (cmp >= 0) : (cmp > 0); if (((low | high) & (digitExponent <= cutoff_min_Exponent)) | - (digitExponent == cutoff_max_Exponent)) { + (digitExponent == cutoff_max_Exponent)) { break; } @@ -1358,15 +1333,13 @@ Dragon4(BigInt *bigints, const npy_int32 exponent, high = NPY_FALSE; for (;;) { - digitExponent = digitExponent-1; + digitExponent = digitExponent - 1; /* divide out the scale to extract the digit */ - outputDigit = - BigInt_DivideWithRemainder_MaxQuotient9(scaledValue, scale); + outputDigit = BigInt_DivideWithRemainder_MaxQuotient9(scaledValue, scale); DEBUG_ASSERT(outputDigit < 10); - if ((scaledValue->length == 0) | - (digitExponent == cutoff_max_Exponent)) { + if ((scaledValue->length == 0) | (digitExponent == cutoff_max_Exponent)) { break; } @@ -1448,15 +1421,14 @@ Dragon4(BigInt *bigints, const npy_int32 exponent, } static npy_uint32 -FormatPositional(char *buffer, npy_uint32 bufferSize, BigInt *mantissa, - npy_int32 exponent, char signbit, npy_uint32 mantissaBit, - npy_bool hasUnequalMargins, DigitMode digit_mode, - CutoffMode cutoff_mode, npy_int32 precision, - npy_int32 min_digits, TrimMode trim_mode, - npy_int32 digits_left, npy_int32 digits_right) +FormatPositional(char *buffer, npy_uint32 bufferSize, BigInt *mantissa, npy_int32 exponent, + char signbit, npy_uint32 mantissaBit, npy_bool hasUnequalMargins, + DigitMode digit_mode, CutoffMode cutoff_mode, npy_int32 precision, + npy_int32 min_digits, TrimMode trim_mode, npy_int32 digits_left, + npy_int32 digits_right) { npy_int32 printExponent; - npy_int32 numDigits, numWholeDigits=0, has_sign=0; + npy_int32 numDigits, numWholeDigits = 0, has_sign = 0; npy_int32 add_digits; npy_int32 maxPrintLen = (npy_int32)bufferSize - 1, pos = 0; @@ -1479,9 +1451,8 @@ FormatPositional(char *buffer, npy_uint32 bufferSize, BigInt *mantissa, has_sign = 1; } - numDigits = Dragon4(mantissa, exponent, mantissaBit, hasUnequalMargins, - digit_mode, cutoff_mode, precision, min_digits, - buffer + has_sign, maxPrintLen - has_sign, + numDigits = Dragon4(mantissa, exponent, mantissaBit, hasUnequalMargins, digit_mode, cutoff_mode, + precision, min_digits, buffer + has_sign, maxPrintLen - has_sign, &printExponent); DEBUG_ASSERT(numDigits > 0); @@ -1490,7 +1461,7 @@ FormatPositional(char *buffer, npy_uint32 bufferSize, BigInt *mantissa, /* if output has a whole number */ if (printExponent >= 0) { /* leave the whole number at the start of the buffer */ - numWholeDigits = printExponent+1; + numWholeDigits = printExponent + 1; if (numDigits <= numWholeDigits) { npy_int32 count = numWholeDigits - numDigits; pos += numDigits; @@ -1502,7 +1473,7 @@ FormatPositional(char *buffer, npy_uint32 bufferSize, BigInt *mantissa, /* add trailing zeros up to the decimal point */ numDigits += count; - for ( ; count > 0; count--) { + for (; count > 0; count--) { buffer[pos++] = '0'; } } @@ -1516,8 +1487,8 @@ FormatPositional(char *buffer, npy_uint32 bufferSize, BigInt *mantissa, numFractionDigits = maxFractionDigits; } - memmove(buffer + pos + numWholeDigits + 1, - buffer + pos + numWholeDigits, numFractionDigits); + memmove(buffer + pos + numWholeDigits + 1, buffer + pos + numWholeDigits, + numFractionDigits); pos += numWholeDigits; buffer[pos] = '.'; numDigits = numWholeDigits + 1 + numFractionDigits; @@ -1548,8 +1519,7 @@ FormatPositional(char *buffer, npy_uint32 bufferSize, BigInt *mantissa, numFractionDigits = maxFractionDigits; } - memmove(buffer + pos + digitsStartIdx, buffer + pos, - numFractionDigits); + memmove(buffer + pos + digitsStartIdx, buffer + pos, numFractionDigits); /* insert the leading zeros */ for (i = 2; i < digitsStartIdx; ++i) { @@ -1563,7 +1533,7 @@ FormatPositional(char *buffer, npy_uint32 bufferSize, BigInt *mantissa, /* add the decimal point */ if (pos + 1 < maxPrintLen) { - buffer[pos+1] = '.'; + buffer[pos + 1] = '.'; } /* add the initial zero */ @@ -1576,8 +1546,7 @@ FormatPositional(char *buffer, npy_uint32 bufferSize, BigInt *mantissa, } /* always add decimal point, except for DprZeros mode */ - if (trim_mode != TrimMode_DptZeros && numFractionDigits == 0 && - pos < maxPrintLen) { + if (trim_mode != TrimMode_DptZeros && numFractionDigits == 0 && pos < maxPrintLen) { buffer[pos++] = '.'; } @@ -1594,8 +1563,7 @@ FormatPositional(char *buffer, npy_uint32 bufferSize, BigInt *mantissa, numFractionDigits++; } } - else if (trim_mode == TrimMode_None && - desiredFractionalDigits > numFractionDigits && + else if (trim_mode == TrimMode_None && desiredFractionalDigits > numFractionDigits && pos < maxPrintLen) { /* add trailing zeros up to add_digits length */ /* compute the number of trailing zeros needed */ @@ -1605,7 +1573,7 @@ FormatPositional(char *buffer, npy_uint32 bufferSize, BigInt *mantissa, } numFractionDigits += count; - for ( ; count > 0; count--) { + for (; count > 0; count--) { buffer[pos++] = '0'; } } @@ -1616,19 +1584,19 @@ FormatPositional(char *buffer, npy_uint32 bufferSize, BigInt *mantissa, * depending on trim settings. */ if (trim_mode != TrimMode_None && numFractionDigits > 0) { - while (buffer[pos-1] == '0') { + while (buffer[pos - 1] == '0') { pos--; numFractionDigits--; } - if (buffer[pos-1] == '.') { + if (buffer[pos - 1] == '.') { /* in TrimMode_LeaveOneZero, add trailing 0 back */ - if (trim_mode == TrimMode_LeaveOneZero){ + if (trim_mode == TrimMode_LeaveOneZero) { buffer[pos++] = '0'; numFractionDigits++; } /* in TrimMode_DptZeros, remove trailing decimal point */ else if (trim_mode == TrimMode_DptZeros) { - pos--; + pos--; } } } @@ -1638,8 +1606,7 @@ FormatPositional(char *buffer, npy_uint32 bufferSize, BigInt *mantissa, npy_int32 count = digits_right - numFractionDigits; /* in trim_mode DptZeros, if right padding, add a space for the . */ - if (trim_mode == TrimMode_DptZeros && numFractionDigits == 0 - && pos < maxPrintLen) { + if (trim_mode == TrimMode_DptZeros && numFractionDigits == 0 && pos < maxPrintLen) { buffer[pos++] = ' '; } @@ -1647,7 +1614,7 @@ FormatPositional(char *buffer, npy_uint32 bufferSize, BigInt *mantissa, count = maxPrintLen - pos; } - for ( ; count > 0; count--) { + for (; count > 0; count--) { buffer[pos++] = ' '; } } @@ -1664,7 +1631,7 @@ FormatPositional(char *buffer, npy_uint32 bufferSize, BigInt *mantissa, memmove(buffer + shift, buffer, count); } pos = shift + count; - for ( ; shift > 0; shift--) { + for (; shift > 0; shift--) { buffer[shift - 1] = ' '; } } @@ -1677,11 +1644,10 @@ FormatPositional(char *buffer, npy_uint32 bufferSize, BigInt *mantissa, } static npy_uint32 -FormatScientific (char *buffer, npy_uint32 bufferSize, BigInt *mantissa, - npy_int32 exponent, char signbit, npy_uint32 mantissaBit, - npy_bool hasUnequalMargins, DigitMode digit_mode, - npy_int32 precision, npy_int32 min_digits, TrimMode trim_mode, - npy_int32 digits_left, npy_int32 exp_digits) +FormatScientific(char *buffer, npy_uint32 bufferSize, BigInt *mantissa, npy_int32 exponent, + char signbit, npy_uint32 mantissaBit, npy_bool hasUnequalMargins, + DigitMode digit_mode, npy_int32 precision, npy_int32 min_digits, + TrimMode trim_mode, npy_int32 digits_left, npy_int32 exp_digits) { npy_int32 printExponent; npy_int32 numDigits; @@ -1714,17 +1680,15 @@ FormatScientific (char *buffer, npy_uint32 bufferSize, BigInt *mantissa, pCurOut++; --bufferSize; } - else if (signbit == '-' && bufferSize > 1) { + else if (signbit == '-' && bufferSize > 1) { *pCurOut = '-'; pCurOut++; --bufferSize; } - numDigits = Dragon4(mantissa, exponent, mantissaBit, hasUnequalMargins, - digit_mode, CutoffMode_TotalLength, - precision < 0 ? -1 : precision + 1, - min_digits < 0 ? -1 : min_digits + 1, - pCurOut, bufferSize, &printExponent); + numDigits = Dragon4(mantissa, exponent, mantissaBit, hasUnequalMargins, digit_mode, + CutoffMode_TotalLength, precision < 0 ? -1 : precision + 1, + min_digits < 0 ? -1 : min_digits + 1, pCurOut, bufferSize, &printExponent); DEBUG_ASSERT(numDigits > 0); DEBUG_ASSERT(numDigits <= bufferSize); @@ -1736,12 +1700,12 @@ FormatScientific (char *buffer, npy_uint32 bufferSize, BigInt *mantissa, } /* insert the decimal point prior to the fractional number */ - numFractionDigits = numDigits-1; + numFractionDigits = numDigits - 1; if (numFractionDigits > 0 && bufferSize > 1) { npy_int32 maxFractionDigits = (npy_int32)bufferSize - 2; if (numFractionDigits > maxFractionDigits) { - numFractionDigits = maxFractionDigits; + numFractionDigits = maxFractionDigits; } memmove(pCurOut + 1, pCurOut, numFractionDigits); @@ -1751,8 +1715,7 @@ FormatScientific (char *buffer, npy_uint32 bufferSize, BigInt *mantissa, } /* always add decimal point, except for DprZeros mode */ - if (trim_mode != TrimMode_DptZeros && numFractionDigits == 0 && - bufferSize > 1) { + if (trim_mode != TrimMode_DptZeros && numFractionDigits == 0 && bufferSize > 1) { *pCurOut = '.'; ++pCurOut; --bufferSize; @@ -1838,12 +1801,12 @@ FormatScientific (char *buffer, npy_uint32 bufferSize, BigInt *mantissa, printExponent /= 10; } /* count back over leading zeros */ - for (i = 5; i > exp_digits && digits[i-1] == 0; i--) { + for (i = 5; i > exp_digits && digits[i - 1] == 0; i--) { } exp_size = i; /* write remaining digits to tmp buf */ for (i = exp_size; i > 0; i--) { - exponentBuffer[2 + (exp_size-i)] = (char)('0' + digits[i-1]); + exponentBuffer[2 + (exp_size - i)] = (char)('0' + digits[i - 1]); } /* copy the exponent buffer into the output */ @@ -1856,7 +1819,6 @@ FormatScientific (char *buffer, npy_uint32 bufferSize, BigInt *mantissa, bufferSize -= count; } - DEBUG_ASSERT(bufferSize > 0); pCurOut[0] = '\0'; @@ -1864,28 +1826,24 @@ FormatScientific (char *buffer, npy_uint32 bufferSize, BigInt *mantissa, } static npy_uint32 -Format_floatbits(char *buffer, npy_uint32 bufferSize, BigInt *mantissa, - npy_int32 exponent, char signbit, npy_uint32 mantissaBit, - npy_bool hasUnequalMargins, Dragon4_Options *opt) +Format_floatbits(char *buffer, npy_uint32 bufferSize, BigInt *mantissa, npy_int32 exponent, + char signbit, npy_uint32 mantissaBit, npy_bool hasUnequalMargins, + Dragon4_Options *opt) { /* format the value */ if (opt->scientific) { - return FormatScientific(buffer, bufferSize, mantissa, exponent, - signbit, mantissaBit, hasUnequalMargins, - opt->digit_mode, opt->precision, - opt->min_digits, opt->trim_mode, - opt->digits_left, opt->exp_digits); + return FormatScientific(buffer, bufferSize, mantissa, exponent, signbit, mantissaBit, + hasUnequalMargins, opt->digit_mode, opt->precision, opt->min_digits, + opt->trim_mode, opt->digits_left, opt->exp_digits); } else { - return FormatPositional(buffer, bufferSize, mantissa, exponent, - signbit, mantissaBit, hasUnequalMargins, - opt->digit_mode, opt->cutoff_mode, - opt->precision, opt->min_digits, opt->trim_mode, - opt->digits_left, opt->digits_right); + return FormatPositional(buffer, bufferSize, mantissa, exponent, signbit, mantissaBit, + hasUnequalMargins, opt->digit_mode, opt->cutoff_mode, + opt->precision, opt->min_digits, opt->trim_mode, opt->digits_left, + opt->digits_right); } } - static npy_uint32 Dragon4_PrintFloat_Sleef_quad(Sleef_quad *value, Dragon4_Options *opt) { @@ -1920,9 +1878,9 @@ Dragon4_PrintFloat_Sleef_quad(Sleef_quad *value, Dragon4_Options *opt) if (floatSign != 0) { signbit = '-'; } - else if (opt->sign) { - signbit = '+'; - } + // else if (opt->sign) { + // signbit = '+'; + // } /* if this is a special value */ if (floatExponent == bitmask_u32(15)) { @@ -1934,26 +1892,24 @@ Dragon4_PrintFloat_Sleef_quad(Sleef_quad *value, Dragon4_Options *opt) /* factor the value into its parts */ if (floatExponent != 0) { /* normal */ - mantissa_hi = (1ull << 48) | mantissa_hi; + mantissa_hi = (1ull << 48) | mantissa_hi; /* mantissa_lo is unchanged */ - exponent = floatExponent - 16383 - 112; - mantissaBit = 112; - hasUnequalMargins = (floatExponent != 1) && (mantissa_hi == 0 && - mantissa_lo == 0); + exponent = floatExponent - 16383 - 112; + mantissaBit = 112; + hasUnequalMargins = (floatExponent != 1) && (mantissa_hi == 0 && mantissa_lo == 0); } else { /* subnormal */ - exponent = 1 - 16383 - 112; - mantissaBit = LogBase2_128(mantissa_hi, mantissa_lo); - hasUnequalMargins = NPY_FALSE; + exponent = 1 - 16383 - 112; + mantissaBit = LogBase2_128(mantissa_hi, mantissa_lo); + hasUnequalMargins = NPY_FALSE; } BigInt_Set_2x_uint64(&bigints[0], mantissa_hi, mantissa_lo); - return Format_floatbits(buffer, bufferSize, bigints, exponent, - signbit, mantissaBit, hasUnequalMargins, opt); + return Format_floatbits(buffer, bufferSize, bigints, exponent, signbit, mantissaBit, + hasUnequalMargins, opt); } - PyObject * Dragon4_Positional_QuadDType_opt(Sleef_quad *val, Dragon4_Options *opt) { @@ -1966,12 +1922,12 @@ Dragon4_Positional_QuadDType_opt(Sleef_quad *val, Dragon4_Options *opt) } PyObject * -Dragon4_Positional_QuadDType(Sleef_quad *val, DigitMode digit_mode, - CutoffMode cutoff_mode, int precision, int min_digits, - int sign, TrimMode trim, int pad_left, int pad_right) +Dragon4_Positional_QuadDType(Sleef_quad *val, DigitMode digit_mode, CutoffMode cutoff_mode, + int precision, int min_digits, int sign, TrimMode trim, int pad_left, + int pad_right) { Dragon4_Options opt; - + opt.scientific = 0; opt.digit_mode = digit_mode; opt.cutoff_mode = cutoff_mode; @@ -1998,9 +1954,8 @@ Dragon4_Scientific_QuadDType_opt(Sleef_quad *val, Dragon4_Options *opt) } PyObject * -Dragon4_Scientific_QuadDType(Sleef_quad *val, DigitMode digit_mode, int precision, - int min_digits, int sign, TrimMode trim, int pad_left, - int exp_digits) +Dragon4_Scientific_QuadDType(Sleef_quad *val, DigitMode digit_mode, int precision, int min_digits, + int sign, TrimMode trim, int pad_left, int exp_digits) { Dragon4_Options opt; @@ -2018,40 +1973,47 @@ Dragon4_Scientific_QuadDType(Sleef_quad *val, DigitMode digit_mode, int precisio return Dragon4_Scientific_QuadDType_opt(val, &opt); } - PyObject * -Dragon4_Positional(PyObject *obj, DigitMode digit_mode, CutoffMode cutoff_mode, - int precision, int min_digits, int sign, TrimMode trim, - int pad_left, int pad_right) +Dragon4_Positional(PyObject *obj, DigitMode digit_mode, CutoffMode cutoff_mode, int precision, + int min_digits, int sign, TrimMode trim, int pad_left, int pad_right) { npy_double v; if (PyArray_IsScalar(obj, QuadPrecDType)) { QuadPrecisionObject *quad_obj = (QuadPrecisionObject *)obj; if (quad_obj->backend == BACKEND_SLEEF) { - return Dragon4_Positional_QuadDType(&quad_obj->value.sleef_value, digit_mode, cutoff_mode, precision, min_digits, sign, trim, pad_left, pad_right); - } else { + return Dragon4_Positional_QuadDType(&quad_obj->value.sleef_value, digit_mode, + cutoff_mode, precision, min_digits, sign, trim, + pad_left, pad_right); + } + else { Sleef_quad sleef_val = Sleef_cast_from_doubleq1(quad_obj->value.longdouble_value); - return Dragon4_Positional_QuadDType(&sleef_val, digit_mode, cutoff_mode, precision, min_digits, sign, trim, pad_left, pad_right); + return Dragon4_Positional_QuadDType(&sleef_val, digit_mode, cutoff_mode, precision, + min_digits, sign, trim, pad_left, pad_right); } } + + return NULL; } PyObject * -Dragon4_Scientific(PyObject *obj, DigitMode digit_mode, int precision, - int min_digits, int sign, TrimMode trim, int pad_left, - int exp_digits) +Dragon4_Scientific(PyObject *obj, DigitMode digit_mode, int precision, int min_digits, int sign, + TrimMode trim, int pad_left, int exp_digits) { npy_double val; if (PyArray_IsScalar(obj, QuadPrecDType)) { QuadPrecisionObject *quad_obj = (QuadPrecisionObject *)obj; if (quad_obj->backend == BACKEND_SLEEF) { - return Dragon4_Scientific_QuadDType(&quad_obj->value.sleef_value, digit_mode, precision, min_digits, sign, trim, pad_left, exp_digits); - } else { + return Dragon4_Scientific_QuadDType(&quad_obj->value.sleef_value, digit_mode, precision, + min_digits, sign, trim, pad_left, exp_digits); + } + else { Sleef_quad sleef_val = Sleef_cast_from_doubleq1(quad_obj->value.longdouble_value); - return Dragon4_Scientific_QuadDType(&sleef_val, digit_mode, precision, min_digits, sign, trim, pad_left, exp_digits); + return Dragon4_Scientific_QuadDType(&sleef_val, digit_mode, precision, min_digits, sign, + trim, pad_left, exp_digits); } } + return NULL; } \ No newline at end of file diff --git a/quaddtype/numpy_quaddtype/src/dtype.c b/quaddtype/numpy_quaddtype/src/dtype.c index 4524f3db..bdc6a649 100644 --- a/quaddtype/numpy_quaddtype/src/dtype.c +++ b/quaddtype/numpy_quaddtype/src/dtype.c @@ -50,10 +50,8 @@ quad_store(char *data_ptr, void *x, QuadBackendType backend) QuadPrecDTypeObject * new_quaddtype_instance(QuadBackendType backend) { - if (backend != BACKEND_SLEEF && backend != BACKEND_LONGDOUBLE) - { - PyErr_SetString(PyExc_TypeError, - "Backend must be sleef or longdouble"); + if (backend != BACKEND_SLEEF && backend != BACKEND_LONGDOUBLE) { + PyErr_SetString(PyExc_TypeError, "Backend must be sleef or longdouble"); return NULL; } @@ -71,7 +69,6 @@ new_quaddtype_instance(QuadBackendType backend) static QuadPrecDTypeObject * ensure_canonical(QuadPrecDTypeObject *self) { - Py_INCREF(self); return self; } @@ -79,7 +76,6 @@ ensure_canonical(QuadPrecDTypeObject *self) static QuadPrecDTypeObject * common_instance(QuadPrecDTypeObject *dtype1, QuadPrecDTypeObject *dtype2) { - if (dtype1->backend != dtype2->backend) { PyErr_SetString(PyExc_TypeError, "Cannot find common instance for QuadPrecDTypes with different backends"); @@ -92,7 +88,6 @@ common_instance(QuadPrecDTypeObject *dtype1, QuadPrecDTypeObject *dtype2) static PyArray_DTypeMeta * common_dtype(PyArray_DTypeMeta *cls, PyArray_DTypeMeta *other) { - // Promote integer and floating-point types to QuadPrecDType if (other->type_num >= 0 && (PyTypeNum_ISINTEGER(other->type_num) || PyTypeNum_ISFLOAT(other->type_num))) { @@ -116,7 +111,7 @@ quadprec_discover_descriptor_from_pyobject(PyArray_DTypeMeta *NPY_UNUSED(cls), P PyErr_SetString(PyExc_TypeError, "Can only store QuadPrecision in a QuadPrecDType array."); return NULL; } - + QuadPrecisionObject *quad_obj = (QuadPrecisionObject *)obj; return (PyArray_Descr *)new_quaddtype_instance(quad_obj->backend); @@ -169,7 +164,7 @@ quadprec_getitem(QuadPrecDTypeObject *descr, char *dataptr) static PyArray_Descr * quadprec_default_descr(PyArray_DTypeMeta *cls) { - QuadPrecDTypeObject * temp = new_quaddtype_instance(BACKEND_SLEEF); + QuadPrecDTypeObject *temp = new_quaddtype_instance(BACKEND_SLEEF); return (PyArray_Descr *)temp; } @@ -181,6 +176,7 @@ static PyType_Slot QuadPrecDType_Slots[] = { {NPY_DT_setitem, &quadprec_setitem}, {NPY_DT_getitem, &quadprec_getitem}, {NPY_DT_default_descr, &quadprec_default_descr}, + {NPY_DT_PyArray_ArrFuncs_dotfunc, NULL}, {0, NULL}}; static PyObject * @@ -220,8 +216,6 @@ QuadPrecDType_str(QuadPrecDTypeObject *self) return PyUnicode_FromFormat("QuadPrecDType(backend='%s')", backend_str); } - - PyArray_DTypeMeta QuadPrecDType = { {{ PyVarObject_HEAD_INIT(NULL, 0).tp_name = "numpy_quaddtype.QuadPrecDType", From 0ac54649088cfb08dbd461287132f839aac91b1d Mon Sep 17 00:00:00 2001 From: swayaminsync Date: Mon, 16 Sep 2024 15:16:12 +0530 Subject: [PATCH 14/32] added more unary func --- quaddtype/numpy_quaddtype/src/dtype.c | 8 +- quaddtype/numpy_quaddtype/src/ops.hpp | 99 +++++++++++++++++++++++++ quaddtype/numpy_quaddtype/src/scalar.c | 2 +- quaddtype/numpy_quaddtype/src/umath.cpp | 22 +++++- 4 files changed, 126 insertions(+), 5 deletions(-) diff --git a/quaddtype/numpy_quaddtype/src/dtype.c b/quaddtype/numpy_quaddtype/src/dtype.c index bdc6a649..839c6ab4 100644 --- a/quaddtype/numpy_quaddtype/src/dtype.c +++ b/quaddtype/numpy_quaddtype/src/dtype.c @@ -50,9 +50,11 @@ quad_store(char *data_ptr, void *x, QuadBackendType backend) QuadPrecDTypeObject * new_quaddtype_instance(QuadBackendType backend) { + QuadBackendType target_backend = backend; if (backend != BACKEND_SLEEF && backend != BACKEND_LONGDOUBLE) { PyErr_SetString(PyExc_TypeError, "Backend must be sleef or longdouble"); return NULL; + // target_backend = BACKEND_SLEEF; } QuadPrecDTypeObject *new = (QuadPrecDTypeObject *)PyArrayDescr_Type.tp_new( @@ -60,9 +62,9 @@ new_quaddtype_instance(QuadBackendType backend) if (new == NULL) { return NULL; } - new->base.elsize = (backend == BACKEND_SLEEF) ? sizeof(Sleef_quad) : sizeof(long double); - new->base.alignment = (backend == BACKEND_SLEEF) ? _Alignof(Sleef_quad) : _Alignof(long double); - new->backend = backend; + new->base.elsize = (target_backend == BACKEND_SLEEF) ? sizeof(Sleef_quad) : sizeof(long double); + new->base.alignment = (target_backend == BACKEND_SLEEF) ? _Alignof(Sleef_quad) : _Alignof(long double); + new->backend = target_backend; return new; } diff --git a/quaddtype/numpy_quaddtype/src/ops.hpp b/quaddtype/numpy_quaddtype/src/ops.hpp index 9b3eeed1..83c7ce3c 100644 --- a/quaddtype/numpy_quaddtype/src/ops.hpp +++ b/quaddtype/numpy_quaddtype/src/ops.hpp @@ -110,6 +110,49 @@ quad_exp2(Sleef_quad *op, Sleef_quad *out) return 0; } +static inline int +quad_sin(Sleef_quad *op, Sleef_quad *out) +{ + *out = Sleef_sinq1_u10(*op); + return 0; +} + +static inline int +quad_cos(Sleef_quad *op, Sleef_quad *out) +{ + *out = Sleef_cosq1_u10(*op); + return 0; +} + +static inline int +quad_tan(Sleef_quad *op, Sleef_quad *out) +{ + *out = Sleef_tanq1_u10(*op); + return 0; +} + +static inline int +quad_asin(Sleef_quad *op, Sleef_quad *out) +{ + *out = Sleef_asinq1_u10(*op); + return 0; +} + +static inline int +quad_acos(Sleef_quad *op, Sleef_quad *out) +{ + *out = Sleef_acosq1_u10(*op); + return 0; +} + +static inline int +quad_atan(Sleef_quad *op, Sleef_quad *out) +{ + *out = Sleef_atanq1_u10(*op); + return 0; +} + + // Unary long double operations typedef int (*unary_op_longdouble_def)(long double *, long double *); @@ -218,6 +261,48 @@ ld_exp2(long double *op, long double *out) return 0; } +static inline int +ld_sin(long double *op, long double *out) +{ + *out = sinl(*op); + return 0; +} + +static inline int +ld_cos(long double *op, long double *out) +{ + *out = cosl(*op); + return 0; +} + +static inline int +ld_tan(long double *op, long double *out) +{ + *out = tanl(*op); + return 0; +} + +static inline int +ld_asin(long double *op, long double *out) +{ + *out = asinl(*op); + return 0; +} + +static inline int +ld_acos(long double *op, long double *out) +{ + *out = acosl(*op); + return 0; +} + +static inline int +ld_atan(long double *op, long double *out) +{ + *out = atanl(*op); + return 0; +} + // Binary Quad operations typedef int (*binary_op_quad_def)(Sleef_quad *, Sleef_quad *, Sleef_quad *); @@ -277,6 +362,13 @@ quad_maximum(Sleef_quad *out, Sleef_quad *in1, Sleef_quad *in2) return 0; } +static inline int +quad_atan2(Sleef_quad *out, Sleef_quad *in1, Sleef_quad *in2) +{ + *out = Sleef_atan2q1_u10(*in1, *in2); + return 0; +} + // Binary long double operations typedef int (*binary_op_longdouble_def)(long double *, long double *, long double *); @@ -336,6 +428,13 @@ ld_maximum(long double *out, long double *in1, long double *in2) return 0; } +static inline int +ld_atan2(long double *out, long double *in1, long double *in2) +{ + *out = atan2l(*in1, *in2); + return 0; +} + // comparison quad functions typedef npy_bool (*cmp_quad_def)(const Sleef_quad *, const Sleef_quad *); diff --git a/quaddtype/numpy_quaddtype/src/scalar.c b/quaddtype/numpy_quaddtype/src/scalar.c index f1cbc6d4..1a3ffa3a 100644 --- a/quaddtype/numpy_quaddtype/src/scalar.c +++ b/quaddtype/numpy_quaddtype/src/scalar.c @@ -232,6 +232,6 @@ PyTypeObject QuadPrecision_Type = { int init_quadprecision_scalar(void) { - QuadPrecision_Type.tp_base = &PyFloat_Type; // this is not working (subclassing to np.floating) + // QuadPrecision_Type.tp_base = &PyFloatingArrType_Type; return PyType_Ready(&QuadPrecision_Type); } \ No newline at end of file diff --git a/quaddtype/numpy_quaddtype/src/umath.cpp b/quaddtype/numpy_quaddtype/src/umath.cpp index de2d982d..5e9e7619 100644 --- a/quaddtype/numpy_quaddtype/src/umath.cpp +++ b/quaddtype/numpy_quaddtype/src/umath.cpp @@ -217,6 +217,24 @@ init_quad_unary_ops(PyObject *numpy) if (create_quad_unary_ufunc(numpy, "exp2") < 0) { return -1; } + if (create_quad_unary_ufunc(numpy, "sin") < 0) { + return -1; + } + if (create_quad_unary_ufunc(numpy, "cos") < 0) { + return -1; + } + if (create_quad_unary_ufunc(numpy, "tan") < 0) { + return -1; + } + if (create_quad_unary_ufunc(numpy, "arcsin") < 0) { + return -1; + } + if (create_quad_unary_ufunc(numpy, "arccos") < 0) { + return -1; + } + if (create_quad_unary_ufunc(numpy, "arctan") < 0) { + return -1; + } return 0; } @@ -315,7 +333,6 @@ static int quad_ufunc_promoter(PyUFuncObject *ufunc, PyArray_DTypeMeta *op_dtypes[], PyArray_DTypeMeta *signature[], PyArray_DTypeMeta *new_op_dtypes[]) { - int nin = ufunc->nin; int nargs = ufunc->nargs; PyArray_DTypeMeta *common = NULL; @@ -473,6 +490,9 @@ init_quad_binary_ops(PyObject *numpy) if (create_quad_binary_ufunc(numpy, "maximum") < 0) { return -1; } + if (create_quad_binary_ufunc(numpy, "arctan2") < 0) { + return -1; + } return 0; } From 25dd6b3a5d2cdeb51bbc32650b240daa13a8ea76 Mon Sep 17 00:00:00 2001 From: swayaminsync Date: Mon, 16 Sep 2024 18:02:42 +0530 Subject: [PATCH 15/32] removing testing files --- temp.py | 283 -------------------------------------------------------- 1 file changed, 283 deletions(-) delete mode 100644 temp.py diff --git a/temp.py b/temp.py deleted file mode 100644 index 3ef85e95..00000000 --- a/temp.py +++ /dev/null @@ -1,283 +0,0 @@ -import numpy_quaddtype as npq -import numpy as np - - -def test_scalar_ops(backend): - print(f"\nTesting scalar operations for {backend} backend:") - - # Create QuadPrecision instances - q1 = npq.QuadPrecision( - "3.14159265358979323846264338327950288", backend=backend) - q2 = npq.QuadPrecision( - "-2.71828182845904523536028747135266250", backend=backend) - - # Test unary operations - print("\nUnary operations:") - print(f" Negation of q1: {-q1}") - print(f" Absolute value of q2: {abs(q2)}") - - # Test binary operations - print("\nBinary operations:") - print(f" Addition: {q1 + q2}") - print(f" Subtraction: {q1 - q2}") - print(f" Multiplication: {q1 * q2}") - print(f" Division: {q1 / q2}") - - # Test comparison operations - print("\nComparison operations:") - print(f" q1 == q2: {q1 == q2}") - print(f" q1 != q2: {q1 != q2}") - print(f" q1 < q2: {q1 < q2}") - print(f" q1 <= q2: {q1 <= q2}") - print(f" q1 > q2: {q1 > q2}") - print(f" q1 >= q2: {q1 >= q2}") - - # Test operations with Python numbers - print("\nOperations with Python numbers:") - print(f" q1 + 1: {q1 + 1}") - print(f" q1 - 2.5: {q1 - 2.5}") - print(f" q1 * 3: {q1 * 3}") - print(f" q1 / 2: {q1 / 2}") - - # Test boolean conversion - print("\nBoolean conversion:") - print(f" bool(q1): {np.bool(q1)}") - print( - f" bool(npq.QuadPrecision('0', backend=backend)): {np.bool(npq.QuadPrecision('0', backend=backend))}") - - -def test_casting(backend): - print(f"\nTesting {backend} backend:") - - # Create QuadPrecision instances - q1 = npq.QuadPrecision( - "3.14159265358979323846264338327950288", backend=backend) - q2 = npq.QuadPrecision( - "-2.71828182845904523536028747135266250", backend=backend) - - # Test casting from QuadPrecision to numpy dtypes - print("Casting from QuadPrecision to numpy dtypes:") - print(f" float32: {np.float32(q1)}") - print(f" float64: {np.float64(q1)}") - print(f" int64: {np.int64(q1)}") - print(f" uint64: {np.uint64(q1)}") - - # Test casting from numpy dtypes to QuadPrecision - print("\nCasting from numpy dtypes to QuadPrecision:") - print( - f" float32: {np.float32(3.14159).astype(npq.QuadPrecDType(backend=backend))}") - print( - f" float64: {np.float64(2.71828182845904).astype(npq.QuadPrecDType(backend=backend))}") - print( - f" int64: {np.int64(-1234567890).astype(npq.QuadPrecDType(backend=backend))}") - print( - f" uint64: {np.uint64(9876543210).astype(npq.QuadPrecDType(backend=backend))}") - - # Test array operations - print("\nArray operations:") - q_array = np.array([q1, q2], dtype=npq.QuadPrecDType(backend=backend)) - print(f" QuadPrecision array: {q_array}") - - np_array = np.array([3.14, -2.71, 1.41, -1.73], dtype=np.float64) - q_from_np = np_array.astype(npq.QuadPrecDType(backend=backend)) - print(f" Numpy to QuadPrecision: {q_from_np}") - - back_to_np = np.array(q_from_np, dtype=np.float64) - print(f" QuadPrecision to Numpy: {back_to_np}") - - # Test precision maintenance - large_int = 12345678901234567890 - q_large = np.array([large_int], dtype=np.uint64).astype( - npq.QuadPrecDType(backend=backend))[0] - print(f"\nPrecision test:") - print(f" Original large int: {large_int}") - print(f" QuadPrecision: {q_large}") - print(f" Back to int: {np.uint64(q_large)}") - - # Test edge cases - - -def test_edge_cases(backend): - print(f"\nTesting negative numbers for {backend} backend:") - - # Test various negative numbers - test_values = [ - -1.0, - -1e10, - -1e100, - -1e300, - np.nextafter(np.finfo(np.float64).min, 0), - np.finfo(np.float64).min - ] - - for value in test_values: - q_value = npq.QuadPrecision(str(value), backend=backend) - print(f" Original: {value}") - print(f" QuadPrecision: {q_value}") - print(f" Back to float64: {np.float64(q_value)}") - print() - - # Test value beyond float64 precision - beyond_float64_precision = "1.7976931348623157081452742373170435e+308" - q_beyond = npq.QuadPrecision(beyond_float64_precision, backend=backend) - print(f" Beyond float64 precision: {q_beyond}") - q_float64_max = npq.QuadPrecision( - str(np.finfo(np.float64).max), backend=backend) - diff = q_beyond - q_float64_max - print(f" Difference from float64 max: {diff}") - print( - f" Difference is positive: {diff > npq.QuadPrecision('0', backend=backend)}") - - # Test epsilon (smallest representable difference between two numbers) - q_epsilon = npq.QuadPrecision( - str(np.finfo(np.float64).eps), backend=backend) - print(f" Float64 epsilon in QuadPrecision: {q_epsilon}") - q_one = npq.QuadPrecision("1", backend=backend) - q_one_plus_epsilon = q_one + q_epsilon - print(f" 1 + epsilon != 1: {q_one_plus_epsilon != q_one}") - print(f" (1 + epsilon) - 1: {q_one_plus_epsilon - q_one}") - - -def test_ufuncs(backend): - print(f"\nTesting ufuncs for {backend} backend:") - - # Create QuadPrecision arrays - q_array1 = np.array([1, 2, 3], dtype=npq.QuadPrecDType(backend=backend)) - q_array2 = np.array([1, 2, 3], dtype=npq.QuadPrecDType(backend=backend)) - - # Test unary ufuncs - print("\nUnary unfuncs:") - print(f" negative: {np.negative(q_array1)}") - print(f" absolute: {np.absolute(q_array1)}") - print(f" rint: {np.rint(q_array1)}") - print(f" floor: {np.floor(q_array1)}") - print(f" ceil: {np.ceil(q_array1)}") - print(f" trunc: {np.trunc(q_array1)}") - print(f" sqrt: {np.sqrt(q_array1)}") - print(f" square: {np.square(q_array1)}") - print(f" log: {np.log(q_array1)}") - print(f" log2: {np.log2(q_array1)}") - print(f" log10: {np.log10(q_array1)}") - print(f" exp: {np.exp(q_array1)}") - print(f" exp2: {np.exp2(q_array1)}") - - # Test binary ufuncs - print("\nBinary ufuncs:") - print(f" add: {np.add(q_array1, q_array2)}") - print(f" subtract: {np.subtract(q_array1, q_array2)}") - print(f" multiply: {np.multiply(q_array1, q_array2)}") - print(f" divide: {np.divide(q_array1, q_array2)}") - print(f" power: {np.power(q_array1, q_array2)}") - print(f" mod: {np.mod(q_array1, q_array2)}") - print(f" minimum: {np.minimum(q_array1, q_array2)}") - print(f" maximum: {np.maximum(q_array1, q_array2)}") - - # Test comparison ufuncs - print("\nComparison ufuncs:") - print(f" equal: {np.equal(q_array1, q_array2)}") - print(f" not_equal: {np.not_equal(q_array1, q_array2)}") - print(f" less: {np.less(q_array1, q_array2)}") - print(f" less_equal: {np.less_equal(q_array1, q_array2)}") - print(f" greater: {np.greater(q_array1, q_array2)}") - print(f" greater_equal: {np.greater_equal(q_array1, q_array2)}") - - # Test mixed operations with numpy arrays - print(f"Testing backend: {backend}") - print("\nMixed operations with numpy arrays:") - np_array = np.array([1.0, 2.0, 3.0], dtype=np.float64) - print(f" add: {np.add(q_array1, np_array)}") - print(f" multiply: {np.multiply(q_array1, np_array)}") - print(f" divide: {np.divide(q_array1, np_array)}") - - # Test reduction operations - print("\nReduction operations:") - print(f" sum: {np.sum(q_array1)}") - print(f" prod: {np.prod(q_array1)}") - print(f" min: {np.min(q_array1)}") - print(f" max: {np.max(q_array1)}") - -from numpy_quaddtype import QuadPrecision, QuadPrecDType - -def test_quad_precision(): - print("Testing QuadPrecision scalar:") - - # Test different initializations - values = [ - 0, - 1, - -1, - 3.14159265358979323846, - 1e100, - 1e-100, - float('inf'), - float('-inf'), - float('nan') - ] - - for val in values: - q = QuadPrecision(val) - print(f"Value: {val}") - print(f" str: {str(q)}") - print(f" repr: {repr(q)}") - - # Test different backends - print("\nTesting backends:") - q_sleef = QuadPrecision(3.14159265358979323846, backend='sleef') - q_longdouble = QuadPrecision(3.14159265358979323846, backend='longdouble') - print(f"Sleef: {q_sleef}") - print(f"Long double: {q_longdouble}") - -def test_quad_dtype(): - print("\nTesting QuadPrecDType:") - - # Create an array with QuadPrecDType - arr = np.array([0, 1, -1, 3.14159265358979323846, 1e100, 1e-100], - dtype=QuadPrecDType()) - - print("Array elements:") - for elem in arr: - print(f" {elem}") - - print("\nFull array:") - print(arr) - - # Test different backends - print("\nTesting backends in arrays:") - arr_sleef = np.array([3.14159265358979323846], dtype=QuadPrecDType(backend='sleef')) - arr_longdouble = np.array([3.14159265358979323846], dtype=QuadPrecDType(backend='longdouble')) - print(f"Sleef array: {arr_sleef}") - print(f"Long double array: {arr_longdouble}") - -def test_operations(): - print("\nTesting basic operations:") - a = QuadPrecision(3.14159265358979323846) - b = QuadPrecision(2.71828182845904523536) - - print(f"a = {a}") - print(f"b = {b}") - print(f"a + b = {a + b}") - print(f"a - b = {a - b}") - print(f"a * b = {a * b}") - print(f"a / b = {a / b}") - -def test(): - # Run tests for both backends - for backend in ['sleef', 'longdouble']: - test_scalar_ops(backend) - test_casting(backend) - test_edge_cases(backend) - test_ufuncs(backend) - test_quad_precision() - test_quad_dtype() - test_operations() - print("*"*50) - - print("All tests completed successfully") - -def dot(a, b): - r = np.dot(a, b) - return r - -if __name__ == "__main__": - a = np.array([1, 2, 3], dtype=QuadPrecDType()) - print(dot(a, a)) \ No newline at end of file From b719ba46e0bcee70459402d11a483f1ce9b4991e Mon Sep 17 00:00:00 2001 From: swayaminsync Date: Mon, 16 Sep 2024 18:35:54 +0530 Subject: [PATCH 16/32] fixing import statements --- quaddtype/numpy_quaddtype/src/umath.cpp | 4 ++-- quaddtype/tests/test_quaddtype.py | 8 +------- 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/quaddtype/numpy_quaddtype/src/umath.cpp b/quaddtype/numpy_quaddtype/src/umath.cpp index 5e9e7619..91794dc6 100644 --- a/quaddtype/numpy_quaddtype/src/umath.cpp +++ b/quaddtype/numpy_quaddtype/src/umath.cpp @@ -333,6 +333,7 @@ static int quad_ufunc_promoter(PyUFuncObject *ufunc, PyArray_DTypeMeta *op_dtypes[], PyArray_DTypeMeta *signature[], PyArray_DTypeMeta *new_op_dtypes[]) { + printf("calling promoter for ufunc %s\n", ufunc->name); int nin = ufunc->nin; int nargs = ufunc->nargs; PyArray_DTypeMeta *common = NULL; @@ -560,7 +561,6 @@ create_quad_comparison_ufunc(PyObject *numpy, const char *ufunc_name) PyArray_DTypeMeta *dtypes[3] = {&QuadPrecDType, &QuadPrecDType, &PyArray_BoolDType}; PyType_Slot slots[] = { - {NPY_METH_resolve_descriptors, (void *)&quad_binary_op_resolve_descriptors}, {NPY_METH_strided_loop, (void *)&quad_generic_comp_strided_loop}, {0, NULL}}; @@ -579,7 +579,7 @@ create_quad_comparison_ufunc(PyObject *numpy, const char *ufunc_name) } PyObject *promoter_capsule = - PyCapsule_New((void *)&quad_ufunc_promoter, "numpy._ufunc_promoter", NULL); + PyCapsule_New((void *)&comparison_ufunc_promoter, "numpy._ufunc_promoter", NULL); if (promoter_capsule == NULL) { return -1; } diff --git a/quaddtype/tests/test_quaddtype.py b/quaddtype/tests/test_quaddtype.py index 20ccd91c..11cf7671 100644 --- a/quaddtype/tests/test_quaddtype.py +++ b/quaddtype/tests/test_quaddtype.py @@ -3,7 +3,7 @@ import numpy as np import operator -from quaddtype import QuadPrecDType, QuadPrecision +from numpy_quaddtype import QuadPrecDType, QuadPrecision def test_create_scalar_simple(): @@ -17,12 +17,6 @@ def test_basic_equality(): "12.0") == QuadPrecision("12.00") -@pytest.mark.parametrize("val", ["123532.543", "12893283.5"]) -def test_scalar_repr(val): - expected = f"QuadPrecision('{str(QuadPrecision(val))}')" - assert repr(QuadPrecision(val)) == expected - - @pytest.mark.parametrize("op", ["add", "sub", "mul", "truediv", "pow"]) @pytest.mark.parametrize("other", ["3.0", "12.5", "100.0"]) def test_binary_ops(op, other): From fe6cabc031789dad74019f06c17b1d08febdbe64 Mon Sep 17 00:00:00 2001 From: swayaminsync Date: Mon, 16 Sep 2024 18:40:47 +0530 Subject: [PATCH 17/32] removed unnecessary prints --- quaddtype/numpy_quaddtype/src/umath.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/quaddtype/numpy_quaddtype/src/umath.cpp b/quaddtype/numpy_quaddtype/src/umath.cpp index 91794dc6..8d965fd4 100644 --- a/quaddtype/numpy_quaddtype/src/umath.cpp +++ b/quaddtype/numpy_quaddtype/src/umath.cpp @@ -333,7 +333,6 @@ static int quad_ufunc_promoter(PyUFuncObject *ufunc, PyArray_DTypeMeta *op_dtypes[], PyArray_DTypeMeta *signature[], PyArray_DTypeMeta *new_op_dtypes[]) { - printf("calling promoter for ufunc %s\n", ufunc->name); int nin = ufunc->nin; int nargs = ufunc->nargs; PyArray_DTypeMeta *common = NULL; From 0fba040ebd5d62c1afad9d8856b637363298d354 Mon Sep 17 00:00:00 2001 From: swayaminsync Date: Tue, 17 Sep 2024 18:01:14 +0530 Subject: [PATCH 18/32] added separate aligned and unaligned ufunc support --- quaddtype/numpy_quaddtype/src/umath.cpp | 155 +++++++++++++++++++----- 1 file changed, 126 insertions(+), 29 deletions(-) diff --git a/quaddtype/numpy_quaddtype/src/umath.cpp b/quaddtype/numpy_quaddtype/src/umath.cpp index 8d965fd4..7b908134 100644 --- a/quaddtype/numpy_quaddtype/src/umath.cpp +++ b/quaddtype/numpy_quaddtype/src/umath.cpp @@ -104,9 +104,9 @@ quad_unary_op_resolve_descriptors(PyObject *self, PyArray_DTypeMeta *const dtype template int -quad_generic_unary_op_strided_loop(PyArrayMethod_Context *context, char *const data[], - npy_intp const dimensions[], npy_intp const strides[], - NpyAuxData *auxdata) +quad_generic_unary_op_strided_loop_unaligned(PyArrayMethod_Context *context, char *const data[], + npy_intp const dimensions[], npy_intp const strides[], + NpyAuxData *auxdata) { npy_intp N = dimensions[0]; char *in_ptr = data[0]; @@ -135,6 +135,34 @@ quad_generic_unary_op_strided_loop(PyArrayMethod_Context *context, char *const d return 0; } +template +int +quad_generic_unary_op_strided_loop_aligned(PyArrayMethod_Context *context, char *const data[], + npy_intp const dimensions[], npy_intp const strides[], + NpyAuxData *auxdata) +{ + npy_intp N = dimensions[0]; + char *in_ptr = data[0]; + char *out_ptr = data[1]; + npy_intp in_stride = strides[0]; + npy_intp out_stride = strides[1]; + + QuadPrecDTypeObject *descr = (QuadPrecDTypeObject *)context->descriptors[0]; + QuadBackendType backend = descr->backend; + + while (N--) { + if (backend == BACKEND_SLEEF) { + sleef_op((Sleef_quad *)in_ptr, (Sleef_quad *)out_ptr); + } + else { + longdouble_op((long double *)in_ptr, (long double *)out_ptr); + } + in_ptr += in_stride; + out_ptr += out_stride; + } + return 0; +} + template int create_quad_unary_ufunc(PyObject *numpy, const char *ufunc_name) @@ -149,7 +177,9 @@ create_quad_unary_ufunc(PyObject *numpy, const char *ufunc_name) PyType_Slot slots[] = { {NPY_METH_resolve_descriptors, (void *)&quad_unary_op_resolve_descriptors}, {NPY_METH_strided_loop, - (void *)&quad_generic_unary_op_strided_loop}, + (void *)&quad_generic_unary_op_strided_loop_aligned}, + {NPY_METH_unaligned_strided_loop, + (void *)&quad_generic_unary_op_strided_loop_unaligned}, {0, NULL}}; PyArrayMethod_Spec Spec = { @@ -157,7 +187,7 @@ create_quad_unary_ufunc(PyObject *numpy, const char *ufunc_name) .nin = 1, .nout = 1, .casting = NPY_NO_CASTING, - .flags = (NPY_ARRAYMETHOD_FLAGS)0, + .flags = NPY_METH_SUPPORTS_UNALIGNED, .dtypes = dtypes, .slots = slots, }; @@ -245,7 +275,6 @@ quad_binary_op_resolve_descriptors(PyObject *self, PyArray_DTypeMeta *const dtyp PyArray_Descr *const given_descrs[], PyArray_Descr *loop_descrs[], npy_intp *NPY_UNUSED(view_offset)) { - QuadPrecDTypeObject *descr_in1 = (QuadPrecDTypeObject *)given_descrs[0]; QuadPrecDTypeObject *descr_in2 = (QuadPrecDTypeObject *)given_descrs[1]; QuadBackendType target_backend; @@ -255,7 +284,8 @@ quad_binary_op_resolve_descriptors(PyObject *self, PyArray_DTypeMeta *const dtyp if (descr_in1->backend != descr_in2->backend) { target_backend = BACKEND_LONGDOUBLE; casting = NPY_SAFE_CASTING; - } else { + } + else { target_backend = descr_in1->backend; } @@ -266,7 +296,8 @@ quad_binary_op_resolve_descriptors(PyObject *self, PyArray_DTypeMeta *const dtyp if (!loop_descrs[i]) { return (NPY_CASTING)-1; } - } else { + } + else { Py_INCREF(given_descrs[i]); loop_descrs[i] = given_descrs[i]; } @@ -278,14 +309,16 @@ quad_binary_op_resolve_descriptors(PyObject *self, PyArray_DTypeMeta *const dtyp if (!loop_descrs[2]) { return (NPY_CASTING)-1; } - } else { + } + else { QuadPrecDTypeObject *descr_out = (QuadPrecDTypeObject *)given_descrs[2]; if (descr_out->backend != target_backend) { loop_descrs[2] = (PyArray_Descr *)new_quaddtype_instance(target_backend); if (!loop_descrs[2]) { return (NPY_CASTING)-1; } - } else { + } + else { Py_INCREF(given_descrs[2]); loop_descrs[2] = given_descrs[2]; } @@ -295,9 +328,9 @@ quad_binary_op_resolve_descriptors(PyObject *self, PyArray_DTypeMeta *const dtyp template int -quad_generic_binop_strided_loop(PyArrayMethod_Context *context, char *const data[], - npy_intp const dimensions[], npy_intp const strides[], - NpyAuxData *auxdata) +quad_generic_binop_strided_loop_unaligned(PyArrayMethod_Context *context, char *const data[], + npy_intp const dimensions[], npy_intp const strides[], + NpyAuxData *auxdata) { npy_intp N = dimensions[0]; char *in1_ptr = data[0], *in2_ptr = data[1]; @@ -329,6 +362,37 @@ quad_generic_binop_strided_loop(PyArrayMethod_Context *context, char *const data return 0; } +template +int +quad_generic_binop_strided_loop_aligned(PyArrayMethod_Context *context, char *const data[], + npy_intp const dimensions[], npy_intp const strides[], + NpyAuxData *auxdata) +{ + npy_intp N = dimensions[0]; + char *in1_ptr = data[0], *in2_ptr = data[1]; + char *out_ptr = data[2]; + npy_intp in1_stride = strides[0]; + npy_intp in2_stride = strides[1]; + npy_intp out_stride = strides[2]; + + QuadPrecDTypeObject *descr = (QuadPrecDTypeObject *)context->descriptors[0]; + QuadBackendType backend = descr->backend; + + while (N--) { + if (backend == BACKEND_SLEEF) { + sleef_op((Sleef_quad *)out_ptr, (Sleef_quad *)in1_ptr, (Sleef_quad *)in2_ptr); + } + else { + longdouble_op((long double *)out_ptr, (long double *)in1_ptr, (long double *)in2_ptr); + } + + in1_ptr += in1_stride; + in2_ptr += in2_stride; + out_ptr += out_stride; + } + return 0; +} + static int quad_ufunc_promoter(PyUFuncObject *ufunc, PyArray_DTypeMeta *op_dtypes[], PyArray_DTypeMeta *signature[], PyArray_DTypeMeta *new_op_dtypes[]) @@ -344,23 +408,19 @@ quad_ufunc_promoter(PyUFuncObject *ufunc, PyArray_DTypeMeta *op_dtypes[], for (int i = 0; i < 3; i++) { Py_INCREF(op_dtypes[1]); new_op_dtypes[i] = op_dtypes[1]; - } return 0; } // Check if any input or signature is QuadPrecision for (int i = 0; i < nin; i++) { - if (op_dtypes[i] == &QuadPrecDType) { has_quad = true; - } } if (has_quad) { common = &QuadPrecDType; - } else { for (int i = nin; i < nargs; i++) { @@ -368,7 +428,6 @@ quad_ufunc_promoter(PyUFuncObject *ufunc, PyArray_DTypeMeta *op_dtypes[], if (common == NULL) { Py_INCREF(signature[i]); common = signature[i]; - } else if (common != signature[i]) { Py_CLEAR(common); // Not homogeneous, unset common @@ -388,7 +447,6 @@ quad_ufunc_promoter(PyUFuncObject *ufunc, PyArray_DTypeMeta *op_dtypes[], return -1; } - } // Set all new_op_dtypes to the common dtype @@ -424,7 +482,9 @@ create_quad_binary_ufunc(PyObject *numpy, const char *ufunc_name) PyType_Slot slots[] = { {NPY_METH_resolve_descriptors, (void *)&quad_binary_op_resolve_descriptors}, {NPY_METH_strided_loop, - (void *)&quad_generic_binop_strided_loop}, + (void *)&quad_generic_binop_strided_loop_aligned}, + {NPY_METH_unaligned_strided_loop, + (void *)&quad_generic_binop_strided_loop_unaligned}, {0, NULL}}; PyArrayMethod_Spec Spec = { @@ -432,7 +492,7 @@ create_quad_binary_ufunc(PyObject *numpy, const char *ufunc_name) .nin = 2, .nout = 1, .casting = NPY_NO_CASTING, - .flags = NPY_METH_IS_REORDERABLE, + .flags = (NPY_ARRAYMETHOD_FLAGS)(NPY_METH_SUPPORTS_UNALIGNED | NPY_METH_IS_REORDERABLE), .dtypes = dtypes, .slots = slots, }; @@ -500,9 +560,9 @@ init_quad_binary_ops(PyObject *numpy) template int -quad_generic_comp_strided_loop(PyArrayMethod_Context *context, char *const data[], - npy_intp const dimensions[], npy_intp const strides[], - NpyAuxData *auxdata) +quad_generic_comp_strided_loop_aligned(PyArrayMethod_Context *context, char *const data[], + npy_intp const dimensions[], npy_intp const strides[], + NpyAuxData *auxdata) { npy_intp N = dimensions[0]; char *in1_ptr = data[0], *in2_ptr = data[1]; @@ -513,7 +573,6 @@ quad_generic_comp_strided_loop(PyArrayMethod_Context *context, char *const data[ QuadPrecDTypeObject *descr = (QuadPrecDTypeObject *)context->descriptors[0]; QuadBackendType backend = descr->backend; - size_t elem_size = (backend == BACKEND_SLEEF) ? sizeof(Sleef_quad) : sizeof(long double); while (N--) { if (backend == BACKEND_SLEEF) { @@ -532,6 +591,42 @@ quad_generic_comp_strided_loop(PyArrayMethod_Context *context, char *const data[ return 0; } +template +int +quad_generic_comp_strided_loop_unaligned(PyArrayMethod_Context *context, char *const data[], + npy_intp const dimensions[], npy_intp const strides[], + NpyAuxData *auxdata) +{ + npy_intp N = dimensions[0]; + char *in1_ptr = data[0], *in2_ptr = data[1]; + char *out_ptr = data[2]; + npy_intp in1_stride = strides[0]; + npy_intp in2_stride = strides[1]; + npy_intp out_stride = strides[2]; + + QuadPrecDTypeObject *descr = (QuadPrecDTypeObject *)context->descriptors[0]; + QuadBackendType backend = descr->backend; + size_t elem_size = (backend == BACKEND_SLEEF) ? sizeof(Sleef_quad) : sizeof(long double); + + quad_value in1, in2; + while (N--) { + memcpy(&in1, in1_ptr, elem_size); + memcpy(&in2, in2_ptr, elem_size); + + if (backend == BACKEND_SLEEF) { + *((npy_bool *)out_ptr) = sleef_comp(&in1.sleef_value, &in2.sleef_value); + } + else { + *((npy_bool *)out_ptr) = ld_comp(&in1.longdouble_value, &in2.longdouble_value); + } + + in1_ptr += in1_stride; + in2_ptr += in2_stride; + out_ptr += out_stride; + } + return 0; +} + NPY_NO_EXPORT int comparison_ufunc_promoter(PyUFuncObject *ufunc, PyArray_DTypeMeta *op_dtypes[], PyArray_DTypeMeta *signature[], PyArray_DTypeMeta *new_op_dtypes[]) @@ -559,16 +654,18 @@ create_quad_comparison_ufunc(PyObject *numpy, const char *ufunc_name) PyArray_DTypeMeta *dtypes[3] = {&QuadPrecDType, &QuadPrecDType, &PyArray_BoolDType}; - PyType_Slot slots[] = { - {NPY_METH_strided_loop, (void *)&quad_generic_comp_strided_loop}, - {0, NULL}}; + PyType_Slot slots[] = {{NPY_METH_strided_loop, + (void *)&quad_generic_comp_strided_loop_aligned}, + {NPY_METH_unaligned_strided_loop, + (void *)&quad_generic_comp_strided_loop_unaligned}, + {0, NULL}}; PyArrayMethod_Spec Spec = { .name = "quad_comp", .nin = 2, .nout = 1, .casting = NPY_NO_CASTING, - .flags = (NPY_ARRAYMETHOD_FLAGS)0, + .flags = NPY_METH_SUPPORTS_UNALIGNED, .dtypes = dtypes, .slots = slots, }; From 4f0a6044619a80ab8e5ff0b140d7b7dc91cdf273 Mon Sep 17 00:00:00 2001 From: swayaminsync Date: Tue, 17 Sep 2024 21:51:17 +0530 Subject: [PATCH 19/32] WIP --- quaddtype/numpy_quaddtype/src/casts.cpp | 4 +- quaddtype/numpy_quaddtype/src/dtype.c | 3 +- quaddtype/numpy_quaddtype/src/ops.hpp | 1 - quaddtype/numpy_quaddtype/src/scalar.c | 55 +++++++++++++----------- quaddtype/numpy_quaddtype/src/umath.cpp | 57 ++++++------------------- 5 files changed, 46 insertions(+), 74 deletions(-) diff --git a/quaddtype/numpy_quaddtype/src/casts.cpp b/quaddtype/numpy_quaddtype/src/casts.cpp index 61809012..a626b366 100644 --- a/quaddtype/numpy_quaddtype/src/casts.cpp +++ b/quaddtype/numpy_quaddtype/src/casts.cpp @@ -272,15 +272,15 @@ numpy_to_quad_resolve_descriptors(PyObject *NPY_UNUSED(self), PyArray_DTypeMeta PyArray_Descr *given_descrs[2], PyArray_Descr *loop_descrs[2], npy_intp *view_offset) { - + // todo: here it is converting this to SLEEF, losing data and getting 0 if (given_descrs[1] == NULL) { + printf("called\n"); loop_descrs[1] = (PyArray_Descr *)new_quaddtype_instance(BACKEND_SLEEF); if (loop_descrs[1] == nullptr) { return (NPY_CASTING)-1; } } else { - Py_INCREF(given_descrs[1]); loop_descrs[1] = given_descrs[1]; } diff --git a/quaddtype/numpy_quaddtype/src/dtype.c b/quaddtype/numpy_quaddtype/src/dtype.c index 839c6ab4..4c3f73dd 100644 --- a/quaddtype/numpy_quaddtype/src/dtype.c +++ b/quaddtype/numpy_quaddtype/src/dtype.c @@ -63,7 +63,8 @@ new_quaddtype_instance(QuadBackendType backend) return NULL; } new->base.elsize = (target_backend == BACKEND_SLEEF) ? sizeof(Sleef_quad) : sizeof(long double); - new->base.alignment = (target_backend == BACKEND_SLEEF) ? _Alignof(Sleef_quad) : _Alignof(long double); + new->base.alignment = + (target_backend == BACKEND_SLEEF) ? _Alignof(Sleef_quad) : _Alignof(long double); new->backend = target_backend; return new; } diff --git a/quaddtype/numpy_quaddtype/src/ops.hpp b/quaddtype/numpy_quaddtype/src/ops.hpp index 83c7ce3c..dcbb902e 100644 --- a/quaddtype/numpy_quaddtype/src/ops.hpp +++ b/quaddtype/numpy_quaddtype/src/ops.hpp @@ -152,7 +152,6 @@ quad_atan(Sleef_quad *op, Sleef_quad *out) return 0; } - // Unary long double operations typedef int (*unary_op_longdouble_def)(long double *, long double *); diff --git a/quaddtype/numpy_quaddtype/src/scalar.c b/quaddtype/numpy_quaddtype/src/scalar.c index 1a3ffa3a..5b09abe2 100644 --- a/quaddtype/numpy_quaddtype/src/scalar.c +++ b/quaddtype/numpy_quaddtype/src/scalar.c @@ -76,8 +76,7 @@ QuadPrecision_from_object(PyObject *value, QuadBackendType backend) self->value.longdouble_value = (long double)val; } } - else - { + else { PyObject *type_str = PyObject_Str((PyObject *)Py_TYPE(value)); if (type_str != NULL) { const char *type_cstr = PyUnicode_AsUTF8(type_str); @@ -132,23 +131,25 @@ QuadPrecision_new(PyTypeObject *cls, PyObject *args, PyObject *kwargs) static PyObject * QuadPrecision_str_dragon4(QuadPrecisionObject *self) { - Dragon4_Options opt = { - .scientific = 0, - .digit_mode = DigitMode_Unique, - .cutoff_mode = CutoffMode_TotalLength, - .precision = SLEEF_QUAD_DIG, - .sign = 1, - .trim_mode = TrimMode_LeaveOneZero, - .digits_left = 1, - .digits_right = SLEEF_QUAD_DIG - }; + Dragon4_Options opt = {.scientific = 0, + .digit_mode = DigitMode_Unique, + .cutoff_mode = CutoffMode_TotalLength, + .precision = SLEEF_QUAD_DIG, + .sign = 1, + .trim_mode = TrimMode_LeaveOneZero, + .digits_left = 1, + .digits_right = SLEEF_QUAD_DIG}; if (self->backend == BACKEND_SLEEF) { - return Dragon4_Positional_QuadDType(&self->value.sleef_value, opt.digit_mode, opt.cutoff_mode, opt.precision, opt.min_digits, opt.sign, opt.trim_mode, opt.digits_left, opt.digits_right); + return Dragon4_Positional_QuadDType( + &self->value.sleef_value, opt.digit_mode, opt.cutoff_mode, opt.precision, + opt.min_digits, opt.sign, opt.trim_mode, opt.digits_left, opt.digits_right); } else { Sleef_quad sleef_val = Sleef_cast_from_doubleq1(self->value.longdouble_value); - return Dragon4_Positional_QuadDType(&sleef_val, opt.digit_mode, opt.cutoff_mode, opt.precision, opt.min_digits, opt.sign, opt.trim_mode, opt.digits_left, opt.digits_right); + return Dragon4_Positional_QuadDType(&sleef_val, opt.digit_mode, opt.cutoff_mode, + opt.precision, opt.min_digits, opt.sign, opt.trim_mode, + opt.digits_left, opt.digits_right); } } @@ -181,24 +182,26 @@ QuadPrecision_repr(QuadPrecisionObject *self) static PyObject * QuadPrecision_repr_dragon4(QuadPrecisionObject *self) { - Dragon4_Options opt = { - .scientific = 1, - .digit_mode = DigitMode_Unique, - .cutoff_mode = CutoffMode_TotalLength, - .precision = SLEEF_QUAD_DIG, - .sign = 1, - .trim_mode = TrimMode_LeaveOneZero, - .digits_left = 1, - .exp_digits = 3 - }; + Dragon4_Options opt = {.scientific = 1, + .digit_mode = DigitMode_Unique, + .cutoff_mode = CutoffMode_TotalLength, + .precision = SLEEF_QUAD_DIG, + .sign = 1, + .trim_mode = TrimMode_LeaveOneZero, + .digits_left = 1, + .exp_digits = 3}; PyObject *str; if (self->backend == BACKEND_SLEEF) { - str = Dragon4_Scientific_QuadDType(&self->value.sleef_value, opt.digit_mode, opt.precision, opt.min_digits, opt.sign, opt.trim_mode, opt.digits_left, opt.exp_digits); + str = Dragon4_Scientific_QuadDType(&self->value.sleef_value, opt.digit_mode, opt.precision, + opt.min_digits, opt.sign, opt.trim_mode, opt.digits_left, + opt.exp_digits); } else { Sleef_quad sleef_val = Sleef_cast_from_doubleq1(self->value.longdouble_value); - str = Dragon4_Scientific_QuadDType(&sleef_val, opt.digit_mode, opt.precision, opt.min_digits, opt.sign, opt.trim_mode, opt.digits_left, opt.exp_digits); + str = Dragon4_Scientific_QuadDType(&sleef_val, opt.digit_mode, opt.precision, + opt.min_digits, opt.sign, opt.trim_mode, opt.digits_left, + opt.exp_digits); } if (str == NULL) { diff --git a/quaddtype/numpy_quaddtype/src/umath.cpp b/quaddtype/numpy_quaddtype/src/umath.cpp index 7b908134..828ace2b 100644 --- a/quaddtype/numpy_quaddtype/src/umath.cpp +++ b/quaddtype/numpy_quaddtype/src/umath.cpp @@ -397,11 +397,12 @@ static int quad_ufunc_promoter(PyUFuncObject *ufunc, PyArray_DTypeMeta *op_dtypes[], PyArray_DTypeMeta *signature[], PyArray_DTypeMeta *new_op_dtypes[]) { + printf("called comparison promoter\n"); int nin = ufunc->nin; int nargs = ufunc->nargs; PyArray_DTypeMeta *common = NULL; bool has_quad = false; - + printf("dtyp1: %s dtype2: %s\n", get_dtype_name(op_dtypes[0]), get_dtype_name(op_dtypes[1])); // Handle the special case for reductions if (op_dtypes[0] == NULL) { assert(nin == 2 && ufunc->nout == 1); /* must be reduction */ @@ -415,6 +416,7 @@ quad_ufunc_promoter(PyUFuncObject *ufunc, PyArray_DTypeMeta *op_dtypes[], // Check if any input or signature is QuadPrecision for (int i = 0; i < nin; i++) { if (op_dtypes[i] == &QuadPrecDType) { + printf("Quaddtype found at index: %d\n", i); has_quad = true; } } @@ -431,7 +433,6 @@ quad_ufunc_promoter(PyUFuncObject *ufunc, PyArray_DTypeMeta *op_dtypes[], } else if (common != signature[i]) { Py_CLEAR(common); // Not homogeneous, unset common - break; } } @@ -459,6 +460,7 @@ quad_ufunc_promoter(PyUFuncObject *ufunc, PyArray_DTypeMeta *op_dtypes[], else { // Otherwise, use the common dtype Py_INCREF(common); + printf("setting output to %s dtype\n", get_dtype_name(common)); new_op_dtypes[i] = common; } } @@ -560,42 +562,9 @@ init_quad_binary_ops(PyObject *numpy) template int -quad_generic_comp_strided_loop_aligned(PyArrayMethod_Context *context, char *const data[], - npy_intp const dimensions[], npy_intp const strides[], - NpyAuxData *auxdata) -{ - npy_intp N = dimensions[0]; - char *in1_ptr = data[0], *in2_ptr = data[1]; - char *out_ptr = data[2]; - npy_intp in1_stride = strides[0]; - npy_intp in2_stride = strides[1]; - npy_intp out_stride = strides[2]; - - QuadPrecDTypeObject *descr = (QuadPrecDTypeObject *)context->descriptors[0]; - QuadBackendType backend = descr->backend; - - while (N--) { - if (backend == BACKEND_SLEEF) { - *((npy_bool *)out_ptr) = - sleef_comp((const Sleef_quad *)in1_ptr, (const Sleef_quad *)in2_ptr); - } - else { - *((npy_bool *)out_ptr) = - ld_comp((const long double *)in1_ptr, (const long double *)in2_ptr); - } - - in1_ptr += in1_stride; - in2_ptr += in2_stride; - out_ptr += out_stride; - } - return 0; -} - -template -int -quad_generic_comp_strided_loop_unaligned(PyArrayMethod_Context *context, char *const data[], - npy_intp const dimensions[], npy_intp const strides[], - NpyAuxData *auxdata) +quad_generic_comp_strided_loop(PyArrayMethod_Context *context, char *const data[], + npy_intp const dimensions[], npy_intp const strides[], + NpyAuxData *auxdata) { npy_intp N = dimensions[0]; char *in1_ptr = data[0], *in2_ptr = data[1]; @@ -617,6 +586,7 @@ quad_generic_comp_strided_loop_unaligned(PyArrayMethod_Context *context, char *c *((npy_bool *)out_ptr) = sleef_comp(&in1.sleef_value, &in2.sleef_value); } else { + printf("%Lf % Lf\n", in1.longdouble_value, in2.longdouble_value); *((npy_bool *)out_ptr) = ld_comp(&in1.longdouble_value, &in2.longdouble_value); } @@ -632,7 +602,6 @@ comparison_ufunc_promoter(PyUFuncObject *ufunc, PyArray_DTypeMeta *op_dtypes[], PyArray_DTypeMeta *signature[], PyArray_DTypeMeta *new_op_dtypes[]) { PyArray_DTypeMeta *new_signature[NPY_MAXARGS]; - memcpy(new_signature, signature, 3 * sizeof(PyArray_DTypeMeta *)); new_signature[2] = NULL; int res = quad_ufunc_promoter(ufunc, op_dtypes, new_signature, new_op_dtypes); @@ -654,11 +623,11 @@ create_quad_comparison_ufunc(PyObject *numpy, const char *ufunc_name) PyArray_DTypeMeta *dtypes[3] = {&QuadPrecDType, &QuadPrecDType, &PyArray_BoolDType}; - PyType_Slot slots[] = {{NPY_METH_strided_loop, - (void *)&quad_generic_comp_strided_loop_aligned}, - {NPY_METH_unaligned_strided_loop, - (void *)&quad_generic_comp_strided_loop_unaligned}, - {0, NULL}}; + PyType_Slot slots[] = { + {NPY_METH_strided_loop, (void *)&quad_generic_comp_strided_loop}, + {NPY_METH_unaligned_strided_loop, + (void *)&quad_generic_comp_strided_loop}, + {0, NULL}}; PyArrayMethod_Spec Spec = { .name = "quad_comp", From 707d5d6123e59d3c2a1631998e8cdb447ec06724 Mon Sep 17 00:00:00 2001 From: swayaminsync Date: Wed, 18 Sep 2024 13:47:32 +0530 Subject: [PATCH 20/32] fixed longdouble comparison casting issue --- quaddtype/numpy_quaddtype/src/casts.cpp | 1 - quaddtype/numpy_quaddtype/src/umath.cpp | 62 +++++++++++++++++++++---- 2 files changed, 54 insertions(+), 9 deletions(-) diff --git a/quaddtype/numpy_quaddtype/src/casts.cpp b/quaddtype/numpy_quaddtype/src/casts.cpp index a626b366..aa38e476 100644 --- a/quaddtype/numpy_quaddtype/src/casts.cpp +++ b/quaddtype/numpy_quaddtype/src/casts.cpp @@ -274,7 +274,6 @@ numpy_to_quad_resolve_descriptors(PyObject *NPY_UNUSED(self), PyArray_DTypeMeta { // todo: here it is converting this to SLEEF, losing data and getting 0 if (given_descrs[1] == NULL) { - printf("called\n"); loop_descrs[1] = (PyArray_Descr *)new_quaddtype_instance(BACKEND_SLEEF); if (loop_descrs[1] == nullptr) { return (NPY_CASTING)-1; diff --git a/quaddtype/numpy_quaddtype/src/umath.cpp b/quaddtype/numpy_quaddtype/src/umath.cpp index 828ace2b..4e7fe949 100644 --- a/quaddtype/numpy_quaddtype/src/umath.cpp +++ b/quaddtype/numpy_quaddtype/src/umath.cpp @@ -282,6 +282,7 @@ quad_binary_op_resolve_descriptors(PyObject *self, PyArray_DTypeMeta *const dtyp // Determine target backend and if casting is needed NPY_CASTING casting = NPY_NO_CASTING; if (descr_in1->backend != descr_in2->backend) { + target_backend = BACKEND_LONGDOUBLE; casting = NPY_SAFE_CASTING; } @@ -397,12 +398,12 @@ static int quad_ufunc_promoter(PyUFuncObject *ufunc, PyArray_DTypeMeta *op_dtypes[], PyArray_DTypeMeta *signature[], PyArray_DTypeMeta *new_op_dtypes[]) { - printf("called comparison promoter\n"); + int nin = ufunc->nin; int nargs = ufunc->nargs; PyArray_DTypeMeta *common = NULL; bool has_quad = false; - printf("dtyp1: %s dtype2: %s\n", get_dtype_name(op_dtypes[0]), get_dtype_name(op_dtypes[1])); + // Handle the special case for reductions if (op_dtypes[0] == NULL) { assert(nin == 2 && ufunc->nout == 1); /* must be reduction */ @@ -416,7 +417,7 @@ quad_ufunc_promoter(PyUFuncObject *ufunc, PyArray_DTypeMeta *op_dtypes[], // Check if any input or signature is QuadPrecision for (int i = 0; i < nin; i++) { if (op_dtypes[i] == &QuadPrecDType) { - printf("Quaddtype found at index: %d\n", i); + has_quad = true; } } @@ -460,7 +461,7 @@ quad_ufunc_promoter(PyUFuncObject *ufunc, PyArray_DTypeMeta *op_dtypes[], else { // Otherwise, use the common dtype Py_INCREF(common); - printf("setting output to %s dtype\n", get_dtype_name(common)); + new_op_dtypes[i] = common; } } @@ -560,6 +561,47 @@ init_quad_binary_ops(PyObject *numpy) // comparison functions +static NPY_CASTING +quad_comparison_op_resolve_descriptors(PyObject *self, PyArray_DTypeMeta *const dtypes[], + PyArray_Descr *const given_descrs[], + PyArray_Descr *loop_descrs[], npy_intp *NPY_UNUSED(view_offset)) +{ + QuadPrecDTypeObject *descr_in1 = (QuadPrecDTypeObject *)given_descrs[0]; + QuadPrecDTypeObject *descr_in2 = (QuadPrecDTypeObject *)given_descrs[1]; + QuadBackendType target_backend; + + // As dealing with different backends then cast to boolean + NPY_CASTING casting = NPY_NO_CASTING; + if (descr_in1->backend != descr_in2->backend) { + target_backend = BACKEND_LONGDOUBLE; + casting = NPY_SAFE_CASTING; + } + else { + target_backend = descr_in1->backend; + } + + // Set up input descriptors, casting if necessary + for (int i = 0; i < 2; i++) { + if (((QuadPrecDTypeObject *)given_descrs[i])->backend != target_backend) { + loop_descrs[i] = (PyArray_Descr *)new_quaddtype_instance(target_backend); + if (!loop_descrs[i]) { + return (NPY_CASTING)-1; + } + } + else { + Py_INCREF(given_descrs[i]); + loop_descrs[i] = given_descrs[i]; + } + } + + // Set up output descriptor + loop_descrs[2] = PyArray_DescrFromType(NPY_BOOL); + if (!loop_descrs[2]) { + return (NPY_CASTING)-1; + } + return casting; +} + template int quad_generic_comp_strided_loop(PyArrayMethod_Context *context, char *const data[], @@ -581,15 +623,18 @@ quad_generic_comp_strided_loop(PyArrayMethod_Context *context, char *const data[ while (N--) { memcpy(&in1, in1_ptr, elem_size); memcpy(&in2, in2_ptr, elem_size); + npy_bool result; if (backend == BACKEND_SLEEF) { - *((npy_bool *)out_ptr) = sleef_comp(&in1.sleef_value, &in2.sleef_value); + result = sleef_comp(&in1.sleef_value, &in2.sleef_value); } else { - printf("%Lf % Lf\n", in1.longdouble_value, in2.longdouble_value); - *((npy_bool *)out_ptr) = ld_comp(&in1.longdouble_value, &in2.longdouble_value); + + result = ld_comp(&in1.longdouble_value, &in2.longdouble_value); } + *((npy_bool *)out_ptr) = result; + in1_ptr += in1_stride; in2_ptr += in2_stride; out_ptr += out_stride; @@ -624,6 +669,7 @@ create_quad_comparison_ufunc(PyObject *numpy, const char *ufunc_name) PyArray_DTypeMeta *dtypes[3] = {&QuadPrecDType, &QuadPrecDType, &PyArray_BoolDType}; PyType_Slot slots[] = { + {NPY_METH_resolve_descriptors, (void *)&quad_comparison_op_resolve_descriptors}, {NPY_METH_strided_loop, (void *)&quad_generic_comp_strided_loop}, {NPY_METH_unaligned_strided_loop, (void *)&quad_generic_comp_strided_loop}, @@ -633,7 +679,7 @@ create_quad_comparison_ufunc(PyObject *numpy, const char *ufunc_name) .name = "quad_comp", .nin = 2, .nout = 1, - .casting = NPY_NO_CASTING, + .casting = NPY_SAFE_CASTING, .flags = NPY_METH_SUPPORTS_UNALIGNED, .dtypes = dtypes, .slots = slots, From 0a75b9123e2e884fb7b150264088b08f122c7688 Mon Sep 17 00:00:00 2001 From: swayaminsync Date: Wed, 18 Sep 2024 15:39:46 +0530 Subject: [PATCH 21/32] aligned and unaligned comparison loops --- quaddtype/README.md | 9 +++-- quaddtype/numpy_quaddtype/src/umath.cpp | 48 +++++++++++++++++++++++-- 2 files changed, 53 insertions(+), 4 deletions(-) diff --git a/quaddtype/README.md b/quaddtype/README.md index 97425387..60693cb9 100644 --- a/quaddtype/README.md +++ b/quaddtype/README.md @@ -9,9 +9,14 @@ pip install -i https://test.pypi.org/simple/ quaddtype ## Usage -``` +```python import numpy as np -from quaddtype import QuadPrecDType, QuadPrecision +from numpy_quaddtype import QuadPrecDType, QuadPrecision +# using sleef backend (default) np.array([1,2,3], dtype=QuadPrecDType()) +np.array([1,2,3], dtype=QuadPrecDType("sleef")) + +# using longdouble backend +np.array([1,2,3], dtype=QuadPrecDType("longdouble")) ``` diff --git a/quaddtype/numpy_quaddtype/src/umath.cpp b/quaddtype/numpy_quaddtype/src/umath.cpp index 4e7fe949..587c0e93 100644 --- a/quaddtype/numpy_quaddtype/src/umath.cpp +++ b/quaddtype/numpy_quaddtype/src/umath.cpp @@ -633,7 +633,51 @@ quad_generic_comp_strided_loop(PyArrayMethod_Context *context, char *const data[ result = ld_comp(&in1.longdouble_value, &in2.longdouble_value); } - *((npy_bool *)out_ptr) = result; + memcpy(out_ptr, &result, sizeof(npy_bool)); + + in1_ptr += in1_stride; + in2_ptr += in2_stride; + out_ptr += out_stride; + } + return 0; +} + + +template +int +quad_generic_comp_strided_loop_aligned(PyArrayMethod_Context *context, char *const data[], + npy_intp const dimensions[], npy_intp const strides[], + NpyAuxData *auxdata) +{ + npy_intp N = dimensions[0]; + char *in1_ptr = data[0], *in2_ptr = data[1]; + char *out_ptr = data[2]; + npy_intp in1_stride = strides[0]; + npy_intp in2_stride = strides[1]; + npy_intp out_stride = strides[2]; + + QuadPrecDTypeObject *descr = (QuadPrecDTypeObject *)context->descriptors[0]; + QuadBackendType backend = descr->backend; + size_t elem_size = (backend == BACKEND_SLEEF) ? sizeof(Sleef_quad) : sizeof(long double); + + quad_value in1, in2; + while (N--) + { + quad_value in1 = *(quad_value *)in1_ptr; + quad_value in2 = *(quad_value *)in2_ptr; + + npy_bool result; + + if (backend == BACKEND_SLEEF) + { + result = sleef_comp(&in1.sleef_value, &in2.sleef_value); + } + else + { + result = ld_comp(&in1.longdouble_value, &in2.longdouble_value); + } + + *(npy_bool *)out_ptr = result; in1_ptr += in1_stride; in2_ptr += in2_stride; @@ -670,7 +714,7 @@ create_quad_comparison_ufunc(PyObject *numpy, const char *ufunc_name) PyType_Slot slots[] = { {NPY_METH_resolve_descriptors, (void *)&quad_comparison_op_resolve_descriptors}, - {NPY_METH_strided_loop, (void *)&quad_generic_comp_strided_loop}, + {NPY_METH_strided_loop, (void *)&quad_generic_comp_strided_loop_aligned}, {NPY_METH_unaligned_strided_loop, (void *)&quad_generic_comp_strided_loop}, {0, NULL}}; From 0052e160b42dfb2f02728d2d1df91d901ac4f7d7 Mon Sep 17 00:00:00 2001 From: swayaminsync Date: Thu, 19 Sep 2024 13:17:01 +0530 Subject: [PATCH 22/32] Creating Quad with Quad --- quaddtype/numpy_quaddtype/src/scalar.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/quaddtype/numpy_quaddtype/src/scalar.c b/quaddtype/numpy_quaddtype/src/scalar.c index 5b09abe2..0dcf5f57 100644 --- a/quaddtype/numpy_quaddtype/src/scalar.c +++ b/quaddtype/numpy_quaddtype/src/scalar.c @@ -76,6 +76,19 @@ QuadPrecision_from_object(PyObject *value, QuadBackendType backend) self->value.longdouble_value = (long double)val; } } + else if (Py_TYPE(value) == &QuadPrecision_Type) + { + // todo: not working for ld backend, getting garbage value not sure why? + QuadPrecisionObject *quad_obj = (QuadPrecisionObject *)value; + // printf("%d %d\n", quad_obj->backend, backend); + // printf("%Lf\n", quad_obj->value.longdouble_value); + if (quad_obj->backend == BACKEND_SLEEF) { + self->value.sleef_value = quad_obj->value.sleef_value; + } + else { + self->value.longdouble_value = quad_obj->value.longdouble_value; + } + } else { PyObject *type_str = PyObject_Str((PyObject *)Py_TYPE(value)); if (type_str != NULL) { From 082b64b1a25baa5a2e36c244459f2016819b3e5d Mon Sep 17 00:00:00 2001 From: swayaminsync Date: Thu, 19 Sep 2024 21:43:48 +0530 Subject: [PATCH 23/32] exposing constants --- quaddtype/numpy_quaddtype/__init__.py | 6 +++++- quaddtype/numpy_quaddtype/src/scalar.c | 22 ++++++++++++++++++++++ quaddtype/numpy_quaddtype/src/scalar.h | 3 +++ 3 files changed, 30 insertions(+), 1 deletion(-) diff --git a/quaddtype/numpy_quaddtype/__init__.py b/quaddtype/numpy_quaddtype/__init__.py index 5aa50441..4ea5a2aa 100644 --- a/quaddtype/numpy_quaddtype/__init__.py +++ b/quaddtype/numpy_quaddtype/__init__.py @@ -5,7 +5,11 @@ ) __all__ = ['QuadPrecision', 'QuadPrecDType', 'SleefQuadPrecision', 'LongDoubleQuadPrecision', - 'SleefQuadPrecDType', 'LongDoubleQuadPrecDType', 'is_longdouble_128'] + 'SleefQuadPrecDType', 'LongDoubleQuadPrecDType', 'is_longdouble_128', 'pi', 'e'] + + +pi = QuadPrecision.pi +e = QuadPrecision.e def SleefQuadPrecision(value): diff --git a/quaddtype/numpy_quaddtype/src/scalar.c b/quaddtype/numpy_quaddtype/src/scalar.c index 0dcf5f57..84f545ae 100644 --- a/quaddtype/numpy_quaddtype/src/scalar.c +++ b/quaddtype/numpy_quaddtype/src/scalar.c @@ -233,6 +233,27 @@ QuadPrecision_dealloc(QuadPrecisionObject *self) Py_TYPE(self)->tp_free((PyObject *)self); } +PyObject* QuadPrecision_get_pi(PyObject* self, void* closure) { + QuadPrecisionObject* new = QuadPrecision_raw_new(BACKEND_SLEEF); + if (new == NULL) return NULL; + new->value.sleef_value = SLEEF_M_PIq; + return (PyObject*)new; +} + +PyObject* QuadPrecision_get_e(PyObject* self, void* closure) { + QuadPrecisionObject* new = QuadPrecision_raw_new(BACKEND_SLEEF); + if (new == NULL) return NULL; + new->value.sleef_value = SLEEF_M_Eq; + return (PyObject*)new; +} + +// Add this to the existing QuadPrecision_Type definition +static PyGetSetDef QuadPrecision_getset[] = { + {"pi", (getter)QuadPrecision_get_pi, NULL, "Pi constant", NULL}, + {"e", (getter)QuadPrecision_get_e, NULL, "Euler's number", NULL}, + {NULL} /* Sentinel */ +}; + PyTypeObject QuadPrecision_Type = { PyVarObject_HEAD_INIT(NULL, 0).tp_name = "numpy_quaddtype.QuadPrecision", .tp_basicsize = sizeof(QuadPrecisionObject), @@ -243,6 +264,7 @@ PyTypeObject QuadPrecision_Type = { .tp_str = (reprfunc)QuadPrecision_str_dragon4, .tp_as_number = &quad_as_scalar, .tp_richcompare = (richcmpfunc)quad_richcompare, + .tp_getset = QuadPrecision_getset, }; int diff --git a/quaddtype/numpy_quaddtype/src/scalar.h b/quaddtype/numpy_quaddtype/src/scalar.h index 4fac1adf..2faf85c9 100644 --- a/quaddtype/numpy_quaddtype/src/scalar.h +++ b/quaddtype/numpy_quaddtype/src/scalar.h @@ -31,6 +31,9 @@ QuadPrecision_from_object(PyObject *value, QuadBackendType backend); int init_quadprecision_scalar(void); +PyObject* QuadPrecision_get_pi(PyObject* self, void* closure); +PyObject* QuadPrecision_get_e(PyObject* self, void* closure); + #define PyArray_IsScalar(obj, QuadPrecDType) PyObject_TypeCheck(obj, &QuadPrecision_Type) #define PyArrayScalar_VAL(obj, QuadPrecDType) (((QuadPrecisionObject *)obj)->value) From e53edbadb1baf4972259028d2e10a059d2fc1d01 Mon Sep 17 00:00:00 2001 From: swayaminsync Date: Fri, 20 Sep 2024 17:03:24 +0530 Subject: [PATCH 24/32] resolved reviews --- quaddtype/numpy_quaddtype/src/dragon4.c | 5 +++++ quaddtype/numpy_quaddtype/src/dtype.c | 11 +++++++--- quaddtype/numpy_quaddtype/src/scalar.c | 29 ++++++++++++++----------- 3 files changed, 29 insertions(+), 16 deletions(-) diff --git a/quaddtype/numpy_quaddtype/src/dragon4.c b/quaddtype/numpy_quaddtype/src/dragon4.c index 1ac9420b..34ad4cbb 100644 --- a/quaddtype/numpy_quaddtype/src/dragon4.c +++ b/quaddtype/numpy_quaddtype/src/dragon4.c @@ -1,3 +1,8 @@ +/* +This code was extracted from NumPy and the original author was Allan Haldane(@ahaldane) +Modifications are specific to support the SLEEF_QUAD +*/ + #include #include #include diff --git a/quaddtype/numpy_quaddtype/src/dtype.c b/quaddtype/numpy_quaddtype/src/dtype.c index 4c3f73dd..7d0fad37 100644 --- a/quaddtype/numpy_quaddtype/src/dtype.c +++ b/quaddtype/numpy_quaddtype/src/dtype.c @@ -79,10 +79,15 @@ ensure_canonical(QuadPrecDTypeObject *self) static QuadPrecDTypeObject * common_instance(QuadPrecDTypeObject *dtype1, QuadPrecDTypeObject *dtype2) { + // if backend mismatch then return SLEEF one (safe to cast ld to quad) if (dtype1->backend != dtype2->backend) { - PyErr_SetString(PyExc_TypeError, - "Cannot find common instance for QuadPrecDTypes with different backends"); - return NULL; + if (dtype1->backend == BACKEND_SLEEF) { + Py_INCREF(dtype1); + return dtype1; + } + + Py_INCREF(dtype2); + return dtype2; } Py_INCREF(dtype1); return dtype1; diff --git a/quaddtype/numpy_quaddtype/src/scalar.c b/quaddtype/numpy_quaddtype/src/scalar.c index 0dcf5f57..c6bac92f 100644 --- a/quaddtype/numpy_quaddtype/src/scalar.c +++ b/quaddtype/numpy_quaddtype/src/scalar.c @@ -76,39 +76,42 @@ QuadPrecision_from_object(PyObject *value, QuadBackendType backend) self->value.longdouble_value = (long double)val; } } - else if (Py_TYPE(value) == &QuadPrecision_Type) - { - // todo: not working for ld backend, getting garbage value not sure why? + else if (Py_TYPE(value) == &QuadPrecision_Type) { + Py_DECREF(self); // discard the default one QuadPrecisionObject *quad_obj = (QuadPrecisionObject *)value; - // printf("%d %d\n", quad_obj->backend, backend); - // printf("%Lf\n", quad_obj->value.longdouble_value); + + // create a new one with the same backend + QuadPrecisionObject *self = QuadPrecision_raw_new(quad_obj->backend); if (quad_obj->backend == BACKEND_SLEEF) { self->value.sleef_value = quad_obj->value.sleef_value; } else { self->value.longdouble_value = quad_obj->value.longdouble_value; } + + return self; } else { PyObject *type_str = PyObject_Str((PyObject *)Py_TYPE(value)); if (type_str != NULL) { const char *type_cstr = PyUnicode_AsUTF8(type_str); if (type_cstr != NULL) { - PyErr_Format( - PyExc_TypeError, - "QuadPrecision value must be a float, int or string, but got %s instead", - type_cstr); + PyErr_Format(PyExc_TypeError, + "QuadPrecision value must be a quad, float, int or string, but got %s " + "instead", + type_cstr); } else { - PyErr_SetString(PyExc_TypeError, - "QuadPrecision value must be a float, int or string, but got an " - "unknown type instead"); + PyErr_SetString( + PyExc_TypeError, + "QuadPrecision value must be a quad, float, int or string, but got an " + "unknown type instead"); } Py_DECREF(type_str); } else { PyErr_SetString(PyExc_TypeError, - "QuadPrecision value must be a float, int or string, but got an " + "QuadPrecision value must be a quad, float, int or string, but got an " "unknown type instead"); } Py_DECREF(self); From bbc99a61d105a0177130552f8ac8427985751baa Mon Sep 17 00:00:00 2001 From: swayaminsync Date: Fri, 20 Sep 2024 19:27:55 +0530 Subject: [PATCH 25/32] fixing constant error --- quaddtype/numpy_quaddtype/__init__.py | 19 ++++++---- .../numpy_quaddtype/src/quaddtype_main.c | 15 ++++++++ quaddtype/numpy_quaddtype/src/scalar.c | 37 ++++++++++++++++++- quaddtype/numpy_quaddtype/src/scalar.h | 18 +++++++++ quaddtype/numpy_quaddtype/src/umath.cpp | 3 -- 5 files changed, 80 insertions(+), 12 deletions(-) diff --git a/quaddtype/numpy_quaddtype/__init__.py b/quaddtype/numpy_quaddtype/__init__.py index 5aa50441..294d7248 100644 --- a/quaddtype/numpy_quaddtype/__init__.py +++ b/quaddtype/numpy_quaddtype/__init__.py @@ -1,24 +1,27 @@ from ._quaddtype_main import ( QuadPrecision, QuadPrecDType, - is_longdouble_128 + is_longdouble_128, + pi, e, log2e, log10e, ln2, ln10, + sqrt2, sqrt3, egamma, phi, quad_max, quad_min, quad_epsilon, quad_denorm_min ) -__all__ = ['QuadPrecision', 'QuadPrecDType', 'SleefQuadPrecision', 'LongDoubleQuadPrecision', - 'SleefQuadPrecDType', 'LongDoubleQuadPrecDType', 'is_longdouble_128'] - +__all__ = [ + 'QuadPrecision', 'QuadPrecDType', 'SleefQuadPrecision', 'LongDoubleQuadPrecision', + 'SleefQuadPrecDType', 'LongDoubleQuadPrecDType', 'is_longdouble_128', + 'pi', 'e', 'log2e', 'log10e', 'ln2', 'ln10', + 'sqrt2', 'sqrt3', 'egamma', 'phi', + 'quad_max', 'quad_min', 'quad_epsilon', 'quad_denorm_min' +] def SleefQuadPrecision(value): return QuadPrecision(value, backend='sleef') - def LongDoubleQuadPrecision(value): return QuadPrecision(value, backend='longdouble') - def SleefQuadPrecDType(): return QuadPrecDType(backend='sleef') - def LongDoubleQuadPrecDType(): - return QuadPrecDType(backend='longdouble') + return QuadPrecDType(backend='longdouble') \ No newline at end of file diff --git a/quaddtype/numpy_quaddtype/src/quaddtype_main.c b/quaddtype/numpy_quaddtype/src/quaddtype_main.c index 7c89d57f..d5f0c729 100644 --- a/quaddtype/numpy_quaddtype/src/quaddtype_main.c +++ b/quaddtype/numpy_quaddtype/src/quaddtype_main.c @@ -65,6 +65,21 @@ PyInit__quaddtype_main(void) goto error; } + if (PyModule_AddObject(m, "pi", (PyObject *)QuadPrecision_pi) < 0) goto error; + if (PyModule_AddObject(m, "e", (PyObject *)QuadPrecision_e) < 0) goto error; + if (PyModule_AddObject(m, "log2e", (PyObject *)QuadPrecision_log2e) < 0) goto error; + if (PyModule_AddObject(m, "log10e", (PyObject *)QuadPrecision_log10e) < 0) goto error; + if (PyModule_AddObject(m, "ln2", (PyObject *)QuadPrecision_ln2) < 0) goto error; + if (PyModule_AddObject(m, "ln10", (PyObject *)QuadPrecision_ln10) < 0) goto error; + if (PyModule_AddObject(m, "sqrt2", (PyObject *)QuadPrecision_sqrt2) < 0) goto error; + if (PyModule_AddObject(m, "sqrt3", (PyObject *)QuadPrecision_sqrt3) < 0) goto error; + if (PyModule_AddObject(m, "egamma", (PyObject *)QuadPrecision_egamma) < 0) goto error; + if (PyModule_AddObject(m, "phi", (PyObject *)QuadPrecision_phi) < 0) goto error; + if (PyModule_AddObject(m, "quad_max", (PyObject *)QuadPrecision_quad_max) < 0) goto error; + if (PyModule_AddObject(m, "quad_min", (PyObject *)QuadPrecision_quad_min) < 0) goto error; + if (PyModule_AddObject(m, "quad_epsilon", (PyObject *)QuadPrecision_quad_epsilon) < 0) goto error; + if (PyModule_AddObject(m, "quad_denorm_min", (PyObject *)QuadPrecision_quad_denorm_min) < 0) goto error; + return m; error: diff --git a/quaddtype/numpy_quaddtype/src/scalar.c b/quaddtype/numpy_quaddtype/src/scalar.c index c6bac92f..cafa1d01 100644 --- a/quaddtype/numpy_quaddtype/src/scalar.c +++ b/quaddtype/numpy_quaddtype/src/scalar.c @@ -248,9 +248,44 @@ PyTypeObject QuadPrecision_Type = { .tp_richcompare = (richcmpfunc)quad_richcompare, }; +QuadPrecisionObject* initialize_constants(const Sleef_quad value, QuadBackendType backend) +{ + QuadPrecisionObject * obj = QuadPrecision_raw_new(backend); + if (backend == BACKEND_SLEEF) { + obj->value.sleef_value = value; + } + else { + obj->value.longdouble_value = Sleef_cast_to_doubleq1(value); + } + + return obj; +} + int init_quadprecision_scalar(void) { - // QuadPrecision_Type.tp_base = &PyFloatingArrType_Type; + QuadPrecisionObject* QuadPrecision_pi = initialize_constants(SLEEF_M_PIq, BACKEND_SLEEF); + QuadPrecisionObject* QuadPrecision_e = initialize_constants(SLEEF_M_Eq, BACKEND_SLEEF); + QuadPrecisionObject* QuadPrecision_log2e = initialize_constants(SLEEF_M_LOG2Eq, BACKEND_SLEEF); + QuadPrecisionObject* QuadPrecision_log10e = initialize_constants(SLEEF_M_LOG10Eq, BACKEND_SLEEF); + QuadPrecisionObject* QuadPrecision_ln2 = initialize_constants(SLEEF_M_LN2q, BACKEND_SLEEF); + QuadPrecisionObject* QuadPrecision_ln10 = initialize_constants(SLEEF_M_LN10q, BACKEND_SLEEF); + QuadPrecisionObject* QuadPrecision_sqrt2 = initialize_constants(SLEEF_M_SQRT2q, BACKEND_SLEEF); + QuadPrecisionObject* QuadPrecision_sqrt3 = initialize_constants(SLEEF_M_SQRT3q, BACKEND_SLEEF); + QuadPrecisionObject* QuadPrecision_egamma = initialize_constants(SLEEF_M_EGAMMAq, BACKEND_SLEEF); + QuadPrecisionObject* QuadPrecision_phi = initialize_constants(SLEEF_M_PHIq, BACKEND_SLEEF); + QuadPrecisionObject* QuadPrecision_quad_max = initialize_constants(SLEEF_QUAD_MAX, BACKEND_SLEEF); + QuadPrecisionObject* QuadPrecision_quad_min = initialize_constants(SLEEF_QUAD_MIN, BACKEND_SLEEF); + QuadPrecisionObject* QuadPrecision_quad_epsilon = initialize_constants(SLEEF_QUAD_EPSILON, BACKEND_SLEEF); + QuadPrecisionObject* QuadPrecision_quad_denorm_min = initialize_constants(SLEEF_QUAD_DENORM_MIN, BACKEND_SLEEF); + + if (!QuadPrecision_pi || !QuadPrecision_e || !QuadPrecision_log2e || !QuadPrecision_log10e || + !QuadPrecision_ln2 || !QuadPrecision_ln10|| !QuadPrecision_sqrt2 || !QuadPrecision_sqrt3 || + !QuadPrecision_egamma || !QuadPrecision_phi || !QuadPrecision_quad_max || !QuadPrecision_quad_min || + !QuadPrecision_quad_epsilon || !QuadPrecision_quad_denorm_min) { + PyErr_SetString(PyExc_RuntimeError, "Failed to initialize QuadPrecision constants"); + return -1; + } + return PyType_Ready(&QuadPrecision_Type); } \ No newline at end of file diff --git a/quaddtype/numpy_quaddtype/src/scalar.h b/quaddtype/numpy_quaddtype/src/scalar.h index 4fac1adf..b2cb3b58 100644 --- a/quaddtype/numpy_quaddtype/src/scalar.h +++ b/quaddtype/numpy_quaddtype/src/scalar.h @@ -34,6 +34,24 @@ init_quadprecision_scalar(void); #define PyArray_IsScalar(obj, QuadPrecDType) PyObject_TypeCheck(obj, &QuadPrecision_Type) #define PyArrayScalar_VAL(obj, QuadPrecDType) (((QuadPrecisionObject *)obj)->value) +QuadPrecisionObject* initialize_constants(const Sleef_quad value, QuadBackendType backend); + +// constant objects +extern QuadPrecisionObject *QuadPrecision_pi; +extern QuadPrecisionObject *QuadPrecision_e; +extern QuadPrecisionObject *QuadPrecision_log2e; +extern QuadPrecisionObject *QuadPrecision_log10e; +extern QuadPrecisionObject *QuadPrecision_ln2; +extern QuadPrecisionObject *QuadPrecision_ln10; +extern QuadPrecisionObject *QuadPrecision_sqrt2; +extern QuadPrecisionObject *QuadPrecision_sqrt3; +extern QuadPrecisionObject *QuadPrecision_egamma; +extern QuadPrecisionObject *QuadPrecision_phi; +extern QuadPrecisionObject *QuadPrecision_quad_max; +extern QuadPrecisionObject *QuadPrecision_quad_min; +extern QuadPrecisionObject *QuadPrecision_quad_epsilon; +extern QuadPrecisionObject *QuadPrecision_quad_denorm_min; + #ifdef __cplusplus } #endif diff --git a/quaddtype/numpy_quaddtype/src/umath.cpp b/quaddtype/numpy_quaddtype/src/umath.cpp index 587c0e93..7271443e 100644 --- a/quaddtype/numpy_quaddtype/src/umath.cpp +++ b/quaddtype/numpy_quaddtype/src/umath.cpp @@ -658,9 +658,6 @@ quad_generic_comp_strided_loop_aligned(PyArrayMethod_Context *context, char *con QuadPrecDTypeObject *descr = (QuadPrecDTypeObject *)context->descriptors[0]; QuadBackendType backend = descr->backend; - size_t elem_size = (backend == BACKEND_SLEEF) ? sizeof(Sleef_quad) : sizeof(long double); - - quad_value in1, in2; while (N--) { quad_value in1 = *(quad_value *)in1_ptr; From 41ca3f981c4081ddfa246640862f28a5e12bea18 Mon Sep 17 00:00:00 2001 From: swayaminsync Date: Fri, 20 Sep 2024 20:58:08 +0530 Subject: [PATCH 26/32] added 128-bit common constants --- quaddtype/numpy_quaddtype/__init__.py | 21 ++++--- .../numpy_quaddtype/src/quaddtype_main.c | 58 ++++++++++++++----- quaddtype/numpy_quaddtype/src/scalar.c | 58 ------------------- quaddtype/numpy_quaddtype/src/scalar.h | 18 ------ 4 files changed, 57 insertions(+), 98 deletions(-) diff --git a/quaddtype/numpy_quaddtype/__init__.py b/quaddtype/numpy_quaddtype/__init__.py index 294d7248..e469a4c1 100644 --- a/quaddtype/numpy_quaddtype/__init__.py +++ b/quaddtype/numpy_quaddtype/__init__.py @@ -2,16 +2,13 @@ QuadPrecision, QuadPrecDType, is_longdouble_128, - pi, e, log2e, log10e, ln2, ln10, - sqrt2, sqrt3, egamma, phi, quad_max, quad_min, quad_epsilon, quad_denorm_min + get_sleef_constant ) __all__ = [ 'QuadPrecision', 'QuadPrecDType', 'SleefQuadPrecision', 'LongDoubleQuadPrecision', - 'SleefQuadPrecDType', 'LongDoubleQuadPrecDType', 'is_longdouble_128', - 'pi', 'e', 'log2e', 'log10e', 'ln2', 'ln10', - 'sqrt2', 'sqrt3', 'egamma', 'phi', - 'quad_max', 'quad_min', 'quad_epsilon', 'quad_denorm_min' + 'SleefQuadPrecDType', 'LongDoubleQuadPrecDType', 'is_longdouble_128', 'pi', 'e', + 'log2e', 'log10e', 'ln2', 'ln10', 'max_value', 'min_value', 'epsilon' ] def SleefQuadPrecision(value): @@ -24,4 +21,14 @@ def SleefQuadPrecDType(): return QuadPrecDType(backend='sleef') def LongDoubleQuadPrecDType(): - return QuadPrecDType(backend='longdouble') \ No newline at end of file + return QuadPrecDType(backend='longdouble') + +pi = get_sleef_constant("pi") +e = get_sleef_constant("e") +log2e = get_sleef_constant("log2e") +log10e = get_sleef_constant("log10e") +ln2 = get_sleef_constant("ln2") +ln10 = get_sleef_constant("ln10") +max_value = get_sleef_constant("quad_max") +min_value = get_sleef_constant("quad_min") +epsilon = get_sleef_constant("epsilon") \ No newline at end of file diff --git a/quaddtype/numpy_quaddtype/src/quaddtype_main.c b/quaddtype/numpy_quaddtype/src/quaddtype_main.c index d5f0c729..f2935299 100644 --- a/quaddtype/numpy_quaddtype/src/quaddtype_main.c +++ b/quaddtype/numpy_quaddtype/src/quaddtype_main.c @@ -1,4 +1,7 @@ #include +#include +#include +#include #define PY_ARRAY_UNIQUE_SYMBOL QuadPrecType_ARRAY_API #define PY_UFUNC_UNIQUE_SYMBOL QuadPrecType_UFUNC_API @@ -26,8 +29,48 @@ static PyObject* py_is_longdouble_128(PyObject* self, PyObject* args) { } } +static PyObject* get_sleef_constant(PyObject* self, PyObject* args) { + const char* constant_name; + if (!PyArg_ParseTuple(args, "s", &constant_name)) { + return NULL; + } + + QuadPrecisionObject* result = QuadPrecision_raw_new(BACKEND_SLEEF); + if (result == NULL) { + return NULL; + } + + if (strcmp(constant_name, "pi") == 0) { + result->value.sleef_value = SLEEF_M_PIq; + } else if (strcmp(constant_name, "e") == 0) { + result->value.sleef_value = SLEEF_M_Eq; + } else if (strcmp(constant_name, "log2e") == 0) { + result->value.sleef_value = SLEEF_M_LOG2Eq; + } else if (strcmp(constant_name, "log10e") == 0) { + result->value.sleef_value = SLEEF_M_LOG10Eq; + } else if (strcmp(constant_name, "ln2") == 0) { + result->value.sleef_value = SLEEF_M_LN2q; + } else if (strcmp(constant_name, "ln10") == 0) { + result->value.sleef_value = SLEEF_M_LN10q; + } else if (strcmp(constant_name, "quad_max") == 0) { + result->value.sleef_value = SLEEF_QUAD_MAX; + } else if (strcmp(constant_name, "quad_min") == 0) { + result->value.sleef_value = SLEEF_QUAD_MIN; + } else if (strcmp(constant_name, "epsilon") == 0) { + result->value.sleef_value = SLEEF_QUAD_EPSILON; + } + else { + PyErr_SetString(PyExc_ValueError, "Unknown constant name"); + Py_DECREF(result); + return NULL; + } + + return (PyObject*)result; +} + static PyMethodDef module_methods[] = { {"is_longdouble_128", py_is_longdouble_128, METH_NOARGS, "Check if long double is 128-bit"}, + {"get_sleef_constant", get_sleef_constant, METH_VARARGS, "Get Sleef constant by name"}, {NULL, NULL, 0, NULL} }; @@ -65,21 +108,6 @@ PyInit__quaddtype_main(void) goto error; } - if (PyModule_AddObject(m, "pi", (PyObject *)QuadPrecision_pi) < 0) goto error; - if (PyModule_AddObject(m, "e", (PyObject *)QuadPrecision_e) < 0) goto error; - if (PyModule_AddObject(m, "log2e", (PyObject *)QuadPrecision_log2e) < 0) goto error; - if (PyModule_AddObject(m, "log10e", (PyObject *)QuadPrecision_log10e) < 0) goto error; - if (PyModule_AddObject(m, "ln2", (PyObject *)QuadPrecision_ln2) < 0) goto error; - if (PyModule_AddObject(m, "ln10", (PyObject *)QuadPrecision_ln10) < 0) goto error; - if (PyModule_AddObject(m, "sqrt2", (PyObject *)QuadPrecision_sqrt2) < 0) goto error; - if (PyModule_AddObject(m, "sqrt3", (PyObject *)QuadPrecision_sqrt3) < 0) goto error; - if (PyModule_AddObject(m, "egamma", (PyObject *)QuadPrecision_egamma) < 0) goto error; - if (PyModule_AddObject(m, "phi", (PyObject *)QuadPrecision_phi) < 0) goto error; - if (PyModule_AddObject(m, "quad_max", (PyObject *)QuadPrecision_quad_max) < 0) goto error; - if (PyModule_AddObject(m, "quad_min", (PyObject *)QuadPrecision_quad_min) < 0) goto error; - if (PyModule_AddObject(m, "quad_epsilon", (PyObject *)QuadPrecision_quad_epsilon) < 0) goto error; - if (PyModule_AddObject(m, "quad_denorm_min", (PyObject *)QuadPrecision_quad_denorm_min) < 0) goto error; - return m; error: diff --git a/quaddtype/numpy_quaddtype/src/scalar.c b/quaddtype/numpy_quaddtype/src/scalar.c index dc5ff623..c65e0889 100644 --- a/quaddtype/numpy_quaddtype/src/scalar.c +++ b/quaddtype/numpy_quaddtype/src/scalar.c @@ -236,27 +236,6 @@ QuadPrecision_dealloc(QuadPrecisionObject *self) Py_TYPE(self)->tp_free((PyObject *)self); } -PyObject* QuadPrecision_get_pi(PyObject* self, void* closure) { - QuadPrecisionObject* new = QuadPrecision_raw_new(BACKEND_SLEEF); - if (new == NULL) return NULL; - new->value.sleef_value = SLEEF_M_PIq; - return (PyObject*)new; -} - -PyObject* QuadPrecision_get_e(PyObject* self, void* closure) { - QuadPrecisionObject* new = QuadPrecision_raw_new(BACKEND_SLEEF); - if (new == NULL) return NULL; - new->value.sleef_value = SLEEF_M_Eq; - return (PyObject*)new; -} - -// Add this to the existing QuadPrecision_Type definition -static PyGetSetDef QuadPrecision_getset[] = { - {"pi", (getter)QuadPrecision_get_pi, NULL, "Pi constant", NULL}, - {"e", (getter)QuadPrecision_get_e, NULL, "Euler's number", NULL}, - {NULL} /* Sentinel */ -}; - PyTypeObject QuadPrecision_Type = { PyVarObject_HEAD_INIT(NULL, 0).tp_name = "numpy_quaddtype.QuadPrecision", .tp_basicsize = sizeof(QuadPrecisionObject), @@ -267,47 +246,10 @@ PyTypeObject QuadPrecision_Type = { .tp_str = (reprfunc)QuadPrecision_str_dragon4, .tp_as_number = &quad_as_scalar, .tp_richcompare = (richcmpfunc)quad_richcompare, - .tp_getset = QuadPrecision_getset, }; -QuadPrecisionObject* initialize_constants(const Sleef_quad value, QuadBackendType backend) -{ - QuadPrecisionObject * obj = QuadPrecision_raw_new(backend); - if (backend == BACKEND_SLEEF) { - obj->value.sleef_value = value; - } - else { - obj->value.longdouble_value = Sleef_cast_to_doubleq1(value); - } - - return obj; -} - int init_quadprecision_scalar(void) { - QuadPrecisionObject* QuadPrecision_pi = initialize_constants(SLEEF_M_PIq, BACKEND_SLEEF); - QuadPrecisionObject* QuadPrecision_e = initialize_constants(SLEEF_M_Eq, BACKEND_SLEEF); - QuadPrecisionObject* QuadPrecision_log2e = initialize_constants(SLEEF_M_LOG2Eq, BACKEND_SLEEF); - QuadPrecisionObject* QuadPrecision_log10e = initialize_constants(SLEEF_M_LOG10Eq, BACKEND_SLEEF); - QuadPrecisionObject* QuadPrecision_ln2 = initialize_constants(SLEEF_M_LN2q, BACKEND_SLEEF); - QuadPrecisionObject* QuadPrecision_ln10 = initialize_constants(SLEEF_M_LN10q, BACKEND_SLEEF); - QuadPrecisionObject* QuadPrecision_sqrt2 = initialize_constants(SLEEF_M_SQRT2q, BACKEND_SLEEF); - QuadPrecisionObject* QuadPrecision_sqrt3 = initialize_constants(SLEEF_M_SQRT3q, BACKEND_SLEEF); - QuadPrecisionObject* QuadPrecision_egamma = initialize_constants(SLEEF_M_EGAMMAq, BACKEND_SLEEF); - QuadPrecisionObject* QuadPrecision_phi = initialize_constants(SLEEF_M_PHIq, BACKEND_SLEEF); - QuadPrecisionObject* QuadPrecision_quad_max = initialize_constants(SLEEF_QUAD_MAX, BACKEND_SLEEF); - QuadPrecisionObject* QuadPrecision_quad_min = initialize_constants(SLEEF_QUAD_MIN, BACKEND_SLEEF); - QuadPrecisionObject* QuadPrecision_quad_epsilon = initialize_constants(SLEEF_QUAD_EPSILON, BACKEND_SLEEF); - QuadPrecisionObject* QuadPrecision_quad_denorm_min = initialize_constants(SLEEF_QUAD_DENORM_MIN, BACKEND_SLEEF); - - if (!QuadPrecision_pi || !QuadPrecision_e || !QuadPrecision_log2e || !QuadPrecision_log10e || - !QuadPrecision_ln2 || !QuadPrecision_ln10|| !QuadPrecision_sqrt2 || !QuadPrecision_sqrt3 || - !QuadPrecision_egamma || !QuadPrecision_phi || !QuadPrecision_quad_max || !QuadPrecision_quad_min || - !QuadPrecision_quad_epsilon || !QuadPrecision_quad_denorm_min) { - PyErr_SetString(PyExc_RuntimeError, "Failed to initialize QuadPrecision constants"); - return -1; - } - return PyType_Ready(&QuadPrecision_Type); } \ No newline at end of file diff --git a/quaddtype/numpy_quaddtype/src/scalar.h b/quaddtype/numpy_quaddtype/src/scalar.h index 61dea172..2faf85c9 100644 --- a/quaddtype/numpy_quaddtype/src/scalar.h +++ b/quaddtype/numpy_quaddtype/src/scalar.h @@ -37,24 +37,6 @@ PyObject* QuadPrecision_get_e(PyObject* self, void* closure); #define PyArray_IsScalar(obj, QuadPrecDType) PyObject_TypeCheck(obj, &QuadPrecision_Type) #define PyArrayScalar_VAL(obj, QuadPrecDType) (((QuadPrecisionObject *)obj)->value) -QuadPrecisionObject* initialize_constants(const Sleef_quad value, QuadBackendType backend); - -// constant objects -extern QuadPrecisionObject *QuadPrecision_pi; -extern QuadPrecisionObject *QuadPrecision_e; -extern QuadPrecisionObject *QuadPrecision_log2e; -extern QuadPrecisionObject *QuadPrecision_log10e; -extern QuadPrecisionObject *QuadPrecision_ln2; -extern QuadPrecisionObject *QuadPrecision_ln10; -extern QuadPrecisionObject *QuadPrecision_sqrt2; -extern QuadPrecisionObject *QuadPrecision_sqrt3; -extern QuadPrecisionObject *QuadPrecision_egamma; -extern QuadPrecisionObject *QuadPrecision_phi; -extern QuadPrecisionObject *QuadPrecision_quad_max; -extern QuadPrecisionObject *QuadPrecision_quad_min; -extern QuadPrecisionObject *QuadPrecision_quad_epsilon; -extern QuadPrecisionObject *QuadPrecision_quad_denorm_min; - #ifdef __cplusplus } #endif From 2fd9cad004d6bf48749c016555469c5ff4c3b53c Mon Sep 17 00:00:00 2001 From: swayaminsync Date: Fri, 20 Sep 2024 21:00:45 +0530 Subject: [PATCH 27/32] refactoring --- quaddtype/numpy_quaddtype/src/scalar.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/quaddtype/numpy_quaddtype/src/scalar.h b/quaddtype/numpy_quaddtype/src/scalar.h index 2faf85c9..4fac1adf 100644 --- a/quaddtype/numpy_quaddtype/src/scalar.h +++ b/quaddtype/numpy_quaddtype/src/scalar.h @@ -31,9 +31,6 @@ QuadPrecision_from_object(PyObject *value, QuadBackendType backend); int init_quadprecision_scalar(void); -PyObject* QuadPrecision_get_pi(PyObject* self, void* closure); -PyObject* QuadPrecision_get_e(PyObject* self, void* closure); - #define PyArray_IsScalar(obj, QuadPrecDType) PyObject_TypeCheck(obj, &QuadPrecision_Type) #define PyArrayScalar_VAL(obj, QuadPrecDType) (((QuadPrecisionObject *)obj)->value) From 672be17ab0c9d84f37dac190eada407cdd687525 Mon Sep 17 00:00:00 2001 From: swayaminsync Date: Sat, 21 Sep 2024 00:01:39 +0530 Subject: [PATCH 28/32] added aligned and unaligned casting loops --- quaddtype/numpy_quaddtype/src/casts.cpp | 137 ++++++++++++++++++++---- quaddtype/numpy_quaddtype/src/scalar.c | 1 - 2 files changed, 119 insertions(+), 19 deletions(-) diff --git a/quaddtype/numpy_quaddtype/src/casts.cpp b/quaddtype/numpy_quaddtype/src/casts.cpp index aa38e476..ed2f4477 100644 --- a/quaddtype/numpy_quaddtype/src/casts.cpp +++ b/quaddtype/numpy_quaddtype/src/casts.cpp @@ -49,9 +49,9 @@ quad_to_quad_resolve_descriptors(PyObject *NPY_UNUSED(self), } static int -quad_to_quad_strided_loop(PyArrayMethod_Context *context, char *const data[], - npy_intp const dimensions[], npy_intp const strides[], - void *NPY_UNUSED(auxdata)) +quad_to_quad_strided_loop_unaligned(PyArrayMethod_Context *context, char *const data[], + npy_intp const dimensions[], npy_intp const strides[], + void *NPY_UNUSED(auxdata)) { npy_intp N = dimensions[0]; char *in_ptr = data[0]; @@ -79,6 +79,44 @@ quad_to_quad_strided_loop(PyArrayMethod_Context *context, char *const data[], return 0; } +static int +quad_to_quad_strided_loop_aligned(PyArrayMethod_Context *context, char *const data[], + npy_intp const dimensions[], npy_intp const strides[], + void *NPY_UNUSED(auxdata)) +{ + npy_intp N = dimensions[0]; + char *in_ptr = data[0]; + char *out_ptr = data[1]; + npy_intp in_stride = strides[0]; + npy_intp out_stride = strides[1]; + + QuadPrecDTypeObject *descr_in = (QuadPrecDTypeObject *)context->descriptors[0]; + QuadPrecDTypeObject *descr_out = (QuadPrecDTypeObject *)context->descriptors[1]; + + if (descr_in->backend != descr_out->backend) { + PyErr_SetString(PyExc_TypeError, + "Cannot convert between different quad-precision backends"); + return -1; + } + + if (descr_in->backend == BACKEND_SLEEF) { + while (N--) { + *(Sleef_quad *)out_ptr = *(Sleef_quad *)in_ptr; + in_ptr += in_stride; + out_ptr += out_stride; + } + } + else { + while (N--) { + *(long double *)out_ptr = *(long double *)in_ptr; + in_ptr += in_stride; + out_ptr += out_stride; + } + } + + return 0; +} + // Casting from other types to QuadDType template @@ -290,9 +328,9 @@ numpy_to_quad_resolve_descriptors(PyObject *NPY_UNUSED(self), PyArray_DTypeMeta template static int -numpy_to_quad_strided_loop(PyArrayMethod_Context *context, char *const data[], - npy_intp const dimensions[], npy_intp const strides[], - void *NPY_UNUSED(auxdata)) +numpy_to_quad_strided_loop_unaligned(PyArrayMethod_Context *context, char *const data[], + npy_intp const dimensions[], npy_intp const strides[], + void *NPY_UNUSED(auxdata)) { npy_intp N = dimensions[0]; char *in_ptr = data[0]; @@ -316,6 +354,36 @@ numpy_to_quad_strided_loop(PyArrayMethod_Context *context, char *const data[], return 0; } +template +static int +numpy_to_quad_strided_loop_aligned(PyArrayMethod_Context *context, char *const data[], + npy_intp const dimensions[], npy_intp const strides[], + void *NPY_UNUSED(auxdata)) +{ + npy_intp N = dimensions[0]; + char *in_ptr = data[0]; + char *out_ptr = data[1]; + + QuadPrecDTypeObject *descr_out = (QuadPrecDTypeObject *)context->descriptors[1]; + QuadBackendType backend = descr_out->backend; + + while (N--) { + T in_val = *(T *)in_ptr; + quad_value out_val = to_quad(in_val, backend); + + if (backend == BACKEND_SLEEF) { + *(Sleef_quad *)(out_ptr) = out_val.sleef_value; + } + else { + *(long double *)(out_ptr) = out_val.longdouble_value; + } + + in_ptr += strides[0]; + out_ptr += strides[1]; + } + return 0; +} + // Casting from QuadDType to other types template @@ -493,9 +561,9 @@ quad_to_numpy_resolve_descriptors(PyObject *NPY_UNUSED(self), PyArray_DTypeMeta template static int -quad_to_numpy_strided_loop(PyArrayMethod_Context *context, char *const data[], - npy_intp const dimensions[], npy_intp const strides[], - void *NPY_UNUSED(auxdata)) +quad_to_numpy_strided_loop_unaligned(PyArrayMethod_Context *context, char *const data[], + npy_intp const dimensions[], npy_intp const strides[], + void *NPY_UNUSED(auxdata)) { npy_intp N = dimensions[0]; char *in_ptr = data[0]; @@ -519,6 +587,37 @@ quad_to_numpy_strided_loop(PyArrayMethod_Context *context, char *const data[], return 0; } +template +static int +quad_to_numpy_strided_loop_aligned(PyArrayMethod_Context *context, char *const data[], + npy_intp const dimensions[], npy_intp const strides[], + void *NPY_UNUSED(auxdata)) +{ + npy_intp N = dimensions[0]; + char *in_ptr = data[0]; + char *out_ptr = data[1]; + + QuadPrecDTypeObject *quad_descr = (QuadPrecDTypeObject *)context->descriptors[0]; + QuadBackendType backend = quad_descr->backend; + + while (N--) { + quad_value in_val; + if (backend == BACKEND_SLEEF) { + in_val.sleef_value = *(Sleef_quad *)in_ptr; + } + else { + in_val.longdouble_value = *(long double *)in_ptr; + } + + T out_val = from_quad(in_val, backend); + *(T *)(out_ptr) = out_val; + + in_ptr += strides[0]; + out_ptr += strides[1]; + } + return 0; +} + static PyArrayMethod_Spec *specs[NUM_CASTS + 1]; // +1 for NULL terminator static size_t spec_count = 0; @@ -537,9 +636,10 @@ add_cast_from(PyArray_DTypeMeta *to) { PyArray_DTypeMeta **dtypes = new PyArray_DTypeMeta *[2]{&QuadPrecDType, to}; - PyType_Slot *slots = new PyType_Slot[3]{ + PyType_Slot *slots = new PyType_Slot[]{ {NPY_METH_resolve_descriptors, (void *)&quad_to_numpy_resolve_descriptors}, - {NPY_METH_strided_loop, (void *)&quad_to_numpy_strided_loop}, + {NPY_METH_strided_loop, (void *)&quad_to_numpy_strided_loop_aligned}, + {NPY_METH_unaligned_strided_loop, (void *)&quad_to_numpy_strided_loop_unaligned}, {0, nullptr}}; PyArrayMethod_Spec *spec = new PyArrayMethod_Spec{ @@ -547,7 +647,7 @@ add_cast_from(PyArray_DTypeMeta *to) .nin = 1, .nout = 1, .casting = NPY_UNSAFE_CASTING, - .flags = (NPY_ARRAYMETHOD_FLAGS)0, + .flags = NPY_METH_SUPPORTS_UNALIGNED, .dtypes = dtypes, .slots = slots, }; @@ -560,9 +660,10 @@ add_cast_to(PyArray_DTypeMeta *from) { PyArray_DTypeMeta **dtypes = new PyArray_DTypeMeta *[2]{from, &QuadPrecDType}; - PyType_Slot *slots = new PyType_Slot[3]{ + PyType_Slot *slots = new PyType_Slot[]{ {NPY_METH_resolve_descriptors, (void *)&numpy_to_quad_resolve_descriptors}, - {NPY_METH_strided_loop, (void *)&numpy_to_quad_strided_loop}, + {NPY_METH_strided_loop, (void *)&numpy_to_quad_strided_loop_aligned}, + {NPY_METH_unaligned_strided_loop, (void *)&numpy_to_quad_strided_loop_unaligned}, {0, nullptr}}; PyArrayMethod_Spec *spec = new PyArrayMethod_Spec{ @@ -570,7 +671,7 @@ add_cast_to(PyArray_DTypeMeta *from) .nin = 1, .nout = 1, .casting = NPY_SAFE_CASTING, - .flags = (NPY_ARRAYMETHOD_FLAGS)0, + .flags = NPY_METH_SUPPORTS_UNALIGNED, .dtypes = dtypes, .slots = slots, }; @@ -583,10 +684,10 @@ init_casts_internal(void) { PyArray_DTypeMeta **quad2quad_dtypes = new PyArray_DTypeMeta *[2]{&QuadPrecDType, &QuadPrecDType}; - PyType_Slot *quad2quad_slots = new PyType_Slot[4]{ + PyType_Slot *quad2quad_slots = new PyType_Slot[]{ {NPY_METH_resolve_descriptors, (void *)&quad_to_quad_resolve_descriptors}, - {NPY_METH_strided_loop, (void *)&quad_to_quad_strided_loop}, - {NPY_METH_unaligned_strided_loop, (void *)&quad_to_quad_strided_loop}, + {NPY_METH_strided_loop, (void *)&quad_to_quad_strided_loop_aligned}, + {NPY_METH_unaligned_strided_loop, (void *)&quad_to_quad_strided_loop_unaligned}, {0, nullptr}}; PyArrayMethod_Spec *quad2quad_spec = new PyArrayMethod_Spec{ diff --git a/quaddtype/numpy_quaddtype/src/scalar.c b/quaddtype/numpy_quaddtype/src/scalar.c index c65e0889..9bec08a6 100644 --- a/quaddtype/numpy_quaddtype/src/scalar.c +++ b/quaddtype/numpy_quaddtype/src/scalar.c @@ -79,7 +79,6 @@ QuadPrecision_from_object(PyObject *value, QuadBackendType backend) else if (Py_TYPE(value) == &QuadPrecision_Type) { Py_DECREF(self); // discard the default one QuadPrecisionObject *quad_obj = (QuadPrecisionObject *)value; - // create a new one with the same backend QuadPrecisionObject *self = QuadPrecision_raw_new(quad_obj->backend); if (quad_obj->backend == BACKEND_SLEEF) { From 600b2c8d96836ec3719fe9242ae9171f5067b182 Mon Sep 17 00:00:00 2001 From: swayaminsync Date: Sat, 21 Sep 2024 03:06:36 +0530 Subject: [PATCH 29/32] fixed inter-backend cast segment fault --- quaddtype/numpy_quaddtype/src/casts.cpp | 77 +++++++++++++++----- quaddtype/numpy_quaddtype/src/scalar_ops.cpp | 8 +- quaddtype/numpy_quaddtype/src/umath.cpp | 38 ++++------ 3 files changed, 77 insertions(+), 46 deletions(-) diff --git a/quaddtype/numpy_quaddtype/src/casts.cpp b/quaddtype/numpy_quaddtype/src/casts.cpp index ed2f4477..c662b4d5 100644 --- a/quaddtype/numpy_quaddtype/src/casts.cpp +++ b/quaddtype/numpy_quaddtype/src/casts.cpp @@ -28,9 +28,10 @@ quad_to_quad_resolve_descriptors(PyObject *NPY_UNUSED(self), QuadPrecDTypeObject *given_descrs[2], QuadPrecDTypeObject *loop_descrs[2], npy_intp *view_offset) { - if (given_descrs[0]->backend != given_descrs[1]->backend) { - return NPY_UNSAFE_CASTING; - } + NPY_CASTING casting = NPY_NO_CASTING; + + if (given_descrs[0]->backend != given_descrs[1]->backend) + casting = NPY_UNSAFE_CASTING; Py_INCREF(given_descrs[0]); loop_descrs[0] = given_descrs[0]; @@ -45,7 +46,7 @@ quad_to_quad_resolve_descriptors(PyObject *NPY_UNUSED(self), } *view_offset = 0; - return NPY_NO_CASTING; + return casting; } static int @@ -62,10 +63,26 @@ quad_to_quad_strided_loop_unaligned(PyArrayMethod_Context *context, char *const QuadPrecDTypeObject *descr_in = (QuadPrecDTypeObject *)context->descriptors[0]; QuadPrecDTypeObject *descr_out = (QuadPrecDTypeObject *)context->descriptors[1]; + // inter-backend casting if (descr_in->backend != descr_out->backend) { - PyErr_SetString(PyExc_TypeError, - "Cannot convert between different quad-precision backends"); - return -1; + while (N--) { + quad_value in_val, out_val; + if (descr_in->backend == BACKEND_SLEEF) { + memcpy(&in_val.sleef_value, in_ptr, sizeof(Sleef_quad)); + out_val.longdouble_value = Sleef_cast_to_doubleq1(in_val.sleef_value); + } + else { + memcpy(&in_val.longdouble_value, in_ptr, sizeof(long double)); + out_val.sleef_value = Sleef_cast_from_doubleq1(in_val.longdouble_value); + } + memcpy(out_ptr, &out_val, + (descr_out->backend == BACKEND_SLEEF) ? sizeof(Sleef_quad) + : sizeof(long double)); + in_ptr += in_stride; + out_ptr += out_stride; + } + + return 0; } size_t elem_size = @@ -93,10 +110,26 @@ quad_to_quad_strided_loop_aligned(PyArrayMethod_Context *context, char *const da QuadPrecDTypeObject *descr_in = (QuadPrecDTypeObject *)context->descriptors[0]; QuadPrecDTypeObject *descr_out = (QuadPrecDTypeObject *)context->descriptors[1]; + // inter-backend casting if (descr_in->backend != descr_out->backend) { - PyErr_SetString(PyExc_TypeError, - "Cannot convert between different quad-precision backends"); - return -1; + if (descr_in->backend == BACKEND_SLEEF) { + while (N--) { + Sleef_quad in_val = *(Sleef_quad *)in_ptr; + *(long double *)out_ptr = Sleef_cast_to_doubleq1(in_val); + in_ptr += in_stride; + out_ptr += out_stride; + } + } + else { + while (N--) { + long double in_val = *(long double *)in_ptr; + *(Sleef_quad *)out_ptr = Sleef_cast_from_doubleq1(in_val); + in_ptr += in_stride; + out_ptr += out_stride; + } + } + + return 0; } if (descr_in->backend == BACKEND_SLEEF) { @@ -627,6 +660,11 @@ add_spec(PyArrayMethod_Spec *spec) if (spec_count < NUM_CASTS) { specs[spec_count++] = spec; } + else { + delete[] spec->dtypes; + delete[] spec->slots; + delete spec; + } } // functions to add casts @@ -682,9 +720,8 @@ add_cast_to(PyArray_DTypeMeta *from) PyArrayMethod_Spec ** init_casts_internal(void) { - PyArray_DTypeMeta **quad2quad_dtypes = - new PyArray_DTypeMeta *[2]{&QuadPrecDType, &QuadPrecDType}; - PyType_Slot *quad2quad_slots = new PyType_Slot[]{ + PyArray_DTypeMeta **quad2quad_dtypes = new PyArray_DTypeMeta *[2]{nullptr, nullptr}; + PyType_Slot *quad2quad_slots = new PyType_Slot[4]{ {NPY_METH_resolve_descriptors, (void *)&quad_to_quad_resolve_descriptors}, {NPY_METH_strided_loop, (void *)&quad_to_quad_strided_loop_aligned}, {NPY_METH_unaligned_strided_loop, (void *)&quad_to_quad_strided_loop_unaligned}, @@ -694,7 +731,7 @@ init_casts_internal(void) .name = "cast_QuadPrec_to_QuadPrec", .nin = 1, .nout = 1, - .casting = NPY_NO_CASTING, + .casting = NPY_UNSAFE_CASTING, // since SLEEF -> ld might lose precision .flags = NPY_METH_SUPPORTS_UNALIGNED, .dtypes = quad2quad_dtypes, .slots = quad2quad_slots, @@ -749,13 +786,13 @@ init_casts(void) void free_casts(void) { - for (auto cast : specs) { - if (cast == nullptr) { - continue; + for (size_t i = 0; i < spec_count; i++) { + if (specs[i]) { + delete[] specs[i]->dtypes; + delete[] specs[i]->slots; + delete specs[i]; + specs[i] = nullptr; } - delete[] cast->dtypes; - delete[] cast->slots; - delete cast; } spec_count = 0; } \ No newline at end of file diff --git a/quaddtype/numpy_quaddtype/src/scalar_ops.cpp b/quaddtype/numpy_quaddtype/src/scalar_ops.cpp index 2faf810d..98261fe9 100644 --- a/quaddtype/numpy_quaddtype/src/scalar_ops.cpp +++ b/quaddtype/numpy_quaddtype/src/scalar_ops.cpp @@ -124,7 +124,6 @@ quad_binary_func(PyObject *op1, PyObject *op2) return (PyObject *)res; } -// todo: add support with float and int PyObject * quad_richcompare(QuadPrecisionObject *self, PyObject *other, int cmp_op) { @@ -212,7 +211,8 @@ QuadPrecision_float(QuadPrecisionObject *self) { if (self->backend == BACKEND_SLEEF) { return PyFloat_FromDouble(Sleef_cast_to_doubleq1(self->value.sleef_value)); - } else { + } + else { return PyFloat_FromDouble((double)self->value.longdouble_value); } } @@ -222,12 +222,12 @@ QuadPrecision_int(QuadPrecisionObject *self) { if (self->backend == BACKEND_SLEEF) { return PyLong_FromLongLong(Sleef_cast_to_int64q1(self->value.sleef_value)); - } else { + } + else { return PyLong_FromLongLong((long long)self->value.longdouble_value); } } - PyNumberMethods quad_as_scalar = { .nb_add = (binaryfunc)quad_binary_func, .nb_subtract = (binaryfunc)quad_binary_func, diff --git a/quaddtype/numpy_quaddtype/src/umath.cpp b/quaddtype/numpy_quaddtype/src/umath.cpp index 7271443e..f5665451 100644 --- a/quaddtype/numpy_quaddtype/src/umath.cpp +++ b/quaddtype/numpy_quaddtype/src/umath.cpp @@ -282,7 +282,6 @@ quad_binary_op_resolve_descriptors(PyObject *self, PyArray_DTypeMeta *const dtyp // Determine target backend and if casting is needed NPY_CASTING casting = NPY_NO_CASTING; if (descr_in1->backend != descr_in2->backend) { - target_backend = BACKEND_LONGDOUBLE; casting = NPY_SAFE_CASTING; } @@ -398,12 +397,11 @@ static int quad_ufunc_promoter(PyUFuncObject *ufunc, PyArray_DTypeMeta *op_dtypes[], PyArray_DTypeMeta *signature[], PyArray_DTypeMeta *new_op_dtypes[]) { - int nin = ufunc->nin; int nargs = ufunc->nargs; PyArray_DTypeMeta *common = NULL; bool has_quad = false; - + // Handle the special case for reductions if (op_dtypes[0] == NULL) { assert(nin == 2 && ufunc->nout == 1); /* must be reduction */ @@ -417,7 +415,6 @@ quad_ufunc_promoter(PyUFuncObject *ufunc, PyArray_DTypeMeta *op_dtypes[], // Check if any input or signature is QuadPrecision for (int i = 0; i < nin; i++) { if (op_dtypes[i] == &QuadPrecDType) { - has_quad = true; } } @@ -461,7 +458,7 @@ quad_ufunc_promoter(PyUFuncObject *ufunc, PyArray_DTypeMeta *op_dtypes[], else { // Otherwise, use the common dtype Py_INCREF(common); - + new_op_dtypes[i] = common; } } @@ -563,13 +560,14 @@ init_quad_binary_ops(PyObject *numpy) static NPY_CASTING quad_comparison_op_resolve_descriptors(PyObject *self, PyArray_DTypeMeta *const dtypes[], - PyArray_Descr *const given_descrs[], - PyArray_Descr *loop_descrs[], npy_intp *NPY_UNUSED(view_offset)) + PyArray_Descr *const given_descrs[], + PyArray_Descr *loop_descrs[], + npy_intp *NPY_UNUSED(view_offset)) { QuadPrecDTypeObject *descr_in1 = (QuadPrecDTypeObject *)given_descrs[0]; QuadPrecDTypeObject *descr_in2 = (QuadPrecDTypeObject *)given_descrs[1]; QuadBackendType target_backend; - + // As dealing with different backends then cast to boolean NPY_CASTING casting = NPY_NO_CASTING; if (descr_in1->backend != descr_in2->backend) { @@ -599,7 +597,7 @@ quad_comparison_op_resolve_descriptors(PyObject *self, PyArray_DTypeMeta *const if (!loop_descrs[2]) { return (NPY_CASTING)-1; } - return casting; + return casting; } template @@ -626,10 +624,9 @@ quad_generic_comp_strided_loop(PyArrayMethod_Context *context, char *const data[ npy_bool result; if (backend == BACKEND_SLEEF) { - result = sleef_comp(&in1.sleef_value, &in2.sleef_value); + result = sleef_comp(&in1.sleef_value, &in2.sleef_value); } else { - result = ld_comp(&in1.longdouble_value, &in2.longdouble_value); } @@ -642,12 +639,11 @@ quad_generic_comp_strided_loop(PyArrayMethod_Context *context, char *const data[ return 0; } - template int quad_generic_comp_strided_loop_aligned(PyArrayMethod_Context *context, char *const data[], - npy_intp const dimensions[], npy_intp const strides[], - NpyAuxData *auxdata) + npy_intp const dimensions[], npy_intp const strides[], + NpyAuxData *auxdata) { npy_intp N = dimensions[0]; char *in1_ptr = data[0], *in2_ptr = data[1]; @@ -658,19 +654,16 @@ quad_generic_comp_strided_loop_aligned(PyArrayMethod_Context *context, char *con QuadPrecDTypeObject *descr = (QuadPrecDTypeObject *)context->descriptors[0]; QuadBackendType backend = descr->backend; - while (N--) - { + while (N--) { quad_value in1 = *(quad_value *)in1_ptr; quad_value in2 = *(quad_value *)in2_ptr; npy_bool result; - if (backend == BACKEND_SLEEF) - { + if (backend == BACKEND_SLEEF) { result = sleef_comp(&in1.sleef_value, &in2.sleef_value); - } - else - { + } + else { result = ld_comp(&in1.longdouble_value, &in2.longdouble_value); } @@ -711,7 +704,8 @@ create_quad_comparison_ufunc(PyObject *numpy, const char *ufunc_name) PyType_Slot slots[] = { {NPY_METH_resolve_descriptors, (void *)&quad_comparison_op_resolve_descriptors}, - {NPY_METH_strided_loop, (void *)&quad_generic_comp_strided_loop_aligned}, + {NPY_METH_strided_loop, + (void *)&quad_generic_comp_strided_loop_aligned}, {NPY_METH_unaligned_strided_loop, (void *)&quad_generic_comp_strided_loop}, {0, NULL}}; From 77e25b6aeaafbe8a4354466c1e384c46cfe37777 Mon Sep 17 00:00:00 2001 From: swayaminsync Date: Sun, 22 Sep 2024 00:53:57 +0530 Subject: [PATCH 30/32] made ufunc return actual value instead of 0 --- quaddtype/numpy_quaddtype/src/ops.hpp | 428 ++++++++----------- quaddtype/numpy_quaddtype/src/scalar_ops.cpp | 20 +- quaddtype/numpy_quaddtype/src/umath.cpp | 16 +- 3 files changed, 202 insertions(+), 262 deletions(-) diff --git a/quaddtype/numpy_quaddtype/src/ops.hpp b/quaddtype/numpy_quaddtype/src/ops.hpp index dcbb902e..b32ae386 100644 --- a/quaddtype/numpy_quaddtype/src/ops.hpp +++ b/quaddtype/numpy_quaddtype/src/ops.hpp @@ -3,435 +3,375 @@ #include // Unary Quad Operations -typedef int (*unary_op_quad_def)(Sleef_quad *, Sleef_quad *); +typedef Sleef_quad (*unary_op_quad_def)(Sleef_quad *); -static int -quad_negative(Sleef_quad *op, Sleef_quad *out) +static Sleef_quad +quad_negative(Sleef_quad *op) { - *out = Sleef_negq1(*op); - return 0; + return Sleef_negq1(*op); } -static int -quad_positive(Sleef_quad *op, Sleef_quad *out) +static Sleef_quad +quad_positive(Sleef_quad *op) { - *out = *op; - return 0; + return *op; } -static inline int -quad_absolute(Sleef_quad *op, Sleef_quad *out) +static inline Sleef_quad +quad_absolute(Sleef_quad *op) { - *out = Sleef_fabsq1(*op); - return 0; + return Sleef_fabsq1(*op); } -static inline int -quad_rint(Sleef_quad *op, Sleef_quad *out) +static inline Sleef_quad +quad_rint(Sleef_quad *op) { - *out = Sleef_rintq1(*op); - return 0; + return Sleef_rintq1(*op); } -static inline int -quad_trunc(Sleef_quad *op, Sleef_quad *out) +static inline Sleef_quad +quad_trunc(Sleef_quad *op) { - *out = Sleef_truncq1(*op); - return 0; + return Sleef_truncq1(*op); } -static inline int -quad_floor(Sleef_quad *op, Sleef_quad *out) +static inline Sleef_quad +quad_floor(Sleef_quad *op) { - *out = Sleef_floorq1(*op); - return 0; + return Sleef_floorq1(*op); } -static inline int -quad_ceil(Sleef_quad *op, Sleef_quad *out) +static inline Sleef_quad +quad_ceil(Sleef_quad *op) { - *out = Sleef_ceilq1(*op); - return 0; + return Sleef_ceilq1(*op); } -static inline int -quad_sqrt(Sleef_quad *op, Sleef_quad *out) +static inline Sleef_quad +quad_sqrt(Sleef_quad *op) { - *out = Sleef_sqrtq1_u05(*op); - return 0; + return Sleef_sqrtq1_u05(*op); } -static inline int -quad_square(Sleef_quad *op, Sleef_quad *out) +static inline Sleef_quad +quad_square(Sleef_quad *op) { - *out = Sleef_mulq1_u05(*op, *op); - return 0; + return Sleef_mulq1_u05(*op, *op); } -static inline int -quad_log(Sleef_quad *op, Sleef_quad *out) +static inline Sleef_quad +quad_log(Sleef_quad *op) { - *out = Sleef_logq1_u10(*op); - return 0; + return Sleef_logq1_u10(*op); } -static inline int -quad_log2(Sleef_quad *op, Sleef_quad *out) +static inline Sleef_quad +quad_log2(Sleef_quad *op) { - *out = Sleef_log2q1_u10(*op); - return 0; + return Sleef_log2q1_u10(*op); } -static inline int -quad_log10(Sleef_quad *op, Sleef_quad *out) +static inline Sleef_quad +quad_log10(Sleef_quad *op) { - *out = Sleef_log10q1_u10(*op); - return 0; + return Sleef_log10q1_u10(*op); } -static inline int -quad_log1p(Sleef_quad *op, Sleef_quad *out) +static inline Sleef_quad +quad_log1p(Sleef_quad *op) { - *out = Sleef_log1pq1_u10(*op); - return 0; + return Sleef_log1pq1_u10(*op); } -static inline int -quad_exp(Sleef_quad *op, Sleef_quad *out) +static inline Sleef_quad +quad_exp(Sleef_quad *op) { - *out = Sleef_expq1_u10(*op); - return 0; + return Sleef_expq1_u10(*op); } -static inline int -quad_exp2(Sleef_quad *op, Sleef_quad *out) +static inline Sleef_quad +quad_exp2(Sleef_quad *op) { - *out = Sleef_exp2q1_u10(*op); - return 0; + return Sleef_exp2q1_u10(*op); } -static inline int -quad_sin(Sleef_quad *op, Sleef_quad *out) +static inline Sleef_quad +quad_sin(Sleef_quad *op) { - *out = Sleef_sinq1_u10(*op); - return 0; + return Sleef_sinq1_u10(*op); } -static inline int -quad_cos(Sleef_quad *op, Sleef_quad *out) +static inline Sleef_quad +quad_cos(Sleef_quad *op) { - *out = Sleef_cosq1_u10(*op); - return 0; + return Sleef_cosq1_u10(*op); } -static inline int -quad_tan(Sleef_quad *op, Sleef_quad *out) +static inline Sleef_quad +quad_tan(Sleef_quad *op) { - *out = Sleef_tanq1_u10(*op); - return 0; + return Sleef_tanq1_u10(*op); } -static inline int -quad_asin(Sleef_quad *op, Sleef_quad *out) +static inline Sleef_quad +quad_asin(Sleef_quad *op) { - *out = Sleef_asinq1_u10(*op); - return 0; + return Sleef_asinq1_u10(*op); } -static inline int -quad_acos(Sleef_quad *op, Sleef_quad *out) +static inline Sleef_quad +quad_acos(Sleef_quad *op) { - *out = Sleef_acosq1_u10(*op); - return 0; + return Sleef_acosq1_u10(*op); } -static inline int -quad_atan(Sleef_quad *op, Sleef_quad *out) +static inline Sleef_quad +quad_atan(Sleef_quad *op) { - *out = Sleef_atanq1_u10(*op); - return 0; + return Sleef_atanq1_u10(*op); } // Unary long double operations -typedef int (*unary_op_longdouble_def)(long double *, long double *); +typedef long double (*unary_op_longdouble_def)(long double *); -static int -ld_negative(long double *op, long double *out) +static inline long double +ld_negative(long double *op) { - *out = -(*op); - return 0; + return -(*op); } -static int -ld_positive(long double *op, long double *out) +static inline long double +ld_positive(long double *op) { - *out = *op; - return 0; + return *op; } -static inline int -ld_absolute(long double *op, long double *out) +static inline long double +ld_absolute(long double *op) { - *out = fabsl(*op); - return 0; + return fabsl(*op); } -static inline int -ld_rint(long double *op, long double *out) +static inline long double +ld_rint(long double *op) { - *out = rintl(*op); - return 0; + return rintl(*op); } -static inline int -ld_trunc(long double *op, long double *out) +static inline long double +ld_trunc(long double *op) { - *out = truncl(*op); - return 0; + return truncl(*op); } -static inline int -ld_floor(long double *op, long double *out) +static inline long double +ld_floor(long double *op) { - *out = floorl(*op); - return 0; + return floorl(*op); } -static inline int -ld_ceil(long double *op, long double *out) +static inline long double +ld_ceil(long double *op) { - *out = ceill(*op); - return 0; + return ceill(*op); } -static inline int -ld_sqrt(long double *op, long double *out) +static inline long double +ld_sqrt(long double *op) { - *out = sqrtl(*op); - return 0; + return sqrtl(*op); } -static inline int -ld_square(long double *op, long double *out) +static inline long double +ld_square(long double *op) { - *out = (*op) * (*op); - return 0; + return (*op) * (*op); } -static inline int -ld_log(long double *op, long double *out) +static inline long double +ld_log(long double *op) { - *out = logl(*op); - return 0; + return logl(*op); } -static inline int -ld_log2(long double *op, long double *out) +static inline long double +ld_log2(long double *op) { - *out = log2l(*op); - return 0; + return log2l(*op); } -static inline int -ld_log10(long double *op, long double *out) +static inline long double +ld_log10(long double *op) { - *out = log10l(*op); - return 0; + return log10l(*op); } -static inline int -ld_log1p(long double *op, long double *out) +static inline long double +ld_log1p(long double *op) { - *out = log1pl(*op); - return 0; + return log1pl(*op); } -static inline int -ld_exp(long double *op, long double *out) +static inline long double +ld_exp(long double *op) { - *out = expl(*op); - return 0; + return expl(*op); } -static inline int -ld_exp2(long double *op, long double *out) +static inline long double +ld_exp2(long double *op) { - *out = exp2l(*op); - return 0; + return exp2l(*op); } -static inline int -ld_sin(long double *op, long double *out) +static inline long double +ld_sin(long double *op) { - *out = sinl(*op); - return 0; + return sinl(*op); } -static inline int -ld_cos(long double *op, long double *out) +static inline long double +ld_cos(long double *op) { - *out = cosl(*op); - return 0; + return cosl(*op); } -static inline int -ld_tan(long double *op, long double *out) +static inline long double +ld_tan(long double *op) { - *out = tanl(*op); - return 0; + return tanl(*op); } -static inline int -ld_asin(long double *op, long double *out) +static inline long double +ld_asin(long double *op) { - *out = asinl(*op); - return 0; + return asinl(*op); } -static inline int -ld_acos(long double *op, long double *out) +static inline long double +ld_acos(long double *op) { - *out = acosl(*op); - return 0; + return acosl(*op); } -static inline int -ld_atan(long double *op, long double *out) +static inline long double +ld_atan(long double *op) { - *out = atanl(*op); - return 0; + return atanl(*op); } // Binary Quad operations -typedef int (*binary_op_quad_def)(Sleef_quad *, Sleef_quad *, Sleef_quad *); +typedef Sleef_quad (*binary_op_quad_def)(Sleef_quad *, Sleef_quad *); -static inline int -quad_add(Sleef_quad *out, Sleef_quad *in1, Sleef_quad *in2) +static inline Sleef_quad +quad_add(Sleef_quad *in1, Sleef_quad *in2) { - *out = Sleef_addq1_u05(*in1, *in2); - return 0; + return Sleef_addq1_u05(*in1, *in2); } -static inline int -quad_sub(Sleef_quad *out, Sleef_quad *in1, Sleef_quad *in2) +static inline Sleef_quad +quad_sub(Sleef_quad *in1, Sleef_quad *in2) { - *out = Sleef_subq1_u05(*in1, *in2); - return 0; + return Sleef_subq1_u05(*in1, *in2); } -static inline int -quad_mul(Sleef_quad *res, Sleef_quad *a, Sleef_quad *b) +static inline Sleef_quad +quad_mul(Sleef_quad *a, Sleef_quad *b) { - *res = Sleef_mulq1_u05(*a, *b); - return 0; + return Sleef_mulq1_u05(*a, *b); } -static inline int -quad_div(Sleef_quad *res, Sleef_quad *a, Sleef_quad *b) +static inline Sleef_quad +quad_div(Sleef_quad *a, Sleef_quad *b) { - *res = Sleef_divq1_u05(*a, *b); - return 0; + return Sleef_divq1_u05(*a, *b); } -static inline int -quad_pow(Sleef_quad *res, Sleef_quad *a, Sleef_quad *b) +static inline Sleef_quad +quad_pow(Sleef_quad *a, Sleef_quad *b) { - *res = Sleef_powq1_u10(*a, *b); - return 0; + return Sleef_powq1_u10(*a, *b); } -static inline int -quad_mod(Sleef_quad *res, Sleef_quad *a, Sleef_quad *b) +static inline Sleef_quad +quad_mod(Sleef_quad *a, Sleef_quad *b) { - *res = Sleef_fmodq1(*a, *b); - return 0; + return Sleef_fmodq1(*a, *b); } -static inline int -quad_minimum(Sleef_quad *out, Sleef_quad *in1, Sleef_quad *in2) +static inline Sleef_quad +quad_minimum(Sleef_quad *in1, Sleef_quad *in2) { - *out = Sleef_icmpleq1(*in1, *in2) ? *in1 : *in2; - return 0; + return Sleef_icmpleq1(*in1, *in2) ? *in1 : *in2; } -static inline int -quad_maximum(Sleef_quad *out, Sleef_quad *in1, Sleef_quad *in2) +static inline Sleef_quad +quad_maximum(Sleef_quad *in1, Sleef_quad *in2) { - *out = Sleef_icmpgeq1(*in1, *in2) ? *in1 : *in2; - return 0; + return Sleef_icmpgeq1(*in1, *in2) ? *in1 : *in2; } -static inline int -quad_atan2(Sleef_quad *out, Sleef_quad *in1, Sleef_quad *in2) +static inline Sleef_quad +quad_atan2(Sleef_quad *in1, Sleef_quad *in2) { - *out = Sleef_atan2q1_u10(*in1, *in2); - return 0; + return Sleef_atan2q1_u10(*in1, *in2); } // Binary long double operations -typedef int (*binary_op_longdouble_def)(long double *, long double *, long double *); +typedef long double (*binary_op_longdouble_def)(long double *, long double *); -static inline int -ld_add(long double *out, long double *in1, long double *in2) +static inline long double +ld_add(long double *in1, long double *in2) { - *out = (*in1) + (*in2); - return 0; + return (*in1) + (*in2); } -static inline int -ld_sub(long double *out, long double *in1, long double *in2) +static inline long double +ld_sub(long double *in1, long double *in2) { - *out = (*in1) - (*in2); - return 0; + return (*in1) - (*in2); } -static inline int -ld_mul(long double *res, long double *a, long double *b) +static inline long double +ld_mul(long double *a, long double *b) { - *res = (*a) * (*b); - return 0; + return (*a) * (*b); } -static inline int -ld_div(long double *res, long double *a, long double *b) +static inline long double +ld_div(long double *a, long double *b) { - *res = (*a) / (*b); - return 0; + return (*a) / (*b); } -static inline int -ld_pow(long double *res, long double *a, long double *b) +static inline long double +ld_pow(long double *a, long double *b) { - *res = powl(*a, *b); - return 0; + return powl(*a, *b); } -static inline int -ld_mod(long double *res, long double *a, long double *b) +static inline long double +ld_mod(long double *a, long double *b) { - *res = fmodl(*a, *b); - return 0; + return fmodl(*a, *b); } -static inline int -ld_minimum(long double *out, long double *in1, long double *in2) +static inline long double +ld_minimum(long double *in1, long double *in2) { - *out = (*in1 < *in2) ? *in1 : *in2; - return 0; + return (*in1 < *in2) ? *in1 : *in2; } -static inline int -ld_maximum(long double *out, long double *in1, long double *in2) +static inline long double +ld_maximum(long double *in1, long double *in2) { - *out = (*in1 > *in2) ? *in1 : *in2; - return 0; + return (*in1 > *in2) ? *in1 : *in2; } -static inline int -ld_atan2(long double *out, long double *in1, long double *in2) +static inline long double +ld_atan2(long double *in1, long double *in2) { - *out = atan2l(*in1, *in2); - return 0; + return atan2l(*in1, *in2); } // comparison quad functions diff --git a/quaddtype/numpy_quaddtype/src/scalar_ops.cpp b/quaddtype/numpy_quaddtype/src/scalar_ops.cpp index 98261fe9..5888ad60 100644 --- a/quaddtype/numpy_quaddtype/src/scalar_ops.cpp +++ b/quaddtype/numpy_quaddtype/src/scalar_ops.cpp @@ -28,10 +28,10 @@ quad_unary_func(QuadPrecisionObject *self) } if (self->backend == BACKEND_SLEEF) { - sleef_op(&self->value.sleef_value, &res->value.sleef_value); + res->value.sleef_value = sleef_op(&self->value.sleef_value); } else { - longdouble_op(&self->value.longdouble_value, &res->value.longdouble_value); + res->value.longdouble_value = longdouble_op(&self->value.longdouble_value); } return (PyObject *)res; } @@ -100,22 +100,22 @@ quad_binary_func(PyObject *op1, PyObject *op2) if (backend == BACKEND_SLEEF) { if (is_forward) { - sleef_op(&res->value.sleef_value, &self->value.sleef_value, - &other_quad->value.sleef_value); + res->value.sleef_value = + sleef_op(&self->value.sleef_value, &other_quad->value.sleef_value); } else { - sleef_op(&res->value.sleef_value, &other_quad->value.sleef_value, - &self->value.sleef_value); + res->value.sleef_value = + sleef_op(&other_quad->value.sleef_value, &self->value.sleef_value); } } else { if (is_forward) { - longdouble_op(&res->value.longdouble_value, &self->value.longdouble_value, - &other_quad->value.longdouble_value); + res->value.longdouble_value = longdouble_op(&self->value.longdouble_value, + &other_quad->value.longdouble_value); } else { - longdouble_op(&res->value.longdouble_value, &other_quad->value.longdouble_value, - &self->value.longdouble_value); + res->value.longdouble_value = longdouble_op(&other_quad->value.longdouble_value, + &self->value.longdouble_value); } } diff --git a/quaddtype/numpy_quaddtype/src/umath.cpp b/quaddtype/numpy_quaddtype/src/umath.cpp index f5665451..0058236a 100644 --- a/quaddtype/numpy_quaddtype/src/umath.cpp +++ b/quaddtype/numpy_quaddtype/src/umath.cpp @@ -122,10 +122,10 @@ quad_generic_unary_op_strided_loop_unaligned(PyArrayMethod_Context *context, cha while (N--) { memcpy(&in, in_ptr, elem_size); if (backend == BACKEND_SLEEF) { - sleef_op(&in.sleef_value, &out.sleef_value); + out.sleef_value = sleef_op(&in.sleef_value); } else { - longdouble_op(&in.longdouble_value, &out.longdouble_value); + out.longdouble_value = longdouble_op(&in.longdouble_value); } memcpy(out_ptr, &out, elem_size); @@ -152,10 +152,10 @@ quad_generic_unary_op_strided_loop_aligned(PyArrayMethod_Context *context, char while (N--) { if (backend == BACKEND_SLEEF) { - sleef_op((Sleef_quad *)in_ptr, (Sleef_quad *)out_ptr); + *(Sleef_quad *)out_ptr = sleef_op((Sleef_quad *)in_ptr); } else { - longdouble_op((long double *)in_ptr, (long double *)out_ptr); + *(long double *)out_ptr = longdouble_op((long double *)in_ptr); } in_ptr += in_stride; out_ptr += out_stride; @@ -348,10 +348,10 @@ quad_generic_binop_strided_loop_unaligned(PyArrayMethod_Context *context, char * memcpy(&in1, in1_ptr, elem_size); memcpy(&in2, in2_ptr, elem_size); if (backend == BACKEND_SLEEF) { - sleef_op(&out.sleef_value, &in1.sleef_value, &in2.sleef_value); + out.sleef_value = sleef_op(&in1.sleef_value, &in2.sleef_value); } else { - longdouble_op(&out.longdouble_value, &in1.longdouble_value, &in2.longdouble_value); + out.longdouble_value = longdouble_op(&in1.longdouble_value, &in2.longdouble_value); } memcpy(out_ptr, &out, elem_size); @@ -380,10 +380,10 @@ quad_generic_binop_strided_loop_aligned(PyArrayMethod_Context *context, char *co while (N--) { if (backend == BACKEND_SLEEF) { - sleef_op((Sleef_quad *)out_ptr, (Sleef_quad *)in1_ptr, (Sleef_quad *)in2_ptr); + *(Sleef_quad *)out_ptr = sleef_op((Sleef_quad *)in1_ptr, (Sleef_quad *)in2_ptr); } else { - longdouble_op((long double *)out_ptr, (long double *)in1_ptr, (long double *)in2_ptr); + *(long double *)out_ptr = longdouble_op((long double *)in1_ptr, (long double *)in2_ptr); } in1_ptr += in1_stride; From 87a789184f961c71645ceaec1e0600562d8e9fe3 Mon Sep 17 00:00:00 2001 From: swayaminsync Date: Sun, 22 Sep 2024 02:16:24 +0530 Subject: [PATCH 31/32] updated meson.build --- quaddtype/meson.build | 1 + 1 file changed, 1 insertion(+) diff --git a/quaddtype/meson.build b/quaddtype/meson.build index fde838c0..9e6b2f8e 100644 --- a/quaddtype/meson.build +++ b/quaddtype/meson.build @@ -24,6 +24,7 @@ includes = include_directories( ) srcs = [ + 'numpy_quaddtype/src/quad_common.h' 'numpy_quaddtype/src/casts.h', 'numpy_quaddtype/src/casts.cpp', 'numpy_quaddtype/src/scalar.h', From 1a6618a47decb932d360470018511bb7dd9c10ff Mon Sep 17 00:00:00 2001 From: swayaminsync Date: Sun, 22 Sep 2024 02:19:59 +0530 Subject: [PATCH 32/32] missed the comma :) --- quaddtype/meson.build | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/quaddtype/meson.build b/quaddtype/meson.build index 9e6b2f8e..d6e651b8 100644 --- a/quaddtype/meson.build +++ b/quaddtype/meson.build @@ -24,7 +24,7 @@ includes = include_directories( ) srcs = [ - 'numpy_quaddtype/src/quad_common.h' + 'numpy_quaddtype/src/quad_common.h', 'numpy_quaddtype/src/casts.h', 'numpy_quaddtype/src/casts.cpp', 'numpy_quaddtype/src/scalar.h',